msm: kgsl: Add Qualcomm GPU driver

Snapshot of the Qualcom Adreno GPU driver (KGSL) as of msm-3.18 commit commit e70ad0cd5efd ("Promotion of kernel.lnx.3.18-151201."). Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
author: Jordan Crouse <jcrouse@codeaurora.org> 2016-02-25 09:16:24 -0700
committer: David Keitel <dkeitel@codeaurora.org> 2016-03-22 11:15:49 -0700
commit: 5103db813f92bdb6c064631674e4ae5726be03f3 (patch)
tree: c12b0fec0575125e59639631c9cdc190cbe770d1
parent: e64e0d283a6be977af3bfba4f9a559630a7836ee (diff)
77 files changed, 53632 insertions, 0 deletions
diff --git a/Documentation/devicetree/bindings/gpu/adreno-busmon.txt b/Documentation/devicetree/bindings/gpu/adreno-busmon.txt
new file mode 100644
index 000000000000..7bf2fe8274d0
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/adreno-busmon.txt
@@ -0,0 +1,16 @@
+Adreno bus monitor device
+
+kgsl-busmon is a psedo device that represents a devfreq bus bandwidth
+governor. If this device is present then two different governors are used
+for  GPU DCVS and bus DCVS.
+
+Required properties:
+- compatible:	Must be "qcom,kgsl-busmon"
+- label:	Device name used for sysfs entry.
+
+Example:
+
+	qcom,kgsl-busmon {
+		compatible = "qcom,kgsl-busmon";
+		label = "kgsl-busmon";
+	};
diff --git a/Documentation/devicetree/bindings/gpu/adreno-iommu.txt b/Documentation/devicetree/bindings/gpu/adreno-iommu.txt
new file mode 100644
index 000000000000..de88a6eba7a5
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/adreno-iommu.txt
@@ -0,0 +1,88 @@
+Qualcomm Technologies, Inc. GPU IOMMU
+
+Required properties:
+
+Required properties:
+- compatible : one of:
+	- "qcom,kgsl-smmu-v1"
+	- "qcom,kgsl-smmu-v2"
+
+- reg		: Base address and size of the SMMU.
+
+- clocks	: List of clocks to be used during SMMU register access. See
+		  Documentation/devicetree/bindings/clock/clock-bindings.txt
+		  for information about the format. For each clock specified
+		  here, there must be a corresponding entry in clock-names
+		  (see below).
+
+- clock-names	: List of clock names corresponding to the clocks specified in
+		  the "clocks" property (above). See
+		  Documentation/devicetree/bindings/clock/clock-bindings.txt
+		  for more info.
+- qcom,protect  : The GPU register region which must be protected by a CP
+		  protected mode. On some targets this region must cover
+		  the entire SMMU register space, on others there
+		  is a separate aperture for CP to program context banks.
+
+Optional properties:
+- qcom,micro-mmu-control : Some targets provide an implementation defined
+		  register for blocking translation requests during GPU side
+		  programming.  This property specifies the offset of this
+		  register within the iommu register space.
+- qcom,retention :  A boolean specifying if retention is supported on this target
+- qcom,global_pt :  A boolean specifying if global pagetable should be used.
+		  When not set we use per process pagetables
+- qcom,hyp_secure_alloc : A bool specifying if the hypervisor is used on this target
+		  for secure buffer allocation
+- qcom,secure_align_mask: A mask for determining how secure buffers need to
+		  be aligned
+- qcom,coherent-htw: A boolean specifying if coherent hardware table walks should
+		  be enabled.
+
+- List of sub nodes, one for each of the translation context banks supported.
+  The driver uses the names of these nodes to determine how they are used,
+  currently supported names are:
+  - gfx3d_user : Used for the 'normal' GPU address space.
+  - gfx3d_secure : Used for the content protection address space.
+  Each sub node has the following required properties:
+
+	- compatible : "qcom,smmu-kgsl-cb"
+	- iommus : Specifies the SID's used by this context bank, this needs to be
+		   <kgsl_smmu SID> pair, kgsl_smmu is the string parsed by iommu
+		   driver to match this context bank with the kgsl_smmu device
+		   defined in iommu device tree. On targets where the msm iommu
+		   driver is used rather than the arm smmu driver, this property
+		   may be absent.
+	- qcom,gpu-offset :  Offset into the GPU register space for accessing
+		   this context bank. On some targets the iommu registers are not
+		   part of the GPU's register space, and a separate register aperture
+		   is used. Otherwise the same register offsets may be used for CPU
+		   or GPU side programming.
+
+Example:
+
+	msm_iommu: qcom,kgsl-iommu {
+		compatible = "qcom,kgsl-smmu-v2";
+		reg = <0xb40000 0x20000>;
+		qcom,protect = <0x40000 0x20000>;
+		clocks = <&clock_mmss clk_gpu_ahb_clk>,
+			<&clock_gcc clk_gcc_mmss_bimc_gfx_clk>,
+			<&clock_mmss clk_mmss_mmagic_ahb_clk>,
+			<&clock_mmss clk_mmss_mmagic_cfg_ahb_clk>;
+		clock-names = "gpu_ahb_clk", "bimc_gfx_clk", "mmagic_ahb_clk", "mmagic_cfg_ahb_clk";
+		qcom,secure_align_mask = <0xfff>;
+		qcom,retention;
+		qcom,global_pt;
+
+		gfx3d_user: gfx3d_user {
+			compatible = "qcom,smmu-kgsl-cb";
+			iommus = <&kgsl_smmu 0>,
+				 <&kgsl_smmu 1>;
+			qcom,gpu-offset = <0x48000>;
+		};
+
+		gfx3d_secure: gfx3d_secure {
+			compatible = "qcom,smmu-kgsl-cb";
+			iommus = <&kgsl_smmu 2>;
+		};
+	};
diff --git a/Documentation/devicetree/bindings/gpu/adreno-pwrlevels.txt b/Documentation/devicetree/bindings/gpu/adreno-pwrlevels.txt
new file mode 100644
index 000000000000..e5617d15a821
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/adreno-pwrlevels.txt
@@ -0,0 +1,25 @@
+Qualcomm GPU powerlevels
+
+Powerlevels are defined in sets by qcom,gpu-pwrlevels. Multiple sets (bins)
+can be defined within qcom,gpu-pwrelvel-bins. Each powerlevel defines a
+voltage, bus, and bandwitdh level.
+
+- qcom,gpu-pwrlevel-bins:	Contains one or more qcom,gpu-pwrlevels sets
+
+Properties:
+- compatible:			Must be qcom,gpu-pwrlevel-bins
+- qcom,gpu-pwrlevels:		Defines a set of powerlevels
+
+Properties:
+- qcom,speed-bin:		Speed bin identifier for the set - must match
+				the value read from the hardware
+
+- qcom,gpu-pwrlevel:		A single powerlevel
+
+Properties:
+- reg:				Index of the powerlevel (0 = highest perf)
+- qcom,gpu-freq			GPU frequency for the powerlevel (in Hz)
+- qcom,bus-freq			Index to a bus level (defined by the bus
+				settings)
+- qcom,bus-min			Minimum bus level to set for the power level
+- qcom,bus-max			maximum bus level to set for the power level
diff --git a/Documentation/devicetree/bindings/gpu/adreno.txt b/Documentation/devicetree/bindings/gpu/adreno.txt
new file mode 100644
index 000000000000..84cbc21705e9
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/adreno.txt
@@ -0,0 +1,222 @@
+Qualcomm GPU
+
+Qualcomm Adreno GPU
+
+Required properties:
+- label:		A string used as a descriptive name for the device.
+- compatible:		Must be "qcom,kgsl-3d0" and "qcom,kgsl-3d"
+- reg:			Specifies the register base address and size. The second interval
+			specifies the shader memory base address and size.
+- reg-names:		Resource names used for the physical address of device registers
+			and shader memory. "kgsl_3d0_reg_memory" gives the physical address
+			and length of device registers while "kgsl_3d0_shader_memory" gives
+			physical address and length of device shader memory.  If
+			specified, "qfprom_memory" gives the range for the efuse
+			registers used for various configuration options.
+- interrupts:		Interrupt mapping for GPU IRQ.
+- interrupt-names:	String property to describe the name of the interrupt.
+- qcom,id:		An integer used as an identification number for the device.
+
+- clocks:		List of phandle and clock specifier pairs, one pair
+			for each clock input to the device.
+- clock-names:		List of clock input name strings sorted in the same
+			order as the clocks property.
+				Current values of clock-names are:
+				"src_clk", "core_clk", "iface_clk", "mem_clk", "mem_iface_clk",
+				"alt_mem_iface_clk", "rbbmtimer_clk",  "alwayson_clk"
+				"core_clk" and "iface_clk" are required and others are optional
+
+- qcom,base-leakage-coefficient: Dynamic leakage coefficient.
+- qcom,lm-limit:	Current limit for GPU limit management.
+
+Bus Scaling Data:
+- qcom,msm-bus,name: String property to describe the name of the 3D graphics processor.
+- qcom,msm-bus,num-cases: This is the the number of Bus Scaling use cases defined in the vectors property.
+- qcom,msm-bus,active-only: A boolean flag indicating if it is active only.
+- qcom,msm-bus,num-paths: This represents the number of paths in each Bus Scaling Usecase.
+- qcom,msm-bus,vectors-KBps: A series of 4 cell properties, format of which is:
+						<src dst ab ib>, <src dst ab ib>, // For Bus Scaling Usecase 1
+						<src dst ab ib>, <src dst ab ib>, // For Bus Scaling Usecase 2
+						<..  ..  .. ..>, <..  ..  .. ..>; // For Bus Scaling Usecase n
+						This property is a series of all vectors for all Bus Scaling Usecases.
+						Each set of vectors for each usecase describes bandwidth votes for a combination
+						of src/dst ports.  The driver will set the desired use case based on the selected
+						power level and the desired bandwidth vote will be registered for the port pairs.
+					Current values of src are:
+						0 = MSM_BUS_MASTER_GRAPHICS_3D
+						1 = MSM_BUS_MASTER_GRAPHICS_3D_PORT1
+						2 = MSM_BUS_MASTER_V_OCMEM_GFX3D
+					Current values of dst are:
+						0 = MSM_BUS_SLAVE_EBI_CH0
+						1 = MSM_BUS_SLAVE_OCMEM
+					ab: Represents aggregated bandwidth. This value is 0 for Graphics.
+					ib: Represents instantaneous bandwidth. This value has a range <0 8000 MB/s>
+
+- qcom,ocmem-bus-client: Container for another set of bus scaling properties
+						qcom,msm-bus,name
+						qcom,msm-bus,num-cases
+						qcom,msm-bus,num-paths
+						qcom,msm-bus,vectors-KBps
+			to be used by ocmem msm bus scaling client.
+
+GDSC Oxili Regulators:
+- regulator-names:		List of regulator name strings sorted in power-on order
+- vddcx-supply:			Phandle for vddcx regulator device node.
+- vdd-supply:			Phandle for vdd regulator device node.
+
+IOMMU Data:
+- iommu:			Phandle for the KGSL IOMMU device node
+
+GPU Power levels:
+- qcom,gpu-pwrlevel-bins:	Container for sets of GPU power levels (see
+				adreno-pwrlevels.txt)
+
+DCVS Core info
+- qcom,dcvs-core-info		Container for the DCVS core info (see
+				dcvs-core-info.txt)
+
+Optional Properties:
+- qcom,initial-powerlevel: This value indicates which qcom,gpu-pwrlevel should be used at start time
+			   and when coming back out of resume
+- qcom,bus-control:	   Boolean. Enables an independent bus vote from the gpu frequency
+- qcom,bus-width:	   Bus width in number of bytes. This enables dynamic AB bus voting based on
+			   bus width and actual bus transactions.
+- qcom,gpubw-dev:	   a phandle to a device representing bus bandwidth requirements
+			   (see devdw.txt)
+- qcom,idle-timeout:	   This property represents the time in microseconds for idle timeout.
+- qcom,deep-nap-timeout:   This property represents the time in microseconds for entering deeper
+			   power state.
+- qcom,chipid:		   If it exists this property is used to replace
+			   the chip identification read from the GPU hardware.
+			   This is used to override faulty hardware readings.
+- qcom,strtstp-sleepwake:  Boolean. Enables use of GPU SLUMBER instead of SLEEP for power savings
+- qcom,gx-retention:  	   Boolean. Enables use of GX rail RETENTION voltage
+
+- qcom,pm-qos-active-latency:
+				Right after GPU wakes up from sleep, driver votes for
+				acceptable maximum latency to the pm-qos driver. This
+				voting demands that the system can not go into any
+				power save state *if* the latency to bring system back
+				into active state is more than this value.
+				Value is in microseconds.
+- qcom,pm-qos-wakeup-latency:
+				Similar to the above. Driver votes against deep low
+				power modes right before GPU wakes up from sleep.
+- qcom,force-32bit:
+				Force the GPU to use 32 bit data sizes even if
+				it is capable of doing 64 bit.
+
+- qcom,gpu-quirk-two-pass-use-wfi:
+				Signal the GPU to set Set TWOPASSUSEWFI bit in
+				A5XX_PC_DBG_ECO_CNTL (5XX only)
+
+The following properties are optional as collecting data via coresight might
+not be supported for every chipset. The documentation for coresight
+properties can be found in:
+Documentation/devicetree/bindings/coresight/coresight.txt
+
+- coresight-id           Unique integer identifier for the bus.
+- coresight-name         Unique descriptive name of the bus.
+- coresight-nr-inports   Number of input ports on the bus.
+- coresight-outports     List of output port numbers on the bus.
+- coresight-child-list   List of phandles pointing to the children of this
+                         component.
+- coresight-child-ports  List of input port numbers of the children.
+
+
+Example of A330 GPU in MSM8916:
+
+&soc {
+	msm_gpu: qcom,kgsl-3d0@01c00000 {
+		label = "kgsl-3d0";
+		compatible = "qcom,kgsl-3d0", "qcom,kgsl-3d";
+		reg = <0x01c00000 0x10000
+		       0x01c20000 0x20000>;
+		reg-names = "kgsl_3d0_reg_memory" , "kgsl_3d0_shader_memory";
+		interrupts = <0 33 0>;
+		interrupt-names = "kgsl_3d0_irq";
+		qcom,id = <0>;
+
+		qcom,chipid = <0x03000600>;
+
+		qcom,initial-pwrlevel = <1>;
+
+		/* Idle Timeout = HZ/12 */
+		qcom,idle-timeout = <8>;
+		qcom,strtstp-sleepwake;
+
+		clocks = <&clock_gcc clk_gcc_oxili_gfx3d_clk>,
+			<&clock_gcc clk_gcc_oxili_ahb_clk>,
+			<&clock_gcc clk_gcc_oxili_gmem_clk>,
+			<&clock_gcc clk_gcc_bimc_gfx_clk>,
+			<&clock_gcc clk_gcc_bimc_gpu_clk>;
+		clock-names = "core_clk", "iface_clk", "mem_clk",
+				"mem_iface_clk", "alt_mem_iface_clk";
+
+		/* Bus Scale Settings */
+		qcom,msm-bus,name = "grp3d";
+		qcom,msm-bus,num-cases = <4>;
+		qcom,msm-bus,num-paths = <1>;
+		qcom,msm-bus,vectors-KBps =
+			<26 512 0 0>,
+			<26 512 0 1600000>,
+			<26 512 0 3200000>,
+			<26 512 0 4264000>;
+
+		/* GDSC oxili regulators */
+		vdd-supply = <&gdsc_oxili_gx>;
+
+		/* IOMMU Data */
+		iommu = <&gfx_iommu>;
+
+		/* Trace bus */
+		coresight-id = <67>;
+		coresight-name = "coresight-gfx";
+		coresight-nr-inports = <0>;
+		coresight-outports = <0>;
+		coresight-child-list = <&funnel_in0>;
+		coresight-child-ports = <5>;
+
+		/* Power levels */
+		qcom,gpu-pwrlevels-bins {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			qcom,gpu-pwrlevels-0 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				qcom,speed-bin = <0>;
+
+				qcom,gpu-pwrlevel@0 {
+					reg = <0>;
+					qcom,gpu-freq = <400000000>;
+					qcom,bus-freq = <3>;
+					qcom,io-fraction = <33>;
+				};
+
+				qcom,gpu-pwrlevel@1 {
+					reg = <1>;
+					qcom,gpu-freq = <310000000>;
+					qcom,bus-freq = <2>;
+					qcom,io-fraction = <66>;
+				};
+
+				qcom,gpu-pwrlevel@2 {
+					reg = <2>;
+					qcom,gpu-freq = <200000000>;
+					qcom,bus-freq = <1>;
+					qcom,io-fraction = <100>;
+				};
+
+				qcom,gpu-pwrlevel@3 {
+					reg = <3>;
+					qcom,gpu-freq = <27000000>;
+					qcom,bus-freq = <0>;
+					qcom,io-fraction = <0>;
+				};
+			};
+		};
+
+	};
+};
diff --git a/drivers/gpu/msm/Kconfig b/drivers/gpu/msm/Kconfig
new file mode 100644
index 000000000000..aca3ab611214
--- /dev/null
+++ b/drivers/gpu/msm/Kconfig
@@ -0,0 +1,42 @@
+config MSM_KGSL
+	tristate "MSM 3D Graphics driver"
+	default n
+	depends on ARCH_MSM && !ARCH_MSM7X00A && !ARCH_MSM7X25
+	select GENERIC_ALLOCATOR
+	select FW_LOADER
+	select PM_DEVFREQ
+	select DEVFREQ_GOV_SIMPLE_ONDEMAND
+	select DEVFREQ_GOV_PERFORMANCE
+	select DEVFREQ_GOV_MSM_ADRENO_TZ
+	select DEVFREQ_GOV_MSM_GPUBW_MON
+	select ONESHOT_SYNC if SYNC
+	---help---
+	  3D graphics driver. Required to use hardware accelerated
+	  OpenGL ES 2.0 and 1.1.
+
+config MSM_KGSL_CFF_DUMP
+	bool "Enable KGSL Common File Format (CFF) Dump Feature [Use with caution]"
+	default n
+	depends on MSM_KGSL
+	select RELAY
+	---help---
+	  This is an analysis and diagnostic feature only, and should only be
+	  turned on during KGSL GPU diagnostics and will slow down the KGSL
+	  performance sigificantly, hence *do not use in production builds*.
+	  When enabled, CFF Dump is on at boot. It can be turned off at runtime
+	  via 'echo 0 > /d/kgsl/cff_dump'.  The log can be captured via
+	  /d/kgsl-cff/cpu[0|1].
+
+config MSM_KGSL_CFF_DUMP_NO_CONTEXT_MEM_DUMP
+	bool "When selected will disable KGSL CFF Dump for context switches"
+	default n
+	depends on MSM_KGSL_CFF_DUMP
+	---help---
+	  Dumping all the memory for every context switch can produce quite
+	  huge log files, to reduce this, turn this feature on.
+
+config MSM_ADRENO_DEFAULT_GOVERNOR
+	string "devfreq governor for the adreno core"
+	default "msm-adreno-tz" if DEVFREQ_GOV_MSM_ADRENO_TZ
+	default "simple_ondemand"
+	depends on MSM_KGSL
diff --git a/drivers/gpu/msm/Makefile b/drivers/gpu/msm/Makefile
new file mode 100644
index 000000000000..9ee90751b944
--- /dev/null
+++ b/drivers/gpu/msm/Makefile
@@ -0,0 +1,48 @@
+ccflags-y := -Iinclude/uapi/drm -Iinclude/drm -Idrivers/gpu/msm
+
+msm_kgsl_core-y = \
+	kgsl.o \
+	kgsl_trace.o \
+	kgsl_cmdbatch.o \
+	kgsl_ioctl.o \
+	kgsl_sharedmem.o \
+	kgsl_pwrctrl.o \
+	kgsl_pwrscale.o \
+	kgsl_mmu.o \
+	kgsl_iommu.o \
+	kgsl_snapshot.o \
+	kgsl_events.o
+
+msm_kgsl_core-$(CONFIG_DEBUG_FS) += kgsl_debugfs.o
+msm_kgsl_core-$(CONFIG_MSM_KGSL_CFF_DUMP) += kgsl_cffdump.o
+msm_kgsl_core-$(CONFIG_SYNC) += kgsl_sync.o
+msm_kgsl_core-$(CONFIG_COMPAT) += kgsl_compat.o
+
+msm_adreno-y += \
+	adreno_ioctl.o \
+	adreno_ringbuffer.o \
+	adreno_drawctxt.o \
+	adreno_dispatch.o \
+	adreno_snapshot.o \
+	adreno_coresight.o \
+	adreno_trace.o \
+	adreno_a3xx.o \
+	adreno_a4xx.o \
+	adreno_a5xx.o \
+	adreno_a3xx_snapshot.o \
+	adreno_a4xx_snapshot.o \
+	adreno_a5xx_snapshot.o \
+	adreno_sysfs.o \
+	adreno.o \
+	adreno_cp_parser.o \
+	adreno_iommu.o \
+	adreno_perfcounter.o
+
+msm_adreno-$(CONFIG_DEBUG_FS) += adreno_debugfs.o adreno_profile.o
+msm_adreno-$(CONFIG_COMPAT) += adreno_compat.o
+
+msm_kgsl_core-objs = $(msm_kgsl_core-y)
+msm_adreno-objs = $(msm_adreno-y)
+
+obj-$(CONFIG_MSM_KGSL) += msm_kgsl_core.o
+obj-$(CONFIG_MSM_KGSL) += msm_adreno.o
diff --git a/drivers/gpu/msm/a3xx_reg.h b/drivers/gpu/msm/a3xx_reg.h
new file mode 100644
index 000000000000..13d55bd4ae3b
--- /dev/null
+++ b/drivers/gpu/msm/a3xx_reg.h
@@ -0,0 +1,891 @@
+/* Copyright (c) 2012-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _A300_REG_H
+#define _A300_REG_H
+
+/* Interrupt bit positions within RBBM_INT_0 */
+
+#define A3XX_INT_RBBM_GPU_IDLE 0
+#define A3XX_INT_RBBM_AHB_ERROR 1
+#define A3XX_INT_RBBM_REG_TIMEOUT 2
+#define A3XX_INT_RBBM_ME_MS_TIMEOUT 3
+#define A3XX_INT_RBBM_PFP_MS_TIMEOUT 4
+#define A3XX_INT_RBBM_ATB_BUS_OVERFLOW 5
+#define A3XX_INT_VFD_ERROR 6
+#define A3XX_INT_CP_SW_INT 7
+#define A3XX_INT_CP_T0_PACKET_IN_IB 8
+#define A3XX_INT_CP_OPCODE_ERROR 9
+#define A3XX_INT_CP_RESERVED_BIT_ERROR 10
+#define A3XX_INT_CP_HW_FAULT 11
+#define A3XX_INT_CP_DMA 12
+#define A3XX_INT_CP_IB2_INT 13
+#define A3XX_INT_CP_IB1_INT 14
+#define A3XX_INT_CP_RB_INT 15
+#define A3XX_INT_CP_REG_PROTECT_FAULT 16
+#define A3XX_INT_CP_RB_DONE_TS 17
+#define A3XX_INT_CP_VS_DONE_TS 18
+#define A3XX_INT_CP_PS_DONE_TS 19
+#define A3XX_INT_CACHE_FLUSH_TS 20
+#define A3XX_INT_CP_AHB_ERROR_HALT 21
+#define A3XX_INT_MISC_HANG_DETECT 24
+#define A3XX_INT_UCHE_OOB_ACCESS 25
+
+/* CP_EVENT_WRITE events */
+#define CACHE_FLUSH_TS 4
+
+/* CP_INTERRUPT masks */
+
+#define CP_INTERRUPT_IB2 0x20000000
+#define CP_INTERRUPT_IB1 0x40000000
+#define CP_INTERRUPT_RB  0x80000000
+
+/* Register definitions */
+
+#define A3XX_RBBM_HW_VERSION 0x000
+#define A3XX_RBBM_HW_RELEASE 0x001
+#define A3XX_RBBM_HW_CONFIGURATION 0x002
+#define A3XX_RBBM_CLOCK_CTL 0x010
+#define A3XX_RBBM_SP_HYST_CNT 0x012
+#define A3XX_RBBM_SW_RESET_CMD 0x018
+#define A3XX_RBBM_AHB_CTL0 0x020
+#define A3XX_RBBM_AHB_CTL1 0x021
+#define A3XX_RBBM_AHB_CMD 0x022
+#define A3XX_RBBM_AHB_ME_SPLIT_STATUS 0x25
+#define A3XX_RBBM_AHB_PFP_SPLIT_STATUS 0x26
+#define A3XX_RBBM_AHB_ERROR_STATUS 0x027
+#define A3XX_RBBM_GPR0_CTL 0x02E
+/* This the same register as on A2XX, just in a different place */
+#define A3XX_RBBM_STATUS 0x030
+#define A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL 0x33
+#define A3XX_RBBM_INTERFACE_HANG_INT_CTL 0x50
+#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL0 0x51
+#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL1 0x54
+#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL2 0x57
+#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL3 0x5A
+#define A3XX_RBBM_INT_CLEAR_CMD 0x061
+#define A3XX_RBBM_INT_0_MASK 0x063
+#define A3XX_RBBM_INT_0_STATUS 0x064
+#define A3XX_RBBM_PERFCTR_CTL 0x80
+#define A3XX_RBBM_PERFCTR_LOAD_CMD0 0x81
+#define A3XX_RBBM_PERFCTR_LOAD_CMD1 0x82
+#define A3XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x84
+#define A3XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x85
+#define A3XX_RBBM_PERFCOUNTER0_SELECT 0x86
+#define A3XX_RBBM_PERFCOUNTER1_SELECT 0x87
+#define A3XX_RBBM_GPU_BUSY_MASKED 0x88
+#define A3XX_RBBM_PERFCTR_CP_0_LO 0x90
+#define A3XX_RBBM_PERFCTR_CP_0_HI 0x91
+#define A3XX_RBBM_PERFCTR_RBBM_0_LO 0x92
+#define A3XX_RBBM_PERFCTR_RBBM_0_HI 0x93
+#define A3XX_RBBM_PERFCTR_RBBM_1_LO 0x94
+#define A3XX_RBBM_PERFCTR_RBBM_1_HI 0x95
+#define A3XX_RBBM_PERFCTR_PC_0_LO 0x96
+#define A3XX_RBBM_PERFCTR_PC_0_HI 0x97
+#define A3XX_RBBM_PERFCTR_PC_1_LO 0x98
+#define A3XX_RBBM_PERFCTR_PC_1_HI 0x99
+#define A3XX_RBBM_PERFCTR_PC_2_LO 0x9A
+#define A3XX_RBBM_PERFCTR_PC_2_HI 0x9B
+#define A3XX_RBBM_PERFCTR_PC_3_LO 0x9C
+#define A3XX_RBBM_PERFCTR_PC_3_HI 0x9D
+#define A3XX_RBBM_PERFCTR_VFD_0_LO 0x9E
+#define A3XX_RBBM_PERFCTR_VFD_0_HI 0x9F
+#define A3XX_RBBM_PERFCTR_VFD_1_LO 0xA0
+#define A3XX_RBBM_PERFCTR_VFD_1_HI 0xA1
+#define A3XX_RBBM_PERFCTR_HLSQ_0_LO 0xA2
+#define A3XX_RBBM_PERFCTR_HLSQ_0_HI 0xA3
+#define A3XX_RBBM_PERFCTR_HLSQ_1_LO 0xA4
+#define A3XX_RBBM_PERFCTR_HLSQ_1_HI 0xA5
+#define A3XX_RBBM_PERFCTR_HLSQ_2_LO 0xA6
+#define A3XX_RBBM_PERFCTR_HLSQ_2_HI 0xA7
+#define A3XX_RBBM_PERFCTR_HLSQ_3_LO 0xA8
+#define A3XX_RBBM_PERFCTR_HLSQ_3_HI 0xA9
+#define A3XX_RBBM_PERFCTR_HLSQ_4_LO 0xAA
+#define A3XX_RBBM_PERFCTR_HLSQ_4_HI 0xAB
+#define A3XX_RBBM_PERFCTR_HLSQ_5_LO 0xAC
+#define A3XX_RBBM_PERFCTR_HLSQ_5_HI 0xAD
+#define A3XX_RBBM_PERFCTR_VPC_0_LO 0xAE
+#define A3XX_RBBM_PERFCTR_VPC_0_HI 0xAF
+#define A3XX_RBBM_PERFCTR_VPC_1_LO 0xB0
+#define A3XX_RBBM_PERFCTR_VPC_1_HI 0xB1
+#define A3XX_RBBM_PERFCTR_TSE_0_LO 0xB2
+#define A3XX_RBBM_PERFCTR_TSE_0_HI 0xB3
+#define A3XX_RBBM_PERFCTR_TSE_1_LO 0xB4
+#define A3XX_RBBM_PERFCTR_TSE_1_HI 0xB5
+#define A3XX_RBBM_PERFCTR_RAS_0_LO 0xB6
+#define A3XX_RBBM_PERFCTR_RAS_0_HI 0xB7
+#define A3XX_RBBM_PERFCTR_RAS_1_LO 0xB8
+#define A3XX_RBBM_PERFCTR_RAS_1_HI 0xB9
+#define A3XX_RBBM_PERFCTR_UCHE_0_LO 0xBA
+#define A3XX_RBBM_PERFCTR_UCHE_0_HI 0xBB
+#define A3XX_RBBM_PERFCTR_UCHE_1_LO 0xBC
+#define A3XX_RBBM_PERFCTR_UCHE_1_HI 0xBD
+#define A3XX_RBBM_PERFCTR_UCHE_2_LO 0xBE
+#define A3XX_RBBM_PERFCTR_UCHE_2_HI 0xBF
+#define A3XX_RBBM_PERFCTR_UCHE_3_LO 0xC0
+#define A3XX_RBBM_PERFCTR_UCHE_3_HI 0xC1
+#define A3XX_RBBM_PERFCTR_UCHE_4_LO 0xC2
+#define A3XX_RBBM_PERFCTR_UCHE_4_HI 0xC3
+#define A3XX_RBBM_PERFCTR_UCHE_5_LO 0xC4
+#define A3XX_RBBM_PERFCTR_UCHE_5_HI 0xC5
+#define A3XX_RBBM_PERFCTR_TP_0_LO 0xC6
+#define A3XX_RBBM_PERFCTR_TP_0_HI 0xC7
+#define A3XX_RBBM_PERFCTR_TP_1_LO 0xC8
+#define A3XX_RBBM_PERFCTR_TP_1_HI 0xC9
+#define A3XX_RBBM_PERFCTR_TP_2_LO 0xCA
+#define A3XX_RBBM_PERFCTR_TP_2_HI 0xCB
+#define A3XX_RBBM_PERFCTR_TP_3_LO 0xCC
+#define A3XX_RBBM_PERFCTR_TP_3_HI 0xCD
+#define A3XX_RBBM_PERFCTR_TP_4_LO 0xCE
+#define A3XX_RBBM_PERFCTR_TP_4_HI 0xCF
+#define A3XX_RBBM_PERFCTR_TP_5_LO 0xD0
+#define A3XX_RBBM_PERFCTR_TP_5_HI 0xD1
+#define A3XX_RBBM_PERFCTR_SP_0_LO 0xD2
+#define A3XX_RBBM_PERFCTR_SP_0_HI 0xD3
+#define A3XX_RBBM_PERFCTR_SP_1_LO 0xD4
+#define A3XX_RBBM_PERFCTR_SP_1_HI 0xD5
+#define A3XX_RBBM_PERFCTR_SP_2_LO 0xD6
+#define A3XX_RBBM_PERFCTR_SP_2_HI 0xD7
+#define A3XX_RBBM_PERFCTR_SP_3_LO 0xD8
+#define A3XX_RBBM_PERFCTR_SP_3_HI 0xD9
+#define A3XX_RBBM_PERFCTR_SP_4_LO 0xDA
+#define A3XX_RBBM_PERFCTR_SP_4_HI 0xDB
+#define A3XX_RBBM_PERFCTR_SP_5_LO 0xDC
+#define A3XX_RBBM_PERFCTR_SP_5_HI 0xDD
+#define A3XX_RBBM_PERFCTR_SP_6_LO 0xDE
+#define A3XX_RBBM_PERFCTR_SP_6_HI 0xDF
+#define A3XX_RBBM_PERFCTR_SP_7_LO 0xE0
+#define A3XX_RBBM_PERFCTR_SP_7_HI 0xE1
+#define A3XX_RBBM_PERFCTR_RB_0_LO 0xE2
+#define A3XX_RBBM_PERFCTR_RB_0_HI 0xE3
+#define A3XX_RBBM_PERFCTR_RB_1_LO 0xE4
+#define A3XX_RBBM_PERFCTR_RB_1_HI 0xE5
+
+#define A3XX_RBBM_RBBM_CTL 0x100
+#define A3XX_RBBM_PERFCTR_PWR_0_LO 0x0EA
+#define A3XX_RBBM_PERFCTR_PWR_0_HI 0x0EB
+#define A3XX_RBBM_PERFCTR_PWR_1_LO 0x0EC
+#define A3XX_RBBM_PERFCTR_PWR_1_HI 0x0ED
+#define A3XX_RBBM_DEBUG_BUS_CTL 0x111
+#define A3XX_RBBM_DEBUG_BUS_DATA_STATUS 0x112
+#define A3XX_RBBM_DEBUG_BUS_STB_CTL0 0x11B
+#define A3XX_RBBM_DEBUG_BUS_STB_CTL1 0x11C
+#define A3XX_RBBM_INT_TRACE_BUS_CTL 0x11D
+#define A3XX_RBBM_EXT_TRACE_BUS_CTL 0x11E
+#define A3XX_RBBM_EXT_TRACE_STOP_CNT 0x11F
+#define A3XX_RBBM_EXT_TRACE_START_CNT 0x120
+#define A3XX_RBBM_EXT_TRACE_PERIOD_CNT 0x121
+#define A3XX_RBBM_EXT_TRACE_CMD 0x122
+#define A3XX_CP_RB_BASE 0x01C0
+#define A3XX_CP_RB_CNTL 0x01C1
+#define A3XX_CP_RB_RPTR_ADDR 0x01C3
+#define A3XX_CP_RB_RPTR 0x01C4
+#define A3XX_CP_RB_WPTR 0x01C5
+#define A3XX_CP_RB_RPTR_WR 0x01C7
+/* Following two are same as on A2XX, just in a different place */
+#define A3XX_CP_PFP_UCODE_ADDR 0x1C9
+#define A3XX_CP_PFP_UCODE_DATA 0x1CA
+#define A3XX_CP_ROQ_ADDR 0x1CC
+#define A3XX_CP_ROQ_DATA 0x1CD
+#define A3XX_CP_MERCIU_ADDR 0x1D1
+#define A3XX_CP_MERCIU_DATA 0x1D2
+#define A3XX_CP_MERCIU_DATA2 0x1D3
+#define A3XX_CP_QUEUE_THRESHOLDS 0x01D5
+#define A3XX_CP_MEQ_ADDR 0x1DA
+#define A3XX_CP_MEQ_DATA 0x1DB
+#define A3XX_CP_SCRATCH_UMSK 0x01DC
+#define A3XX_CP_SCRATCH_ADDR 0x01DD
+#define A3XX_CP_STATE_DEBUG_INDEX 0x01EC
+#define A3XX_CP_STATE_DEBUG_DATA 0x01ED
+#define A3XX_CP_CNTL 0x01F4
+#define A3XX_CP_WFI_PEND_CTR 0x01F5
+#define A3XX_CP_ME_CNTL 0x01F6
+#define A3XX_CP_ME_STATUS 0x01F7
+#define A3XX_CP_ME_RAM_WADDR 0x01F8
+#define A3XX_CP_ME_RAM_RADDR 0x01F9
+#define A3XX_CP_ME_RAM_DATA 0x01FA
+#define A3XX_CP_DEBUG 0x01FC
+
+#define A3XX_RBBM_PM_OVERRIDE2 0x039D
+
+#define A3XX_CP_PERFCOUNTER_SELECT 0x445
+#define A3XX_CP_IB1_BASE 0x0458
+#define A3XX_CP_IB1_BUFSZ 0x0459
+#define A3XX_CP_IB2_BASE 0x045A
+#define A3XX_CP_IB2_BUFSZ 0x045B
+
+#define A3XX_CP_HW_FAULT  0x45C
+#define A3XX_CP_AHB_FAULT 0x54D
+#define A3XX_CP_PROTECT_CTRL 0x45E
+#define A3XX_CP_PROTECT_STATUS 0x45F
+#define A3XX_CP_PROTECT_REG_0 0x460
+#define A3XX_CP_PROTECT_REG_1 0x461
+#define A3XX_CP_PROTECT_REG_2 0x462
+#define A3XX_CP_PROTECT_REG_3 0x463
+#define A3XX_CP_PROTECT_REG_4 0x464
+#define A3XX_CP_PROTECT_REG_5 0x465
+#define A3XX_CP_PROTECT_REG_6 0x466
+#define A3XX_CP_PROTECT_REG_7 0x467
+#define A3XX_CP_PROTECT_REG_8 0x468
+#define A3XX_CP_PROTECT_REG_9 0x469
+#define A3XX_CP_PROTECT_REG_A 0x46A
+#define A3XX_CP_PROTECT_REG_B 0x46B
+#define A3XX_CP_PROTECT_REG_C 0x46C
+#define A3XX_CP_PROTECT_REG_D 0x46D
+#define A3XX_CP_PROTECT_REG_E 0x46E
+#define A3XX_CP_PROTECT_REG_F 0x46F
+#define A3XX_CP_STAT 0x047F
+#define A3XX_CP_SCRATCH_REG0 0x578
+#define A3XX_CP_SCRATCH_REG6 0x57E
+#define A3XX_CP_SCRATCH_REG7 0x57F
+#define A3XX_VSC_BIN_SIZE 0xC01
+#define A3XX_VSC_SIZE_ADDRESS 0xC02
+#define A3XX_VSC_PIPE_CONFIG_0 0xC06
+#define A3XX_VSC_PIPE_DATA_ADDRESS_0 0xC07
+#define A3XX_VSC_PIPE_DATA_LENGTH_0 0xC08
+#define A3XX_VSC_PIPE_CONFIG_1 0xC09
+#define A3XX_VSC_PIPE_DATA_ADDRESS_1 0xC0A
+#define A3XX_VSC_PIPE_DATA_LENGTH_1 0xC0B
+#define A3XX_VSC_PIPE_CONFIG_2 0xC0C
+#define A3XX_VSC_PIPE_DATA_ADDRESS_2 0xC0D
+#define A3XX_VSC_PIPE_DATA_LENGTH_2 0xC0E
+#define A3XX_VSC_PIPE_CONFIG_3 0xC0F
+#define A3XX_VSC_PIPE_DATA_ADDRESS_3 0xC10
+#define A3XX_VSC_PIPE_DATA_LENGTH_3 0xC11
+#define A3XX_VSC_PIPE_CONFIG_4 0xC12
+#define A3XX_VSC_PIPE_DATA_ADDRESS_4 0xC13
+#define A3XX_VSC_PIPE_DATA_LENGTH_4 0xC14
+#define A3XX_VSC_PIPE_CONFIG_5 0xC15
+#define A3XX_VSC_PIPE_DATA_ADDRESS_5 0xC16
+#define A3XX_VSC_PIPE_DATA_LENGTH_5 0xC17
+#define A3XX_VSC_PIPE_CONFIG_6 0xC18
+#define A3XX_VSC_PIPE_DATA_ADDRESS_6 0xC19
+#define A3XX_VSC_PIPE_DATA_LENGTH_6 0xC1A
+#define A3XX_VSC_PIPE_CONFIG_7 0xC1B
+#define A3XX_VSC_PIPE_DATA_ADDRESS_7 0xC1C
+#define A3XX_VSC_PIPE_DATA_LENGTH_7 0xC1D
+#define A3XX_PC_PERFCOUNTER0_SELECT 0xC48
+#define A3XX_PC_PERFCOUNTER1_SELECT 0xC49
+#define A3XX_PC_PERFCOUNTER2_SELECT 0xC4A
+#define A3XX_PC_PERFCOUNTER3_SELECT 0xC4B
+#define A3XX_GRAS_TSE_DEBUG_ECO 0xC81
+#define A3XX_GRAS_PERFCOUNTER0_SELECT 0xC88
+#define A3XX_GRAS_PERFCOUNTER1_SELECT 0xC89
+#define A3XX_GRAS_PERFCOUNTER2_SELECT 0xC8A
+#define A3XX_GRAS_PERFCOUNTER3_SELECT 0xC8B
+#define A3XX_GRAS_CL_USER_PLANE_X0 0xCA0
+#define A3XX_GRAS_CL_USER_PLANE_Y0 0xCA1
+#define A3XX_GRAS_CL_USER_PLANE_Z0 0xCA2
+#define A3XX_GRAS_CL_USER_PLANE_W0 0xCA3
+#define A3XX_GRAS_CL_USER_PLANE_X1 0xCA4
+#define A3XX_GRAS_CL_USER_PLANE_Y1 0xCA5
+#define A3XX_GRAS_CL_USER_PLANE_Z1 0xCA6
+#define A3XX_GRAS_CL_USER_PLANE_W1 0xCA7
+#define A3XX_GRAS_CL_USER_PLANE_X2 0xCA8
+#define A3XX_GRAS_CL_USER_PLANE_Y2 0xCA9
+#define A3XX_GRAS_CL_USER_PLANE_Z2 0xCAA
+#define A3XX_GRAS_CL_USER_PLANE_W2 0xCAB
+#define A3XX_GRAS_CL_USER_PLANE_X3 0xCAC
+#define A3XX_GRAS_CL_USER_PLANE_Y3 0xCAD
+#define A3XX_GRAS_CL_USER_PLANE_Z3 0xCAE
+#define A3XX_GRAS_CL_USER_PLANE_W3 0xCAF
+#define A3XX_GRAS_CL_USER_PLANE_X4 0xCB0
+#define A3XX_GRAS_CL_USER_PLANE_Y4 0xCB1
+#define A3XX_GRAS_CL_USER_PLANE_Z4 0xCB2
+#define A3XX_GRAS_CL_USER_PLANE_W4 0xCB3
+#define A3XX_GRAS_CL_USER_PLANE_X5 0xCB4
+#define A3XX_GRAS_CL_USER_PLANE_Y5 0xCB5
+#define A3XX_GRAS_CL_USER_PLANE_Z5 0xCB6
+#define A3XX_GRAS_CL_USER_PLANE_W5 0xCB7
+#define A3XX_RB_GMEM_BASE_ADDR 0xCC0
+#define A3XX_RB_DEBUG_ECO_CONTROLS_ADDR 0xCC1
+#define A3XX_RB_PERFCOUNTER0_SELECT   0xCC6
+#define A3XX_RB_PERFCOUNTER1_SELECT   0xCC7
+#define A3XX_RB_FRAME_BUFFER_DIMENSION 0xCE0
+#define A3XX_SQ_GPR_MANAGEMENT 0x0D00
+#define A3XX_SQ_INST_STORE_MANAGMENT 0x0D02
+#define A3XX_HLSQ_PERFCOUNTER0_SELECT 0xE00
+#define A3XX_HLSQ_PERFCOUNTER1_SELECT 0xE01
+#define A3XX_HLSQ_PERFCOUNTER2_SELECT 0xE02
+#define A3XX_HLSQ_PERFCOUNTER3_SELECT 0xE03
+#define A3XX_HLSQ_PERFCOUNTER4_SELECT 0xE04
+#define A3XX_HLSQ_PERFCOUNTER5_SELECT 0xE05
+#define A3XX_TP0_CHICKEN 0x0E1E
+#define A3XX_VFD_PERFCOUNTER0_SELECT 0xE44
+#define A3XX_VFD_PERFCOUNTER1_SELECT 0xE45
+#define A3XX_VPC_VPC_DEBUG_RAM_SEL 0xE61
+#define A3XX_VPC_VPC_DEBUG_RAM_READ 0xE62
+#define A3XX_VPC_PERFCOUNTER0_SELECT 0xE64
+#define A3XX_VPC_PERFCOUNTER1_SELECT 0xE65
+#define A3XX_UCHE_CACHE_MODE_CONTROL_REG 0xE82
+#define A3XX_UCHE_PERFCOUNTER0_SELECT 0xE84
+#define A3XX_UCHE_PERFCOUNTER1_SELECT 0xE85
+#define A3XX_UCHE_PERFCOUNTER2_SELECT 0xE86
+#define A3XX_UCHE_PERFCOUNTER3_SELECT 0xE87
+#define A3XX_UCHE_PERFCOUNTER4_SELECT 0xE88
+#define A3XX_UCHE_PERFCOUNTER5_SELECT 0xE89
+#define A3XX_UCHE_CACHE_INVALIDATE0_REG 0xEA0
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG 0xEA1
+#define A3XX_UCHE_CACHE_WAYS_VFD 0xEA6
+#define A3XX_SP_PERFCOUNTER0_SELECT 0xEC4
+#define A3XX_SP_PERFCOUNTER1_SELECT 0xEC5
+#define A3XX_SP_PERFCOUNTER2_SELECT 0xEC6
+#define A3XX_SP_PERFCOUNTER3_SELECT 0xEC7
+#define A3XX_SP_PERFCOUNTER4_SELECT 0xEC8
+#define A3XX_SP_PERFCOUNTER5_SELECT 0xEC9
+#define A3XX_SP_PERFCOUNTER6_SELECT 0xECA
+#define A3XX_SP_PERFCOUNTER7_SELECT 0xECB
+#define A3XX_TP_PERFCOUNTER0_SELECT 0xF04
+#define A3XX_TP_PERFCOUNTER1_SELECT 0xF05
+#define A3XX_TP_PERFCOUNTER2_SELECT 0xF06
+#define A3XX_TP_PERFCOUNTER3_SELECT 0xF07
+#define A3XX_TP_PERFCOUNTER4_SELECT 0xF08
+#define A3XX_TP_PERFCOUNTER5_SELECT 0xF09
+#define A3XX_GRAS_CL_CLIP_CNTL 0x2040
+#define A3XX_GRAS_CL_GB_CLIP_ADJ 0x2044
+#define A3XX_GRAS_CL_VPORT_XOFFSET 0x2048
+#define A3XX_GRAS_CL_VPORT_XSCALE 0x2049
+#define A3XX_GRAS_CL_VPORT_YOFFSET 0x204A
+#define A3XX_GRAS_CL_VPORT_YSCALE 0x204B
+#define A3XX_GRAS_CL_VPORT_ZOFFSET 0x204C
+#define A3XX_GRAS_CL_VPORT_ZSCALE 0x204D
+#define A3XX_GRAS_SU_POINT_MINMAX 0x2068
+#define A3XX_GRAS_SU_POINT_SIZE 0x2069
+#define A3XX_GRAS_SU_POLY_OFFSET_SCALE 0x206C
+#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET 0x206D
+#define A3XX_GRAS_SU_MODE_CONTROL 0x2070
+#define A3XX_GRAS_SC_CONTROL 0x2072
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL 0x2074
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR 0x2075
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL 0x2079
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR 0x207A
+#define A3XX_RB_MODE_CONTROL 0x20C0
+#define A3XX_RB_RENDER_CONTROL 0x20C1
+#define A3XX_RB_MSAA_CONTROL 0x20C2
+#define A3XX_RB_ALPHA_REFERENCE 0x20C3
+#define A3XX_RB_MRT_CONTROL0 0x20C4
+#define A3XX_RB_MRT_BUF_INFO0 0x20C5
+#define A3XX_RB_MRT_BUF_BASE0 0x20C6
+#define A3XX_RB_MRT_BLEND_CONTROL0 0x20C7
+#define A3XX_RB_MRT_CONTROL1 0x20C8
+#define A3XX_RB_MRT_BUF_INFO1 0x20C9
+#define A3XX_RB_MRT_BUF_BASE1 0x20CA
+#define A3XX_RB_MRT_BLEND_CONTROL1 0x20CB
+#define A3XX_RB_MRT_CONTROL2 0x20CC
+#define A3XX_RB_MRT_BUF_INFO2 0x20CD
+#define A3XX_RB_MRT_BUF_BASE2 0x20CE
+#define A3XX_RB_MRT_BLEND_CONTROL2 0x20CF
+#define A3XX_RB_MRT_CONTROL3 0x20D0
+#define A3XX_RB_MRT_BUF_INFO3 0x20D1
+#define A3XX_RB_MRT_BUF_BASE3 0x20D2
+#define A3XX_RB_MRT_BLEND_CONTROL3 0x20D3
+#define A3XX_RB_BLEND_RED 0x20E4
+#define A3XX_RB_BLEND_GREEN 0x20E5
+#define A3XX_RB_BLEND_BLUE 0x20E6
+#define A3XX_RB_BLEND_ALPHA 0x20E7
+#define A3XX_RB_CLEAR_COLOR_DW0 0x20E8
+#define A3XX_RB_CLEAR_COLOR_DW1 0x20E9
+#define A3XX_RB_CLEAR_COLOR_DW2 0x20EA
+#define A3XX_RB_CLEAR_COLOR_DW3 0x20EB
+#define A3XX_RB_COPY_CONTROL 0x20EC
+#define A3XX_RB_COPY_DEST_BASE 0x20ED
+#define A3XX_RB_COPY_DEST_PITCH 0x20EE
+#define A3XX_RB_COPY_DEST_INFO 0x20EF
+#define A3XX_RB_DEPTH_CONTROL 0x2100
+#define A3XX_RB_DEPTH_CLEAR 0x2101
+#define A3XX_RB_DEPTH_BUF_INFO 0x2102
+#define A3XX_RB_DEPTH_BUF_PITCH 0x2103
+#define A3XX_RB_STENCIL_CONTROL 0x2104
+#define A3XX_RB_STENCIL_CLEAR 0x2105
+#define A3XX_RB_STENCIL_BUF_INFO 0x2106
+#define A3XX_RB_STENCIL_BUF_PITCH 0x2107
+#define A3XX_RB_STENCIL_REF_MASK 0x2108
+#define A3XX_RB_STENCIL_REF_MASK_BF 0x2109
+#define A3XX_RB_LRZ_VSC_CONTROL 0x210C
+#define A3XX_RB_WINDOW_OFFSET 0x210E
+#define A3XX_RB_SAMPLE_COUNT_CONTROL 0x2110
+#define A3XX_RB_SAMPLE_COUNT_ADDR 0x2111
+#define A3XX_RB_Z_CLAMP_MIN 0x2114
+#define A3XX_RB_Z_CLAMP_MAX 0x2115
+#define A3XX_PC_VSTREAM_CONTROL 0x21E4
+#define A3XX_PC_VERTEX_REUSE_BLOCK_CNTL 0x21EA
+#define A3XX_PC_PRIM_VTX_CNTL 0x21EC
+#define A3XX_PC_RESTART_INDEX 0x21ED
+#define A3XX_HLSQ_CONTROL_0_REG 0x2200
+#define A3XX_HLSQ_CONTROL_1_REG 0x2201
+#define A3XX_HLSQ_CONTROL_2_REG 0x2202
+#define A3XX_HLSQ_CONTROL_3_REG 0x2203
+#define A3XX_HLSQ_VS_CONTROL_REG 0x2204
+#define A3XX_HLSQ_FS_CONTROL_REG 0x2205
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG 0x2206
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG 0x2207
+#define A3XX_HLSQ_CL_NDRANGE_0_REG 0x220A
+#define A3XX_HLSQ_CL_NDRANGE_1_REG 0x220B
+#define A3XX_HLSQ_CL_NDRANGE_2_REG 0x220C
+#define A3XX_HLSQ_CL_NDRANGE_3_REG 0x220D
+#define A3XX_HLSQ_CL_NDRANGE_4_REG 0x220E
+#define A3XX_HLSQ_CL_NDRANGE_5_REG 0x220F
+#define A3XX_HLSQ_CL_NDRANGE_6_REG 0x2210
+#define A3XX_HLSQ_CL_CONTROL_0_REG 0x2211
+#define A3XX_HLSQ_CL_CONTROL_1_REG 0x2212
+#define A3XX_HLSQ_CL_KERNEL_CONST_REG 0x2214
+#define A3XX_HLSQ_CL_KERNEL_GROUP_X_REG 0x2215
+#define A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG 0x2216
+#define A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG 0x2217
+#define A3XX_HLSQ_CL_WG_OFFSET_REG 0x221A
+#define A3XX_VFD_CONTROL_0 0x2240
+#define A3XX_VFD_INDEX_MIN 0x2242
+#define A3XX_VFD_INDEX_MAX 0x2243
+#define A3XX_VFD_FETCH_INSTR_0_0 0x2246
+#define A3XX_VFD_FETCH_INSTR_0_4 0x224E
+#define A3XX_VFD_FETCH_INSTR_1_0 0x2247
+#define A3XX_VFD_FETCH_INSTR_1_1 0x2249
+#define A3XX_VFD_FETCH_INSTR_1_2 0x224B
+#define A3XX_VFD_FETCH_INSTR_1_3 0x224D
+#define A3XX_VFD_FETCH_INSTR_1_4 0x224F
+#define A3XX_VFD_FETCH_INSTR_1_5 0x2251
+#define A3XX_VFD_FETCH_INSTR_1_6 0x2253
+#define A3XX_VFD_FETCH_INSTR_1_7 0x2255
+#define A3XX_VFD_FETCH_INSTR_1_8 0x2257
+#define A3XX_VFD_FETCH_INSTR_1_9 0x2259
+#define A3XX_VFD_FETCH_INSTR_1_A 0x225B
+#define A3XX_VFD_FETCH_INSTR_1_B 0x225D
+#define A3XX_VFD_FETCH_INSTR_1_C 0x225F
+#define A3XX_VFD_FETCH_INSTR_1_D 0x2261
+#define A3XX_VFD_FETCH_INSTR_1_E 0x2263
+#define A3XX_VFD_FETCH_INSTR_1_F 0x2265
+#define A3XX_VFD_DECODE_INSTR_0 0x2266
+#define A3XX_VFD_VS_THREADING_THRESHOLD 0x227E
+#define A3XX_VPC_ATTR 0x2280
+#define A3XX_VPC_VARY_CYLWRAP_ENABLE_1 0x228B
+#define A3XX_SP_SP_CTRL_REG 0x22C0
+#define A3XX_SP_VS_CTRL_REG0 0x22C4
+#define A3XX_SP_VS_CTRL_REG1 0x22C5
+#define A3XX_SP_VS_PARAM_REG 0x22C6
+#define A3XX_SP_VS_OUT_REG_0 0x22C7
+#define A3XX_SP_VS_OUT_REG_1 0x22C8
+#define A3XX_SP_VS_OUT_REG_2 0x22C9
+#define A3XX_SP_VS_OUT_REG_3 0x22CA
+#define A3XX_SP_VS_OUT_REG_4 0x22CB
+#define A3XX_SP_VS_OUT_REG_5 0x22CC
+#define A3XX_SP_VS_OUT_REG_6 0x22CD
+#define A3XX_SP_VS_OUT_REG_7 0x22CE
+#define A3XX_SP_VS_VPC_DST_REG_0 0x22D0
+#define A3XX_SP_VS_VPC_DST_REG_1 0x22D1
+#define A3XX_SP_VS_VPC_DST_REG_2 0x22D2
+#define A3XX_SP_VS_VPC_DST_REG_3 0x22D3
+#define A3XX_SP_VS_OBJ_OFFSET_REG 0x22D4
+#define A3XX_SP_VS_OBJ_START_REG 0x22D5
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG 0x22D6
+#define A3XX_SP_VS_PVT_MEM_ADDR_REG 0x22D7
+#define A3XX_SP_VS_PVT_MEM_SIZE_REG 0x22D8
+#define A3XX_SP_VS_LENGTH_REG 0x22DF
+#define A3XX_SP_FS_CTRL_REG0 0x22E0
+#define A3XX_SP_FS_CTRL_REG1 0x22E1
+#define A3XX_SP_FS_OBJ_OFFSET_REG 0x22E2
+#define A3XX_SP_FS_OBJ_START_REG 0x22E3
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG 0x22E4
+#define A3XX_SP_FS_PVT_MEM_ADDR_REG 0x22E5
+#define A3XX_SP_FS_PVT_MEM_SIZE_REG 0x22E6
+#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_0 0x22E8
+#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_1 0x22E9
+#define A3XX_SP_FS_OUTPUT_REG 0x22EC
+#define A3XX_SP_FS_MRT_REG_0 0x22F0
+#define A3XX_SP_FS_MRT_REG_1 0x22F1
+#define A3XX_SP_FS_MRT_REG_2 0x22F2
+#define A3XX_SP_FS_MRT_REG_3 0x22F3
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_0 0x22F4
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_1 0x22F5
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_2 0x22F6
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_3 0x22F7
+#define A3XX_SP_FS_LENGTH_REG 0x22FF
+#define A3XX_PA_SC_AA_CONFIG 0x2301
+#define A3XX_TPL1_TP_VS_TEX_OFFSET 0x2340
+#define A3XX_TPL1_TP_FS_TEX_OFFSET 0x2342
+#define A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR 0x2343
+#define A3XX_VBIF_CLKON 0x3001
+#define A3XX_VBIF_FIXED_SORT_EN 0x300C
+#define A3XX_VBIF_FIXED_SORT_SEL0 0x300D
+#define A3XX_VBIF_FIXED_SORT_SEL1 0x300E
+#define A3XX_VBIF_ABIT_SORT 0x301C
+#define A3XX_VBIF_ABIT_SORT_CONF 0x301D
+#define A3XX_VBIF_GATE_OFF_WRREQ_EN 0x302A
+#define A3XX_VBIF_IN_RD_LIM_CONF0 0x302C
+#define A3XX_VBIF_IN_RD_LIM_CONF1 0x302D
+#define A3XX_VBIF_IN_WR_LIM_CONF0 0x3030
+#define A3XX_VBIF_IN_WR_LIM_CONF1 0x3031
+#define A3XX_VBIF_OUT_RD_LIM_CONF0 0x3034
+#define A3XX_VBIF_OUT_WR_LIM_CONF0 0x3035
+#define A3XX_VBIF_DDR_OUT_MAX_BURST 0x3036
+#define A3XX_VBIF_ARB_CTL 0x303C
+#define A3XX_VBIF_ROUND_ROBIN_QOS_ARB 0x3049
+#define A3XX_VBIF_OUT_AXI_AMEMTYPE_CONF0 0x3058
+#define A3XX_VBIF_OUT_AXI_AOOO_EN 0x305E
+#define A3XX_VBIF_OUT_AXI_AOOO 0x305F
+#define A3XX_VBIF_PERF_CNT_EN 0x3070
+#define A3XX_VBIF_PERF_CNT_CLR 0x3071
+#define A3XX_VBIF_PERF_CNT_SEL 0x3072
+#define A3XX_VBIF_PERF_CNT0_LO 0x3073
+#define A3XX_VBIF_PERF_CNT0_HI 0x3074
+#define A3XX_VBIF_PERF_CNT1_LO 0x3075
+#define A3XX_VBIF_PERF_CNT1_HI 0x3076
+#define A3XX_VBIF_PERF_PWR_CNT0_LO 0x3077
+#define A3XX_VBIF_PERF_PWR_CNT0_HI 0x3078
+#define A3XX_VBIF_PERF_PWR_CNT1_LO 0x3079
+#define A3XX_VBIF_PERF_PWR_CNT1_HI 0x307a
+#define A3XX_VBIF_PERF_PWR_CNT2_LO 0x307b
+#define A3XX_VBIF_PERF_PWR_CNT2_HI 0x307c
+
+#define A3XX_VBIF_XIN_HALT_CTRL0 0x3080
+#define A3XX_VBIF_XIN_HALT_CTRL0_MASK 0x3F
+
+#define A3XX_VBIF_XIN_HALT_CTRL1 0x3081
+
+/* VBIF register offsets for A306 */
+#define A3XX_VBIF2_XIN_HALT_CTRL0 0x3081
+#define A3XX_VBIF2_XIN_HALT_CTRL0_MASK 0x7
+
+#define A3XX_VBIF2_XIN_HALT_CTRL1 0x3082
+
+#define A3XX_VBIF2_PERF_CNT_EN0 0x30c0
+#define A3XX_VBIF2_PERF_CNT_EN1 0x30c1
+#define A3XX_VBIF2_PERF_CNT_EN2 0x30c2
+#define A3XX_VBIF2_PERF_CNT_EN3 0x30c3
+#define A3XX_VBIF2_PERF_CNT_CLR0 0x30c8
+#define A3XX_VBIF2_PERF_CNT_CLR1 0x30c9
+#define A3XX_VBIF2_PERF_CNT_CLR2 0x30ca
+#define A3XX_VBIF2_PERF_CNT_CLR3 0x30cb
+#define A3XX_VBIF2_PERF_CNT_SEL0 0x30d0
+#define A3XX_VBIF2_PERF_CNT_SEL1 0x30d1
+#define A3XX_VBIF2_PERF_CNT_SEL2 0x30d2
+#define A3XX_VBIF2_PERF_CNT_SEL3 0x30d3
+#define A3XX_VBIF2_PERF_CNT_LOW0 0x30d8
+#define A3XX_VBIF2_PERF_CNT_LOW1 0x30d9
+#define A3XX_VBIF2_PERF_CNT_LOW2 0x30da
+#define A3XX_VBIF2_PERF_CNT_LOW3 0x30db
+#define A3XX_VBIF2_PERF_CNT_HIGH0 0x30e0
+#define A3XX_VBIF2_PERF_CNT_HIGH1 0x30e1
+#define A3XX_VBIF2_PERF_CNT_HIGH2 0x30e2
+#define A3XX_VBIF2_PERF_CNT_HIGH3 0x30e3
+
+#define A3XX_VBIF2_PERF_PWR_CNT_EN0 0x3100
+#define A3XX_VBIF2_PERF_PWR_CNT_EN1 0x3101
+#define A3XX_VBIF2_PERF_PWR_CNT_EN2 0x3102
+#define A3XX_VBIF2_PERF_PWR_CNT_CLR0 0x3108
+#define A3XX_VBIF2_PERF_PWR_CNT_CLR1 0x3109
+#define A3XX_VBIF2_PERF_PWR_CNT_CLR2 0x310A
+#define A3XX_VBIF2_PERF_PWR_CNT_LOW0 0x3110
+#define A3XX_VBIF2_PERF_PWR_CNT_LOW1 0x3111
+#define A3XX_VBIF2_PERF_PWR_CNT_LOW2 0x3112
+#define A3XX_VBIF2_PERF_PWR_CNT_HIGH0 0x3118
+#define A3XX_VBIF2_PERF_PWR_CNT_HIGH1 0x3119
+#define A3XX_VBIF2_PERF_PWR_CNT_HIGH2 0x311a
+
+#define A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0 0x3800
+#define A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL1 0x3801
+
+/* Various flags used by the context switch code */
+
+#define SP_MULTI 0
+#define SP_BUFFER_MODE 1
+#define SP_TWO_VTX_QUADS 0
+#define SP_PIXEL_BASED 0
+#define SP_R8G8B8A8_UNORM 8
+#define SP_FOUR_PIX_QUADS 1
+
+#define HLSQ_DIRECT 0
+#define HLSQ_BLOCK_ID_SP_VS 4
+#define HLSQ_SP_VS_INSTR 0
+#define HLSQ_SP_FS_INSTR 0
+#define HLSQ_BLOCK_ID_SP_FS 6
+#define HLSQ_TWO_PIX_QUADS 0
+#define HLSQ_TWO_VTX_QUADS 0
+#define HLSQ_BLOCK_ID_TP_TEX 2
+#define HLSQ_TP_TEX_SAMPLERS 0
+#define HLSQ_TP_TEX_MEMOBJ 1
+#define HLSQ_BLOCK_ID_TP_MIPMAP 3
+#define HLSQ_TP_MIPMAP_BASE 1
+#define HLSQ_FOUR_PIX_QUADS 1
+
+#define RB_FACTOR_ONE 1
+#define RB_BLEND_OP_ADD 0
+#define RB_FACTOR_ZERO 0
+#define RB_DITHER_DISABLE 0
+#define RB_DITHER_ALWAYS 1
+#define RB_FRAG_NEVER 0
+#define RB_ENDIAN_NONE 0
+#define RB_R8G8B8A8_UNORM 8
+#define RB_RESOLVE_PASS 2
+#define RB_CLEAR_MODE_RESOLVE 1
+#define RB_TILINGMODE_LINEAR 0
+#define RB_REF_NEVER 0
+#define RB_FRAG_LESS 1
+#define RB_REF_ALWAYS 7
+#define RB_STENCIL_KEEP 0
+#define RB_RENDERING_PASS 0
+#define RB_TILINGMODE_32X32 2
+
+#define PC_DRAW_TRIANGLES 2
+#define PC_DI_PT_RECTLIST 8
+#define PC_DI_SRC_SEL_AUTO_INDEX 2
+#define PC_DI_INDEX_SIZE_16_BIT 0
+#define PC_DI_IGNORE_VISIBILITY 0
+#define PC_DI_PT_TRILIST 4
+#define PC_DI_SRC_SEL_IMMEDIATE 1
+#define PC_DI_INDEX_SIZE_32_BIT 1
+
+#define UCHE_ENTIRE_CACHE 1
+#define UCHE_OP_INVALIDATE 1
+
+/*
+ * The following are bit field shifts within some of the registers defined
+ * above. These are used in the context switch code in conjunction with the
+ * _SET macro
+ */
+
+#define GRAS_CL_CLIP_CNTL_CLIP_DISABLE 16
+#define GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER 12
+#define GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE 21
+#define GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE 19
+#define GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE 20
+#define GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE 17
+#define GRAS_CL_VPORT_XSCALE_VPORT_XSCALE 0
+#define GRAS_CL_VPORT_YSCALE_VPORT_YSCALE 0
+#define GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE 0
+#define GRAS_SC_CONTROL_RASTER_MODE 12
+#define GRAS_SC_CONTROL_RENDER_MODE 4
+#define GRAS_SC_SCREEN_SCISSOR_BR_BR_X 0
+#define GRAS_SC_SCREEN_SCISSOR_BR_BR_Y 16
+#define GRAS_SC_WINDOW_SCISSOR_BR_BR_X 0
+#define GRAS_SC_WINDOW_SCISSOR_BR_BR_Y 16
+#define GRAS_SU_CTRLMODE_LINEHALFWIDTH 03
+#define HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY 16
+#define HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY 0
+#define HLSQ_CTRL0REG_CHUNKDISABLE 26
+#define HLSQ_CTRL0REG_CONSTSWITCHMODE 27
+#define HLSQ_CTRL0REG_FSSUPERTHREADENABLE 6
+#define HLSQ_CTRL0REG_FSTHREADSIZE 4
+#define HLSQ_CTRL0REG_LAZYUPDATEDISABLE 28
+#define HLSQ_CTRL0REG_RESERVED2 10
+#define HLSQ_CTRL0REG_SPCONSTFULLUPDATE 29
+#define HLSQ_CTRL0REG_SPSHADERRESTART 9
+#define HLSQ_CTRL0REG_TPFULLUPDATE 30
+#define HLSQ_CTRL1REG_RESERVED1 9
+#define HLSQ_CTRL1REG_VSSUPERTHREADENABLE 8
+#define HLSQ_CTRL1REG_VSTHREADSIZE 6
+#define HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD 26
+#define HLSQ_FSCTRLREG_FSCONSTLENGTH 0
+#define HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET 12
+#define HLSQ_FSCTRLREG_FSINSTRLENGTH 24
+#define HLSQ_VSCTRLREG_VSINSTRLENGTH 24
+#define PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE 8
+#define PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE 5
+#define PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST 25
+#define PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC 0
+#define PC_DRAW_INITIATOR_PRIM_TYPE 0
+#define PC_DRAW_INITIATOR_SOURCE_SELECT 6
+#define PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE 9
+#define PC_DRAW_INITIATOR_INDEX_SIZE 0x0B
+#define PC_DRAW_INITIATOR_SMALL_INDEX 0x0D
+#define PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE 0x0E
+#define RB_COPYCONTROL_COPY_GMEM_BASE 14
+#define RB_COPYCONTROL_RESOLVE_CLEAR_MODE 4
+#define RB_COPYDESTBASE_COPY_DEST_BASE 4
+#define RB_COPYDESTINFO_COPY_COMPONENT_ENABLE 14
+#define RB_COPYDESTINFO_COPY_DEST_ENDIAN 18
+#define RB_COPYDESTINFO_COPY_DEST_FORMAT 2
+#define RB_COPYDESTINFO_COPY_DEST_TILE 0
+#define RB_COPYDESTPITCH_COPY_DEST_PITCH 0
+#define RB_DEPTHCONTROL_Z_TEST_FUNC 4
+#define RB_MODECONTROL_RENDER_MODE 8
+#define RB_MODECONTROL_MARB_CACHE_SPLIT_MODE 15
+#define RB_MODECONTROL_PACKER_TIMER_ENABLE 16
+#define RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE 21
+#define RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR 24
+#define RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR 16
+#define RB_MRTBLENDCONTROL_CLAMP_ENABLE 29
+#define RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE 5
+#define RB_MRTBLENDCONTROL_RGB_DEST_FACTOR 8
+#define RB_MRTBLENDCONTROL_RGB_SRC_FACTOR 0
+#define RB_MRTBUFBASE_COLOR_BUF_BASE 4
+#define RB_MRTBUFINFO_COLOR_BUF_PITCH 17
+#define RB_MRTBUFINFO_COLOR_FORMAT 0
+#define RB_MRTBUFINFO_COLOR_TILE_MODE 6
+#define RB_MRTCONTROL_COMPONENT_ENABLE 24
+#define RB_MRTCONTROL_DITHER_MODE 12
+#define RB_MRTCONTROL_READ_DEST_ENABLE 3
+#define RB_MRTCONTROL_ROP_CODE 8
+#define RB_MSAACONTROL_MSAA_DISABLE 10
+#define RB_MSAACONTROL_SAMPLE_MASK 16
+#define RB_RENDERCONTROL_ALPHA_TEST_FUNC 24
+#define RB_RENDERCONTROL_BIN_WIDTH 4
+#define RB_RENDERCONTROL_DISABLE_COLOR_PIPE 12
+#define RB_STENCILCONTROL_STENCIL_FAIL 11
+#define RB_STENCILCONTROL_STENCIL_FAIL_BF 23
+#define RB_STENCILCONTROL_STENCIL_FUNC 8
+#define RB_STENCILCONTROL_STENCIL_FUNC_BF 20
+#define RB_STENCILCONTROL_STENCIL_ZFAIL 17
+#define RB_STENCILCONTROL_STENCIL_ZFAIL_BF 29
+#define RB_STENCILCONTROL_STENCIL_ZPASS 14
+#define RB_STENCILCONTROL_STENCIL_ZPASS_BF 26
+#define SP_FSCTRLREG0_FSFULLREGFOOTPRINT 10
+#define SP_FSCTRLREG0_FSHALFREGFOOTPRINT 4
+#define SP_FSCTRLREG0_FSICACHEINVALID 2
+#define SP_FSCTRLREG0_FSINOUTREGOVERLAP 18
+#define SP_FSCTRLREG0_FSINSTRBUFFERMODE 1
+#define SP_FSCTRLREG0_FSLENGTH 24
+#define SP_FSCTRLREG0_FSSUPERTHREADMODE 21
+#define SP_FSCTRLREG0_FSTHREADMODE 0
+#define SP_FSCTRLREG0_FSTHREADSIZE 20
+#define SP_FSCTRLREG0_PIXLODENABLE 22
+#define SP_FSCTRLREG1_FSCONSTLENGTH 0
+#define SP_FSCTRLREG1_FSINITIALOUTSTANDING 20
+#define SP_FSCTRLREG1_HALFPRECVAROFFSET 24
+#define SP_FSMRTREG_REGID 0
+#define SP_FSMRTREG_PRECISION 8
+#define SP_FSOUTREG_PAD0 2
+#define SP_IMAGEOUTPUTREG_MRTFORMAT 0
+#define SP_IMAGEOUTPUTREG_DEPTHOUTMODE 3
+#define SP_IMAGEOUTPUTREG_PAD0 6
+#define SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET 16
+#define SP_OBJOFFSETREG_SHADEROBJOFFSETINIC 25
+#define SP_SHADERLENGTH_LEN 0
+#define SP_SPCTRLREG_CONSTMODE 18
+#define SP_SPCTRLREG_LOMODE 22
+#define SP_SPCTRLREG_SLEEPMODE 20
+#define SP_VSCTRLREG0_VSFULLREGFOOTPRINT 10
+#define SP_VSCTRLREG0_VSICACHEINVALID 2
+#define SP_VSCTRLREG0_VSINSTRBUFFERMODE 1
+#define SP_VSCTRLREG0_VSLENGTH 24
+#define SP_VSCTRLREG0_VSSUPERTHREADMODE 21
+#define SP_VSCTRLREG0_VSTHREADMODE 0
+#define SP_VSCTRLREG0_VSTHREADSIZE 20
+#define SP_VSCTRLREG1_VSINITIALOUTSTANDING 24
+#define SP_VSOUTREG_COMPMASK0 9
+#define SP_VSPARAMREG_POSREGID 0
+#define SP_VSPARAMREG_PSIZEREGID 8
+#define SP_VSPARAMREG_TOTALVSOUTVAR 20
+#define SP_VSVPCDSTREG_OUTLOC0 0
+#define TPL1_TPTEXOFFSETREG_BASETABLEPTR 16
+#define TPL1_TPTEXOFFSETREG_MEMOBJOFFSET 8
+#define TPL1_TPTEXOFFSETREG_SAMPLEROFFSET 0
+#define UCHE_INVALIDATE1REG_OPCODE 0x1C
+#define UCHE_INVALIDATE1REG_ALLORPORTION 0x1F
+#define VFD_BASEADDR_BASEADDR 0
+#define VFD_CTRLREG0_PACKETSIZE 18
+#define VFD_CTRLREG0_STRMDECINSTRCNT 22
+#define VFD_CTRLREG0_STRMFETCHINSTRCNT 27
+#define VFD_CTRLREG0_TOTALATTRTOVS 0
+#define VFD_CTRLREG1_MAXSTORAGE 0
+#define VFD_CTRLREG1_REGID4INST 24
+#define VFD_CTRLREG1_REGID4VTX 16
+#define VFD_DECODEINSTRUCTIONS_CONSTFILL 4
+#define VFD_DECODEINSTRUCTIONS_FORMAT 6
+#define VFD_DECODEINSTRUCTIONS_LASTCOMPVALID 29
+#define VFD_DECODEINSTRUCTIONS_REGID 12
+#define VFD_DECODEINSTRUCTIONS_SHIFTCNT 24
+#define VFD_DECODEINSTRUCTIONS_SWITCHNEXT 30
+#define VFD_DECODEINSTRUCTIONS_WRITEMASK 0
+#define VFD_FETCHINSTRUCTIONS_BUFSTRIDE 7
+#define VFD_FETCHINSTRUCTIONS_FETCHSIZE 0
+#define VFD_FETCHINSTRUCTIONS_INDEXDECODE 18
+#define VFD_FETCHINSTRUCTIONS_STEPRATE 24
+#define VFD_FETCHINSTRUCTIONS_SWITCHNEXT 17
+#define VFD_THREADINGTHRESHOLD_REGID_VTXCNT 8
+#define VFD_THREADINGTHRESHOLD_REGID_THRESHOLD 0
+#define VFD_THREADINGTHRESHOLD_RESERVED6 4
+#define VPC_VPCATTR_LMSIZE 28
+#define VPC_VPCATTR_THRHDASSIGN 12
+#define VPC_VPCATTR_TOTALATTR 0
+#define VPC_VPCPACK_NUMFPNONPOSVAR 8
+#define VPC_VPCPACK_NUMNONPOSVSVAR 16
+#define VPC_VPCVARPSREPLMODE_COMPONENT08 0
+#define VPC_VPCVARPSREPLMODE_COMPONENT09 2
+#define VPC_VPCVARPSREPLMODE_COMPONENT0A 4
+#define VPC_VPCVARPSREPLMODE_COMPONENT0B 6
+#define VPC_VPCVARPSREPLMODE_COMPONENT0C 8
+#define VPC_VPCVARPSREPLMODE_COMPONENT0D 10
+#define VPC_VPCVARPSREPLMODE_COMPONENT0E 12
+#define VPC_VPCVARPSREPLMODE_COMPONENT0F 14
+#define VPC_VPCVARPSREPLMODE_COMPONENT10 16
+#define VPC_VPCVARPSREPLMODE_COMPONENT11 18
+#define VPC_VPCVARPSREPLMODE_COMPONENT12 20
+#define VPC_VPCVARPSREPLMODE_COMPONENT13 22
+#define VPC_VPCVARPSREPLMODE_COMPONENT14 24
+#define VPC_VPCVARPSREPLMODE_COMPONENT15 26
+#define VPC_VPCVARPSREPLMODE_COMPONENT16 28
+#define VPC_VPCVARPSREPLMODE_COMPONENT17 30
+
+/* RBBM Debug bus block IDs */
+#define RBBM_BLOCK_ID_NONE             0x0
+#define RBBM_BLOCK_ID_CP               0x1
+#define RBBM_BLOCK_ID_RBBM             0x2
+#define RBBM_BLOCK_ID_VBIF             0x3
+#define RBBM_BLOCK_ID_HLSQ             0x4
+#define RBBM_BLOCK_ID_UCHE             0x5
+#define RBBM_BLOCK_ID_PC               0x8
+#define RBBM_BLOCK_ID_VFD              0x9
+#define RBBM_BLOCK_ID_VPC              0xa
+#define RBBM_BLOCK_ID_TSE              0xb
+#define RBBM_BLOCK_ID_RAS              0xc
+#define RBBM_BLOCK_ID_VSC              0xd
+#define RBBM_BLOCK_ID_SP_0             0x10
+#define RBBM_BLOCK_ID_SP_1             0x11
+#define RBBM_BLOCK_ID_SP_2             0x12
+#define RBBM_BLOCK_ID_SP_3             0x13
+#define RBBM_BLOCK_ID_TPL1_0           0x18
+#define RBBM_BLOCK_ID_TPL1_1           0x19
+#define RBBM_BLOCK_ID_TPL1_2           0x1a
+#define RBBM_BLOCK_ID_TPL1_3           0x1b
+#define RBBM_BLOCK_ID_RB_0             0x20
+#define RBBM_BLOCK_ID_RB_1             0x21
+#define RBBM_BLOCK_ID_RB_2             0x22
+#define RBBM_BLOCK_ID_RB_3             0x23
+#define RBBM_BLOCK_ID_MARB_0           0x28
+#define RBBM_BLOCK_ID_MARB_1           0x29
+#define RBBM_BLOCK_ID_MARB_2           0x2a
+#define RBBM_BLOCK_ID_MARB_3           0x2b
+
+/* RBBM_CLOCK_CTL default value */
+#define A3XX_RBBM_CLOCK_CTL_DEFAULT   0xAAAAAAAA
+#define A320_RBBM_CLOCK_CTL_DEFAULT   0xBFFFFFFF
+#define A330_RBBM_CLOCK_CTL_DEFAULT   0xBFFCFFFF
+
+#define A330_RBBM_GPR0_CTL_DEFAULT    0x00000000
+#define A330v2_RBBM_GPR0_CTL_DEFAULT  0x05515455
+#define A310_RBBM_GPR0_CTL_DEFAULT    0x000000AA
+
+/* COUNTABLE FOR SP PERFCOUNTER */
+#define SP_ALU_ACTIVE_CYCLES           0x1D
+#define SP0_ICL1_MISSES                0x1A
+#define SP_FS_CFLOW_INSTRUCTIONS       0x0C
+
+/* COUNTABLE FOR TSE PERFCOUNTER */
+#define TSE_INPUT_PRIM_NUM             0x0
+
+/* VBIF countables */
+#define VBIF_AXI_TOTAL_BEATS 85
+#define VBIF_DDR_TOTAL_CYCLES 110
+
+/* VBIF Recoverable HALT bit value */
+#define VBIF_RECOVERABLE_HALT_CTRL 0x1
+
+/*
+ * CP DEBUG settings for A3XX core:
+ * DYNAMIC_CLK_DISABLE [27] - turn off the dynamic clock control
+ * MIU_128BIT_WRITE_ENABLE [25] - Allow 128 bit writes to the VBIF
+ */
+#define A3XX_CP_DEBUG_DEFAULT ((1 << 27) | (1 << 25))
+
+
+#endif
diff --git a/drivers/gpu/msm/a4xx_reg.h b/drivers/gpu/msm/a4xx_reg.h
new file mode 100644
index 000000000000..78db8dd2da40
--- /dev/null
+++ b/drivers/gpu/msm/a4xx_reg.h
@@ -0,0 +1,924 @@
+/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _A4XX_REG_H
+#define _A4XX_REG_H
+
+/* A4XX interrupt bits */
+#define A4XX_INT_RBBM_GPU_IDLE			0
+#define A4XX_INT_RBBM_AHB_ERROR			1
+#define A4XX_INT_RBBM_REG_TIMEOUT		2
+#define A4XX_INT_RBBM_ME_MS_TIMEOUT		3
+#define A4XX_INT_RBBM_PFP_MS_TIMEOUT		4
+#define A4XX_INT_RBBM_ETS_MS_TIMEOUT		5
+#define A4XX_INT_RBBM_ASYNC_OVERFLOW		6
+#define A4XX_INT_RBBM_GPC_ERR			7
+#define A4XX_INT_CP_SW				8
+#define A4XX_INT_CP_OPCODE_ERROR		9
+#define A4XX_INT_CP_RESERVED_BIT_ERROR		10
+#define A4XX_INT_CP_HW_FAULT			11
+#define A4XX_INT_CP_DMA				12
+#define A4XX_INT_CP_IB2_INT			13
+#define A4XX_INT_CP_IB1_INT			14
+#define A4XX_INT_CP_RB_INT			15
+#define A4XX_INT_CP_REG_PROTECT_FAULT		16
+#define A4XX_INT_CP_RB_DONE_TS			17
+#define A4XX_INT_CP_VS_DONE_TS			18
+#define A4XX_INT_CP_PS_DONE_TS			19
+#define A4XX_INT_CACHE_FLUSH_TS			20
+#define A4XX_INT_CP_AHB_ERROR_HALT		21
+#define A4XX_INT_RBBM_ATB_BUS_OVERFLOW		22
+#define A4XX_INT_MISC_HANG_DETECT		24
+#define A4XX_INT_UCHE_OOB_ACCESS		25
+#define A4XX_INT_RBBM_DPM_CALC_ERR		28
+#define A4XX_INT_RBBM_DPM_EPOCH_ERR		29
+#define A4XX_INT_RBBM_DPM_THERMAL_YELLOW_ERR	30
+#define A4XX_INT_RBBM_DPM_THERMAL_RED_ERR	31
+
+/* RB registers */
+#define A4XX_RB_GMEM_BASE_ADDR		0xcc0
+
+#define A4XX_RB_PERFCTR_RB_SEL_0	0xcc7
+#define A4XX_RB_PERFCTR_RB_SEL_1	0xcc8
+#define A4XX_RB_PERFCTR_RB_SEL_2	0xcc9
+#define A4XX_RB_PERFCTR_RB_SEL_3	0xcca
+#define A4XX_RB_PERFCTR_RB_SEL_4	0xccb
+#define A4XX_RB_PERFCTR_RB_SEL_5	0xccc
+#define A4XX_RB_PERFCTR_RB_SEL_6	0xccd
+#define A4XX_RB_PERFCTR_RB_SEL_7	0xcce
+
+enum a4xx_rb_perfctr_rb_sel {
+	RB_VALID_SAMPLES = 0x25,
+	RB_Z_FAIL = 0x28,
+	RB_S_FAIL = 0x29,
+};
+
+/* RBBM registers */
+#define A4XX_RBBM_CLOCK_CTL_TP0			0x4
+#define A4XX_RBBM_CLOCK_CTL_TP1			0x5
+#define A4XX_RBBM_CLOCK_CTL_TP2			0x6
+#define A4XX_RBBM_CLOCK_CTL_TP3			0x7
+#define A4XX_RBBM_CLOCK_CTL2_TP0		0x8
+#define A4XX_RBBM_CLOCK_CTL2_TP1		0x9
+#define A4XX_RBBM_CLOCK_CTL2_TP2		0xA
+#define A4XX_RBBM_CLOCK_CTL2_TP3		0xB
+#define A4XX_RBBM_CLOCK_HYST_TP0		0xC
+#define A4XX_RBBM_CLOCK_HYST_TP1		0xD
+#define A4XX_RBBM_CLOCK_HYST_TP2		0xE
+#define A4XX_RBBM_CLOCK_HYST_TP3		0xF
+#define A4XX_RBBM_CLOCK_DELAY_TP0		0x10
+#define A4XX_RBBM_CLOCK_DELAY_TP1		0x11
+#define A4XX_RBBM_CLOCK_DELAY_TP2		0x12
+#define A4XX_RBBM_CLOCK_DELAY_TP3		0x13
+#define A4XX_RBBM_CLOCK_CTL_UCHE		0x14
+#define A4XX_RBBM_CLOCK_CTL2_UCHE		0x15
+#define A4XX_RBBM_CLOCK_CTL3_UCHE		0x16
+#define A4XX_RBBM_CLOCK_CTL4_UCHE		0x17
+#define A4XX_RBBM_CLOCK_HYST_UCHE		0x18
+#define A4XX_RBBM_CLOCK_DELAY_UCHE		0x19
+#define A4XX_RBBM_CLOCK_MODE_GPC		0x1a
+#define A4XX_RBBM_CLOCK_DELAY_GPC		0x1b
+#define A4XX_RBBM_CLOCK_HYST_GPC		0x1c
+#define A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM	0x1d
+#define A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM	0x1e
+#define A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM	0x1f
+#define A4XX_RBBM_CLOCK_CTL			0x20
+#define A4XX_RBBM_SP_HYST_CNT			0x21
+#define A4XX_RBBM_SW_RESET_CMD			0x22
+#define A4XX_RBBM_AHB_CTL0			0x23
+#define A4XX_RBBM_AHB_CTL1			0x24
+#define A4XX_RBBM_AHB_CMD			0x25
+#define A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL		0x2b
+#define A4XX_RBBM_INTERFACE_HANG_INT_CTL	0x2f
+#define A4XX_RBBM_INT_CLEAR_CMD			0x36
+#define A4XX_RBBM_INT_0_MASK			0x37
+#define A4XX_RBBM_ALWAYSON_COUNTER_CNTL		0x3d
+#define A4XX_RBBM_RBBM_CTL			0x3e
+#define A4XX_RBBM_CLOCK_CTL2			0x42
+#define A4XX_RBBM_BLOCK_SW_RESET_CMD		0x45
+#define A4XX_RBBM_EXT_TRACE_BUS_CTL		0x49
+#define A4XX_RBBM_CFG_DEBBUS_SEL_A		0x4a
+#define A4XX_RBBM_CFG_DEBBUS_SEL_B		0x4b
+#define A4XX_RBBM_CFG_DEBBUS_SEL_C		0x4c
+#define A4XX_RBBM_CFG_DEBBUS_SEL_D		0x4d
+#define A4XX_RBBM_CFG_DEBBUS_SEL_PING_INDEX_SHIFT	0
+#define A4XX_RBBM_CFG_DEBBUS_SEL_PING_BLK_SEL_SHIFT	8
+#define A4XX_RBBM_CFG_DEBBUS_SEL_PONG_INDEX_SHIFT	16
+#define A4XX_RBBM_CFG_DEBBUS_SEL_PONG_BLK_SEL_SHIFT	24
+
+#define A4XX_RBBM_CFG_DEBBUS_CTLT		0x4e
+#define A4XX_RBBM_CFG_DEBBUS_CTLT_ENT_SHIFT		0
+#define A4XX_RBBM_CFG_DEBBUS_CTLT_GRANU_SHIFT		12
+#define A4XX_RBBM_CFG_DEBBUS_CTLT_SEGT_SHIFT		28
+
+#define A4XX_RBBM_CFG_DEBBUS_CTLM		0x4f
+#define A4XX_RBBM_CFG_DEBBUS_CTLT_ENABLE_SHIFT		24
+
+#define A4XX_RBBM_CFG_DEBBUS_OPL		0x50
+#define A4XX_RBBM_CFG_DEBBUS_OPE		0x51
+#define A4XX_RBBM_CFG_DEBBUS_IVTL_0		0x52
+#define A4XX_RBBM_CFG_DEBBUS_IVTL_1		0x53
+#define A4XX_RBBM_CFG_DEBBUS_IVTL_2		0x54
+#define A4XX_RBBM_CFG_DEBBUS_IVTL_3		0x55
+
+#define A4XX_RBBM_CFG_DEBBUS_MASKL_0		0x56
+#define A4XX_RBBM_CFG_DEBBUS_MASKL_1		0x57
+#define A4XX_RBBM_CFG_DEBBUS_MASKL_2		0x58
+#define A4XX_RBBM_CFG_DEBBUS_MASKL_3		0x59
+
+
+#define A4XX_RBBM_CFG_DEBBUS_BYTEL_0		0x5a
+#define A4XX_RBBM_CFG_DEBBUS_BYTEL_0_BYTEL0_SHIFT	0
+#define A4XX_RBBM_CFG_DEBBUS_BYTEL_0_BYTEL1_SHIFT	4
+#define A4XX_RBBM_CFG_DEBBUS_BYTEL_0_BYTEL2_SHIFT	8
+#define A4XX_RBBM_CFG_DEBBUS_BYTEL_0_BYTEL3_SHIFT	12
+#define A4XX_RBBM_CFG_DEBBUS_BYTEL_0_BYTEL4_SHIFT	16
+#define A4XX_RBBM_CFG_DEBBUS_BYTEL_0_BYTEL5_SHIFT	20
+#define A4XX_RBBM_CFG_DEBBUS_BYTEL_0_BYTEL6_SHIFT	24
+#define A4XX_RBBM_CFG_DEBBUS_BYTEL_0_BYTEL7_SHIFT	28
+
+#define A4XX_RBBM_CFG_DEBBUS_BYTEL_1			0x5b
+#define A4XX_RBBM_CFG_DEBBUS_BYTEL_1_BYTEL8_SHIFT	0
+#define A4XX_RBBM_CFG_DEBBUS_BYTEL_1_BYTEL9_SHIFT	4
+#define A4XX_RBBM_CFG_DEBBUS_BYTEL_1_BYTEL10_SHIFT	8
+#define A4XX_RBBM_CFG_DEBBUS_BYTEL_1_BYTEL11_SHIFT	12
+#define A4XX_RBBM_CFG_DEBBUS_BYTEL_1_BYTEL12_SHIFT	16
+#define A4XX_RBBM_CFG_DEBBUS_BYTEL_1_BYTEL13_SHIFT	20
+#define A4XX_RBBM_CFG_DEBBUS_BYTEL_1_BYTEL14_SHIFT	24
+#define A4XX_RBBM_CFG_DEBBUS_BYTEL_1_BYTEL15_SHIFT	28
+
+#define A4XX_RBBM_CFG_DEBBUS_IVTE_0		0x5c
+#define A4XX_RBBM_CFG_DEBBUS_IVTE_1		0x5d
+#define A4XX_RBBM_CFG_DEBBUS_IVTE_2		0x5e
+#define A4XX_RBBM_CFG_DEBBUS_IVTE_3		0x5f
+#define A4XX_RBBM_CFG_DEBBUS_MASKE_0		0x60
+#define A4XX_RBBM_CFG_DEBBUS_MASKE_1		0x61
+#define A4XX_RBBM_CFG_DEBBUS_MASKE_2		0x62
+#define A4XX_RBBM_CFG_DEBBUS_MASKE_3		0x63
+#define A4XX_RBBM_CFG_DEBBUS_NIBBLEE		0x64
+#define A4XX_RBBM_CFG_DEBBUS_PTRC0		0x65
+#define A4XX_RBBM_CFG_DEBBUS_PTRC1		0x66
+#define A4XX_RBBM_CFG_DEBBUS_LOADREG		0x67
+#define A4XX_RBBM_CLOCK_CTL_SP0			0x68
+#define A4XX_RBBM_CLOCK_CTL_SP1			0x69
+#define A4XX_RBBM_CLOCK_CTL_SP2			0x6A
+#define A4XX_RBBM_CLOCK_CTL_SP3			0x6B
+#define A4XX_RBBM_CLOCK_CTL2_SP0		0x6C
+#define A4XX_RBBM_CLOCK_CTL2_SP1		0x6D
+#define A4XX_RBBM_CLOCK_CTL2_SP2		0x6E
+#define A4XX_RBBM_CLOCK_CTL2_SP3		0x6F
+#define A4XX_RBBM_CLOCK_HYST_SP0		0x70
+#define A4XX_RBBM_CLOCK_HYST_SP1		0x71
+#define A4XX_RBBM_CLOCK_HYST_SP2		0x72
+#define A4XX_RBBM_CLOCK_HYST_SP3		0x73
+#define A4XX_RBBM_CLOCK_DELAY_SP0		0x74
+#define A4XX_RBBM_CLOCK_DELAY_SP1		0x75
+#define A4XX_RBBM_CLOCK_DELAY_SP2		0x76
+#define A4XX_RBBM_CLOCK_DELAY_SP3		0x77
+#define A4XX_RBBM_CLOCK_CTL_RB0			0x78
+#define A4XX_RBBM_CLOCK_CTL_RB1			0x79
+#define A4XX_RBBM_CLOCK_CTL_RB2			0x7A
+#define A4XX_RBBM_CLOCK_CTL_RB3			0x7B
+#define A4XX_RBBM_CLOCK_CTL2_RB0		0x7C
+#define A4XX_RBBM_CLOCK_CTL2_RB1		0x7D
+#define A4XX_RBBM_CLOCK_CTL2_RB2		0x7E
+#define A4XX_RBBM_CLOCK_CTL2_RB3		0x7F
+#define A4XX_RBBM_CLOCK_HYST_COM_DCOM		0x80
+#define A4XX_RBBM_CLOCK_CTL_COM_DCOM		0x81
+#define A4XX_RBBM_CLOCK_CTL_MARB_CCU0		0x82
+#define A4XX_RBBM_CLOCK_CTL_MARB_CCU1		0x83
+#define A4XX_RBBM_CLOCK_CTL_MARB_CCU2		0x84
+#define A4XX_RBBM_CLOCK_CTL_MARB_CCU3		0x85
+#define A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU0	0x86
+#define A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU1	0x87
+#define A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU2	0x88
+#define A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU3	0x89
+#define A4XX_RBBM_CLOCK_CTL_HLSQ		0x8a
+#define A4XX_RBBM_CLOCK_HYST_HLSQ		0x8b
+
+#define A4XX_RBBM_CLOCK_DELAY_HLSQ		0x8c
+#define A4XX_CGC_HLSQ_TP_EARLY_CYC_MASK		0x00700000
+#define A4XX_CGC_HLSQ_TP_EARLY_CYC_SHIFT	20
+
+#define A4XX_RBBM_CLOCK_DELAY_COM_DCOM		0x8d
+#define A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_0	0x8e
+#define A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_1	0x8f
+#define A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_2	0x90
+#define A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_3	0x91
+
+#define A4XX_RBBM_CFG_DEBBUS_IDX		0x93
+#define A4XX_RBBM_CFG_DEBBUS_CLRC		0x94
+#define A4XX_RBBM_CFG_DEBBUS_LOADIVT		0x95
+
+#define A4XX_RBBM_POWER_CNTL_IP			0x98
+#define A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0	0x99
+#define A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1	0x9a
+#define A4XX_RBBM_PERFCTR_CP_0_LO		0x9c
+#define A4XX_RBBM_PERFCTR_CP_0_HI		0x9d
+#define A4XX_RBBM_PERFCTR_CP_1_LO		0x9e
+#define A4XX_RBBM_PERFCTR_CP_1_HI		0x9f
+#define A4XX_RBBM_PERFCTR_CP_2_LO		0xa0
+#define A4XX_RBBM_PERFCTR_CP_2_HI		0xa1
+#define A4XX_RBBM_PERFCTR_CP_3_LO		0xa2
+#define A4XX_RBBM_PERFCTR_CP_3_HI		0xa3
+#define A4XX_RBBM_PERFCTR_CP_4_LO		0xa4
+#define A4XX_RBBM_PERFCTR_CP_4_HI		0xa5
+#define A4XX_RBBM_PERFCTR_CP_5_LO		0xa6
+#define A4XX_RBBM_PERFCTR_CP_5_HI		0xa7
+#define A4XX_RBBM_PERFCTR_CP_6_LO		0xa8
+#define A4XX_RBBM_PERFCTR_CP_6_HI		0xa9
+#define A4XX_RBBM_PERFCTR_CP_7_LO		0xaa
+#define A4XX_RBBM_PERFCTR_CP_7_HI		0xab
+#define A4XX_RBBM_PERFCTR_RBBM_0_LO		0xac
+#define A4XX_RBBM_PERFCTR_RBBM_0_HI		0xad
+#define A4XX_RBBM_PERFCTR_RBBM_1_LO		0xae
+#define A4XX_RBBM_PERFCTR_RBBM_1_HI		0xaf
+#define A4XX_RBBM_PERFCTR_RBBM_2_LO		0xb0
+#define A4XX_RBBM_PERFCTR_RBBM_2_HI		0xb1
+#define A4XX_RBBM_PERFCTR_RBBM_3_LO		0xb2
+#define A4XX_RBBM_PERFCTR_RBBM_3_HI		0xb3
+#define A4XX_RBBM_PERFCTR_PC_0_LO		0xb4
+#define A4XX_RBBM_PERFCTR_PC_0_HI		0xb5
+#define A4XX_RBBM_PERFCTR_PC_1_LO		0xb6
+#define A4XX_RBBM_PERFCTR_PC_1_HI		0xb7
+#define A4XX_RBBM_PERFCTR_PC_2_LO		0xb8
+#define A4XX_RBBM_PERFCTR_PC_2_HI		0xb9
+#define A4XX_RBBM_PERFCTR_PC_3_LO		0xba
+#define A4XX_RBBM_PERFCTR_PC_3_HI		0xbb
+#define A4XX_RBBM_PERFCTR_PC_4_LO		0xbc
+#define A4XX_RBBM_PERFCTR_PC_4_HI		0xbd
+#define A4XX_RBBM_PERFCTR_PC_5_LO		0xbe
+#define A4XX_RBBM_PERFCTR_PC_5_HI		0xbf
+#define A4XX_RBBM_PERFCTR_PC_6_LO		0xc0
+#define A4XX_RBBM_PERFCTR_PC_6_HI		0xc1
+#define A4XX_RBBM_PERFCTR_PC_7_LO		0xc2
+#define A4XX_RBBM_PERFCTR_PC_7_HI		0xc3
+#define A4XX_RBBM_PERFCTR_VFD_0_LO		0xc4
+#define A4XX_RBBM_PERFCTR_VFD_0_HI		0xc5
+#define A4XX_RBBM_PERFCTR_VFD_1_LO		0xc6
+#define A4XX_RBBM_PERFCTR_VFD_1_HI		0xc7
+#define A4XX_RBBM_PERFCTR_VFD_2_LO		0xc8
+#define A4XX_RBBM_PERFCTR_VFD_2_HI		0xc9
+#define A4XX_RBBM_PERFCTR_VFD_3_LO		0xca
+#define A4XX_RBBM_PERFCTR_VFD_3_HI		0xcb
+#define A4XX_RBBM_PERFCTR_VFD_4_LO		0xcc
+#define A4XX_RBBM_PERFCTR_VFD_4_HI		0xcd
+#define A4XX_RBBM_PERFCTR_VFD_5_LO		0xce
+#define A4XX_RBBM_PERFCTR_VFD_5_HI		0xcf
+#define A4XX_RBBM_PERFCTR_VFD_6_LO		0xd0
+#define A4XX_RBBM_PERFCTR_VFD_6_HI		0xd1
+#define A4XX_RBBM_PERFCTR_VFD_7_LO		0xd2
+#define A4XX_RBBM_PERFCTR_VFD_7_HI		0xd3
+#define A4XX_RBBM_PERFCTR_HLSQ_0_LO		0xd4
+#define A4XX_RBBM_PERFCTR_HLSQ_0_HI		0xd5
+#define A4XX_RBBM_PERFCTR_HLSQ_1_LO		0xd6
+#define A4XX_RBBM_PERFCTR_HLSQ_1_HI		0xd7
+#define A4XX_RBBM_PERFCTR_HLSQ_2_LO		0xd8
+#define A4XX_RBBM_PERFCTR_HLSQ_2_HI		0xd9
+#define A4XX_RBBM_PERFCTR_HLSQ_3_LO		0xda
+#define A4XX_RBBM_PERFCTR_HLSQ_3_HI		0xdb
+#define A4XX_RBBM_PERFCTR_HLSQ_4_LO		0xdc
+#define A4XX_RBBM_PERFCTR_HLSQ_4_HI		0xdd
+#define A4XX_RBBM_PERFCTR_HLSQ_5_LO		0xde
+#define A4XX_RBBM_PERFCTR_HLSQ_5_HI		0xdf
+#define A4XX_RBBM_PERFCTR_HLSQ_6_LO		0xe0
+#define A4XX_RBBM_PERFCTR_HLSQ_6_HI		0xe1
+#define A4XX_RBBM_PERFCTR_HLSQ_7_LO		0xe2
+#define A4XX_RBBM_PERFCTR_HLSQ_7_HI		0xe3
+#define A4XX_RBBM_PERFCTR_VPC_0_LO		0xe4
+#define A4XX_RBBM_PERFCTR_VPC_0_HI		0xe5
+#define A4XX_RBBM_PERFCTR_VPC_1_LO		0xe6
+#define A4XX_RBBM_PERFCTR_VPC_1_HI		0xe7
+#define A4XX_RBBM_PERFCTR_VPC_2_LO		0xe8
+#define A4XX_RBBM_PERFCTR_VPC_2_HI		0xe9
+#define A4XX_RBBM_PERFCTR_VPC_3_LO		0xea
+#define A4XX_RBBM_PERFCTR_VPC_3_HI		0xeb
+#define A4XX_RBBM_PERFCTR_CCU_0_LO		0xec
+#define A4XX_RBBM_PERFCTR_CCU_0_HI		0xed
+#define A4XX_RBBM_PERFCTR_CCU_1_LO		0xee
+#define A4XX_RBBM_PERFCTR_CCU_1_HI		0xef
+#define A4XX_RBBM_PERFCTR_CCU_2_LO		0xf0
+#define A4XX_RBBM_PERFCTR_CCU_2_HI		0xf1
+#define A4XX_RBBM_PERFCTR_CCU_3_LO		0xf2
+#define A4XX_RBBM_PERFCTR_CCU_3_HI		0xf3
+#define A4XX_RBBM_PERFCTR_TSE_0_LO		0xf4
+#define A4XX_RBBM_PERFCTR_TSE_0_HI		0xf5
+#define A4XX_RBBM_PERFCTR_TSE_1_LO		0xf6
+#define A4XX_RBBM_PERFCTR_TSE_1_HI		0xf7
+#define A4XX_RBBM_PERFCTR_TSE_2_LO		0xf8
+#define A4XX_RBBM_PERFCTR_TSE_2_HI		0xf9
+#define A4XX_RBBM_PERFCTR_TSE_3_LO		0xfa
+#define A4XX_RBBM_PERFCTR_TSE_3_HI		0xfb
+#define A4XX_RBBM_PERFCTR_RAS_0_LO		0xfc
+#define A4XX_RBBM_PERFCTR_RAS_0_HI		0xfd
+#define A4XX_RBBM_PERFCTR_RAS_1_LO		0xfe
+#define A4XX_RBBM_PERFCTR_RAS_1_HI		0xff
+#define A4XX_RBBM_PERFCTR_RAS_2_LO		0x100
+#define A4XX_RBBM_PERFCTR_RAS_2_HI		0x101
+#define A4XX_RBBM_PERFCTR_RAS_3_LO		0x102
+#define A4XX_RBBM_PERFCTR_RAS_3_HI		0x103
+#define A4XX_RBBM_PERFCTR_UCHE_0_LO		0x104
+#define A4XX_RBBM_PERFCTR_UCHE_0_HI		0x105
+#define A4XX_RBBM_PERFCTR_UCHE_1_LO		0x106
+#define A4XX_RBBM_PERFCTR_UCHE_1_HI		0x107
+#define A4XX_RBBM_PERFCTR_UCHE_2_LO		0x108
+#define A4XX_RBBM_PERFCTR_UCHE_2_HI		0x109
+#define A4XX_RBBM_PERFCTR_UCHE_3_LO		0x10a
+#define A4XX_RBBM_PERFCTR_UCHE_3_HI		0x10b
+#define A4XX_RBBM_PERFCTR_UCHE_4_LO		0x10c
+#define A4XX_RBBM_PERFCTR_UCHE_4_HI		0x10d
+#define A4XX_RBBM_PERFCTR_UCHE_5_LO		0x10e
+#define A4XX_RBBM_PERFCTR_UCHE_5_HI		0x10f
+#define A4XX_RBBM_PERFCTR_UCHE_6_LO		0x110
+#define A4XX_RBBM_PERFCTR_UCHE_6_HI		0x111
+#define A4XX_RBBM_PERFCTR_UCHE_7_LO		0x112
+#define A4XX_RBBM_PERFCTR_UCHE_7_HI		0x113
+#define A4XX_RBBM_PERFCTR_TP_0_LO		0x114
+#define A4XX_RBBM_PERFCTR_TP_0_HI		0x115
+#define A4XX_RBBM_PERFCTR_TP_1_LO		0x116
+#define A4XX_RBBM_PERFCTR_TP_1_HI		0x117
+#define A4XX_RBBM_PERFCTR_TP_2_LO		0x118
+#define A4XX_RBBM_PERFCTR_TP_2_HI		0x119
+#define A4XX_RBBM_PERFCTR_TP_3_LO		0x11a
+#define A4XX_RBBM_PERFCTR_TP_3_HI		0x11b
+#define A4XX_RBBM_PERFCTR_TP_4_LO		0x11c
+#define A4XX_RBBM_PERFCTR_TP_4_HI		0x11d
+#define A4XX_RBBM_PERFCTR_TP_5_LO		0x11e
+#define A4XX_RBBM_PERFCTR_TP_5_HI		0x11f
+#define A4XX_RBBM_PERFCTR_TP_6_LO		0x120
+#define A4XX_RBBM_PERFCTR_TP_6_HI		0x121
+#define A4XX_RBBM_PERFCTR_TP_7_LO		0x122
+#define A4XX_RBBM_PERFCTR_TP_7_HI		0x123
+#define A4XX_RBBM_PERFCTR_SP_0_LO		0x124
+#define A4XX_RBBM_PERFCTR_SP_0_HI		0x125
+#define A4XX_RBBM_PERFCTR_SP_1_LO		0x126
+#define A4XX_RBBM_PERFCTR_SP_1_HI		0x127
+#define A4XX_RBBM_PERFCTR_SP_2_LO		0x128
+#define A4XX_RBBM_PERFCTR_SP_2_HI		0x129
+#define A4XX_RBBM_PERFCTR_SP_3_LO		0x12a
+#define A4XX_RBBM_PERFCTR_SP_3_HI		0x12b
+#define A4XX_RBBM_PERFCTR_SP_4_LO		0x12c
+#define A4XX_RBBM_PERFCTR_SP_4_HI		0x12d
+#define A4XX_RBBM_PERFCTR_SP_5_LO		0x12e
+#define A4XX_RBBM_PERFCTR_SP_5_HI		0x12f
+#define A4XX_RBBM_PERFCTR_SP_6_LO		0x130
+#define A4XX_RBBM_PERFCTR_SP_6_HI		0x131
+#define A4XX_RBBM_PERFCTR_SP_7_LO		0x132
+#define A4XX_RBBM_PERFCTR_SP_7_HI		0x133
+#define A4XX_RBBM_PERFCTR_SP_8_LO		0x134
+#define A4XX_RBBM_PERFCTR_SP_8_HI		0x135
+#define A4XX_RBBM_PERFCTR_SP_9_LO		0x136
+#define A4XX_RBBM_PERFCTR_SP_9_HI		0x137
+#define A4XX_RBBM_PERFCTR_SP_10_LO		0x138
+#define A4XX_RBBM_PERFCTR_SP_10_HI		0x139
+#define A4XX_RBBM_PERFCTR_SP_11_LO		0x13a
+#define A4XX_RBBM_PERFCTR_SP_11_HI		0x13b
+#define A4XX_RBBM_PERFCTR_RB_0_LO		0x13c
+#define A4XX_RBBM_PERFCTR_RB_0_HI		0x13d
+#define A4XX_RBBM_PERFCTR_RB_1_LO		0x13e
+#define A4XX_RBBM_PERFCTR_RB_1_HI		0x13f
+#define A4XX_RBBM_PERFCTR_RB_2_LO		0x140
+#define A4XX_RBBM_PERFCTR_RB_2_HI		0x141
+#define A4XX_RBBM_PERFCTR_RB_3_LO		0x142
+#define A4XX_RBBM_PERFCTR_RB_3_HI		0x143
+#define A4XX_RBBM_PERFCTR_RB_4_LO		0x144
+#define A4XX_RBBM_PERFCTR_RB_4_HI		0x145
+#define A4XX_RBBM_PERFCTR_RB_5_LO		0x146
+#define A4XX_RBBM_PERFCTR_RB_5_HI		0x147
+#define A4XX_RBBM_PERFCTR_RB_6_LO		0x148
+#define A4XX_RBBM_PERFCTR_RB_6_HI		0x149
+#define A4XX_RBBM_PERFCTR_RB_7_LO		0x14a
+#define A4XX_RBBM_PERFCTR_RB_7_HI		0x14b
+#define A4XX_RBBM_PERFCTR_VSC_0_LO		0x14c
+#define A4XX_RBBM_PERFCTR_VSC_0_HI		0x14d
+#define A4XX_RBBM_PERFCTR_VSC_1_LO		0x14e
+#define A4XX_RBBM_PERFCTR_VSC_1_HI		0x14f
+#define A4XX_RBBM_PERFCTR_PWR_0_LO		0x166
+#define A4XX_RBBM_PERFCTR_PWR_0_HI		0x167
+#define A4XX_RBBM_PERFCTR_PWR_1_LO		0x168
+#define A4XX_RBBM_PERFCTR_PWR_1_HI		0x169
+#define A4XX_RBBM_ALWAYSON_COUNTER_LO		0x16e
+#define A4XX_RBBM_ALWAYSON_COUNTER_HI		0x16f
+#define A4XX_RBBM_PERFCTR_CTL			0x170
+#define A4XX_RBBM_PERFCTR_LOAD_CMD0		0x171
+#define A4XX_RBBM_PERFCTR_LOAD_CMD1		0x172
+#define A4XX_RBBM_PERFCTR_LOAD_CMD2		0x173
+#define A4XX_RBBM_PERFCTR_LOAD_VALUE_LO		0x174
+#define A4XX_RBBM_PERFCTR_LOAD_VALUE_HI		0x175
+#define A4XX_RBBM_PERFCTR_RBBM_SEL_0		0x176
+#define A4XX_RBBM_PERFCTR_RBBM_SEL_1		0x177
+#define A4XX_RBBM_PERFCTR_RBBM_SEL_2		0x178
+#define A4XX_RBBM_PERFCTR_RBBM_SEL_3		0x179
+#define A4XX_RBBM_GPU_BUSY_MASKED		0x17a
+#define A4XX_RBBM_INT_0_STATUS			0x17d
+#define A4XX_RBBM_AHB_ME_SPLIT_STATUS		0x18c
+#define A4XX_RBBM_AHB_PFP_SPLIT_STATUS		0x18d
+#define A4XX_RBBM_AHB_ERROR_STATUS		0x18f
+#define A4XX_RBBM_STATUS			0x191
+#define A4XX_RBBM_CFG_COUNTER0			0x1a2
+#define A4XX_RBBM_CFG_DEBBUS_TRACE_BUF0		0x1a9
+#define A4XX_RBBM_CFG_DEBBUS_TRACE_BUF1		0x1aa
+#define A4XX_RBBM_CFG_DEBBUS_TRACE_BUF2		0x1ab
+#define A4XX_RBBM_CFG_DEBBUS_TRACE_BUF3		0x1ac
+#define A4XX_RBBM_CFG_DEBBUS_TRACE_BUF4		0x1ad
+#define A4XX_RBBM_CFG_DEBBUS_MISR0		0x1ae
+#define A4XX_RBBM_CFG_DEBBUS_MISR1		0x1af
+#define A4XX_RBBM_POWER_STATUS			0x1b0
+#define A4XX_RBBM_PPD_V2_SP_PWR_WEIGHTS		0x1b2
+#define A4XX_RBBM_PPD_V2_SP_RB_EPOCH_TH		0x1b3
+#define A4XX_RBBM_PPD_V2_TP_CONFIG		0x1b4
+#define A4XX_RBBM_PPD_RAMP_V2_CONTROL		0x1b5
+#define A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2		0x1b8
+#define A4XX_RBBM_PPD_CTRL			0x1b9
+#define A4XX_RBBM_PPD_EPOCH_INTRA_TH_1		0x1ba
+#define A4XX_RBBM_PPD_EPOCH_INTRA_TH_2		0x1bb
+#define A4XX_RBBM_PPD_EPOCH_INTER_TH_HIGH_CLEAR_THR  0x1bc
+#define A4XX_RBBM_PPD_EPOCH_INTER_TH_LOW	0x1bd
+/* SECVID registers */
+#define A4XX_RBBM_SECVID_TRUST_CONFIG		0xf000
+#define A4XX_RBBM_SECVID_TRUST_CONTROL		0xf400
+#define A4XX_RBBM_SECVID_TSB_TRUSTED_BASE	0xf800
+#define A4XX_RBBM_SECVID_TSB_TRUSTED_SIZE	0xf801
+#define A4XX_RBBM_SECVID_TSB_CONTROL		0xf802
+
+/* CP registers */
+#define A4XX_CP_RB_BASE			0x200
+#define A4XX_CP_RB_CNTL			0x201
+#define A4XX_CP_RB_RPTR_ADDR		0x203
+#define A4XX_CP_RB_RPTR			0x204
+#define A4XX_CP_RB_WPTR			0x205
+#define A4XX_CP_IB1_BASE		0x206
+#define A4XX_CP_IB1_BUFSZ		0x207
+#define A4XX_CP_IB2_BASE		0x208
+#define A4XX_CP_IB2_BUFSZ		0x209
+#define A4XX_CP_ROQ_ADDR		0x21C
+#define A4XX_CP_ROQ_DATA		0x21D
+#define A4XX_CP_MEQ_ADDR		0x21E
+#define A4XX_CP_MEQ_DATA		0x21F
+#define A4XX_CP_MERCIU_ADDR		0x220
+#define A4XX_CP_MERCIU_DATA		0x221
+#define A4XX_CP_MERCIU_DATA2		0x222
+#define A4XX_CP_PFP_UCODE_ADDR		0x223
+#define A4XX_CP_PFP_UCODE_DATA		0x224
+#define A4XX_CP_ME_RAM_WADDR		0x225
+#define A4XX_CP_ME_RAM_RADDR		0x226
+#define A4XX_CP_ME_RAM_DATA		0x227
+#define A4XX_CP_SCRATCH_UMASK		0x228
+#define A4XX_CP_SCRATCH_ADDR		0x229
+
+#define A4XX_CP_PREEMPT			0x22a
+/* PREEMPT register bit shifts */
+#define A4XX_CP_PREEMPT_STOP_SHIFT	0
+#define A4XX_CP_PREEMPT_RESUME_SHIFT	1
+
+#define A4XX_CP_PREEMPT_DISABLE		0x22b
+#define A4XX_CP_CNTL			0x22c
+#define A4XX_CP_ME_CNTL			0x22d
+#define A4XX_CP_DEBUG			0x22e
+#define A4XX_CP_STATE_DEBUG_INDEX	0x22f
+#define A4XX_CP_STATE_DEBUG_DATA	0x230
+#define A4XX_CP_POWER_COLLAPSE_CNTL	0x234
+/*
+ * CP debug settings for A4xx cores
+ * MIU_128BIT_WRITE_ENABLE [25] - Allow 128 bit writes to the VBIF
+ */
+#define A4XX_CP_DEBUG_DEFAULT (1 << 25)
+
+#define A4XX_CP_PROTECT_REG_0		0x240
+#define A4XX_CP_PROTECT_CTRL		0x250
+#define A4XX_CP_PROTECT_REG_10          0x251
+
+#define A4XX_CP_ME_STATUS		0x4d1
+#define A4XX_CP_CNTL			0x22c
+#define A4XX_CP_WFI_PEND_CTR		0x4d2
+#define A4XX_CP_PREEMPT_DEBUG		0x4d6
+#define A4XX_CP_HW_FAULT		0x4d8
+#define A4XX_CP_PROTECT_STATUS		0x4da
+#define A4XX_CP_PERFCTR_CP_SEL_0	0x500
+#define A4XX_CP_PERFCTR_CP_SEL_1	0x501
+#define A4XX_CP_PERFCTR_CP_SEL_2	0x502
+#define A4XX_CP_PERFCTR_CP_SEL_3	0x503
+#define A4XX_CP_PERFCTR_CP_SEL_4	0x504
+#define A4XX_CP_PERFCTR_CP_SEL_5	0x505
+#define A4XX_CP_PERFCTR_CP_SEL_6	0x506
+#define A4XX_CP_PERFCTR_CP_SEL_7	0x507
+
+#define A4XX_CP_SCRATCH_REG0		0x578
+#define A4XX_CP_SCRATCH_REG6		0x57e
+#define A4XX_CP_SCRATCH_REG7		0x57f
+#define A4XX_CP_SCRATCH_REG8		0x580
+#define A4XX_CP_SCRATCH_REG9		0x581
+#define A4XX_CP_SCRATCH_REG10		0x582
+#define A4XX_CP_SCRATCH_REG11		0x583
+#define A4XX_CP_SCRATCH_REG12		0x584
+#define A4XX_CP_SCRATCH_REG13		0x585
+#define A4XX_CP_SCRATCH_REG14		0x586
+#define A4XX_CP_SCRATCH_REG15		0x587
+#define A4XX_CP_SCRATCH_REG16		0x588
+#define A4XX_CP_SCRATCH_REG17		0x589
+#define A4XX_CP_SCRATCH_REG18		0x58a
+#define A4XX_CP_SCRATCH_REG23		0x58f
+
+/* SP registers */
+#define A4XX_SP_SP_CTRL			0x22C0
+#define A4XX_SP_INSTR_CACHE_CTRL	0x22c1
+#define A4XX_SP_VS_OBJ_START		0x22e1
+#define A4XX_SP_VS_PVT_MEM_ADDR		0x22e3
+#define A4XX_SP_FS_CTRL_1		0x22e9
+#define A4XX_SP_FS_OBJ_START		0x22eb
+#define A4XX_SP_FS_PVT_MEM_ADDR		0x22ed
+#define A4XX_SP_CS_CTRL_0		0x2300
+#define A4XX_SP_CS_OBJ_OFFSET		0x2301
+#define A4XX_SP_CS_OBJ_START		0x2302
+#define A4XX_SP_CS_PVT_MEM_PARAM	0x2303
+#define A4XX_SP_CS_PVT_MEM_ADDR		0x2304
+#define A4XX_SP_CS_PVT_MEM_SIZE		0x2305
+#define A4XX_SP_CS_LENGTH		0x2306
+#define A4XX_SP_MODE_CONTROL		0xec3
+#define A4XX_SP_PERFCTR_SP_SEL_0	0xec4
+#define A4XX_SP_PERFCTR_SP_SEL_1	0xec5
+#define A4XX_SP_PERFCTR_SP_SEL_2	0xec6
+#define A4XX_SP_PERFCTR_SP_SEL_3	0xec7
+#define A4XX_SP_PERFCTR_SP_SEL_4	0xec8
+#define A4XX_SP_PERFCTR_SP_SEL_5	0xec9
+#define A4XX_SP_PERFCTR_SP_SEL_6	0xeca
+#define A4XX_SP_PERFCTR_SP_SEL_7	0xecb
+#define A4XX_SP_PERFCTR_SP_SEL_8	0xecc
+#define A4XX_SP_PERFCTR_SP_SEL_9	0xecd
+#define A4XX_SP_PERFCTR_SP_SEL_10	0xece
+#define A4XX_SP_PERFCTR_SP_SEL_11	0xecf
+#define A4XX_SP_VS_PVT_MEM_ADDR		0x22e3
+#define A4XX_SP_FS_PVT_MEM_ADDR		0x22ed
+#define A4XX_SP_VS_OBJ_START		0x22e1
+#define A4XX_SP_FS_OBJ_START		0x22eb
+
+/* COUNTABLE FOR SP PERFCOUNTER */
+#define A4XX_SP_ALU_ACTIVE_CYCLES	0x1D
+#define A4XX_SP0_ICL1_MISSES		0x1A
+#define A4XX_SP_FS_CFLOW_INSTRUCTIONS	0x0C
+
+/* COUNTABLE FOR TSE PERFCOUNTER */
+#define A4XX_TSE_INPUT_PRIM_NUM		0x0
+
+enum a4xx_sp_perfctr_sp_sel {
+	SP_FS_STAGE_BARY_INSTRUCTIONS = 0x10,
+};
+
+/* VPC registers */
+#define A4XX_VPC_DEBUG_RAM_SEL		0xe60
+#define A4XX_VPC_DEBUG_RAM_READ		0xe61
+#define A4XX_VPC_PERFCTR_VPC_SEL_0	0xe65
+#define A4XX_VPC_PERFCTR_VPC_SEL_1	0xe66
+#define A4XX_VPC_PERFCTR_VPC_SEL_2	0xe67
+#define A4XX_VPC_PERFCTR_VPC_SEL_3	0xe68
+
+/* UCHE register */
+#define UCHE_TRAP_BASE_LO               0xe83
+#define UCHE_TRAP_BASE_HI               0xe84
+#define A4XX_UCHE_INVALIDATE0		0xe8a
+#define A4XX_UCHE_INVALIDATE1		0xe8b
+#define A4XX_UCHE_CACHE_WAYS_VFD	0xe8c
+
+/* VSC registers */
+#define A4XX_VSC_SIZE_ADDRESS		0xc01
+#define A4XX_VSC_PIPE_DATA_ADDRESS_0	0xc10
+#define A4XX_VSC_PIPE_DATA_ADDRESS_1	0xc11
+#define A4XX_VSC_PIPE_DATA_ADDRESS_2	0xc12
+#define A4XX_VSC_PIPE_DATA_ADDRESS_3	0xc13
+#define A4XX_VSC_PIPE_DATA_ADDRESS_4	0xc14
+#define A4XX_VSC_PIPE_DATA_ADDRESS_5	0xc15
+#define A4XX_VSC_PIPE_DATA_ADDRESS_6	0xc16
+#define A4XX_VSC_PIPE_DATA_ADDRESS_7	0xc17
+#define A4XX_VSC_PIPE_DATA_LENGTH_0	0xc18
+#define A4XX_VSC_PIPE_DATA_LENGTH_1	0xc19
+#define A4XX_VSC_PIPE_DATA_LENGTH_2	0xc1a
+#define A4XX_VSC_PIPE_DATA_LENGTH_3	0xc1b
+#define A4XX_VSC_PIPE_DATA_LENGTH_4	0xc1c
+#define A4XX_VSC_PIPE_DATA_LENGTH_5	0xc1d
+#define A4XX_VSC_PIPE_DATA_LENGTH_6	0xc1e
+#define A4XX_VSC_PIPE_DATA_LENGTH_7	0xc1f
+#define A4XX_VSC_PERFCTR_VSC_SEL_0	0xc50
+#define A4XX_VSC_PERFCTR_VSC_SEL_1	0xc51
+
+/* VFD registers */
+#define A4XX_VFD_CONTROL_0		0x2200
+#define A4XX_VFD_FETCH_INSTR_0_0	0x220a
+#define A4XX_VFD_FETCH_INSTR_1_31	0x2287
+#define A4XX_VFD_PERFCTR_VFD_SEL_0	0xe43
+#define A4XX_VFD_PERFCTR_VFD_SEL_1	0xe44
+#define A4XX_VFD_PERFCTR_VFD_SEL_2	0xe45
+#define A4XX_VFD_PERFCTR_VFD_SEL_3	0xe46
+#define A4XX_VFD_PERFCTR_VFD_SEL_4	0xe47
+#define A4XX_VFD_PERFCTR_VFD_SEL_5	0xe48
+#define A4XX_VFD_PERFCTR_VFD_SEL_6	0xe49
+#define A4XX_VFD_PERFCTR_VFD_SEL_7	0xe4a
+#define A4XX_VFD_FETCH_INSTR_1_0	0x220b
+#define A4XX_VFD_FETCH_INSTR_1_1	0x220f
+#define A4XX_VFD_FETCH_INSTR_1_2	0x2213
+#define A4XX_VFD_FETCH_INSTR_1_3	0x2217
+#define A4XX_VFD_FETCH_INSTR_1_4	0x221b
+#define A4XX_VFD_FETCH_INSTR_1_5	0x221f
+#define A4XX_VFD_FETCH_INSTR_1_6	0x2223
+#define A4XX_VFD_FETCH_INSTR_1_7	0x2227
+#define A4XX_VFD_FETCH_INSTR_1_8	0x222b
+#define A4XX_VFD_FETCH_INSTR_1_9	0x222f
+#define A4XX_VFD_FETCH_INSTR_1_10	0x2233
+#define A4XX_VFD_FETCH_INSTR_1_11	0x2237
+#define A4XX_VFD_FETCH_INSTR_1_12	0x223b
+#define A4XX_VFD_FETCH_INSTR_1_13	0x223f
+#define A4XX_VFD_FETCH_INSTR_1_14	0x2243
+#define A4XX_VFD_FETCH_INSTR_1_15	0x2247
+#define A4XX_VFD_FETCH_INSTR_1_16	0x224b
+#define A4XX_VFD_FETCH_INSTR_1_17	0x224f
+#define A4XX_VFD_FETCH_INSTR_1_18	0x2253
+#define A4XX_VFD_FETCH_INSTR_1_19	0x2257
+#define A4XX_VFD_FETCH_INSTR_1_20	0x225b
+#define A4XX_VFD_FETCH_INSTR_1_21	0x225f
+#define A4XX_VFD_FETCH_INSTR_1_22	0x2263
+#define A4XX_VFD_FETCH_INSTR_1_23	0x2267
+#define A4XX_VFD_FETCH_INSTR_1_24	0x226b
+#define A4XX_VFD_FETCH_INSTR_1_25	0x226f
+#define A4XX_VFD_FETCH_INSTR_1_26	0x2273
+#define A4XX_VFD_FETCH_INSTR_1_27	0x2277
+#define A4XX_VFD_FETCH_INSTR_1_28	0x227b
+#define A4XX_VFD_FETCH_INSTR_1_29	0x227f
+#define A4XX_VFD_FETCH_INSTR_1_30	0x2283
+#define A4XX_VFD_FETCH_INSTR_1_31	0x2287
+
+
+enum a4xx_vfd_perfctr_vfd_sel {
+	VFD_VPC_BYPASS_TRANS = 0x2,
+	VFD_UPPER_SHADER_FIBERS = 0xb,
+	VFD_LOWER_SHADER_FIBERS = 0xc,
+};
+
+/* VBIF registers */
+#define A4XX_VBIF_VERSION			0x3000
+#define A4XX_VBIF_CLKON				0x3001
+#define A4XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK	0x1
+#define A4XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT	0x1
+
+#define A4XX_VBIF_ABIT_SORT		0x301c
+#define A4XX_VBIF_ABIT_SORT_CONF	0x301d
+#define A4XX_VBIF_GATE_OFF_WRREQ_EN	0x302a
+#define A4XX_VBIF_IN_RD_LIM_CONF0	0x302c
+#define A4XX_VBIF_IN_RD_LIM_CONF1	0x302d
+#define A4XX_VBIF_IN_WR_LIM_CONF0	0x3030
+#define A4XX_VBIF_IN_WR_LIM_CONF1	0x3031
+#define A4XX_VBIF_ROUND_ROBIN_QOS_ARB	0x3049
+
+#define A4XX_VBIF_XIN_HALT_CTRL0	0x3080
+#define A4XX_VBIF_XIN_HALT_CTRL0_MASK	0x1F
+#define A405_VBIF_XIN_HALT_CTRL0_MASK	0x3
+
+#define A4XX_VBIF_XIN_HALT_CTRL1	0x3081
+
+#define A4XX_VBIF_TEST_BUS_OUT_CTRL		0x3084
+#define A4XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK	0x1
+#define A4XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT	0x0
+
+#define A4XX_VBIF_TEST_BUS1_CTRL0	0x3085
+#define A4XX_VBIF_TEST_BUS1_CTRL1			0x3086
+#define A4XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK		0xF
+#define A4XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT	0
+
+#define A4XX_VBIF_TEST_BUS2_CTRL0	0x3087
+#define A4XX_VBIF_TEST_BUS2_CTRL1			0x3088
+#define A4XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK		0xF
+#define A4XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT	0x0
+
+#define A4XX_VBIF_TEST_BUS_OUT		0x308c
+
+#define A4XX_VBIF_PERF_CNT_EN0		0x30c0
+#define A4XX_VBIF_PERF_CNT_EN1		0x30c1
+#define A4XX_VBIF_PERF_CNT_EN2		0x30c2
+#define A4XX_VBIF_PERF_CNT_EN3		0x30c3
+#define A4XX_VBIF_PERF_CNT_CLR0		0x30c8
+#define A4XX_VBIF_PERF_CNT_CLR1		0x30c9
+#define A4XX_VBIF_PERF_CNT_CLR2		0x30ca
+#define A4XX_VBIF_PERF_CNT_CLR3		0x30cb
+#define A4XX_VBIF_PERF_CNT_SEL0		0x30d0
+#define A4XX_VBIF_PERF_CNT_SEL1		0x30d1
+#define A4XX_VBIF_PERF_CNT_SEL2		0x30d2
+#define A4XX_VBIF_PERF_CNT_SEL3		0x30d3
+#define A4XX_VBIF_PERF_CNT_LOW0		0x30d8
+#define A4XX_VBIF_PERF_CNT_LOW1		0x30d9
+#define A4XX_VBIF_PERF_CNT_LOW2		0x30da
+#define A4XX_VBIF_PERF_CNT_LOW3		0x30db
+#define A4XX_VBIF_PERF_CNT_HIGH0	0x30e0
+#define A4XX_VBIF_PERF_CNT_HIGH1	0x30e1
+#define A4XX_VBIF_PERF_CNT_HIGH2	0x30e2
+#define A4XX_VBIF_PERF_CNT_HIGH3	0x30e3
+
+#define A4XX_VBIF_PERF_PWR_CNT_EN0	0x3100
+#define A4XX_VBIF_PERF_PWR_CNT_EN1	0x3101
+#define A4XX_VBIF_PERF_PWR_CNT_EN2	0x3102
+#define A4XX_VBIF_PERF_PWR_CNT_EN3	0x3103
+#define A4XX_VBIF_PERF_PWR_CNT_CLR0	0x3108
+#define A4XX_VBIF_PERF_PWR_CNT_CLR1	0x3109
+#define A4XX_VBIF_PERF_PWR_CNT_CLR2	0x310A
+#define A4XX_VBIF_PERF_PWR_CNT_CLR3	0x310B
+#define A4XX_VBIF_PERF_PWR_CNT_LOW0	0x3110
+#define A4XX_VBIF_PERF_PWR_CNT_LOW1	0x3111
+#define A4XX_VBIF_PERF_PWR_CNT_LOW2	0x3112
+#define A4XX_VBIF_PERF_PWR_CNT_LOW3	0x3113
+#define A4XX_VBIF_PERF_PWR_CNT_HIGH0	0x3118
+#define A4XX_VBIF_PERF_PWR_CNT_HIGH1	0x3119
+#define A4XX_VBIF_PERF_PWR_CNT_HIGH2	0x311a
+#define A4XX_VBIF_PERF_PWR_CNT_HIGH3	0x311b
+
+/* Bit flags for RBBM_CTL */
+#define A4XX_RBBM_RBBM_CTL_RESET_PWR_CTR0	0x00000001
+#define A4XX_RBBM_RBBM_CTL_RESET_PWR_CTR1	0x00000002
+#define A4XX_RBBM_RBBM_CTL_ENABLE_PWR_CTR0	0x00000010
+#define A4XX_RBBM_RBBM_CTL_ENABLE_PWR_CTR1	0x00000020
+
+/* GRAS registers */
+#define A4XX_GRAS_PERFCTR_TSE_SEL_0	0xc88
+#define A4XX_GRAS_PERFCTR_TSE_SEL_1	0xc89
+#define A4XX_GRAS_PERFCTR_TSE_SEL_2	0xc8a
+#define A4XX_GRAS_PERFCTR_TSE_SEL_3	0xc8b
+#define A4XX_GRAS_PERFCTR_RAS_SEL_0	0xc8c
+#define A4XX_GRAS_PERFCTR_RAS_SEL_1	0xc8d
+#define A4XX_GRAS_PERFCTR_RAS_SEL_2	0xc8e
+#define A4XX_GRAS_PERFCTR_RAS_SEL_3	0xc8f
+
+/* PC registers */
+#define A4XX_PC_PERFCTR_PC_SEL_0	0xd10
+#define A4XX_PC_PERFCTR_PC_SEL_1	0xd11
+#define A4XX_PC_PERFCTR_PC_SEL_2	0xd12
+#define A4XX_PC_PERFCTR_PC_SEL_3	0xd13
+#define A4XX_PC_PERFCTR_PC_SEL_4	0xd14
+#define A4XX_PC_PERFCTR_PC_SEL_5	0xd15
+#define A4XX_PC_PERFCTR_PC_SEL_6	0xd16
+#define A4XX_PC_PERFCTR_PC_SEL_7	0xd17
+
+enum a4xx_pc_perfctr_pc_sel {
+	PC_INSTANCES = 0x1,
+	PC_VERTEX_HITS = 0x8,
+	PC_GENERATED_FIBERS = 0x12,
+	PC_GENERATED_WAVES = 0x13,
+};
+
+/* HLSQ registers */
+#define A4XX_HLSQ_TIMEOUT_THRESHOLD     0xe00
+#define A4XX_HLSQ_STATE_RESTORE_TRIGGER	0xe01
+#define A4XX_HLSQ_MODE_CONTROL		0xe05
+#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_0	0xe06
+#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_1	0xe07
+#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_2	0xe08
+#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_3	0xe09
+#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_4	0xe0a
+#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_5	0xe0b
+#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_6	0xe0c
+#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_7	0xe0d
+#define A4XX_HLSQ_SPTP_RDSEL		0xe30
+#define A4xx_HLSQ_CONTROL_0		0x23c0
+#define A4xx_HLSQ_CONTROL_1		0x23c1
+#define A4xx_HLSQ_CONTROL_2		0x23c2
+#define A4xx_HLSQ_CONTROL_3		0x23c3
+#define A4xx_HLSQ_CONTROL_4		0x23c4
+#define A4XX_HLSQ_CS_CONTROL		0x23ca
+#define A4XX_HLSQ_CL_NDRANGE_0		0x23cd
+#define A4XX_HLSQ_CL_NDRANGE_1		0x23ce
+#define A4XX_HLSQ_CL_NDRANGE_2		0x23cf
+#define A4XX_HLSQ_CL_NDRANGE_3		0x23d0
+#define A4XX_HLSQ_CL_NDRANGE_4		0x23d1
+#define A4XX_HLSQ_CL_NDRANGE_5		0x23d2
+#define A4XX_HLSQ_CL_NDRANGE_6		0x23d3
+#define A4XX_HLSQ_CL_CONTROL_0		0x23d4
+#define A4XX_HLSQ_CL_CONTROL_1		0x23d5
+#define A4XX_HLSQ_CL_KERNEL_CONST	0x23d6
+#define A4XX_HLSQ_CL_KERNEL_GROUP_X	0x23d7
+#define A4XX_HLSQ_CL_KERNEL_GROUP_Y	0x23d8
+#define A4XX_HLSQ_CL_KERNEL_GROUP_Z	0x23d9
+#define A4XX_HLSQ_CL_WG_OFFSET		0x23da
+#define A4XX_HLSQ_UPDATE_CONTROL	0x23db
+
+enum a4xx_hlsq_perfctr_hlsq_sel {
+	HLSQ_SP_VS_STAGE_CONSTANT = 0x0,
+	HLSQ_SP_VS_STAGE_INSTRUCTIONS = 0x1,
+	HLSQ_SP_FS_STAGE_CONSTANT = 0x2,
+	HLSQ_SP_FS_STAGE_INSTRUCTIONS = 0x3,
+	HLSQ_FS_STAGE_16_WAVES = 0x8,
+	HLSQ_FS_STAGE_32_WAVES = 0x9,
+	HLSQ_FS_STAGE_64_WAVES = 0xa,
+	HLSQ_VS_STAGE_16_WAVES = 0xb,
+	HLSQ_VS_STAGE_32_WAVES = 0xc,
+};
+
+/* CCU registers */
+#define A4XX_RB_PERFCTR_CCU_SEL_0	0xccf
+#define A4XX_RB_PERFCTR_CCU_SEL_1	0xcd0
+#define A4XX_RB_PERFCTR_CCU_SEL_2	0xcd1
+#define A4XX_RB_PERFCTR_CCU_SEL_3	0xcd2
+
+enum a4xx_cu_perfctr_ccu_sel {
+	CCU_VBIF_STALL = 0x1,
+	CCU_VBIF_LATENCY_CYCLES = 0x4,
+	CCU_VBIF_LATENCY_SAMPLES = 0x5,
+	CCU_Z_READ = 0x13,
+	CCU_Z_WRITE = 0x14,
+	CCU_C_READ = 0x15,
+	CCU_C_WRITE = 0x16,
+};
+
+/* UCHE registers */
+#define A4XX_UCHE_PERFCTR_UCHE_SEL_0	0xe8e
+#define A4XX_UCHE_PERFCTR_UCHE_SEL_1	0xe8f
+#define A4XX_UCHE_PERFCTR_UCHE_SEL_2	0xe90
+#define A4XX_UCHE_PERFCTR_UCHE_SEL_3	0xe91
+#define A4XX_UCHE_PERFCTR_UCHE_SEL_4	0xe92
+#define A4XX_UCHE_PERFCTR_UCHE_SEL_5	0xe93
+#define A4XX_UCHE_PERFCTR_UCHE_SEL_6	0xe94
+#define A4XX_UCHE_PERFCTR_UCHE_SEL_7	0xe95
+
+/* TPL1 registers */
+enum a4xx_uche_perfctr_uche_sel {
+	UCHE_READ_REQUESTS_MARB = 0x8,
+	UCHE_READ_REQUESTS_SP = 0x9,
+	UCHE_WRITE_REQUESTS_MARB = 0xa,
+	UCHE_WRITE_REQUESTS_SP = 0xb,
+	UCHE_WRITE_REQUESTS_VPC = 0x14,
+};
+
+/* TPL1 registers */
+#define A4XX_TPL1_TP_MODE_CONTROL	0xf03
+#define A4XX_TPL1_PERFCTR_TP_SEL_0	0xf04
+#define A4XX_TPL1_PERFCTR_TP_SEL_1	0xf05
+#define A4XX_TPL1_PERFCTR_TP_SEL_2	0xf06
+#define A4XX_TPL1_PERFCTR_TP_SEL_3	0xf07
+#define A4XX_TPL1_PERFCTR_TP_SEL_4	0xf08
+#define A4XX_TPL1_PERFCTR_TP_SEL_5	0xf09
+#define A4XX_TPL1_PERFCTR_TP_SEL_6	0xf0a
+#define A4XX_TPL1_PERFCTR_TP_SEL_7	0xf0b
+#define A4XX_TPL1_TP_TEX_TSIZE_1	0x23a0
+#define A4XX_TPL1_TP_CS_BORDER_COLOR_BASE_ADDR	0x23A4
+#define A4XX_TPL1_TP_CS_SAMPLER_BASE_ADDR	0x23A5
+#define A4XX_TPL1_TP_CS_TEXMEMOBJ_BASE_ADDR	0x23A6
+
+enum a4xx_tpl1_perfctr_tp_sel {
+	TP_OUTPUT_TEXELS_POINT = 0x2,
+	TP_OUTPUT_TEXELS_BILINEAR = 0x3,
+	TP_OUTPUT_TEXELS_MIP = 0x4,
+	TP_OUTPUT_TEXELS_ANISO = 0x5,
+	TP_OUTPUT_TEXELS_OPS16 = 0x6,
+	TP_OUTPUT_TEXELS_OPS32 = 0x7,
+	TP_ZERO_LOD = 0xe,
+	TP_LATENCY = 0x12,
+	TP_LATENCY_TRANS = 0x13,
+};
+
+/* Enum for debug bus */
+enum a4xx_rbbm_debbus_id {
+	A4XX_RBBM_DEBBUS_CP_ID = 0x1,
+	A4XX_RBBM_DEBBUS_RBBM_ID = 0x2,
+	A4XX_RBBM_DEBBUS_VBIF_ID = 0x3,
+	A4XX_RBBM_DEBBUS_HLSQ_ID = 0x4,
+	A4XX_RBBM_DEBBUS_UCHE_ID = 0x5,
+	A4XX_RBBM_DEBBUS_DPM_ID  = 0x6,
+	A4XX_RBBM_DEBBUS_TESS_ID = 0x7,
+	A4XX_RBBM_DEBBUS_PC_ID   = 0x8,
+	A4XX_RBBM_DEBBUS_VFD_ID  = 0x9,
+	A4XX_RBBM_DEBBUS_VPC_ID  = 0xa,
+	A4XX_RBBM_DEBBUS_TSE_ID  = 0xb,
+	A4XX_RBBM_DEBBUS_RAS_ID  = 0xc,
+	A4XX_RBBM_DEBBUS_VSC_ID  = 0xd,
+	A4XX_RBBM_DEBBUS_COM_ID  = 0xe,
+	A4XX_RBBM_DEBBUS_DCOM_ID = 0xf,
+	A4XX_RBBM_DEBBUS_SP_0_ID = 0x10,
+	A4XX_RBBM_DEBBUS_SP_1_ID = 0x11,
+	A4XX_RBBM_DEBBUS_SP_2_ID = 0x12,
+	A4XX_RBBM_DEBBUS_SP_3_ID = 0x13,
+	A4XX_RBBM_DEBBUS_TPL1_0_ID = 0x18,
+	A4XX_RBBM_DEBBUS_TPL1_1_ID = 0x19,
+	A4XX_RBBM_DEBBUS_TPL1_2_ID = 0x1a,
+	A4XX_RBBM_DEBBUS_TPL1_3_ID = 0x1b,
+	A4XX_RBBM_DEBBUS_RB_0_ID = 0x20,
+	A4XX_RBBM_DEBBUS_RB_1_ID = 0x21,
+	A4XX_RBBM_DEBBUS_RB_2_ID = 0x22,
+	A4XX_RBBM_DEBBUS_RB_3_ID = 0x23,
+	A4XX_RBBM_DEBBUS_MARB_0_ID = 0x28,
+	A4XX_RBBM_DEBBUS_MARB_1_ID = 0x29,
+	A4XX_RBBM_DEBBUS_MARB_2_ID = 0x2a,
+	A4XX_RBBM_DEBBUS_MARB_3_ID = 0x2b,
+	A4XX_RBBM_DEBBUS_CCU_0_ID = 0x30,
+	A4XX_RBBM_DEBBUS_CCU_1_ID = 0x31,
+	A4XX_RBBM_DEBBUS_CCU_2_ID = 0x32,
+	A4XX_RBBM_DEBBUS_CCU_3_ID = 0x33
+};
+
+#define A4XX_NUM_AXI_ARB_BLOCKS	2
+#define A4XX_NUM_XIN_BLOCKS	5
+
+#endif /* _A4XX_REG_H */
diff --git a/drivers/gpu/msm/a5xx_reg.h b/drivers/gpu/msm/a5xx_reg.h
new file mode 100644
index 000000000000..cdaa7f513b9d
--- /dev/null
+++ b/drivers/gpu/msm/a5xx_reg.h
@@ -0,0 +1,897 @@
+/* Copyright (c) 2014-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _A5XX_REG_H
+#define _A5XX_REG_H
+
+/* A5XX interrupt bits */
+#define A5XX_INT_RBBM_GPU_IDLE           0
+#define A5XX_INT_RBBM_AHB_ERROR          1
+#define A5XX_INT_RBBM_TRANSFER_TIMEOUT   2
+#define A5XX_INT_RBBM_ME_MS_TIMEOUT      3
+#define A5XX_INT_RBBM_PFP_MS_TIMEOUT     4
+#define A5XX_INT_RBBM_ETS_MS_TIMEOUT     5
+#define A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW 6
+#define A5XX_INT_RBBM_GPC_ERROR          7
+#define A5XX_INT_CP_SW                   8
+#define A5XX_INT_CP_HW_ERROR             9
+#define A5XX_INT_CP_CCU_FLUSH_DEPTH_TS   10
+#define A5XX_INT_CP_CCU_FLUSH_COLOR_TS   11
+#define A5XX_INT_CP_CCU_RESOLVE_TS       12
+#define A5XX_INT_CP_IB2                  13
+#define A5XX_INT_CP_IB1                  14
+#define A5XX_INT_CP_RB                   15
+#define A5XX_INT_CP_UNUSED_1             16
+#define A5XX_INT_CP_RB_DONE_TS           17
+#define A5XX_INT_CP_WT_DONE_TS           18
+#define A5XX_INT_UNKNOWN_1               19
+#define A5XX_INT_CP_CACHE_FLUSH_TS       20
+#define A5XX_INT_UNUSED_2                21
+#define A5XX_INT_RBBM_ATB_BUS_OVERFLOW   22
+#define A5XX_INT_MISC_HANG_DETECT        23
+#define A5XX_INT_UCHE_OOB_ACCESS         24
+#define A5XX_INT_UCHE_TRAP_INTR          25
+#define A5XX_INT_DEBBUS_INTR_0           26
+#define A5XX_INT_DEBBUS_INTR_1           27
+#define A5XX_INT_GPMU_VOLTAGE_DROOP      28
+#define A5XX_INT_GPMU_FIRMWARE           29
+#define A5XX_INT_ISDB_CPU_IRQ            30
+#define A5XX_INT_ISDB_UNDER_DEBUG        31
+
+/* CP Interrupt bits */
+#define A5XX_CP_OPCODE_ERROR               0
+#define A5XX_CP_RESERVED_BIT_ERROR         1
+#define A5XX_CP_HW_FAULT_ERROR             2
+#define A5XX_CP_DMA_ERROR                  3
+#define A5XX_CP_REGISTER_PROTECTION_ERROR  4
+#define A5XX_CP_AHB_ERROR                  5
+
+/* CP registers */
+#define A5XX_CP_RB_BASE                  0x800
+#define A5XX_CP_RB_BASE_HI               0x801
+#define A5XX_CP_RB_CNTL                  0x802
+#define A5XX_CP_RB_RPTR                  0x806
+#define A5XX_CP_RB_WPTR                  0x807
+#define A5XX_CP_PFP_STAT_ADDR            0x808
+#define A5XX_CP_PFP_STAT_DATA            0x809
+#define A5XX_CP_DRAW_STATE_ADDR          0x80B
+#define A5XX_CP_DRAW_STATE_DATA          0x80C
+#define A5XX_CP_CRASH_SCRIPT_BASE_LO     0x817
+#define A5XX_CP_CRASH_SCRIPT_BASE_HI     0x818
+#define A5XX_CP_CRASH_DUMP_CNTL          0x819
+#define A5XX_CP_ME_STAT_ADDR             0x81A
+#define A5XX_CP_ROQ_THRESHOLDS_1         0x81F
+#define A5XX_CP_ROQ_THRESHOLDS_2         0x820
+#define A5XX_CP_ROQ_DBG_ADDR             0x821
+#define A5XX_CP_ROQ_DBG_DATA             0x822
+#define A5XX_CP_MEQ_DBG_ADDR             0x823
+#define A5XX_CP_MEQ_DBG_DATA             0x824
+#define A5XX_CP_MEQ_THRESHOLDS           0x825
+#define A5XX_CP_MERCIU_SIZE              0x826
+#define A5XX_CP_MERCIU_DBG_ADDR          0x827
+#define A5XX_CP_MERCIU_DBG_DATA_1        0x828
+#define A5XX_CP_MERCIU_DBG_DATA_2        0x829
+#define A5XX_CP_PFP_UCODE_DBG_ADDR       0x82A
+#define A5XX_CP_PFP_UCODE_DBG_DATA       0x82B
+#define A5XX_CP_ME_UCODE_DBG_ADDR        0x82F
+#define A5XX_CP_ME_UCODE_DBG_DATA        0x830
+#define A5XX_CP_CNTL                     0x831
+#define A5XX_CP_ME_CNTL                  0x832
+#define A5XX_CP_CHICKEN_DBG              0x833
+#define A5XX_CP_PFP_INSTR_BASE_LO        0x835
+#define A5XX_CP_PFP_INSTR_BASE_HI        0x836
+#define A5XX_CP_PM4_INSTR_BASE_LO        0x838
+#define A5XX_CP_PM4_INSTR_BASE_HI        0x839
+#define A5XX_CP_CONTEXT_SWITCH_CNTL      0x83B
+#define A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO   0x83C
+#define A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI   0x83D
+#define A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO   0x83E
+#define A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI   0x83F
+#define A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO   0x840
+#define A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI   0x841
+#define A5XX_CP_ADDR_MODE_CNTL           0x860
+#define A5XX_CP_ME_STAT_DATA             0xB14
+#define A5XX_CP_WFI_PEND_CTR             0xB15
+#define A5XX_CP_INTERRUPT_STATUS         0xB18
+#define A5XX_CP_HW_FAULT                 0xB1A
+#define A5XX_CP_PROTECT_STATUS           0xB1C
+#define A5XX_CP_IB1_BASE                 0xB1F
+#define A5XX_CP_IB1_BASE_HI              0xB20
+#define A5XX_CP_IB1_BUFSZ                0xB21
+#define A5XX_CP_IB2_BASE                 0xB22
+#define A5XX_CP_IB2_BASE_HI              0xB23
+#define A5XX_CP_IB2_BUFSZ                0xB24
+#define A5XX_CP_PROTECT_REG_0            0x880
+#define A5XX_CP_PROTECT_CNTL             0x8A0
+#define A5XX_CP_AHB_FAULT                0xB1B
+#define A5XX_CP_PERFCTR_CP_SEL_0         0xBB0
+#define A5XX_CP_PERFCTR_CP_SEL_1         0xBB1
+#define A5XX_CP_PERFCTR_CP_SEL_2         0xBB2
+#define A5XX_CP_PERFCTR_CP_SEL_3         0xBB3
+#define A5XX_CP_PERFCTR_CP_SEL_4         0xBB4
+#define A5XX_CP_PERFCTR_CP_SEL_5         0xBB5
+#define A5XX_CP_PERFCTR_CP_SEL_6         0xBB6
+#define A5XX_CP_PERFCTR_CP_SEL_7         0xBB7
+
+#define A5XX_VSC_ADDR_MODE_CNTL          0xBC1
+
+/* CP Power Counter Registers Select */
+#define A5XX_CP_POWERCTR_CP_SEL_0        0xBBA
+#define A5XX_CP_POWERCTR_CP_SEL_1        0xBBB
+#define A5XX_CP_POWERCTR_CP_SEL_2        0xBBC
+#define A5XX_CP_POWERCTR_CP_SEL_3        0xBBD
+
+/* CP_EVENT_WRITE events */
+#define A5XX_CACHE_FLUSH_TS              0x4
+
+/* RBBM registers */
+#define A5XX_RBBM_CFG_DBGBUS_SEL_A               0x4
+#define A5XX_RBBM_CFG_DBGBUS_SEL_B               0x5
+#define A5XX_RBBM_CFG_DBGBUS_SEL_C               0x6
+#define A5XX_RBBM_CFG_DBGBUS_SEL_D               0x7
+#define A5XX_RBBM_CFG_DBGBUS_SEL_PING_INDEX_SHIFT    0x0
+#define A5XX_RBBM_CFG_DBGBUS_SEL_PING_BLK_SEL_SHIFT  0x8
+#define A5XX_RBBM_CFG_DBGBUS_SEL_PONG_INDEX_SHIFT    0x10
+#define A5XX_RBBM_CFG_DBGBUS_SEL_PONG_BLK_SEL_SHIFT  0x18
+
+#define A5XX_RBBM_CFG_DBGBUS_CNTLT               0x8
+#define A5XX_RBBM_CFG_DBGBUS_CNTLM               0x9
+#define A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT  0x18
+#define A5XX_RBBM_CFG_DBGBUS_OPL                 0xA
+#define A5XX_RBBM_CFG_DBGBUS_OPE                 0xB
+#define A5XX_RBBM_CFG_DBGBUS_IVTL_0              0xC
+#define A5XX_RBBM_CFG_DBGBUS_IVTL_1              0xD
+#define A5XX_RBBM_CFG_DBGBUS_IVTL_2              0xE
+#define A5XX_RBBM_CFG_DBGBUS_IVTL_3              0xF
+#define A5XX_RBBM_CFG_DBGBUS_MASKL_0             0x10
+#define A5XX_RBBM_CFG_DBGBUS_MASKL_1             0x11
+#define A5XX_RBBM_CFG_DBGBUS_MASKL_2             0x12
+#define A5XX_RBBM_CFG_DBGBUS_MASKL_3             0x13
+#define A5XX_RBBM_CFG_DBGBUS_BYTEL_0             0x14
+#define A5XX_RBBM_CFG_DBGBUS_BYTEL_1             0x15
+#define A5XX_RBBM_CFG_DBGBUS_IVTE_0              0x16
+#define A5XX_RBBM_CFG_DBGBUS_IVTE_1              0x17
+#define A5XX_RBBM_CFG_DBGBUS_IVTE_2              0x18
+#define A5XX_RBBM_CFG_DBGBUS_IVTE_3              0x19
+#define A5XX_RBBM_CFG_DBGBUS_MASKE_0             0x1A
+#define A5XX_RBBM_CFG_DBGBUS_MASKE_1             0x1B
+#define A5XX_RBBM_CFG_DBGBUS_MASKE_2             0x1C
+#define A5XX_RBBM_CFG_DBGBUS_MASKE_3             0x1D
+#define A5XX_RBBM_CFG_DBGBUS_NIBBLEE             0x1E
+#define A5XX_RBBM_CFG_DBGBUS_PTRC0               0x1F
+#define A5XX_RBBM_CFG_DBGBUS_PTRC1               0x20
+#define A5XX_RBBM_CFG_DBGBUS_LOADREG             0x21
+#define A5XX_RBBM_CFG_DBGBUS_IDX                 0x22
+#define A5XX_RBBM_CFG_DBGBUS_CLRC                0x23
+#define A5XX_RBBM_CFG_DBGBUS_LOADIVT             0x24
+#define A5XX_RBBM_INTERFACE_HANG_INT_CNTL        0x2F
+#define A5XX_RBBM_INT_CLEAR_CMD                  0x37
+#define A5XX_RBBM_INT_0_MASK                     0x38
+#define A5XX_RBBM_AHB_DBG_CNTL                   0x3F
+#define A5XX_RBBM_EXT_VBIF_DBG_CNTL              0x41
+#define A5XX_RBBM_SW_RESET_CMD                   0x43
+#define A5XX_RBBM_BLOCK_SW_RESET_CMD             0x45
+#define A5XX_RBBM_BLOCK_SW_RESET_CMD2            0x46
+#define A5XX_RBBM_DBG_LO_HI_GPIO                 0x48
+#define A5XX_RBBM_EXT_TRACE_BUS_CNTL             0x49
+#define A5XX_RBBM_CLOCK_CNTL_TP0                 0x4A
+#define A5XX_RBBM_CLOCK_CNTL_TP1                 0x4B
+#define A5XX_RBBM_CLOCK_CNTL_TP2                 0x4C
+#define A5XX_RBBM_CLOCK_CNTL_TP3                 0x4D
+#define A5XX_RBBM_CLOCK_CNTL2_TP0                0x4E
+#define A5XX_RBBM_CLOCK_CNTL2_TP1                0x4F
+#define A5XX_RBBM_CLOCK_CNTL2_TP2                0x50
+#define A5XX_RBBM_CLOCK_CNTL2_TP3                0x51
+#define A5XX_RBBM_CLOCK_CNTL3_TP0                0x52
+#define A5XX_RBBM_CLOCK_CNTL3_TP1                0x53
+#define A5XX_RBBM_CLOCK_CNTL3_TP2                0x54
+#define A5XX_RBBM_CLOCK_CNTL3_TP3                0x55
+#define A5XX_RBBM_READ_AHB_THROUGH_DBG           0x59
+#define A5XX_RBBM_CLOCK_CNTL_UCHE                0x5A
+#define A5XX_RBBM_CLOCK_CNTL2_UCHE               0x5B
+#define A5XX_RBBM_CLOCK_CNTL3_UCHE               0x5C
+#define A5XX_RBBM_CLOCK_CNTL4_UCHE               0x5D
+#define A5XX_RBBM_CLOCK_HYST_UCHE                0x5E
+#define A5XX_RBBM_CLOCK_DELAY_UCHE               0x5F
+#define A5XX_RBBM_CLOCK_MODE_GPC                 0x60
+#define A5XX_RBBM_CLOCK_DELAY_GPC                0x61
+#define A5XX_RBBM_CLOCK_HYST_GPC                 0x62
+#define A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM        0x63
+#define A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM        0x64
+#define A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM       0x65
+#define A5XX_RBBM_CLOCK_DELAY_HLSQ               0x66
+#define A5XX_RBBM_CLOCK_CNTL                     0x67
+#define A5XX_RBBM_CLOCK_CNTL_SP0                 0x68
+#define A5XX_RBBM_CLOCK_CNTL_SP1                 0x69
+#define A5XX_RBBM_CLOCK_CNTL_SP2                 0x6A
+#define A5XX_RBBM_CLOCK_CNTL_SP3                 0x6B
+#define A5XX_RBBM_CLOCK_CNTL2_SP0                0x6C
+#define A5XX_RBBM_CLOCK_CNTL2_SP1                0x6D
+#define A5XX_RBBM_CLOCK_CNTL2_SP2                0x6E
+#define A5XX_RBBM_CLOCK_CNTL2_SP3                0x6F
+#define A5XX_RBBM_CLOCK_HYST_SP0                 0x70
+#define A5XX_RBBM_CLOCK_HYST_SP1                 0x71
+#define A5XX_RBBM_CLOCK_HYST_SP2                 0x72
+#define A5XX_RBBM_CLOCK_HYST_SP3                 0x73
+#define A5XX_RBBM_CLOCK_DELAY_SP0                0x74
+#define A5XX_RBBM_CLOCK_DELAY_SP1                0x75
+#define A5XX_RBBM_CLOCK_DELAY_SP2                0x76
+#define A5XX_RBBM_CLOCK_DELAY_SP3                0x77
+#define A5XX_RBBM_CLOCK_CNTL_RB0                 0x78
+#define A5XX_RBBM_CLOCK_CNTL_RB1                 0x79
+#define A5XX_RBBM_CLOCK_CNTL_RB2                 0x7a
+#define A5XX_RBBM_CLOCK_CNTL_RB3                 0x7B
+#define A5XX_RBBM_CLOCK_CNTL2_RB0                0x7C
+#define A5XX_RBBM_CLOCK_CNTL2_RB1                0x7D
+#define A5XX_RBBM_CLOCK_CNTL2_RB2                0x7E
+#define A5XX_RBBM_CLOCK_CNTL2_RB3                0x7F
+#define A5XX_RBBM_CLOCK_HYST_RAC                 0x80
+#define A5XX_RBBM_CLOCK_DELAY_RAC                0x81
+#define A5XX_RBBM_CLOCK_CNTL_CCU0                0x82
+#define A5XX_RBBM_CLOCK_CNTL_CCU1                0x83
+#define A5XX_RBBM_CLOCK_CNTL_CCU2                0x84
+#define A5XX_RBBM_CLOCK_CNTL_CCU3                0x85
+#define A5XX_RBBM_CLOCK_HYST_RB_CCU0             0x86
+#define A5XX_RBBM_CLOCK_HYST_RB_CCU1             0x87
+#define A5XX_RBBM_CLOCK_HYST_RB_CCU2             0x88
+#define A5XX_RBBM_CLOCK_HYST_RB_CCU3             0x89
+#define A5XX_RBBM_CLOCK_CNTL_RAC                 0x8A
+#define A5XX_RBBM_CLOCK_CNTL2_RAC                0x8B
+#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0        0x8C
+#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1        0x8D
+#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2        0x8E
+#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3        0x8F
+#define A5XX_RBBM_CLOCK_HYST_VFD                 0x90
+#define A5XX_RBBM_CLOCK_MODE_VFD                 0x91
+#define A5XX_RBBM_CLOCK_DELAY_VFD                0x92
+#define A5XX_RBBM_AHB_CNTL0                      0x93
+#define A5XX_RBBM_AHB_CNTL1                      0x94
+#define A5XX_RBBM_AHB_CNTL2                      0x95
+#define A5XX_RBBM_AHB_CMD                        0x96
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11     0x9C
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12     0x9D
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13     0x9E
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14     0x9F
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15     0xA0
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16     0xA1
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17     0xA2
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18     0xA3
+#define A5XX_RBBM_CLOCK_DELAY_TP0                0xA4
+#define A5XX_RBBM_CLOCK_DELAY_TP1                0xA5
+#define A5XX_RBBM_CLOCK_DELAY_TP2                0xA6
+#define A5XX_RBBM_CLOCK_DELAY_TP3                0xA7
+#define A5XX_RBBM_CLOCK_DELAY2_TP0               0xA8
+#define A5XX_RBBM_CLOCK_DELAY2_TP1               0xA9
+#define A5XX_RBBM_CLOCK_DELAY2_TP2               0xAA
+#define A5XX_RBBM_CLOCK_DELAY2_TP3               0xAB
+#define A5XX_RBBM_CLOCK_DELAY3_TP0               0xAC
+#define A5XX_RBBM_CLOCK_DELAY3_TP1               0xAD
+#define A5XX_RBBM_CLOCK_DELAY3_TP2               0xAE
+#define A5XX_RBBM_CLOCK_DELAY3_TP3               0xAF
+#define A5XX_RBBM_CLOCK_HYST_TP0                 0xB0
+#define A5XX_RBBM_CLOCK_HYST_TP1                 0xB1
+#define A5XX_RBBM_CLOCK_HYST_TP2                 0xB2
+#define A5XX_RBBM_CLOCK_HYST_TP3                 0xB3
+#define A5XX_RBBM_CLOCK_HYST2_TP0                0xB4
+#define A5XX_RBBM_CLOCK_HYST2_TP1                0xB5
+#define A5XX_RBBM_CLOCK_HYST2_TP2                0xB6
+#define A5XX_RBBM_CLOCK_HYST2_TP3                0xB7
+#define A5XX_RBBM_CLOCK_HYST3_TP0                0xB8
+#define A5XX_RBBM_CLOCK_HYST3_TP1                0xB9
+#define A5XX_RBBM_CLOCK_HYST3_TP2                0xBA
+#define A5XX_RBBM_CLOCK_HYST3_TP3                0xBB
+#define A5XX_RBBM_PERFCTR_CP_0_LO                0x3A0
+#define A5XX_RBBM_PERFCTR_CP_0_HI                0x3A1
+#define A5XX_RBBM_PERFCTR_CP_1_LO                0x3A2
+#define A5XX_RBBM_PERFCTR_CP_1_HI                0x3A3
+#define A5XX_RBBM_PERFCTR_CP_2_LO                0x3A4
+#define A5XX_RBBM_PERFCTR_CP_2_HI                0x3A5
+#define A5XX_RBBM_PERFCTR_CP_3_LO                0x3A6
+#define A5XX_RBBM_PERFCTR_CP_3_HI                0x3A7
+#define A5XX_RBBM_PERFCTR_CP_4_LO                0x3A8
+#define A5XX_RBBM_PERFCTR_CP_4_HI                0x3A9
+#define A5XX_RBBM_PERFCTR_CP_5_LO                0x3AA
+#define A5XX_RBBM_PERFCTR_CP_5_HI                0x3AB
+#define A5XX_RBBM_PERFCTR_CP_6_LO                0x3AC
+#define A5XX_RBBM_PERFCTR_CP_6_HI                0x3AD
+#define A5XX_RBBM_PERFCTR_CP_7_LO                0x3AE
+#define A5XX_RBBM_PERFCTR_CP_7_HI                0x3AF
+#define A5XX_RBBM_PERFCTR_RBBM_0_LO              0x3B0
+#define A5XX_RBBM_PERFCTR_RBBM_0_HI              0x3B1
+#define A5XX_RBBM_PERFCTR_RBBM_1_LO              0x3B2
+#define A5XX_RBBM_PERFCTR_RBBM_1_HI              0x3B3
+#define A5XX_RBBM_PERFCTR_RBBM_2_LO              0x3B4
+#define A5XX_RBBM_PERFCTR_RBBM_2_HI              0x3B5
+#define A5XX_RBBM_PERFCTR_RBBM_3_LO              0x3B6
+#define A5XX_RBBM_PERFCTR_RBBM_3_HI              0x3B7
+#define A5XX_RBBM_PERFCTR_PC_0_LO                0x3B8
+#define A5XX_RBBM_PERFCTR_PC_0_HI                0x3B9
+#define A5XX_RBBM_PERFCTR_PC_1_LO                0x3BA
+#define A5XX_RBBM_PERFCTR_PC_1_HI                0x3BB
+#define A5XX_RBBM_PERFCTR_PC_2_LO                0x3BC
+#define A5XX_RBBM_PERFCTR_PC_2_HI                0x3BD
+#define A5XX_RBBM_PERFCTR_PC_3_LO                0x3BE
+#define A5XX_RBBM_PERFCTR_PC_3_HI                0x3BF
+#define A5XX_RBBM_PERFCTR_PC_4_LO                0x3C0
+#define A5XX_RBBM_PERFCTR_PC_4_HI                0x3C1
+#define A5XX_RBBM_PERFCTR_PC_5_LO                0x3C2
+#define A5XX_RBBM_PERFCTR_PC_5_HI                0x3C3
+#define A5XX_RBBM_PERFCTR_PC_6_LO                0x3C4
+#define A5XX_RBBM_PERFCTR_PC_6_HI                0x3C5
+#define A5XX_RBBM_PERFCTR_PC_7_LO                0x3C6
+#define A5XX_RBBM_PERFCTR_PC_7_HI                0x3C7
+#define A5XX_RBBM_PERFCTR_VFD_0_LO               0x3C8
+#define A5XX_RBBM_PERFCTR_VFD_0_HI               0x3C9
+#define A5XX_RBBM_PERFCTR_VFD_1_LO               0x3CA
+#define A5XX_RBBM_PERFCTR_VFD_1_HI               0x3CB
+#define A5XX_RBBM_PERFCTR_VFD_2_LO               0x3CC
+#define A5XX_RBBM_PERFCTR_VFD_2_HI               0x3CD
+#define A5XX_RBBM_PERFCTR_VFD_3_LO               0x3CE
+#define A5XX_RBBM_PERFCTR_VFD_3_HI               0x3CF
+#define A5XX_RBBM_PERFCTR_VFD_4_LO               0x3D0
+#define A5XX_RBBM_PERFCTR_VFD_4_HI               0x3D1
+#define A5XX_RBBM_PERFCTR_VFD_5_LO               0x3D2
+#define A5XX_RBBM_PERFCTR_VFD_5_HI               0x3D3
+#define A5XX_RBBM_PERFCTR_VFD_6_LO               0x3D4
+#define A5XX_RBBM_PERFCTR_VFD_6_HI               0x3D5
+#define A5XX_RBBM_PERFCTR_VFD_7_LO               0x3D6
+#define A5XX_RBBM_PERFCTR_VFD_7_HI               0x3D7
+#define A5XX_RBBM_PERFCTR_HLSQ_0_LO              0x3D8
+#define A5XX_RBBM_PERFCTR_HLSQ_0_HI              0x3D9
+#define A5XX_RBBM_PERFCTR_HLSQ_1_LO              0x3DA
+#define A5XX_RBBM_PERFCTR_HLSQ_1_HI              0x3DB
+#define A5XX_RBBM_PERFCTR_HLSQ_2_LO              0x3DC
+#define A5XX_RBBM_PERFCTR_HLSQ_2_HI              0x3DD
+#define A5XX_RBBM_PERFCTR_HLSQ_3_LO              0x3DE
+#define A5XX_RBBM_PERFCTR_HLSQ_3_HI              0x3DF
+#define A5XX_RBBM_PERFCTR_HLSQ_4_LO              0x3E0
+#define A5XX_RBBM_PERFCTR_HLSQ_4_HI              0x3E1
+#define A5XX_RBBM_PERFCTR_HLSQ_5_LO              0x3E2
+#define A5XX_RBBM_PERFCTR_HLSQ_5_HI              0x3E3
+#define A5XX_RBBM_PERFCTR_HLSQ_6_LO              0x3E4
+#define A5XX_RBBM_PERFCTR_HLSQ_6_HI              0x3E5
+#define A5XX_RBBM_PERFCTR_HLSQ_7_LO              0x3E6
+#define A5XX_RBBM_PERFCTR_HLSQ_7_HI              0x3E7
+#define A5XX_RBBM_PERFCTR_VPC_0_LO               0x3E8
+#define A5XX_RBBM_PERFCTR_VPC_0_HI               0x3E9
+#define A5XX_RBBM_PERFCTR_VPC_1_LO               0x3EA
+#define A5XX_RBBM_PERFCTR_VPC_1_HI               0x3EB
+#define A5XX_RBBM_PERFCTR_VPC_2_LO               0x3EC
+#define A5XX_RBBM_PERFCTR_VPC_2_HI               0x3ED
+#define A5XX_RBBM_PERFCTR_VPC_3_LO               0x3EE
+#define A5XX_RBBM_PERFCTR_VPC_3_HI               0x3EF
+#define A5XX_RBBM_PERFCTR_CCU_0_LO               0x3F0
+#define A5XX_RBBM_PERFCTR_CCU_0_HI               0x3F1
+#define A5XX_RBBM_PERFCTR_CCU_1_LO               0x3F2
+#define A5XX_RBBM_PERFCTR_CCU_1_HI               0x3F3
+#define A5XX_RBBM_PERFCTR_CCU_2_LO               0x3F4
+#define A5XX_RBBM_PERFCTR_CCU_2_HI               0x3F5
+#define A5XX_RBBM_PERFCTR_CCU_3_LO               0x3F6
+#define A5XX_RBBM_PERFCTR_CCU_3_HI               0x3F7
+#define A5XX_RBBM_PERFCTR_TSE_0_LO               0x3F8
+#define A5XX_RBBM_PERFCTR_TSE_0_HI               0x3F9
+#define A5XX_RBBM_PERFCTR_TSE_1_LO               0x3FA
+#define A5XX_RBBM_PERFCTR_TSE_1_HI               0x3FB
+#define A5XX_RBBM_PERFCTR_TSE_2_LO               0x3FC
+#define A5XX_RBBM_PERFCTR_TSE_2_HI               0x3FD
+#define A5XX_RBBM_PERFCTR_TSE_3_LO               0x3FE
+#define A5XX_RBBM_PERFCTR_TSE_3_HI               0x3FF
+#define A5XX_RBBM_PERFCTR_RAS_0_LO               0x400
+#define A5XX_RBBM_PERFCTR_RAS_0_HI               0x401
+#define A5XX_RBBM_PERFCTR_RAS_1_LO               0x402
+#define A5XX_RBBM_PERFCTR_RAS_1_HI               0x403
+#define A5XX_RBBM_PERFCTR_RAS_2_LO               0x404
+#define A5XX_RBBM_PERFCTR_RAS_2_HI               0x405
+#define A5XX_RBBM_PERFCTR_RAS_3_LO               0x406
+#define A5XX_RBBM_PERFCTR_RAS_3_HI               0x407
+#define A5XX_RBBM_PERFCTR_UCHE_0_LO              0x408
+#define A5XX_RBBM_PERFCTR_UCHE_0_HI              0x409
+#define A5XX_RBBM_PERFCTR_UCHE_1_LO              0x40A
+#define A5XX_RBBM_PERFCTR_UCHE_1_HI              0x40B
+#define A5XX_RBBM_PERFCTR_UCHE_2_LO              0x40C
+#define A5XX_RBBM_PERFCTR_UCHE_2_HI              0x40D
+#define A5XX_RBBM_PERFCTR_UCHE_3_LO              0x40E
+#define A5XX_RBBM_PERFCTR_UCHE_3_HI              0x40F
+#define A5XX_RBBM_PERFCTR_UCHE_4_LO              0x410
+#define A5XX_RBBM_PERFCTR_UCHE_4_HI              0x411
+#define A5XX_RBBM_PERFCTR_UCHE_5_LO              0x412
+#define A5XX_RBBM_PERFCTR_UCHE_5_HI              0x413
+#define A5XX_RBBM_PERFCTR_UCHE_6_LO              0x414
+#define A5XX_RBBM_PERFCTR_UCHE_6_HI              0x415
+#define A5XX_RBBM_PERFCTR_UCHE_7_LO              0x416
+#define A5XX_RBBM_PERFCTR_UCHE_7_HI              0x417
+#define A5XX_RBBM_PERFCTR_TP_0_LO                0x418
+#define A5XX_RBBM_PERFCTR_TP_0_HI                0x419
+#define A5XX_RBBM_PERFCTR_TP_1_LO                0x41A
+#define A5XX_RBBM_PERFCTR_TP_1_HI                0x41B
+#define A5XX_RBBM_PERFCTR_TP_2_LO                0x41C
+#define A5XX_RBBM_PERFCTR_TP_2_HI                0x41D
+#define A5XX_RBBM_PERFCTR_TP_3_LO                0x41E
+#define A5XX_RBBM_PERFCTR_TP_3_HI                0x41F
+#define A5XX_RBBM_PERFCTR_TP_4_LO                0x420
+#define A5XX_RBBM_PERFCTR_TP_4_HI                0x421
+#define A5XX_RBBM_PERFCTR_TP_5_LO                0x422
+#define A5XX_RBBM_PERFCTR_TP_5_HI                0x423
+#define A5XX_RBBM_PERFCTR_TP_6_LO                0x424
+#define A5XX_RBBM_PERFCTR_TP_6_HI                0x425
+#define A5XX_RBBM_PERFCTR_TP_7_LO                0x426
+#define A5XX_RBBM_PERFCTR_TP_7_HI                0x427
+#define A5XX_RBBM_PERFCTR_SP_0_LO                0x428
+#define A5XX_RBBM_PERFCTR_SP_0_HI                0x429
+#define A5XX_RBBM_PERFCTR_SP_1_LO                0x42A
+#define A5XX_RBBM_PERFCTR_SP_1_HI                0x42B
+#define A5XX_RBBM_PERFCTR_SP_2_LO                0x42C
+#define A5XX_RBBM_PERFCTR_SP_2_HI                0x42D
+#define A5XX_RBBM_PERFCTR_SP_3_LO                0x42E
+#define A5XX_RBBM_PERFCTR_SP_3_HI                0x42F
+#define A5XX_RBBM_PERFCTR_SP_4_LO                0x430
+#define A5XX_RBBM_PERFCTR_SP_4_HI                0x431
+#define A5XX_RBBM_PERFCTR_SP_5_LO                0x432
+#define A5XX_RBBM_PERFCTR_SP_5_HI                0x433
+#define A5XX_RBBM_PERFCTR_SP_6_LO                0x434
+#define A5XX_RBBM_PERFCTR_SP_6_HI                0x435
+#define A5XX_RBBM_PERFCTR_SP_7_LO                0x436
+#define A5XX_RBBM_PERFCTR_SP_7_HI                0x437
+#define A5XX_RBBM_PERFCTR_SP_8_LO                0x438
+#define A5XX_RBBM_PERFCTR_SP_8_HI                0x439
+#define A5XX_RBBM_PERFCTR_SP_9_LO                0x43A
+#define A5XX_RBBM_PERFCTR_SP_9_HI                0x43B
+#define A5XX_RBBM_PERFCTR_SP_10_LO               0x43C
+#define A5XX_RBBM_PERFCTR_SP_10_HI               0x43D
+#define A5XX_RBBM_PERFCTR_SP_11_LO               0x43E
+#define A5XX_RBBM_PERFCTR_SP_11_HI               0x43F
+#define A5XX_RBBM_PERFCTR_RB_0_LO                0x440
+#define A5XX_RBBM_PERFCTR_RB_0_HI                0x441
+#define A5XX_RBBM_PERFCTR_RB_1_LO                0x442
+#define A5XX_RBBM_PERFCTR_RB_1_HI                0x443
+#define A5XX_RBBM_PERFCTR_RB_2_LO                0x444
+#define A5XX_RBBM_PERFCTR_RB_2_HI                0x445
+#define A5XX_RBBM_PERFCTR_RB_3_LO                0x446
+#define A5XX_RBBM_PERFCTR_RB_3_HI                0x447
+#define A5XX_RBBM_PERFCTR_RB_4_LO                0x448
+#define A5XX_RBBM_PERFCTR_RB_4_HI                0x449
+#define A5XX_RBBM_PERFCTR_RB_5_LO                0x44A
+#define A5XX_RBBM_PERFCTR_RB_5_HI                0x44B
+#define A5XX_RBBM_PERFCTR_RB_6_LO                0x44C
+#define A5XX_RBBM_PERFCTR_RB_6_HI                0x44D
+#define A5XX_RBBM_PERFCTR_RB_7_LO                0x44E
+#define A5XX_RBBM_PERFCTR_RB_7_HI                0x44F
+#define A5XX_RBBM_PERFCTR_VSC_0_LO               0x450
+#define A5XX_RBBM_PERFCTR_VSC_0_HI               0x451
+#define A5XX_RBBM_PERFCTR_VSC_1_LO               0x452
+#define A5XX_RBBM_PERFCTR_VSC_1_HI               0x453
+#define A5XX_RBBM_PERFCTR_LRZ_0_LO               0x454
+#define A5XX_RBBM_PERFCTR_LRZ_0_HI               0x455
+#define A5XX_RBBM_PERFCTR_LRZ_1_LO               0x456
+#define A5XX_RBBM_PERFCTR_LRZ_1_HI               0x457
+#define A5XX_RBBM_PERFCTR_LRZ_2_LO               0x458
+#define A5XX_RBBM_PERFCTR_LRZ_2_HI               0x459
+#define A5XX_RBBM_PERFCTR_LRZ_3_LO               0x45A
+#define A5XX_RBBM_PERFCTR_LRZ_3_HI               0x45B
+#define A5XX_RBBM_PERFCTR_CMP_0_LO               0x45C
+#define A5XX_RBBM_PERFCTR_CMP_0_HI               0x45D
+#define A5XX_RBBM_PERFCTR_CMP_1_LO               0x45E
+#define A5XX_RBBM_PERFCTR_CMP_1_HI               0x45F
+#define A5XX_RBBM_PERFCTR_CMP_2_LO               0x460
+#define A5XX_RBBM_PERFCTR_CMP_2_HI               0x461
+#define A5XX_RBBM_PERFCTR_CMP_3_LO               0x462
+#define A5XX_RBBM_PERFCTR_CMP_3_HI               0x463
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_0             0x46B
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_1             0x46C
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_2             0x46D
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_3             0x46E
+#define A5XX_RBBM_ALWAYSON_COUNTER_LO            0x4D2
+#define A5XX_RBBM_ALWAYSON_COUNTER_HI            0x4D3
+#define A5XX_RBBM_STATUS                         0x4F5
+#define A5XX_RBBM_STATUS3                        0x530
+#define A5XX_RBBM_INT_0_STATUS                   0x4E1
+#define A5XX_RBBM_AHB_ME_SPLIT_STATUS            0x4F0
+#define A5XX_RBBM_AHB_PFP_SPLIT_STATUS           0x4F1
+#define A5XX_RBBM_AHB_ERROR_STATUS               0x4F4
+#define A5XX_RBBM_PERFCTR_CNTL                   0x464
+#define A5XX_RBBM_PERFCTR_LOAD_CMD0              0x465
+#define A5XX_RBBM_PERFCTR_LOAD_CMD1              0x466
+#define A5XX_RBBM_PERFCTR_LOAD_CMD2              0x467
+#define A5XX_RBBM_PERFCTR_LOAD_CMD3              0x468
+#define A5XX_RBBM_PERFCTR_LOAD_VALUE_LO          0x469
+#define A5XX_RBBM_PERFCTR_LOAD_VALUE_HI          0x46A
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_0             0x46B
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_1             0x46C
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_2             0x46D
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_3             0x46E
+#define A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED        0x46F
+#define A5XX_RBBM_CFG_DBGBUS_EVENT_LOGIC         0x504
+#define A5XX_RBBM_CFG_DBGBUS_OVER                0x505
+#define A5XX_RBBM_CFG_DBGBUS_COUNT0              0x506
+#define A5XX_RBBM_CFG_DBGBUS_COUNT1              0x507
+#define A5XX_RBBM_CFG_DBGBUS_COUNT2              0x508
+#define A5XX_RBBM_CFG_DBGBUS_COUNT3              0x509
+#define A5XX_RBBM_CFG_DBGBUS_COUNT4              0x50A
+#define A5XX_RBBM_CFG_DBGBUS_COUNT5              0x50B
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_ADDR          0x50C
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF0          0x50D
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1          0x50E
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2          0x50F
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF3          0x510
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF4          0x511
+#define A5XX_RBBM_CFG_DBGBUS_MISR0               0x512
+#define A5XX_RBBM_CFG_DBGBUS_MISR1               0x513
+#define A5XX_RBBM_ISDB_CNT                       0x533
+#define A5XX_RBBM_SECVID_TRUST_CONFIG            0xF000
+#define A5XX_RBBM_SECVID_TRUST_CNTL              0xF400
+#define A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO     0xF800
+#define A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI     0xF801
+#define A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE        0xF802
+#define A5XX_RBBM_SECVID_TSB_CNTL                0xF803
+#define A5XX_RBBM_SECVID_TSB_COMP_STATUS_LO      0xF804
+#define A5XX_RBBM_SECVID_TSB_COMP_STATUS_HI      0xF805
+#define A5XX_RBBM_SECVID_TSB_UCHE_STATUS_LO      0xF806
+#define A5XX_RBBM_SECVID_TSB_UCHE_STATUS_HI      0xF807
+#define A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL      0xF810
+
+/* VSC registers */
+#define A5XX_VSC_PERFCTR_VSC_SEL_0          0xC60
+#define A5XX_VSC_PERFCTR_VSC_SEL_1          0xC61
+
+#define A5XX_GRAS_ADDR_MODE_CNTL            0xC81
+
+/* TSE registers */
+#define A5XX_GRAS_PERFCTR_TSE_SEL_0         0xC90
+#define A5XX_GRAS_PERFCTR_TSE_SEL_1         0xC91
+#define A5XX_GRAS_PERFCTR_TSE_SEL_2         0xC92
+#define A5XX_GRAS_PERFCTR_TSE_SEL_3         0xC93
+
+/* RAS registers */
+#define A5XX_GRAS_PERFCTR_RAS_SEL_0         0xC94
+#define A5XX_GRAS_PERFCTR_RAS_SEL_1         0xC95
+#define A5XX_GRAS_PERFCTR_RAS_SEL_2         0xC96
+#define A5XX_GRAS_PERFCTR_RAS_SEL_3         0xC97
+
+/* LRZ registers */
+#define A5XX_GRAS_PERFCTR_LRZ_SEL_0         0xC98
+#define A5XX_GRAS_PERFCTR_LRZ_SEL_1         0xC99
+#define A5XX_GRAS_PERFCTR_LRZ_SEL_2         0xC9A
+#define A5XX_GRAS_PERFCTR_LRZ_SEL_3         0xC9B
+
+
+/* RB registers */
+#define A5XX_RB_ADDR_MODE_CNTL              0xCC5
+#define A5XX_RB_PERFCTR_RB_SEL_0            0xCD0
+#define A5XX_RB_PERFCTR_RB_SEL_1            0xCD1
+#define A5XX_RB_PERFCTR_RB_SEL_2            0xCD2
+#define A5XX_RB_PERFCTR_RB_SEL_3            0xCD3
+#define A5XX_RB_PERFCTR_RB_SEL_4            0xCD4
+#define A5XX_RB_PERFCTR_RB_SEL_5            0xCD5
+#define A5XX_RB_PERFCTR_RB_SEL_6            0xCD6
+#define A5XX_RB_PERFCTR_RB_SEL_7            0xCD7
+
+/* CCU registers */
+#define A5XX_RB_PERFCTR_CCU_SEL_0           0xCD8
+#define A5XX_RB_PERFCTR_CCU_SEL_1           0xCD9
+#define A5XX_RB_PERFCTR_CCU_SEL_2           0xCDA
+#define A5XX_RB_PERFCTR_CCU_SEL_3           0xCDB
+
+/* RB Power Counter RB Registers Select */
+#define A5XX_RB_POWERCTR_RB_SEL_0           0xCE0
+#define A5XX_RB_POWERCTR_RB_SEL_1           0xCE1
+#define A5XX_RB_POWERCTR_RB_SEL_2           0xCE2
+#define A5XX_RB_POWERCTR_RB_SEL_3           0xCE3
+
+/* RB Power Counter CCU Registers Select */
+#define A5XX_RB_POWERCTR_CCU_SEL_0          0xCE4
+#define A5XX_RB_POWERCTR_CCU_SEL_1          0xCE5
+
+/* CMP registers */
+#define A5XX_RB_PERFCTR_CMP_SEL_0           0xCEC
+#define A5XX_RB_PERFCTR_CMP_SEL_1           0xCED
+#define A5XX_RB_PERFCTR_CMP_SEL_2           0xCEE
+#define A5XX_RB_PERFCTR_CMP_SEL_3           0xCEF
+
+/* PC registers */
+#define A5XX_PC_DBG_ECO_CNTL                0xD00
+#define A5XX_PC_ADDR_MODE_CNTL              0xD01
+#define A5XX_PC_PERFCTR_PC_SEL_0            0xD10
+#define A5XX_PC_PERFCTR_PC_SEL_1            0xD11
+#define A5XX_PC_PERFCTR_PC_SEL_2            0xD12
+#define A5XX_PC_PERFCTR_PC_SEL_3            0xD13
+#define A5XX_PC_PERFCTR_PC_SEL_4            0xD14
+#define A5XX_PC_PERFCTR_PC_SEL_5            0xD15
+#define A5XX_PC_PERFCTR_PC_SEL_6            0xD16
+#define A5XX_PC_PERFCTR_PC_SEL_7            0xD17
+
+/* HLSQ registers */
+#define A5XX_HLSQ_TIMEOUT_THRESHOLD         0xE00
+#define A5XX_HLSQ_ADDR_MODE_CNTL            0xE05
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_0        0xE10
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_1        0xE11
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_2        0xE12
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_3        0xE13
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_4        0xE14
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_5        0xE15
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_6        0xE16
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_7        0xE17
+#define A5XX_HLSQ_SPTP_RDSEL                0xF08
+#define A5XX_HLSQ_DBG_READ_SEL              0xBC00
+#define A5XX_HLSQ_DBG_AHB_READ_APERTURE     0xA000
+
+/* VFD registers */
+#define A5XX_VFD_ADDR_MODE_CNTL             0xE41
+#define A5XX_VFD_PERFCTR_VFD_SEL_0          0xE50
+#define A5XX_VFD_PERFCTR_VFD_SEL_1          0xE51
+#define A5XX_VFD_PERFCTR_VFD_SEL_2          0xE52
+#define A5XX_VFD_PERFCTR_VFD_SEL_3          0xE53
+#define A5XX_VFD_PERFCTR_VFD_SEL_4          0xE54
+#define A5XX_VFD_PERFCTR_VFD_SEL_5          0xE55
+#define A5XX_VFD_PERFCTR_VFD_SEL_6          0xE56
+#define A5XX_VFD_PERFCTR_VFD_SEL_7          0xE57
+
+/* VPC registers */
+#define A5XX_VPC_ADDR_MODE_CNTL             0xE61
+#define A5XX_VPC_PERFCTR_VPC_SEL_0          0xE64
+#define A5XX_VPC_PERFCTR_VPC_SEL_1          0xE65
+#define A5XX_VPC_PERFCTR_VPC_SEL_2          0xE66
+#define A5XX_VPC_PERFCTR_VPC_SEL_3          0xE67
+
+/* UCHE registers */
+#define A5XX_UCHE_ADDR_MODE_CNTL            0xE80
+#define A5XX_UCHE_SVM_CNTL                  0xE82
+#define A5XX_UCHE_WRITE_THRU_BASE_LO        0xE87
+#define A5XX_UCHE_WRITE_THRU_BASE_HI        0xE88
+#define A5XX_UCHE_TRAP_BASE_LO              0xE89
+#define A5XX_UCHE_TRAP_BASE_HI              0xE8A
+#define A5XX_UCHE_GMEM_RANGE_MIN_LO         0xE8B
+#define A5XX_UCHE_GMEM_RANGE_MIN_HI         0xE8C
+#define A5XX_UCHE_GMEM_RANGE_MAX_LO         0xE8D
+#define A5XX_UCHE_GMEM_RANGE_MAX_HI         0xE8E
+#define A5XX_UCHE_INVALIDATE0               0xE95
+#define A5XX_UCHE_CACHE_WAYS                0xE96
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_0        0xEA0
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_1        0xEA1
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_2        0xEA2
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_3        0xEA3
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_4        0xEA4
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_5        0xEA5
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_6        0xEA6
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_7        0xEA7
+
+/* UCHE Power Counter UCHE Registers Select */
+#define A5XX_UCHE_POWERCTR_UCHE_SEL_0       0xEA8
+#define A5XX_UCHE_POWERCTR_UCHE_SEL_1       0xEA9
+#define A5XX_UCHE_POWERCTR_UCHE_SEL_2       0xEAA
+#define A5XX_UCHE_POWERCTR_UCHE_SEL_3       0xEAB
+
+/* SP registers */
+#define A5XX_SP_DBG_ECO_CNTL                0xEC0
+#define A5XX_SP_ADDR_MODE_CNTL              0xEC1
+#define A5XX_SP_PERFCTR_SP_SEL_0            0xED0
+#define A5XX_SP_PERFCTR_SP_SEL_1            0xED1
+#define A5XX_SP_PERFCTR_SP_SEL_2            0xED2
+#define A5XX_SP_PERFCTR_SP_SEL_3            0xED3
+#define A5XX_SP_PERFCTR_SP_SEL_4            0xED4
+#define A5XX_SP_PERFCTR_SP_SEL_5            0xED5
+#define A5XX_SP_PERFCTR_SP_SEL_6            0xED6
+#define A5XX_SP_PERFCTR_SP_SEL_7            0xED7
+#define A5XX_SP_PERFCTR_SP_SEL_8            0xED8
+#define A5XX_SP_PERFCTR_SP_SEL_9            0xED9
+#define A5XX_SP_PERFCTR_SP_SEL_10           0xEDA
+#define A5XX_SP_PERFCTR_SP_SEL_11           0xEDB
+
+/* SP Power Counter SP Registers Select */
+#define A5XX_SP_POWERCTR_SP_SEL_0           0xEDC
+#define A5XX_SP_POWERCTR_SP_SEL_1           0xEDD
+#define A5XX_SP_POWERCTR_SP_SEL_2           0xEDE
+#define A5XX_SP_POWERCTR_SP_SEL_3           0xEDF
+
+/* TP registers */
+#define A5XX_TPL1_ADDR_MODE_CNTL            0xF01
+#define A5XX_TPL1_PERFCTR_TP_SEL_0          0xF10
+#define A5XX_TPL1_PERFCTR_TP_SEL_1          0xF11
+#define A5XX_TPL1_PERFCTR_TP_SEL_2          0xF12
+#define A5XX_TPL1_PERFCTR_TP_SEL_3          0xF13
+#define A5XX_TPL1_PERFCTR_TP_SEL_4          0xF14
+#define A5XX_TPL1_PERFCTR_TP_SEL_5          0xF15
+#define A5XX_TPL1_PERFCTR_TP_SEL_6          0xF16
+#define A5XX_TPL1_PERFCTR_TP_SEL_7          0xF17
+
+/* TP Power Counter TP Registers Select */
+#define A5XX_TPL1_POWERCTR_TP_SEL_0         0xF18
+#define A5XX_TPL1_POWERCTR_TP_SEL_1         0xF19
+#define A5XX_TPL1_POWERCTR_TP_SEL_2         0xF1A
+#define A5XX_TPL1_POWERCTR_TP_SEL_3         0xF1B
+
+/* VBIF registers */
+#define A5XX_VBIF_VERSION                       0x3000
+#define A5XX_VBIF_CLKON                         0x3001
+#define A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK   0x1
+#define A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT  0x1
+
+#define A5XX_VBIF_ABIT_SORT                0x3028
+#define A5XX_VBIF_ABIT_SORT_CONF           0x3029
+#define A5XX_VBIF_ROUND_ROBIN_QOS_ARB      0x3049
+#define A5XX_VBIF_GATE_OFF_WRREQ_EN        0x302A
+#define A5XX_VBIF_IN_RD_LIM_CONF0          0x302C
+#define A5XX_VBIF_IN_RD_LIM_CONF1          0x302D
+
+#define A5XX_VBIF_XIN_HALT_CTRL0	   0x3080
+#define A5XX_VBIF_XIN_HALT_CTRL0_MASK	   0xF
+#define A510_VBIF_XIN_HALT_CTRL0_MASK	   0x7
+#define A5XX_VBIF_XIN_HALT_CTRL1	   0x3081
+
+#define A5XX_VBIF_TEST_BUS_OUT_CTRL            0x3084
+#define A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK    0x1
+#define A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT   0x0
+
+#define A5XX_VBIF_TEST_BUS1_CTRL0                0x3085
+#define A5XX_VBIF_TEST_BUS1_CTRL1                0x3086
+#define A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK  0xF
+#define A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT 0x0
+
+#define A5XX_VBIF_TEST_BUS2_CTRL0                   0x3087
+#define A5XX_VBIF_TEST_BUS2_CTRL1                   0x3088
+#define A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK     0xF
+#define A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT    0x0
+
+#define A5XX_VBIF_TEST_BUS_OUT             0x308c
+
+#define A5XX_VBIF_PERF_CNT_SEL0            0x30D0
+#define A5XX_VBIF_PERF_CNT_SEL1            0x30D1
+#define A5XX_VBIF_PERF_CNT_SEL2            0x30D2
+#define A5XX_VBIF_PERF_CNT_SEL3            0x30D3
+#define A5XX_VBIF_PERF_CNT_LOW0            0x30D8
+#define A5XX_VBIF_PERF_CNT_LOW1            0x30D9
+#define A5XX_VBIF_PERF_CNT_LOW2            0x30DA
+#define A5XX_VBIF_PERF_CNT_LOW3            0x30DB
+#define A5XX_VBIF_PERF_CNT_HIGH0           0x30E0
+#define A5XX_VBIF_PERF_CNT_HIGH1           0x30E1
+#define A5XX_VBIF_PERF_CNT_HIGH2           0x30E2
+#define A5XX_VBIF_PERF_CNT_HIGH3           0x30E3
+
+#define A5XX_VBIF_PERF_PWR_CNT_EN0         0x3100
+#define A5XX_VBIF_PERF_PWR_CNT_EN1         0x3101
+#define A5XX_VBIF_PERF_PWR_CNT_EN2         0x3102
+
+#define A5XX_VBIF_PERF_PWR_CNT_LOW0        0x3110
+#define A5XX_VBIF_PERF_PWR_CNT_LOW1        0x3111
+#define A5XX_VBIF_PERF_PWR_CNT_LOW2        0x3112
+
+#define A5XX_VBIF_PERF_PWR_CNT_HIGH0       0x3118
+#define A5XX_VBIF_PERF_PWR_CNT_HIGH1       0x3119
+#define A5XX_VBIF_PERF_PWR_CNT_HIGH2       0x311A
+
+/* GPMU registers */
+#define A5XX_GPMU_INST_RAM_BASE            0x8800
+#define A5XX_GPMU_DATA_RAM_BASE            0x9800
+#define A5XX_GPMU_SP_POWER_CNTL            0xA881
+#define A5XX_GPMU_RBCCU_CLOCK_CNTL         0xA886
+#define A5XX_GPMU_RBCCU_POWER_CNTL         0xA887
+#define A5XX_GPMU_SP_PWR_CLK_STATUS        0xA88B
+#define A5XX_GPMU_RBCCU_PWR_CLK_STATUS     0xA88D
+#define A5XX_GPMU_PWR_COL_STAGGER_DELAY    0xA891
+#define A5XX_GPMU_PWR_COL_INTER_FRAME_CTRL 0xA892
+#define A5XX_GPMU_PWR_COL_INTER_FRAME_HYST 0xA893
+#define A5XX_GPMU_PWR_COL_BINNING_CTRL     0xA894
+#define A5XX_GPMU_CLOCK_THROTTLE_CTRL      0xA8A3
+#define A5XX_GPMU_WFI_CONFIG               0xA8C1
+#define A5XX_GPMU_RBBM_INTR_INFO           0xA8D6
+#define A5XX_GPMU_CM3_SYSRESET             0xA8D8
+#define A5XX_GPMU_GENERAL_0                0xA8E0
+#define A5XX_GPMU_GENERAL_1                0xA8E1
+
+/* COUNTABLE FOR SP PERFCOUNTER */
+#define A5XX_SP_ALU_ACTIVE_CYCLES          0x1
+#define A5XX_SP0_ICL1_MISSES               0x35
+#define A5XX_SP_FS_CFLOW_INSTRUCTIONS      0x27
+
+/* COUNTABLE FOR TSE PERFCOUNTER */
+#define A5XX_TSE_INPUT_PRIM_NUM            0x6
+
+/* GPMU POWER COUNTERS */
+#define A5XX_SP_POWER_COUNTER_0_LO		0xA840
+#define A5XX_SP_POWER_COUNTER_0_HI		0xA841
+#define A5XX_SP_POWER_COUNTER_1_LO		0xA842
+#define A5XX_SP_POWER_COUNTER_1_HI		0xA843
+#define A5XX_SP_POWER_COUNTER_2_LO		0xA844
+#define A5XX_SP_POWER_COUNTER_2_HI		0xA845
+#define A5XX_SP_POWER_COUNTER_3_LO		0xA846
+#define A5XX_SP_POWER_COUNTER_3_HI		0xA847
+
+#define A5XX_TP_POWER_COUNTER_0_LO		0xA848
+#define A5XX_TP_POWER_COUNTER_0_HI		0xA849
+#define A5XX_TP_POWER_COUNTER_1_LO		0xA84A
+#define A5XX_TP_POWER_COUNTER_1_HI		0xA84B
+#define A5XX_TP_POWER_COUNTER_2_LO		0xA84C
+#define A5XX_TP_POWER_COUNTER_2_HI		0xA84D
+#define A5XX_TP_POWER_COUNTER_3_LO		0xA84E
+#define A5XX_TP_POWER_COUNTER_3_HI		0xA84F
+
+#define A5XX_RB_POWER_COUNTER_0_LO		0xA850
+#define A5XX_RB_POWER_COUNTER_0_HI		0xA851
+#define A5XX_RB_POWER_COUNTER_1_LO		0xA852
+#define A5XX_RB_POWER_COUNTER_1_HI		0xA853
+#define A5XX_RB_POWER_COUNTER_2_LO		0xA854
+#define A5XX_RB_POWER_COUNTER_2_HI		0xA855
+#define A5XX_RB_POWER_COUNTER_3_LO		0xA856
+#define A5XX_RB_POWER_COUNTER_3_HI		0xA857
+
+#define A5XX_CCU_POWER_COUNTER_0_LO		0xA858
+#define A5XX_CCU_POWER_COUNTER_0_HI		0xA859
+#define A5XX_CCU_POWER_COUNTER_1_LO		0xA85A
+#define A5XX_CCU_POWER_COUNTER_1_HI		0xA85B
+
+#define A5XX_UCHE_POWER_COUNTER_0_LO		0xA85C
+#define A5XX_UCHE_POWER_COUNTER_0_HI		0xA85D
+#define A5XX_UCHE_POWER_COUNTER_1_LO		0xA85E
+#define A5XX_UCHE_POWER_COUNTER_1_HI		0xA85F
+#define A5XX_UCHE_POWER_COUNTER_2_LO		0xA860
+#define A5XX_UCHE_POWER_COUNTER_2_HI		0xA861
+#define A5XX_UCHE_POWER_COUNTER_3_LO		0xA862
+#define A5XX_UCHE_POWER_COUNTER_3_HI		0xA863
+
+#define A5XX_CP_POWER_COUNTER_0_LO		0xA864
+#define A5XX_CP_POWER_COUNTER_0_HI		0xA865
+#define A5XX_CP_POWER_COUNTER_1_LO		0xA866
+#define A5XX_CP_POWER_COUNTER_1_HI		0xA867
+#define A5XX_CP_POWER_COUNTER_2_LO		0xA868
+#define A5XX_CP_POWER_COUNTER_2_HI		0xA869
+#define A5XX_CP_POWER_COUNTER_3_LO		0xA86A
+#define A5XX_CP_POWER_COUNTER_3_HI		0xA86B
+
+#define A5XX_GPMU_POWER_COUNTER_0_LO		0xA86C
+#define A5XX_GPMU_POWER_COUNTER_0_HI		0xA86D
+#define A5XX_GPMU_POWER_COUNTER_1_LO		0xA86E
+#define A5XX_GPMU_POWER_COUNTER_1_HI		0xA86F
+#define A5XX_GPMU_POWER_COUNTER_2_LO		0xA870
+#define A5XX_GPMU_POWER_COUNTER_2_HI		0xA871
+#define A5XX_GPMU_POWER_COUNTER_3_LO		0xA872
+#define A5XX_GPMU_POWER_COUNTER_3_HI		0xA873
+#define A5XX_GPMU_POWER_COUNTER_4_LO		0xA874
+#define A5XX_GPMU_POWER_COUNTER_4_HI		0xA875
+#define A5XX_GPMU_POWER_COUNTER_5_LO		0xA876
+#define A5XX_GPMU_POWER_COUNTER_5_HI		0xA877
+
+#define A5XX_GPMU_POWER_COUNTER_ENABLE		0xA878
+#define A5XX_GPMU_ALWAYS_ON_COUNTER_LO		0xA879
+#define A5XX_GPMU_ALWAYS_ON_COUNTER_HI		0xA87A
+#define A5XX_GPMU_ALWAYS_ON_COUNTER_RESET	0xA87B
+#define A5XX_GPMU_POWER_COUNTER_SELECT_0	0xA87C
+#define A5XX_GPMU_POWER_COUNTER_SELECT_1	0xA87D
+
+#define A5XX_GPMU_CLOCK_THROTTLE_CTRL		0xA8A3
+#define A5XX_GPMU_THROTTLE_UNMASK_FORCE_CTRL	0xA8A8
+
+#define A5XX_GPMU_TEMP_SENSOR_ID		0xAC00
+#define A5XX_GPMU_TEMP_SENSOR_CONFIG		0xAC01
+#define A5XX_GPMU_TEMP_VAL			0xAC02
+#define A5XX_GPMU_DELTA_TEMP_THRESHOLD		0xAC03
+#define A5XX_GPMU_TEMP_THRESHOLD_INTR_STATUS	0xAC05
+#define A5XX_GPMU_TEMP_THRESHOLD_INTR_EN_MASK	0xAC06
+
+#define A5XX_GPMU_LEAKAGE_TEMP_COEFF_0_1	0xAC40
+#define A5XX_GPMU_LEAKAGE_TEMP_COEFF_2_3	0xAC41
+#define A5XX_GPMU_LEAKAGE_VTG_COEFF_0_1		0xAC42
+#define A5XX_GPMU_LEAKAGE_VTG_COEFF_2_3		0xAC43
+#define A5XX_GPMU_BASE_LEAKAGE			0xAC46
+
+#define A5XX_GPMU_GPMU_VOLTAGE			0xAC60
+#define A5XX_GPMU_GPMU_VOLTAGE_INTR_STATUS	0xAC61
+#define A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK	0xAC62
+#define A5XX_GPMU_GPMU_PWR_THRESHOLD		0xAC80
+
+#define A5XX_GDPM_CONFIG1			0xB80C
+#define A5XX_GDPM_CONFIG2			0xB80D
+#define A5XX_GDPM_INT_EN			0xB80F
+#define A5XX_GDPM_INT_MASK			0xB811
+#define A5XX_GPMU_BEC_ENABLE			0xB9A0
+
+#endif /* _A5XX_REG_H */
+
diff --git a/drivers/gpu/msm/adreno-gpulist.h b/drivers/gpu/msm/adreno-gpulist.h
new file mode 100644
index 000000000000..6f333624a28d
--- /dev/null
+++ b/drivers/gpu/msm/adreno-gpulist.h
@@ -0,0 +1,240 @@
+/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define ANY_ID (~0)
+
+static const struct adreno_gpu_core adreno_gpulist[] = {
+	{
+		.gpurev = ADRENO_REV_A306,
+		.core = 3,
+		.major = 0,
+		.minor = 6,
+		.patchid = 0x00,
+		.pm4fw_name = "a300_pm4.fw",
+		.pfpfw_name = "a300_pfp.fw",
+		.gpudev = &adreno_a3xx_gpudev,
+		.gmem_size = SZ_128K,
+		.busy_mask = 0x7FFFFFFE,
+	},
+	{
+		.gpurev = ADRENO_REV_A306A,
+		.core = 3,
+		.major = 0,
+		.minor = 6,
+		.patchid = 0x20,
+		.pm4fw_name = "a300_pm4.fw",
+		.pfpfw_name = "a300_pfp.fw",
+		.gpudev = &adreno_a3xx_gpudev,
+		.gmem_size = SZ_128K,
+		.busy_mask = 0x7FFFFFFE,
+	},
+	{
+		.gpurev = ADRENO_REV_A304,
+		.core = 3,
+		.major = 0,
+		.minor = 4,
+		.patchid = 0x00,
+		.pm4fw_name = "a300_pm4.fw",
+		.pfpfw_name = "a300_pfp.fw",
+		.gpudev = &adreno_a3xx_gpudev,
+		.gmem_size = (SZ_64K + SZ_32K),
+		.busy_mask = 0x7FFFFFFE,
+	},
+	{
+		.gpurev = ADRENO_REV_A405,
+		.core = 4,
+		.major = 0,
+		.minor = 5,
+		.patchid = ANY_ID,
+		.features = 0,
+		.pm4fw_name = "a420_pm4.fw",
+		.pfpfw_name = "a420_pfp.fw",
+		.gpudev = &adreno_a4xx_gpudev,
+		.gmem_size = SZ_256K,
+		.busy_mask = 0x7FFFFFFE,
+	},
+	{
+		.gpurev = ADRENO_REV_A420,
+		.core = 4,
+		.major = 2,
+		.minor = 0,
+		.patchid = ANY_ID,
+		.features = ADRENO_USES_OCMEM | ADRENO_WARM_START |
+					ADRENO_USE_BOOTSTRAP,
+		.pm4fw_name = "a420_pm4.fw",
+		.pfpfw_name = "a420_pfp.fw",
+		.gpudev = &adreno_a4xx_gpudev,
+		.gmem_size = (SZ_1M + SZ_512K),
+		.pm4_jt_idx = 0x901,
+		.pm4_jt_addr = 0x300,
+		.pfp_jt_idx = 0x401,
+		.pfp_jt_addr = 0x400,
+		.pm4_bstrp_size = 0x06,
+		.pfp_bstrp_size = 0x28,
+		.pfp_bstrp_ver = 0x4ff083,
+		.busy_mask = 0x7FFFFFFE,
+	},
+	{
+		.gpurev = ADRENO_REV_A430,
+		.core = 4,
+		.major = 3,
+		.minor = 0,
+		.patchid = ANY_ID,
+		.features = ADRENO_USES_OCMEM  | ADRENO_WARM_START |
+			ADRENO_USE_BOOTSTRAP | ADRENO_SPTP_PC | ADRENO_PPD |
+			ADRENO_CONTENT_PROTECTION | ADRENO_PREEMPTION,
+		.pm4fw_name = "a420_pm4.fw",
+		.pfpfw_name = "a420_pfp.fw",
+		.gpudev = &adreno_a4xx_gpudev,
+		.gmem_size = (SZ_1M + SZ_512K),
+		.pm4_jt_idx = 0x901,
+		.pm4_jt_addr = 0x300,
+		.pfp_jt_idx = 0x401,
+		.pfp_jt_addr = 0x400,
+		.pm4_bstrp_size = 0x06,
+		.pfp_bstrp_size = 0x28,
+		.pfp_bstrp_ver = 0x4ff083,
+		.shader_offset = 0x20000,
+		.shader_size = 0x10000,
+		.num_protected_regs = 0x18,
+		.busy_mask = 0x7FFFFFFE,
+	},
+	{
+		.gpurev = ADRENO_REV_A418,
+		.core = 4,
+		.major = 1,
+		.minor = 8,
+		.patchid = ANY_ID,
+		.features = ADRENO_USES_OCMEM  | ADRENO_WARM_START |
+			ADRENO_USE_BOOTSTRAP | ADRENO_SPTP_PC,
+		.pm4fw_name = "a420_pm4.fw",
+		.pfpfw_name = "a420_pfp.fw",
+		.gpudev = &adreno_a4xx_gpudev,
+		.gmem_size = (SZ_512K),
+		.pm4_jt_idx = 0x901,
+		.pm4_jt_addr = 0x300,
+		.pfp_jt_idx = 0x401,
+		.pfp_jt_addr = 0x400,
+		.pm4_bstrp_size = 0x06,
+		.pfp_bstrp_size = 0x28,
+		.pfp_bstrp_ver = 0x4ff083,
+		.shader_offset = 0x20000, /* SP and TP addresses */
+		.shader_size = 0x10000,
+		.num_protected_regs = 0x18,
+		.busy_mask = 0x7FFFFFFE,
+	},
+	{
+		.gpurev = ADRENO_REV_A530,
+		.core = 5,
+		.major = 3,
+		.minor = 0,
+		.patchid = 0,
+		.pm4fw_name = "a530v1_pm4.fw",
+		.pfpfw_name = "a530v1_pfp.fw",
+		.gpudev = &adreno_a5xx_gpudev,
+		.gmem_size = SZ_1M,
+		.num_protected_regs = 0x20,
+		.busy_mask = 0xFFFFFFFE,
+	},
+	{
+		.gpurev = ADRENO_REV_A530,
+		.core = 5,
+		.major = 3,
+		.minor = 0,
+		.patchid = 1,
+		.features = ADRENO_GPMU | ADRENO_SPTP_PC | ADRENO_LM |
+			ADRENO_PREEMPTION | ADRENO_64BIT |
+			ADRENO_CONTENT_PROTECTION,
+		.pm4fw_name = "a530_pm4.fw",
+		.pfpfw_name = "a530_pfp.fw",
+		.zap_name = "a530_zap",
+		.gpudev = &adreno_a5xx_gpudev,
+		.gmem_size = SZ_1M,
+		.num_protected_regs = 0x20,
+		.gpmufw_name = "a530_gpmu.fw2",
+		.gpmu_major = 1,
+		.gpmu_minor = 0,
+		.busy_mask = 0xFFFFFFFE,
+		.lm_major = 3,
+		.lm_minor = 0,
+		.gpmu_tsens = 0x00060007,
+		.max_power = 5448,
+		.regfw_name = "a530v2_seq.fw2",
+	},
+	{
+		.gpurev = ADRENO_REV_A530,
+		.core = 5,
+		.major = 3,
+		.minor = 0,
+		.patchid = ANY_ID,
+		.features = ADRENO_GPMU | ADRENO_SPTP_PC | ADRENO_LM |
+			ADRENO_PREEMPTION | ADRENO_64BIT |
+			ADRENO_CONTENT_PROTECTION,
+		.pm4fw_name = "a530_pm4.fw",
+		.pfpfw_name = "a530_pfp.fw",
+		.zap_name = "a530_zap",
+		.gpudev = &adreno_a5xx_gpudev,
+		.gmem_size = SZ_1M,
+		.num_protected_regs = 0x20,
+		.gpmufw_name = "a530v3_gpmu.fw2",
+		.gpmu_major = 1,
+		.gpmu_minor = 0,
+		.busy_mask = 0xFFFFFFFE,
+		.lm_major = 1,
+		.lm_minor = 0,
+		.gpmu_tsens = 0x00060007,
+		.max_power = 5448,
+		.regfw_name = "a530v3_seq.fw2",
+	},
+	{
+		.gpurev = ADRENO_REV_A505,
+		.core = 5,
+		.major = 0,
+		.minor = 5,
+		.patchid = ANY_ID,
+		.features = ADRENO_PREEMPTION | ADRENO_64BIT,
+		.pm4fw_name = "a530_pm4.fw",
+		.pfpfw_name = "a530_pfp.fw",
+		.gpudev = &adreno_a5xx_gpudev,
+		.gmem_size = (SZ_128K + SZ_8K),
+		.num_protected_regs = 0x20,
+		.busy_mask = 0xFFFFFFFE,
+	},
+	{
+		.gpurev = ADRENO_REV_A506,
+		.core = 5,
+		.major = 0,
+		.minor = 6,
+		.patchid = ANY_ID,
+		.features = ADRENO_PREEMPTION | ADRENO_64BIT,
+		.pm4fw_name = "a530_pm4.fw",
+		.pfpfw_name = "a530_pfp.fw",
+		.gpudev = &adreno_a5xx_gpudev,
+		.gmem_size = (SZ_128K + SZ_8K),
+		.num_protected_regs = 0x20,
+		.busy_mask = 0xFFFFFFFE,
+	},
+	{
+		.gpurev = ADRENO_REV_A510,
+		.core = 5,
+		.major = 1,
+		.minor = 0,
+		.patchid = ANY_ID,
+		.pm4fw_name = "a530_pm4.fw",
+		.pfpfw_name = "a530_pfp.fw",
+		.gpudev = &adreno_a5xx_gpudev,
+		.gmem_size = SZ_256K,
+		.num_protected_regs = 0x20,
+		.busy_mask = 0xFFFFFFFE,
+	},
+};
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
new file mode 100644
index 000000000000..5bac1f2d79e0
--- /dev/null
+++ b/drivers/gpu/msm/adreno.c
@@ -0,0 +1,2884 @@
+/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/delay.h>
+#include <linux/of_coresight.h>
+#include <linux/input.h>
+#include <soc/qcom/scm.h>
+
+#include <linux/msm-bus-board.h>
+#include <linux/msm-bus.h>
+
+#include "kgsl.h"
+#include "kgsl_pwrscale.h"
+#include "kgsl_cffdump.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_iommu.h"
+#include "kgsl_trace.h"
+
+#include "adreno.h"
+#include "adreno_compat.h"
+#include "adreno_pm4types.h"
+#include "adreno_trace.h"
+
+#include "a3xx_reg.h"
+#include "adreno_snapshot.h"
+
+/* Include the master list of GPU cores that are supported */
+#include "adreno-gpulist.h"
+
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "adreno."
+
+static bool nopreempt;
+module_param(nopreempt, bool, 0444);
+MODULE_PARM_DESC(nopreempt, "Disable GPU preemption");
+
+#define DRIVER_VERSION_MAJOR   3
+#define DRIVER_VERSION_MINOR   1
+
+/* Number of times to try hard reset */
+#define NUM_TIMES_RESET_RETRY 5
+
+#define KGSL_LOG_LEVEL_DEFAULT 3
+
+static void adreno_input_work(struct work_struct *work);
+
+static struct devfreq_msm_adreno_tz_data adreno_tz_data = {
+	.bus = {
+		.max = 350,
+	},
+	.device_id = KGSL_DEVICE_3D0,
+};
+
+static const struct kgsl_functable adreno_functable;
+
+static struct kgsl_iommu device_3d0_iommu;
+
+static struct adreno_device device_3d0 = {
+	.dev = {
+		KGSL_DEVICE_COMMON_INIT(device_3d0.dev),
+		.pwrscale = KGSL_PWRSCALE_INIT(&adreno_tz_data),
+		.name = DEVICE_3D0_NAME,
+		.id = KGSL_DEVICE_3D0,
+		.pwrctrl = {
+			.irq_name = "kgsl_3d0_irq",
+		},
+		.iomemname = "kgsl_3d0_reg_memory",
+		.shadermemname = "kgsl_3d0_shader_memory",
+		.ftbl = &adreno_functable,
+		.cmd_log = KGSL_LOG_LEVEL_DEFAULT,
+		.ctxt_log = KGSL_LOG_LEVEL_DEFAULT,
+		.drv_log = KGSL_LOG_LEVEL_DEFAULT,
+		.mem_log = KGSL_LOG_LEVEL_DEFAULT,
+		.pwr_log = KGSL_LOG_LEVEL_DEFAULT,
+	},
+	.gmem_size = SZ_256K,
+	.pfp_fw = NULL,
+	.pm4_fw = NULL,
+	.ft_policy = KGSL_FT_DEFAULT_POLICY,
+	.ft_pf_policy = KGSL_FT_PAGEFAULT_DEFAULT_POLICY,
+	.fast_hang_detect = 1,
+	.long_ib_detect = 1,
+	.input_work = __WORK_INITIALIZER(device_3d0.input_work,
+		adreno_input_work),
+	.pwrctrl_flag = BIT(ADRENO_SPTP_PC_CTRL) | BIT(ADRENO_PPD_CTRL) |
+		BIT(ADRENO_LM_CTRL),
+	.profile.enabled = false,
+};
+
+/* Ptr to array for the current set of fault detect registers */
+unsigned int *adreno_ft_regs;
+/* Total number of fault detect registers */
+unsigned int adreno_ft_regs_num;
+/* Ptr to array for the current fault detect registers values */
+unsigned int *adreno_ft_regs_val;
+/* Array of default fault detect registers */
+static unsigned int adreno_ft_regs_default[] = {
+	ADRENO_REG_RBBM_STATUS,
+	ADRENO_REG_CP_RB_RPTR,
+	ADRENO_REG_CP_IB1_BASE,
+	ADRENO_REG_CP_IB1_BUFSZ,
+	ADRENO_REG_CP_IB2_BASE,
+	ADRENO_REG_CP_IB2_BUFSZ
+};
+
+/* Nice level for the higher priority GPU start thread */
+int adreno_wake_nice = -7;
+
+/* Number of milliseconds to stay active active after a wake on touch */
+unsigned int adreno_wake_timeout = 100;
+
+/**
+ * adreno_readreg64() - Read a 64bit register by getting its offset from the
+ * offset array defined in gpudev node
+ * @adreno_dev:		Pointer to the the adreno device
+ * @lo:	lower 32bit register enum that is to be read
+ * @hi:	higher 32bit register enum that is to be read
+ * @val: 64 bit Register value read is placed here
+ */
+void adreno_readreg64(struct adreno_device *adreno_dev,
+		enum adreno_regs lo, enum adreno_regs hi, uint64_t *val)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int val_lo = 0, val_hi = 0;
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	if (adreno_checkreg_off(adreno_dev, lo))
+		kgsl_regread(device, gpudev->reg_offsets->offsets[lo], &val_lo);
+	if (adreno_checkreg_off(adreno_dev, hi))
+		kgsl_regread(device, gpudev->reg_offsets->offsets[hi], &val_hi);
+
+	*val = (val_lo | ((uint64_t)val_hi << 32));
+}
+
+/**
+ * adreno_writereg64() - Write a 64bit register by getting its offset from the
+ * offset array defined in gpudev node
+ * @adreno_dev:	Pointer to the the adreno device
+ * @lo:	lower 32bit register enum that is to be written
+ * @hi:	higher 32bit register enum that is to be written
+ * @val: 64 bit value to write
+ */
+void adreno_writereg64(struct adreno_device *adreno_dev,
+		enum adreno_regs lo, enum adreno_regs hi, uint64_t val)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	if (adreno_checkreg_off(adreno_dev, lo))
+		kgsl_regwrite(device, gpudev->reg_offsets->offsets[lo],
+				lower_32_bits(val));
+	if (adreno_checkreg_off(adreno_dev, hi))
+		kgsl_regwrite(device, gpudev->reg_offsets->offsets[hi],
+				upper_32_bits(val));
+}
+
+/**
+ * adreno_of_read_property() - Adreno read property
+ * @node: Device node
+ *
+ * Read a u32 property.
+ */
+static inline int adreno_of_read_property(struct device_node *node,
+	const char *prop, unsigned int *ptr)
+{
+	int ret = of_property_read_u32(node, prop, ptr);
+	if (ret)
+		KGSL_CORE_ERR("Unable to read '%s'\n", prop);
+	return ret;
+}
+
+static void __iomem *efuse_base;
+static size_t efuse_len;
+
+int adreno_efuse_map(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct resource *res;
+
+	if (efuse_base != NULL)
+		return 0;
+
+	res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM,
+		"qfprom_memory");
+
+	if (res == NULL)
+		return -ENODEV;
+
+	efuse_base = ioremap(res->start, resource_size(res));
+	if (efuse_base == NULL)
+		return -ENODEV;
+
+	efuse_len = resource_size(res);
+	return 0;
+}
+
+void adreno_efuse_unmap(struct adreno_device *adreno_dev)
+{
+	if (efuse_base != NULL) {
+		iounmap(efuse_base);
+		efuse_base = NULL;
+		efuse_len = 0;
+	}
+}
+
+int adreno_efuse_read_u32(struct adreno_device *adreno_dev, unsigned int offset,
+		unsigned int *val)
+{
+	if (efuse_base == NULL)
+		return -ENODEV;
+
+	if (offset >= efuse_len)
+		return -ERANGE;
+
+	if (val != NULL) {
+		*val = readl_relaxed(efuse_base + offset);
+		/* Make sure memory is updated before returning */
+		rmb();
+	}
+
+	return 0;
+}
+
+/*
+ * adreno_iommu_cb_probe() - Adreno iommu context bank probe
+ *
+ * Iommu context bank probe function.
+ */
+static int adreno_iommu_cb_probe(struct platform_device *pdev)
+{
+	struct kgsl_iommu_context *ctx = NULL;
+	struct device_node *node = pdev->dev.of_node;
+	struct kgsl_iommu *iommu = &device_3d0_iommu;
+	int ret = 0;
+
+	/* Map context names from dt to id's */
+	if (!strcmp("gfx3d_user", node->name)) {
+		ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+		ctx->id = KGSL_IOMMU_CONTEXT_USER;
+		ctx->cb_num = -1;
+	} else if (!strcmp("gfx3d_secure", node->name)) {
+		ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_SECURE];
+		ctx->id = KGSL_IOMMU_CONTEXT_SECURE;
+		ctx->cb_num = -1;
+		device_3d0.dev.mmu.secured = true;
+	} else {
+		KGSL_CORE_ERR("dt: Unknown context label %s\n", node->name);
+		return -EINVAL;
+	}
+
+	if (ctx->name != NULL) {
+		KGSL_CORE_ERR("dt: %s appears multiple times\n", node->name);
+		return -EINVAL;
+	}
+	ctx->name = node->name;
+
+	/* this property won't be found for all context banks */
+	if (of_property_read_u32(node, "qcom,gpu-offset",
+				&ctx->gpu_offset))
+		ctx->gpu_offset = UINT_MAX;
+
+	ctx->kgsldev = &device_3d0.dev;
+
+	/* arm-smmu driver we'll have the right device pointer here. */
+	if (of_find_property(node, "iommus", NULL)) {
+		ctx->dev = &pdev->dev;
+	} else {
+		/*
+		 * old iommu driver requires that we query the context bank
+		 * device rather than getting it from dt.
+		 */
+		ctx->dev = kgsl_mmu_get_ctx(ctx->name);
+		if (IS_ERR_OR_NULL(ctx->dev)) {
+			ret = (ctx->dev == NULL) ? -ENODEV : PTR_ERR(ctx->dev);
+			KGSL_CORE_ERR("ctx %s: kgsl_mmu_get_ctx err: %d\n",
+					ctx->name, ret);
+			return ret;
+		}
+	}
+
+	kgsl_mmu_set_mmutype(KGSL_MMU_TYPE_IOMMU);
+
+	return ret;
+}
+
+static struct of_device_id iommu_match_table[] = {
+	{ .compatible = "qcom,kgsl-smmu-v1", },
+	{ .compatible = "qcom,kgsl-smmu-v2", },
+	{ .compatible = "qcom,smmu-kgsl-cb", },
+	{}
+};
+
+/**
+ * adreno_iommu_pdev_probe() - Adreno iommu context bank probe
+ * @pdev: Platform device
+ *
+ * Iommu probe function.
+ */
+static int adreno_iommu_pdev_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	const char *cname;
+	struct property *prop;
+	u32 reg_val[2];
+	int i = 0;
+	struct kgsl_iommu *iommu = &device_3d0_iommu;
+
+	if (of_device_is_compatible(dev->of_node, "qcom,smmu-kgsl-cb"))
+		return adreno_iommu_cb_probe(pdev);
+	else if (of_device_is_compatible(dev->of_node, "qcom,kgsl-smmu-v1"))
+		iommu->version = 1;
+	else
+		iommu->version = 2;
+
+	if (of_property_read_u32_array(pdev->dev.of_node, "reg", reg_val, 2)) {
+		KGSL_CORE_ERR("dt: Unable to read KGSL IOMMU register range\n");
+		return -EINVAL;
+	}
+	iommu->regstart = reg_val[0];
+	iommu->regsize = reg_val[1];
+
+	/* Protecting the SMMU registers is mandatory */
+	if (of_property_read_u32_array(pdev->dev.of_node, "qcom,protect",
+					reg_val, 2)) {
+		KGSL_CORE_ERR("dt: no iommu protection range specified\n");
+		return -EINVAL;
+	}
+	iommu->protect.base = reg_val[0] / sizeof(u32);
+	iommu->protect.range = ilog2(reg_val[1] / sizeof(u32));
+
+	of_property_for_each_string(dev->of_node, "clock-names", prop, cname) {
+		struct clk *c = devm_clk_get(dev, cname);
+		if (IS_ERR(c)) {
+			KGSL_CORE_ERR("dt: Couldn't get clock: %s\n", cname);
+			return -ENODEV;
+		}
+		if (i >= KGSL_IOMMU_MAX_CLKS) {
+			KGSL_CORE_ERR("dt: too many clocks defined.\n");
+			return -EINVAL;
+		}
+
+		iommu->clks[i] = c;
+		++i;
+	}
+
+	if (of_property_read_bool(pdev->dev.of_node, "qcom,retention"))
+		device_3d0.dev.mmu.features |= KGSL_MMU_RETENTION;
+
+	if (of_property_read_bool(pdev->dev.of_node, "qcom,global_pt"))
+		device_3d0.dev.mmu.features |= KGSL_MMU_GLOBAL_PAGETABLE;
+
+	if (of_property_read_bool(pdev->dev.of_node, "qcom,hyp_secure_alloc"))
+		device_3d0.dev.mmu.features |= KGSL_MMU_HYP_SECURE_ALLOC;
+
+	if (of_property_read_bool(pdev->dev.of_node, "qcom,force-32bit"))
+		device_3d0.dev.mmu.features |= KGSL_MMU_FORCE_32BIT;
+
+	if (of_property_read_u32(pdev->dev.of_node, "qcom,micro-mmu-control",
+				&iommu->micro_mmu_ctrl))
+		iommu->micro_mmu_ctrl = UINT_MAX;
+
+	if (of_property_read_bool(pdev->dev.of_node, "qcom,coherent-htw"))
+		device_3d0.dev.mmu.features |= KGSL_MMU_COHERENT_HTW;
+
+	if (of_property_read_u32(pdev->dev.of_node, "qcom,secure_align_mask",
+		&device_3d0.dev.mmu.secure_align_mask))
+		device_3d0.dev.mmu.secure_align_mask = 0xfff;
+
+	return of_platform_populate(pdev->dev.of_node, iommu_match_table,
+					NULL, &pdev->dev);
+}
+
+static struct platform_driver kgsl_iommu_platform_driver = {
+	.probe = adreno_iommu_pdev_probe,
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "kgsl-iommu",
+		.of_match_table = iommu_match_table,
+	}
+};
+
+static int __init kgsl_iommu_pdev_init(void)
+{
+	return platform_driver_register(&kgsl_iommu_platform_driver);
+}
+
+static void __exit kgsl_iommu_pdev_exit(void)
+{
+	platform_driver_unregister(&kgsl_iommu_platform_driver);
+}
+
+module_init(kgsl_iommu_pdev_init);
+module_exit(kgsl_iommu_pdev_exit);
+
+static int _get_counter(struct adreno_device *adreno_dev,
+		int group, int countable, unsigned int *lo,
+		unsigned int *hi)
+{
+	int ret = 0;
+
+	if (*lo == 0) {
+
+		ret = adreno_perfcounter_get(adreno_dev, group, countable,
+			lo, hi, PERFCOUNTER_FLAG_KERNEL);
+
+		if (ret) {
+			struct kgsl_device *device = &adreno_dev->dev;
+
+			KGSL_DRV_ERR(device,
+				"Unable to allocate fault detect performance counter %d/%d\n",
+				group, countable);
+			KGSL_DRV_ERR(device,
+				"GPU fault detect will be less reliable\n");
+		}
+	}
+
+	return ret;
+}
+
+static inline void _put_counter(struct adreno_device *adreno_dev,
+		int group, int countable, unsigned int *lo,
+		unsigned int *hi)
+{
+	if (*lo != 0)
+		adreno_perfcounter_put(adreno_dev, group, countable,
+			PERFCOUNTER_FLAG_KERNEL);
+
+	*lo = 0;
+	*hi = 0;
+}
+
+/**
+ * adreno_fault_detect_start() - Allocate performance counters
+ * used for fast fault detection
+ * @adreno_dev: Pointer to an adreno_device structure
+ *
+ * Allocate the series of performance counters that should be periodically
+ * checked to verify that the GPU is still moving
+ */
+void adreno_fault_detect_start(struct adreno_device *adreno_dev)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int i, j = ARRAY_SIZE(adreno_ft_regs_default);
+
+	if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv))
+		return;
+
+	if (adreno_dev->fast_hang_detect == 1)
+		return;
+
+	for (i = 0; i < gpudev->ft_perf_counters_count; i++) {
+		_get_counter(adreno_dev, gpudev->ft_perf_counters[i].counter,
+			 gpudev->ft_perf_counters[i].countable,
+			 &adreno_ft_regs[j + (i * 2)],
+			 &adreno_ft_regs[j + ((i * 2) + 1)]);
+	}
+
+	adreno_dev->fast_hang_detect = 1;
+}
+
+/**
+ * adreno_fault_detect_stop() - Release performance counters
+ * used for fast fault detection
+ * @adreno_dev: Pointer to an adreno_device structure
+ *
+ * Release the counters allocated in adreno_fault_detect_start
+ */
+void adreno_fault_detect_stop(struct adreno_device *adreno_dev)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int i, j = ARRAY_SIZE(adreno_ft_regs_default);
+
+	if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv))
+		return;
+
+	if (!adreno_dev->fast_hang_detect)
+		return;
+
+	for (i = 0; i < gpudev->ft_perf_counters_count; i++) {
+		_put_counter(adreno_dev, gpudev->ft_perf_counters[i].counter,
+			 gpudev->ft_perf_counters[i].countable,
+			 &adreno_ft_regs[j + (i * 2)],
+			 &adreno_ft_regs[j + ((i * 2) + 1)]);
+
+	}
+
+	adreno_dev->fast_hang_detect = 0;
+}
+
+/*
+ * A workqueue callback responsible for actually turning on the GPU after a
+ * touch event. kgsl_pwrctrl_change_state(ACTIVE) is used without any
+ * active_count protection to avoid the need to maintain state.  Either
+ * somebody will start using the GPU or the idle timer will fire and put the
+ * GPU back into slumber.
+ */
+static void adreno_input_work(struct work_struct *work)
+{
+	struct adreno_device *adreno_dev = container_of(work,
+			struct adreno_device, input_work);
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	mutex_lock(&device->mutex);
+
+	device->flags |= KGSL_FLAG_WAKE_ON_TOUCH;
+
+	/*
+	 * Don't schedule adreno_start in a high priority workqueue, we are
+	 * already in a workqueue which should be sufficient
+	 */
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE);
+
+	/*
+	 * When waking up from a touch event we want to stay active long enough
+	 * for the user to send a draw command.  The default idle timer timeout
+	 * is shorter than we want so go ahead and push the idle timer out
+	 * further for this special case
+	 */
+	mod_timer(&device->idle_timer,
+		jiffies + msecs_to_jiffies(adreno_wake_timeout));
+	mutex_unlock(&device->mutex);
+}
+
+/*
+ * Process input events and schedule work if needed.  At this point we are only
+ * interested in groking EV_ABS touchscreen events
+ */
+static void adreno_input_event(struct input_handle *handle, unsigned int type,
+		unsigned int code, int value)
+{
+	struct kgsl_device *device = handle->handler->private;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/* Only consider EV_ABS (touch) events */
+	if (type != EV_ABS)
+		return;
+
+	/*
+	 * Don't do anything if anything hasn't been rendered since we've been
+	 * here before
+	 */
+
+	if (device->flags & KGSL_FLAG_WAKE_ON_TOUCH)
+		return;
+
+	/*
+	 * If the device is in nap, kick the idle timer to make sure that we
+	 * don't go into slumber before the first render. If the device is
+	 * already in slumber schedule the wake.
+	 */
+
+	if (device->state == KGSL_STATE_NAP) {
+		/*
+		 * Set the wake on touch bit to keep from coming back here and
+		 * keeping the device in nap without rendering
+		 */
+
+		device->flags |= KGSL_FLAG_WAKE_ON_TOUCH;
+
+		mod_timer(&device->idle_timer,
+			jiffies + device->pwrctrl.interval_timeout);
+	} else if (device->state == KGSL_STATE_SLUMBER) {
+		schedule_work(&adreno_dev->input_work);
+	}
+}
+
+#ifdef CONFIG_INPUT
+static int adreno_input_connect(struct input_handler *handler,
+		struct input_dev *dev, const struct input_device_id *id)
+{
+	struct input_handle *handle;
+	int ret;
+
+	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+	if (handle == NULL)
+		return -ENOMEM;
+
+	handle->dev = dev;
+	handle->handler = handler;
+	handle->name = handler->name;
+
+	ret = input_register_handle(handle);
+	if (ret) {
+		kfree(handle);
+		return ret;
+	}
+
+	ret = input_open_device(handle);
+	if (ret) {
+		input_unregister_handle(handle);
+		kfree(handle);
+	}
+
+	return ret;
+}
+
+static void adreno_input_disconnect(struct input_handle *handle)
+{
+	input_close_device(handle);
+	input_unregister_handle(handle);
+	kfree(handle);
+}
+#else
+static int adreno_input_connect(struct input_handler *handler,
+		struct input_dev *dev, const struct input_device_id *id)
+{
+	return 0;
+}
+static void adreno_input_disconnect(struct input_handle *handle) {}
+#endif
+
+/*
+ * We are only interested in EV_ABS events so only register handlers for those
+ * input devices that have EV_ABS events
+ */
+static const struct input_device_id adreno_input_ids[] = {
+	{
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT,
+		.evbit = { BIT_MASK(EV_ABS) },
+		/* assumption: MT_.._X & MT_.._Y are in the same long */
+		.absbit = { [BIT_WORD(ABS_MT_POSITION_X)] =
+				BIT_MASK(ABS_MT_POSITION_X) |
+				BIT_MASK(ABS_MT_POSITION_Y) },
+	},
+	{ },
+};
+
+static struct input_handler adreno_input_handler = {
+	.event = adreno_input_event,
+	.connect = adreno_input_connect,
+	.disconnect = adreno_input_disconnect,
+	.name = "kgsl",
+	.id_table = adreno_input_ids,
+};
+
+static int adreno_soft_reset(struct kgsl_device *device);
+
+/*
+ * _soft_reset() - Soft reset GPU
+ * @adreno_dev: Pointer to adreno device
+ *
+ * Soft reset the GPU by doing a AHB write of value 1 to RBBM_SW_RESET
+ * register. This is used when we want to reset the GPU without
+ * turning off GFX power rail. The reset when asserted resets
+ * all the HW logic, restores GPU registers to default state and
+ * flushes out pending VBIF transactions.
+ */
+static void _soft_reset(struct adreno_device *adreno_dev)
+{
+	struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int reg;
+
+	/*
+	 * On a530 v1 RBBM cannot be reset in soft reset.
+	 * Reset all blocks except RBBM for a530v1.
+	 */
+	if (adreno_is_a530v1(adreno_dev)) {
+		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_BLOCK_SW_RESET_CMD,
+						 0xFFDFFC0);
+		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_BLOCK_SW_RESET_CMD2,
+						0x1FFFFFFF);
+	} else {
+
+		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_SW_RESET_CMD, 1);
+		/*
+		 * Do a dummy read to get a brief read cycle delay for the
+		 * reset to take effect
+		 */
+		adreno_readreg(adreno_dev, ADRENO_REG_RBBM_SW_RESET_CMD, &reg);
+		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_SW_RESET_CMD, 0);
+	}
+
+	/* The SP/TP regulator gets turned off after a soft reset */
+
+	if (gpudev->regulator_enable)
+		gpudev->regulator_enable(adreno_dev);
+}
+
+
+void adreno_irqctrl(struct adreno_device *adreno_dev, int state)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int mask = state ? gpudev->irq->mask : 0;
+
+	adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_0_MASK, mask);
+}
+
+ /*
+ * adreno_hang_int_callback() - Isr for fatal interrupts that hang GPU
+ * @adreno_dev: Pointer to device
+ * @bit: Interrupt bit
+ */
+void adreno_hang_int_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	KGSL_DRV_CRIT(device, "MISC: GPU hang detected\n");
+	adreno_irqctrl(adreno_dev, 0);
+
+	/* Trigger a fault in the dispatcher - this will effect a restart */
+	adreno_set_gpu_fault(ADRENO_DEVICE(device), ADRENO_HARD_FAULT);
+	adreno_dispatcher_schedule(device);
+}
+
+ /*
+ * adreno_cp_callback() - CP interrupt handler
+ * @adreno_dev: Adreno device pointer
+ * @irq: irq number
+ *
+ * Handle the cp interrupt generated by GPU.
+ */
+void adreno_cp_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	kgsl_schedule_work(&device->event_work);
+	adreno_dispatcher_schedule(device);
+}
+
+static irqreturn_t adreno_irq_handler(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_irq *irq_params = gpudev->irq;
+	irqreturn_t ret = IRQ_NONE;
+	unsigned int status = 0, tmp;
+	int i;
+
+	adreno_readreg(adreno_dev, ADRENO_REG_RBBM_INT_0_STATUS, &status);
+
+	/* Loop through all set interrupts and call respective handlers */
+	for (tmp = status; tmp != 0;) {
+		i = fls(tmp) - 1;
+
+		if (irq_params->funcs[i].func != NULL) {
+			irq_params->funcs[i].func(adreno_dev, i);
+			ret = IRQ_HANDLED;
+		} else
+			KGSL_DRV_CRIT(device,
+					"Unhandled interrupt bit %x\n", i);
+
+		tmp &= ~BIT(i);
+	}
+
+	gpudev->irq_trace(adreno_dev, status);
+
+	if (status)
+		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_CLEAR_CMD,
+				status);
+	return ret;
+
+}
+
+static inline bool _rev_match(unsigned int id, unsigned int entry)
+{
+	return (entry == ANY_ID || entry == id);
+}
+
+static inline const struct adreno_gpu_core *_get_gpu_core(unsigned int chipid)
+{
+	unsigned int core = ADRENO_CHIPID_CORE(chipid);
+	unsigned int major = ADRENO_CHIPID_MAJOR(chipid);
+	unsigned int minor = ADRENO_CHIPID_MINOR(chipid);
+	unsigned int patchid = ADRENO_CHIPID_PATCH(chipid);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(adreno_gpulist); i++) {
+		if (core == adreno_gpulist[i].core &&
+		    _rev_match(major, adreno_gpulist[i].major) &&
+		    _rev_match(minor, adreno_gpulist[i].minor) &&
+		    _rev_match(patchid, adreno_gpulist[i].patchid))
+			return &adreno_gpulist[i];
+	}
+
+	return NULL;
+}
+
+static void
+adreno_identify_gpu(struct adreno_device *adreno_dev)
+{
+	const struct adreno_reg_offsets *reg_offsets;
+	struct adreno_gpudev *gpudev;
+	int i;
+
+	if (kgsl_property_read_u32(&adreno_dev->dev, "qcom,chipid",
+		&adreno_dev->chipid))
+		KGSL_DRV_FATAL(&adreno_dev->dev,
+			"No GPU chip ID was specified\n");
+
+	adreno_dev->gpucore = _get_gpu_core(adreno_dev->chipid);
+
+	if (adreno_dev->gpucore == NULL)
+		KGSL_DRV_FATAL(&adreno_dev->dev, "Unknown GPU chip ID %8.8X\n",
+			adreno_dev->chipid);
+
+	/*
+	 * The gmem size might be dynamic when ocmem is involved so copy it out
+	 * of the gpu device
+	 */
+
+	adreno_dev->gmem_size = adreno_dev->gpucore->gmem_size;
+
+	/*
+	 * Initialize uninitialzed gpu registers, only needs to be done once
+	 * Make all offsets that are not initialized to ADRENO_REG_UNUSED
+	 */
+
+	gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	reg_offsets = gpudev->reg_offsets;
+
+	for (i = 0; i < ADRENO_REG_REGISTER_MAX; i++) {
+		if (reg_offsets->offset_0 != i && !reg_offsets->offsets[i])
+			reg_offsets->offsets[i] = ADRENO_REG_UNUSED;
+	}
+
+	/* Do target specific identification */
+	if (gpudev->platform_setup != NULL)
+		gpudev->platform_setup(adreno_dev);
+}
+
+static const struct platform_device_id adreno_id_table[] = {
+	{ DEVICE_3D0_NAME, (unsigned long) &device_3d0, },
+	{},
+};
+
+MODULE_DEVICE_TABLE(platform, adreno_id_table);
+
+static const struct of_device_id adreno_match_table[] = {
+	{ .compatible = "qcom,kgsl-3d0", .data = &device_3d0 },
+	{}
+};
+
+static int adreno_of_parse_pwrlevels(struct adreno_device *adreno_dev,
+		struct device_node *node)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct device_node *child;
+
+	pwr->num_pwrlevels = 0;
+
+	for_each_child_of_node(node, child) {
+		unsigned int index;
+		struct kgsl_pwrlevel *level;
+
+		if (adreno_of_read_property(child, "reg", &index))
+			return -EINVAL;
+
+		if (index >= KGSL_MAX_PWRLEVELS) {
+			KGSL_CORE_ERR("Pwrlevel index %d is out of range\n",
+				index);
+			continue;
+		}
+
+		if (index >= pwr->num_pwrlevels)
+			pwr->num_pwrlevels = index + 1;
+
+		level = &pwr->pwrlevels[index];
+
+		if (adreno_of_read_property(child, "qcom,gpu-freq",
+			&level->gpu_freq))
+			return -EINVAL;
+
+		if (adreno_of_read_property(child, "qcom,bus-freq",
+			&level->bus_freq))
+			return -EINVAL;
+
+		if (of_property_read_u32(child, "qcom,bus-min",
+			&level->bus_min))
+			level->bus_min = level->bus_freq;
+
+		if (of_property_read_u32(child, "qcom,bus-max",
+			&level->bus_max))
+			level->bus_max = level->bus_freq;
+	}
+
+	return 0;
+}
+
+static int adreno_of_get_legacy_pwrlevels(struct adreno_device *adreno_dev,
+		struct device_node *parent)
+{
+	struct device_node *node;
+
+	node = of_find_node_by_name(parent, "qcom,gpu-pwrlevels");
+
+	if (node == NULL) {
+		KGSL_CORE_ERR("Unable to find 'qcom,gpu-pwrlevels'\n");
+		return -EINVAL;
+	}
+
+	return adreno_of_parse_pwrlevels(adreno_dev, node);
+}
+
+static int adreno_of_get_pwrlevels(struct adreno_device *adreno_dev,
+		struct device_node *parent)
+{
+	struct device_node *node, *child;
+
+	node = of_find_node_by_name(parent, "qcom,gpu-pwrlevel-bins");
+	if (node == NULL)
+		return adreno_of_get_legacy_pwrlevels(adreno_dev, parent);
+
+	for_each_child_of_node(node, child) {
+		unsigned int bin;
+
+		if (of_property_read_u32(child, "qcom,speed-bin", &bin))
+			continue;
+
+		if (bin == adreno_dev->speed_bin)
+			return adreno_of_parse_pwrlevels(adreno_dev, child);
+	}
+
+	return -ENODEV;
+}
+
+static inline struct adreno_device *adreno_get_dev(struct platform_device *pdev)
+{
+	const struct of_device_id *of_id =
+		of_match_device(adreno_match_table, &pdev->dev);
+
+	return of_id ? (struct adreno_device *) of_id->data : NULL;
+}
+
+static struct {
+	unsigned int quirk;
+	const char *prop;
+} adreno_quirks[] = {
+	 { ADRENO_QUIRK_TWO_PASS_USE_WFI, "qcom,gpu-quirk-two-pass-use-wfi" },
+	 { ADRENO_QUIRK_IOMMU_SYNC, "qcom,gpu-quirk-iommu-sync" },
+};
+
+static int adreno_of_get_power(struct adreno_device *adreno_dev,
+		struct platform_device *pdev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct device_node *node = pdev->dev.of_node;
+	int i, init_level;
+
+	if (of_property_read_string(node, "label", &pdev->name)) {
+		KGSL_CORE_ERR("Unable to read 'label'\n");
+		return -EINVAL;
+	}
+
+	if (adreno_of_read_property(node, "qcom,id", &pdev->id))
+		return -EINVAL;
+
+	/* Set up quirks and other boolean options */
+	for (i = 0; i < ARRAY_SIZE(adreno_quirks); i++) {
+		if (of_property_read_bool(node, adreno_quirks[i].prop))
+			adreno_dev->quirks |= adreno_quirks[i].quirk;
+	}
+
+	if (adreno_of_get_pwrlevels(adreno_dev, node))
+		return -EINVAL;
+
+	if (of_property_read_u32(node, "qcom,initial-pwrlevel", &init_level))
+		init_level = 1;
+
+	if (init_level < 0 || init_level > pwr->num_pwrlevels)
+		init_level = 1;
+
+	pwr->active_pwrlevel = init_level;
+	pwr->default_pwrlevel = init_level;
+
+	/* get pm-qos-active-latency, set it to default if not found */
+	if (of_property_read_u32(node, "qcom,pm-qos-active-latency",
+		&device->pwrctrl.pm_qos_active_latency))
+		device->pwrctrl.pm_qos_active_latency = 501;
+
+	/* get pm-qos-wakeup-latency, set it to default if not found */
+	if (of_property_read_u32(node, "qcom,pm-qos-wakeup-latency",
+		&device->pwrctrl.pm_qos_wakeup_latency))
+		device->pwrctrl.pm_qos_wakeup_latency = 101;
+
+	if (of_property_read_u32(node, "qcom,idle-timeout",
+		(unsigned int *) &device->pwrctrl.interval_timeout))
+		device->pwrctrl.interval_timeout = HZ/12;
+
+	device->pwrctrl.strtstp_sleepwake =
+		of_property_read_bool(node, "qcom,strtstp-sleepwake");
+
+	device->pwrctrl.bus_control = of_property_read_bool(node,
+		"qcom,bus-control");
+
+	return 0;
+}
+
+#ifdef CONFIG_MSM_OCMEM
+static int
+adreno_ocmem_malloc(struct adreno_device *adreno_dev)
+{
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_USES_OCMEM))
+		return 0;
+
+	if (adreno_dev->ocmem_hdl == NULL) {
+		adreno_dev->ocmem_hdl =
+			ocmem_allocate(OCMEM_GRAPHICS, adreno_dev->gmem_size);
+		if (IS_ERR_OR_NULL(adreno_dev->ocmem_hdl)) {
+			adreno_dev->ocmem_hdl = NULL;
+			return -ENOMEM;
+		}
+
+		adreno_dev->gmem_size = adreno_dev->ocmem_hdl->len;
+		adreno_dev->gmem_base = adreno_dev->ocmem_hdl->addr;
+	}
+
+	return 0;
+}
+
+static void
+adreno_ocmem_free(struct adreno_device *adreno_dev)
+{
+	if (adreno_dev->ocmem_hdl != NULL) {
+		ocmem_free(OCMEM_GRAPHICS, adreno_dev->ocmem_hdl);
+		adreno_dev->ocmem_hdl = NULL;
+	}
+}
+#else
+static int
+adreno_ocmem_malloc(struct adreno_device *adreno_dev)
+{
+	return 0;
+}
+
+static void
+adreno_ocmem_free(struct adreno_device *adreno_dev)
+{
+}
+#endif
+
+static int adreno_probe(struct platform_device *pdev)
+{
+	struct kgsl_device *device;
+	struct adreno_device *adreno_dev;
+	int status;
+
+	/* Defer adreno probe if IOMMU is not already probed */
+	if (device_3d0_iommu.regstart == 0)
+		return -EPROBE_DEFER;
+
+	adreno_dev = adreno_get_dev(pdev);
+
+	if (adreno_dev == NULL) {
+		pr_err("adreno: qcom,kgsl-3d0 does not exist in the device tree");
+		return -ENODEV;
+	}
+
+	device = &adreno_dev->dev;
+	device->pdev = pdev;
+	device->mmu.priv = &device_3d0_iommu;
+
+	/* Get the chip ID from the DT and set up target specific parameters */
+	adreno_identify_gpu(adreno_dev);
+
+	status = adreno_of_get_power(adreno_dev, pdev);
+	if (status) {
+		device->pdev = NULL;
+		return status;
+	}
+
+	/*
+	* The SMMU APIs use unsigned long for virtual addresses which means
+	* that we cannot use 64 bit virtual addresses on a 32 bit kernel even
+	* though the hardware and the rest of the KGSL driver supports it.
+	*/
+	if ((BITS_PER_LONG == 64) && ADRENO_FEATURE(adreno_dev, ADRENO_64BIT))
+		device->mmu.features |= KGSL_MMU_64BIT;
+
+	status = kgsl_device_platform_probe(device);
+	if (status) {
+		device->pdev = NULL;
+		return status;
+	}
+
+	/*
+	 * qcom,iommu-secure-id is used to identify MMUs that can handle secure
+	 * content but that is only part of the story - the GPU also has to be
+	 * able to handle secure content.  Unfortunately in a classic catch-22
+	 * we cannot identify the GPU until after the DT is parsed. tl;dr -
+	 * check the GPU capabilities here and modify mmu->secured accordingly
+	 */
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_CONTENT_PROTECTION))
+		device->mmu.secured = false;
+
+	status = adreno_ringbuffer_init(adreno_dev, nopreempt);
+	if (status)
+		goto out;
+
+	status = adreno_dispatcher_init(adreno_dev);
+	if (status)
+		goto out;
+
+	adreno_debugfs_init(adreno_dev);
+	adreno_profile_init(adreno_dev);
+
+	adreno_sysfs_init(device);
+
+	kgsl_pwrscale_init(&pdev->dev, CONFIG_MSM_ADRENO_DEFAULT_GOVERNOR);
+
+	adreno_input_handler.private = device;
+
+#ifdef CONFIG_INPUT
+	/*
+	 * It isn't fatal if we cannot register the input handler.  Sad,
+	 * perhaps, but not fatal
+	 */
+	if (input_register_handler(&adreno_input_handler))
+		KGSL_DRV_ERR(device, "Unable to register the input handler\n");
+#endif
+out:
+	if (status) {
+		adreno_ringbuffer_close(adreno_dev);
+		kgsl_device_platform_remove(device);
+		device->pdev = NULL;
+	}
+
+	return status;
+}
+
+static void _adreno_free_memories(struct adreno_device *adreno_dev)
+{
+	if (test_bit(ADRENO_DEVICE_CMDBATCH_PROFILE, &adreno_dev->priv))
+		kgsl_free_global(&adreno_dev->cmdbatch_profile_buffer);
+
+	/* Free local copies of firmware and other command streams */
+	kfree(adreno_dev->pfp_fw);
+	adreno_dev->pfp_fw = NULL;
+
+	kfree(adreno_dev->pm4_fw);
+	adreno_dev->pm4_fw = NULL;
+
+	kfree(adreno_dev->gpmu_cmds);
+	adreno_dev->gpmu_cmds = NULL;
+
+	kgsl_free_global(&adreno_dev->pm4);
+	kgsl_free_global(&adreno_dev->pfp);
+}
+
+static int adreno_remove(struct platform_device *pdev)
+{
+	struct adreno_device *adreno_dev = adreno_get_dev(pdev);
+	struct kgsl_device *device;
+
+	if (adreno_dev == NULL)
+		return 0;
+
+	device = &adreno_dev->dev;
+
+	/* The memory is fading */
+	_adreno_free_memories(adreno_dev);
+
+#ifdef CONFIG_INPUT
+	input_unregister_handler(&adreno_input_handler);
+#endif
+	adreno_sysfs_close(device);
+
+	adreno_coresight_remove(adreno_dev);
+	adreno_profile_close(adreno_dev);
+
+	kgsl_pwrscale_close(device);
+
+	adreno_dispatcher_close(adreno_dev);
+	adreno_ringbuffer_close(adreno_dev);
+
+	adreno_fault_detect_stop(adreno_dev);
+
+	kfree(adreno_ft_regs);
+	adreno_ft_regs = NULL;
+
+	kfree(adreno_ft_regs_val);
+	adreno_ft_regs_val = NULL;
+
+	if (efuse_base != NULL)
+		iounmap(efuse_base);
+
+	adreno_perfcounter_close(adreno_dev);
+	kgsl_device_platform_remove(device);
+
+	if (test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv)) {
+		kgsl_free_global(&adreno_dev->pwron_fixup);
+		clear_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv);
+	}
+	clear_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv);
+
+	return 0;
+}
+
+static void adreno_fault_detect_init(struct adreno_device *adreno_dev)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int i, val = adreno_dev->fast_hang_detect;
+
+	/* Disable the fast hang detect bit until we know its a go */
+	adreno_dev->fast_hang_detect = 0;
+
+	adreno_ft_regs_num = (ARRAY_SIZE(adreno_ft_regs_default) +
+		gpudev->ft_perf_counters_count*2);
+
+	adreno_ft_regs = kzalloc(adreno_ft_regs_num * sizeof(unsigned int),
+		GFP_KERNEL);
+	adreno_ft_regs_val = kzalloc(adreno_ft_regs_num * sizeof(unsigned int),
+		GFP_KERNEL);
+
+	if (adreno_ft_regs == NULL || adreno_ft_regs_val == NULL) {
+		kfree(adreno_ft_regs);
+		kfree(adreno_ft_regs_val);
+
+		adreno_ft_regs = NULL;
+		adreno_ft_regs_val = NULL;
+
+		return;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(adreno_ft_regs_default); i++)
+		adreno_ft_regs[i] = adreno_getreg(adreno_dev,
+			adreno_ft_regs_default[i]);
+
+	set_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv);
+
+	if (val)
+		adreno_fault_detect_start(adreno_dev);
+}
+
+static int adreno_init(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int ret;
+
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT);
+	/*
+	 * initialization only needs to be done once initially until
+	 * device is shutdown
+	 */
+	if (test_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv))
+		return 0;
+
+	/*
+	 * Either the microcode read failed because the usermodehelper isn't
+	 * available or the microcode was corrupted. Fail the init and force
+	 * the user to try the open() again
+	 */
+
+	ret = gpudev->microcode_read(adreno_dev);
+	if (ret)
+		return ret;
+
+	/* Put the GPU in a responsive state */
+	ret = kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE);
+	if (ret)
+		return ret;
+
+	ret = adreno_iommu_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	/* Initialize coresight for the target */
+	adreno_coresight_init(adreno_dev);
+
+	adreno_perfcounter_init(adreno_dev);
+	adreno_fault_detect_init(adreno_dev);
+
+	/* Power down the device */
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT);
+
+	/*
+	 * Enable the power on shader corruption fix
+	 * This is only applicable for 28nm targets
+	 */
+	if (adreno_is_a3xx(adreno_dev))
+		adreno_a3xx_pwron_fixup_init(adreno_dev);
+	else if ((adreno_is_a405(adreno_dev)) || (adreno_is_a420(adreno_dev)))
+		adreno_a4xx_pwron_fixup_init(adreno_dev);
+
+	if (gpudev->init != NULL)
+		gpudev->init(adreno_dev);
+
+	set_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv);
+
+	/* Use shader offset and length defined in gpudev */
+	if (adreno_dev->gpucore->shader_offset &&
+					adreno_dev->gpucore->shader_size) {
+
+		if (device->shader_mem_phys || device->shader_mem_virt)
+			KGSL_DRV_ERR(device,
+			"Shader memory already specified in device tree\n");
+		else {
+			device->shader_mem_phys = device->reg_phys +
+					adreno_dev->gpucore->shader_offset;
+			device->shader_mem_virt = device->reg_virt +
+					adreno_dev->gpucore->shader_offset;
+			device->shader_mem_len =
+					adreno_dev->gpucore->shader_size;
+		}
+	}
+
+	/* Adjust snapshot section sizes according to core */
+	if ((adreno_is_a330(adreno_dev) || adreno_is_a305b(adreno_dev))) {
+		gpudev->snapshot_data->sect_sizes->cp_pfp =
+					A320_SNAPSHOT_CP_STATE_SECTION_SIZE;
+		gpudev->snapshot_data->sect_sizes->roq =
+					A320_SNAPSHOT_ROQ_SECTION_SIZE;
+		gpudev->snapshot_data->sect_sizes->cp_merciu =
+					A320_SNAPSHOT_CP_MERCIU_SECTION_SIZE;
+	}
+
+	/*
+	 * Allocate a small chunk of memory for precise cmdbatch profiling for
+	 * those targets that have the always on timer
+	 */
+
+	if (!adreno_is_a3xx(adreno_dev)) {
+		int r = kgsl_allocate_global(&adreno_dev->dev,
+			&adreno_dev->cmdbatch_profile_buffer, PAGE_SIZE, 0, 0);
+
+		adreno_dev->cmdbatch_profile_index = 0;
+
+		if (r == 0) {
+			set_bit(ADRENO_DEVICE_CMDBATCH_PROFILE,
+				&adreno_dev->priv);
+			kgsl_sharedmem_set(&adreno_dev->dev,
+				&adreno_dev->cmdbatch_profile_buffer, 0, 0,
+				PAGE_SIZE);
+		}
+
+	}
+
+	if (nopreempt == false &&
+		ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION)) {
+		int r = 0;
+
+		if (gpudev->preemption_init)
+			r = gpudev->preemption_init(adreno_dev);
+
+		if (r == 0)
+			set_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv);
+		else
+			WARN(1, "adreno: GPU preemption is disabled\n");
+	}
+
+	return 0;
+}
+
+static bool regulators_left_on(struct kgsl_device *device)
+{
+	int i;
+
+	for (i = 0; i < KGSL_MAX_REGULATORS; i++) {
+		struct kgsl_regulator *regulator =
+			&device->pwrctrl.regulators[i];
+
+		if (IS_ERR_OR_NULL(regulator->reg))
+			break;
+
+		if (regulator_is_enabled(regulator->reg))
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * _adreno_start - Power up the GPU and prepare to accept commands
+ * @adreno_dev: Pointer to an adreno_device structure
+ *
+ * The core function that powers up and initalizes the GPU.  This function is
+ * called at init and after coming out of SLUMBER
+ */
+static int _adreno_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int status = -EINVAL;
+	unsigned int state = device->state;
+	bool regulator_left_on;
+	unsigned int pmqos_wakeup_vote = device->pwrctrl.pm_qos_wakeup_latency;
+	unsigned int pmqos_active_vote = device->pwrctrl.pm_qos_active_latency;
+
+	/* make sure ADRENO_DEVICE_STARTED is not set here */
+	BUG_ON(test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv));
+
+	pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
+			pmqos_wakeup_vote);
+
+	kgsl_cffdump_open(device);
+
+	regulator_left_on = regulators_left_on(device);
+
+	/* Clear any GPU faults that might have been left over */
+	adreno_clear_gpu_fault(adreno_dev);
+
+	/* Put the GPU in a responsive state */
+	status = kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE);
+	if (status)
+		goto error_pwr_off;
+
+	/* Set the bit to indicate that we've just powered on */
+	set_bit(ADRENO_DEVICE_PWRON, &adreno_dev->priv);
+
+	/* Soft reset the GPU if a regulator is stuck on*/
+	if (regulator_left_on)
+		_soft_reset(adreno_dev);
+
+	status = kgsl_mmu_start(device);
+	if (status)
+		goto error_pwr_off;
+
+	/* Program GPU contect protection init values */
+	if (device->mmu.secured) {
+		if (adreno_is_a4xx(adreno_dev))
+			adreno_writereg(adreno_dev,
+				ADRENO_REG_RBBM_SECVID_TRUST_CONFIG, 0x2);
+		adreno_writereg(adreno_dev,
+				ADRENO_REG_RBBM_SECVID_TSB_CONTROL, 0x0);
+
+		adreno_writereg64(adreno_dev,
+			ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_BASE,
+			ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_BASE_HI,
+			KGSL_IOMMU_SECURE_BASE);
+		adreno_writereg(adreno_dev,
+			ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_SIZE,
+			KGSL_IOMMU_SECURE_SIZE);
+	}
+
+	status = adreno_ocmem_malloc(adreno_dev);
+	if (status) {
+		KGSL_DRV_ERR(device, "OCMEM malloc failed\n");
+		goto error_mmu_off;
+	}
+
+	/* Enable 64 bit gpu addr if feature is set */
+	if (gpudev->enable_64bit &&
+			ADRENO_FEATURE(adreno_dev, ADRENO_64BIT))
+		gpudev->enable_64bit(adreno_dev);
+
+	if (adreno_dev->perfctr_pwr_lo == 0) {
+		int ret = adreno_perfcounter_get(adreno_dev,
+			KGSL_PERFCOUNTER_GROUP_PWR, 1,
+			&adreno_dev->perfctr_pwr_lo, NULL,
+			PERFCOUNTER_FLAG_KERNEL);
+
+		if (ret) {
+			KGSL_DRV_ERR(device,
+				"Unable to get the perf counters for DCVS\n");
+			adreno_dev->perfctr_pwr_lo = 0;
+		}
+	}
+
+	if (device->pwrctrl.bus_control) {
+		int ret;
+
+		/* VBIF waiting for RAM */
+		if (adreno_dev->starved_ram_lo == 0) {
+			ret = adreno_perfcounter_get(adreno_dev,
+				KGSL_PERFCOUNTER_GROUP_VBIF_PWR, 0,
+				&adreno_dev->starved_ram_lo, NULL,
+				PERFCOUNTER_FLAG_KERNEL);
+
+			if (ret) {
+				KGSL_DRV_ERR(device,
+					"Unable to get perf counters for bus DCVS\n");
+				adreno_dev->starved_ram_lo = 0;
+			}
+		}
+
+		/* VBIF DDR cycles */
+		if (adreno_dev->ram_cycles_lo == 0) {
+			ret = adreno_perfcounter_get(adreno_dev,
+				KGSL_PERFCOUNTER_GROUP_VBIF,
+				VBIF_AXI_TOTAL_BEATS,
+				&adreno_dev->ram_cycles_lo, NULL,
+				PERFCOUNTER_FLAG_KERNEL);
+
+			if (ret) {
+				KGSL_DRV_ERR(device,
+					"Unable to get perf counters for bus DCVS\n");
+				adreno_dev->ram_cycles_lo = 0;
+			}
+		}
+	}
+
+	/* Clear the busy_data stats - we're starting over from scratch */
+	adreno_dev->busy_data.gpu_busy = 0;
+	adreno_dev->busy_data.vbif_ram_cycles = 0;
+	adreno_dev->busy_data.vbif_starved_ram = 0;
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_LM)
+		&& adreno_dev->lm_threshold_count == 0) {
+		int ret;
+
+		ret = adreno_perfcounter_get(adreno_dev,
+			KGSL_PERFCOUNTER_GROUP_GPMU_PWR, 27,
+			&adreno_dev->lm_threshold_count, NULL,
+			PERFCOUNTER_FLAG_KERNEL);
+		/* Ignore noncritical ret - used for debugfs */
+		if (ret)
+			adreno_dev->lm_threshold_count = 0;
+	}
+
+	/* Restore performance counter registers with saved values */
+	adreno_perfcounter_restore(adreno_dev);
+
+	/* Start the GPU */
+	gpudev->start(adreno_dev);
+
+	/* Re-initialize the coresight registers if applicable */
+	adreno_coresight_start(adreno_dev);
+
+	adreno_irqctrl(adreno_dev, 1);
+
+	adreno_perfcounter_start(adreno_dev);
+
+	/* Clear FSR here in case it is set from a previous pagefault */
+	kgsl_mmu_clear_fsr(&device->mmu);
+
+	status = adreno_ringbuffer_start(adreno_dev, ADRENO_START_COLD);
+	if (status)
+		goto error_mmu_off;
+
+	if (gpudev->hw_init) {
+		status = gpudev->hw_init(adreno_dev);
+		if (status)
+			goto error_mmu_off;
+	}
+
+	/* Start the dispatcher */
+	adreno_dispatcher_start(device);
+
+	device->reset_counter++;
+
+	set_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv);
+
+	if (pmqos_active_vote != pmqos_wakeup_vote)
+		pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
+				pmqos_active_vote);
+
+	return 0;
+
+error_mmu_off:
+	kgsl_mmu_stop(&device->mmu);
+
+error_pwr_off:
+	/* set the state back to original state */
+	kgsl_pwrctrl_change_state(device, state);
+
+	if (pmqos_active_vote != pmqos_wakeup_vote)
+		pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
+				pmqos_active_vote);
+
+	return status;
+}
+
+/**
+ * adreno_start() - Power up and initialize the GPU
+ * @device: Pointer to the KGSL device to power up
+ * @priority:  Boolean flag to specify of the start should be scheduled in a low
+ * latency work queue
+ *
+ * Power up the GPU and initialize it.  If priority is specified then elevate
+ * the thread priority for the duration of the start operation
+ */
+static int adreno_start(struct kgsl_device *device, int priority)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int nice = task_nice(current);
+	int ret;
+
+	if (priority && (adreno_wake_nice < nice))
+		set_user_nice(current, adreno_wake_nice);
+
+	ret = _adreno_start(adreno_dev);
+
+	if (priority)
+		set_user_nice(current, nice);
+
+	return ret;
+}
+
+/**
+ * adreno_vbif_clear_pending_transactions() - Clear transactions in VBIF pipe
+ * @device: Pointer to the device whose VBIF pipe is to be cleared
+ */
+static int adreno_vbif_clear_pending_transactions(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int mask = gpudev->vbif_xin_halt_ctrl0_mask;
+	unsigned int val;
+	unsigned long wait_for_vbif;
+	int ret = 0;
+
+	adreno_writereg(adreno_dev, ADRENO_REG_VBIF_XIN_HALT_CTRL0, mask);
+	/* wait for the transactions to clear */
+	wait_for_vbif = jiffies + msecs_to_jiffies(100);
+	while (1) {
+		adreno_readreg(adreno_dev,
+			ADRENO_REG_VBIF_XIN_HALT_CTRL1, &val);
+		if ((val & mask) == mask)
+			break;
+		if (time_after(jiffies, wait_for_vbif)) {
+			KGSL_DRV_ERR(device,
+				"Wait limit reached for VBIF XIN Halt\n");
+			ret = -ETIMEDOUT;
+			break;
+		}
+	}
+	adreno_writereg(adreno_dev, ADRENO_REG_VBIF_XIN_HALT_CTRL0, 0);
+	return ret;
+}
+
+static int adreno_stop(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv))
+		return 0;
+
+	adreno_set_active_ctxs_null(adreno_dev);
+
+	adreno_dispatcher_stop(adreno_dev);
+
+	adreno_ringbuffer_stop(adreno_dev);
+
+	adreno_irqctrl(adreno_dev, 0);
+
+	adreno_ocmem_free(adreno_dev);
+
+	/* Save active coresight registers if applicable */
+	adreno_coresight_stop(adreno_dev);
+
+	/* Save physical performance counter values before GPU power down*/
+	adreno_perfcounter_save(adreno_dev);
+
+	adreno_vbif_clear_pending_transactions(device);
+
+	kgsl_mmu_stop(&device->mmu);
+	kgsl_cffdump_close(device);
+
+	clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv);
+
+	return 0;
+}
+
+static inline bool adreno_try_soft_reset(struct kgsl_device *device, int fault)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/*
+	 * Do not do soft reset for a IOMMU fault (because the IOMMU hardware
+	 * needs a reset too) or for the A304 because it can't do SMMU
+	 * programming of any kind after a soft reset
+	 */
+
+	if ((fault & ADRENO_IOMMU_PAGE_FAULT) || adreno_is_a304(adreno_dev))
+		return false;
+
+	return true;
+}
+
+/**
+ * adreno_reset() - Helper function to reset the GPU
+ * @device: Pointer to the KGSL device structure for the GPU
+ * @fault: Type of fault. Needed to skip soft reset for MMU fault
+ *
+ * Try to reset the GPU to recover from a fault.  First, try to do a low latency
+ * soft reset.  If the soft reset fails for some reason, then bring out the big
+ * guns and toggle the footswitch.
+ */
+int adreno_reset(struct kgsl_device *device, int fault)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int ret = -EINVAL;
+	int i = 0;
+
+	/* Try soft reset first */
+	if (adreno_try_soft_reset(device, fault)) {
+		/* Make sure VBIF is cleared before resetting */
+		ret = adreno_vbif_clear_pending_transactions(device);
+
+		if (ret == 0) {
+			ret = adreno_soft_reset(device);
+			if (ret)
+				KGSL_DEV_ERR_ONCE(device,
+					"Device soft reset failed\n");
+		}
+	}
+	if (ret) {
+		/* If soft reset failed/skipped, then pull the power */
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT);
+		/* since device is officially off now clear start bit */
+		clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv);
+
+		/* Keep trying to start the device until it works */
+		for (i = 0; i < NUM_TIMES_RESET_RETRY; i++) {
+			ret = adreno_start(device, 0);
+			if (!ret)
+				break;
+
+			msleep(20);
+		}
+	}
+	if (ret)
+		return ret;
+
+	if (0 != i)
+		KGSL_DRV_WARN(device, "Device hard reset tried %d tries\n", i);
+
+	/*
+	 * If active_cnt is non-zero then the system was active before
+	 * going into a reset - put it back in that state
+	 */
+
+	if (atomic_read(&device->active_cnt))
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE);
+	else
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_NAP);
+
+	/* Set the page table back to the default page table */
+	kgsl_mmu_set_pt(&device->mmu, device->mmu.defaultpagetable);
+	kgsl_sharedmem_writel(device,
+		&adreno_dev->ringbuffers[0].pagetable_desc,
+		offsetof(struct adreno_ringbuffer_pagetable_info,
+			current_global_ptname), 0);
+
+	return ret;
+}
+
+static int adreno_getproperty(struct kgsl_device *device,
+				unsigned int type,
+				void __user *value,
+				size_t sizebytes)
+{
+	int status = -EINVAL;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	switch (type) {
+	case KGSL_PROP_DEVICE_INFO:
+		{
+			struct kgsl_devinfo devinfo;
+
+			if (sizebytes != sizeof(devinfo)) {
+				status = -EINVAL;
+				break;
+			}
+
+			memset(&devinfo, 0, sizeof(devinfo));
+			devinfo.device_id = device->id+1;
+			devinfo.chip_id = adreno_dev->chipid;
+			devinfo.mmu_enabled = kgsl_mmu_enabled();
+			devinfo.gmem_gpubaseaddr = adreno_dev->gmem_base;
+			devinfo.gmem_sizebytes = adreno_dev->gmem_size;
+
+			if (copy_to_user(value, &devinfo, sizeof(devinfo)) !=
+					0) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_DEVICE_SHADOW:
+		{
+			struct kgsl_shadowprop shadowprop;
+
+			if (sizebytes != sizeof(shadowprop)) {
+				status = -EINVAL;
+				break;
+			}
+			memset(&shadowprop, 0, sizeof(shadowprop));
+			if (device->memstore.hostptr) {
+				/*NOTE: with mmu enabled, gpuaddr doesn't mean
+				 * anything to mmap().
+				 */
+				shadowprop.gpuaddr =
+					(unsigned int) device->memstore.gpuaddr;
+				shadowprop.size = device->memstore.size;
+				/* GSL needs this to be set, even if it
+				   appears to be meaningless */
+				shadowprop.flags = KGSL_FLAGS_INITIALIZED |
+					KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS;
+			}
+			if (copy_to_user(value, &shadowprop,
+				sizeof(shadowprop))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_MMU_ENABLE:
+		{
+			int mmu_prop = kgsl_mmu_enabled();
+
+			if (sizebytes != sizeof(int)) {
+				status = -EINVAL;
+				break;
+			}
+			if (copy_to_user(value, &mmu_prop, sizeof(mmu_prop))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_INTERRUPT_WAITS:
+		{
+			int int_waits = 1;
+			if (sizebytes != sizeof(int)) {
+				status = -EINVAL;
+				break;
+			}
+			if (copy_to_user(value, &int_waits, sizeof(int))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_UCHE_GMEM_VADDR:
+		{
+			uint64_t gmem_vaddr = 0;
+			if (adreno_is_a5xx(adreno_dev))
+				gmem_vaddr = ADRENO_UCHE_GMEM_BASE;
+			if (sizebytes != sizeof(uint64_t)) {
+				status = -EINVAL;
+				break;
+			}
+			if (copy_to_user(value, &gmem_vaddr,
+					sizeof(uint64_t))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_SP_GENERIC_MEM:
+		{
+			struct kgsl_sp_generic_mem sp_mem;
+			if (sizebytes != sizeof(sp_mem)) {
+				status = -EINVAL;
+				break;
+			}
+			memset(&sp_mem, 0, sizeof(sp_mem));
+
+			sp_mem.local = adreno_dev->sp_local_gpuaddr;
+			sp_mem.pvt = adreno_dev->sp_pvt_gpuaddr;
+
+			if (copy_to_user(value, &sp_mem, sizeof(sp_mem))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_UCODE_VERSION:
+		{
+			struct kgsl_ucode_version ucode;
+
+			if (sizebytes != sizeof(ucode)) {
+				status = -EINVAL;
+				break;
+			}
+			memset(&ucode, 0, sizeof(ucode));
+
+			ucode.pfp = adreno_dev->pfp_fw_version;
+			ucode.pm4 = adreno_dev->pm4_fw_version;
+
+			if (copy_to_user(value, &ucode, sizeof(ucode))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_GPMU_VERSION:
+		{
+			struct kgsl_gpmu_version gpmu;
+
+			if (adreno_dev->gpucore == NULL) {
+				status = -EINVAL;
+				break;
+			}
+
+			if (!ADRENO_FEATURE(adreno_dev, ADRENO_GPMU)) {
+				status = -EOPNOTSUPP;
+				break;
+			}
+
+			if (sizebytes != sizeof(gpmu)) {
+				status = -EINVAL;
+				break;
+			}
+			memset(&gpmu, 0, sizeof(gpmu));
+
+			gpmu.major = adreno_dev->gpucore->gpmu_major;
+			gpmu.minor = adreno_dev->gpucore->gpmu_minor;
+			gpmu.features = adreno_dev->gpucore->gpmu_features;
+
+			if (copy_to_user(value, &gpmu, sizeof(gpmu))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	default:
+		status = -EINVAL;
+	}
+
+	return status;
+}
+
+int adreno_set_constraint(struct kgsl_device *device,
+				struct kgsl_context *context,
+				struct kgsl_device_constraint *constraint)
+{
+	int status = 0;
+
+	switch (constraint->type) {
+	case KGSL_CONSTRAINT_PWRLEVEL: {
+		struct kgsl_device_constraint_pwrlevel pwr;
+
+		if (constraint->size != sizeof(pwr)) {
+			status = -EINVAL;
+			break;
+		}
+
+		if (copy_from_user(&pwr,
+				(void __user *)constraint->data,
+				sizeof(pwr))) {
+			status = -EFAULT;
+			break;
+		}
+		if (pwr.level >= KGSL_CONSTRAINT_PWR_MAXLEVELS) {
+			status = -EINVAL;
+			break;
+		}
+
+		context->pwr_constraint.type =
+				KGSL_CONSTRAINT_PWRLEVEL;
+		context->pwr_constraint.sub_type = pwr.level;
+		trace_kgsl_user_pwrlevel_constraint(device,
+			context->id,
+			context->pwr_constraint.type,
+			context->pwr_constraint.sub_type);
+		}
+		break;
+	case KGSL_CONSTRAINT_NONE:
+		if (context->pwr_constraint.type == KGSL_CONSTRAINT_PWRLEVEL)
+			trace_kgsl_user_pwrlevel_constraint(device,
+				context->id,
+				KGSL_CONSTRAINT_NONE,
+				context->pwr_constraint.sub_type);
+		context->pwr_constraint.type = KGSL_CONSTRAINT_NONE;
+		break;
+
+	default:
+		status = -EINVAL;
+		break;
+	}
+
+	/* If a new constraint has been set for a context, cancel the old one */
+	if ((status == 0) &&
+		(context->id == device->pwrctrl.constraint.owner_id)) {
+		trace_kgsl_constraint(device, device->pwrctrl.constraint.type,
+					device->pwrctrl.active_pwrlevel, 0);
+		device->pwrctrl.constraint.type = KGSL_CONSTRAINT_NONE;
+	}
+
+	return status;
+}
+
+static int adreno_setproperty(struct kgsl_device_private *dev_priv,
+				unsigned int type,
+				void __user *value,
+				unsigned int sizebytes)
+{
+	int status = -EINVAL;
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	switch (type) {
+	case KGSL_PROP_PWRCTRL: {
+			unsigned int enable;
+
+			if (sizebytes != sizeof(enable))
+				break;
+
+			if (copy_from_user(&enable, value, sizeof(enable))) {
+				status = -EFAULT;
+				break;
+			}
+
+			mutex_lock(&device->mutex);
+
+			if (enable) {
+				device->pwrctrl.ctrl_flags = 0;
+
+				if (!kgsl_active_count_get(&adreno_dev->dev)) {
+					adreno_fault_detect_start(adreno_dev);
+					kgsl_active_count_put(&adreno_dev->dev);
+				}
+
+				kgsl_pwrscale_enable(device);
+			} else {
+				kgsl_pwrctrl_change_state(device,
+							KGSL_STATE_ACTIVE);
+				device->pwrctrl.ctrl_flags = KGSL_PWR_ON;
+				adreno_fault_detect_stop(adreno_dev);
+				kgsl_pwrscale_disable(device);
+			}
+
+			mutex_unlock(&device->mutex);
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_PWR_CONSTRAINT: {
+			struct kgsl_device_constraint constraint;
+			struct kgsl_context *context;
+
+			if (sizebytes != sizeof(constraint))
+				break;
+
+			if (copy_from_user(&constraint, value,
+				sizeof(constraint))) {
+				status = -EFAULT;
+				break;
+			}
+
+			context = kgsl_context_get_owner(dev_priv,
+							constraint.context_id);
+
+			if (context == NULL)
+				break;
+
+			status = adreno_set_constraint(device, context,
+								&constraint);
+
+			kgsl_context_put(context);
+		}
+		break;
+	default:
+		break;
+	}
+
+	return status;
+}
+
+/*
+ * adreno_irq_pending() - Checks if interrupt is generated by h/w
+ * @adreno_dev: Pointer to device whose interrupts are checked
+ *
+ * Returns true if interrupts are pending from device else 0.
+ */
+inline unsigned int adreno_irq_pending(struct adreno_device *adreno_dev)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int status;
+
+	adreno_readreg(adreno_dev, ADRENO_REG_RBBM_INT_0_STATUS, &status);
+
+	return (status & gpudev->irq->mask) ? 1 : 0;
+}
+
+
+/**
+ * adreno_hw_isidle() - Check if the GPU core is idle
+ * @adreno_dev: Pointer to the Adreno device structure for the GPU
+ *
+ * Return true if the RBBM status register for the GPU type indicates that the
+ * hardware is idle
+ */
+bool adreno_hw_isidle(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gpu_core *gpucore = adreno_dev->gpucore;
+	unsigned int reg_rbbm_status;
+
+	adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS,
+		&reg_rbbm_status);
+
+	if (reg_rbbm_status & gpucore->busy_mask)
+		return false;
+
+	/* Don't consider ourselves idle if there is an IRQ pending */
+	if (adreno_irq_pending(adreno_dev))
+		return false;
+
+	return true;
+}
+
+/**
+ * adreno_soft_reset() -  Do a soft reset of the GPU hardware
+ * @device: KGSL device to soft reset
+ *
+ * "soft reset" the GPU hardware - this is a fast path GPU reset
+ * The GPU hardware is reset but we never pull power so we can skip
+ * a lot of the standard adreno_stop/adreno_start sequence
+ */
+static int adreno_soft_reset(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int ret;
+
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE);
+	adreno_set_active_ctxs_null(adreno_dev);
+
+	adreno_irqctrl(adreno_dev, 0);
+
+	adreno_clear_gpu_fault(adreno_dev);
+	/* since device is oficially off now clear start bit */
+	clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv);
+
+	/* save physical performance counter values before GPU soft reset */
+	adreno_perfcounter_save(adreno_dev);
+
+	kgsl_cffdump_close(device);
+	/* Reset the GPU */
+	_soft_reset(adreno_dev);
+
+	/* start of new CFF after reset */
+	kgsl_cffdump_open(device);
+
+	/* Enable 64 bit gpu addr if feature is set */
+	if (gpudev->enable_64bit &&
+			ADRENO_FEATURE(adreno_dev, ADRENO_64BIT))
+		gpudev->enable_64bit(adreno_dev);
+
+	/* Restore physical performance counter values after soft reset */
+	adreno_perfcounter_restore(adreno_dev);
+
+	/* Reinitialize the GPU */
+	gpudev->start(adreno_dev);
+
+	/* Re-initialize the coresight registers if applicable */
+	adreno_coresight_start(adreno_dev);
+
+	/* Enable IRQ */
+	adreno_irqctrl(adreno_dev, 1);
+
+	/* stop all ringbuffers to cancel RB events */
+	adreno_ringbuffer_stop(adreno_dev);
+	/*
+	 * If we have offsets for the jump tables we can try to do a warm start,
+	 * otherwise do a full ringbuffer restart
+	 */
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_WARM_START))
+		ret = adreno_ringbuffer_start(adreno_dev, ADRENO_START_WARM);
+	else
+		ret = adreno_ringbuffer_start(adreno_dev, ADRENO_START_COLD);
+	if (ret)
+		goto done;
+
+	if (gpudev->hw_init)
+		ret = gpudev->hw_init(adreno_dev);
+	if (ret)
+		goto done;
+
+	device->reset_counter++;
+	/* device is back online */
+	set_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv);
+
+done:
+	return ret;
+}
+
+/*
+ * adreno_isidle() - return true if the GPU hardware is idle
+ * @device: Pointer to the KGSL device structure for the GPU
+ *
+ * Return true if the GPU hardware is idle and there are no commands pending in
+ * the ringbuffer
+ */
+bool adreno_isidle(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_ringbuffer *rb;
+	int i;
+
+	if (!kgsl_state_is_awake(device))
+		return true;
+
+	adreno_get_rptr(ADRENO_CURRENT_RINGBUFFER(adreno_dev));
+
+	/*
+	 * wptr is updated when we add commands to ringbuffer, add a barrier
+	 * to make sure updated wptr is compared to rptr
+	 */
+	smp_mb();
+
+	/*
+	 * ringbuffer is truly idle when all ringbuffers read and write
+	 * pointers are equal
+	 */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		if (rb->rptr != rb->wptr)
+			break;
+	}
+
+	if (i == adreno_dev->num_ringbuffers)
+		return adreno_hw_isidle(adreno_dev);
+
+	return false;
+}
+
+/**
+ * adreno_spin_idle() - Spin wait for the GPU to idle
+ * @device: Pointer to the KGSL device
+ * @timeout: milliseconds to wait before returning error
+ *
+ * Spin the CPU waiting for the RBBM status to return idle
+ */
+int adreno_spin_idle(struct kgsl_device *device, unsigned int timeout)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned long wait = jiffies + msecs_to_jiffies(timeout);
+
+	kgsl_cffdump_regpoll(device,
+		adreno_getreg(adreno_dev, ADRENO_REG_RBBM_STATUS) << 2,
+		0x00000000, 0x80000000);
+
+	while (time_before(jiffies, wait)) {
+		/*
+		 * If we fault, stop waiting and return an error. The dispatcher
+		 * will clean up the fault from the work queue, but we need to
+		 * make sure we don't block it by waiting for an idle that
+		 * will never come.
+		 */
+
+		if (adreno_gpu_fault(adreno_dev) != 0)
+			return -EDEADLK;
+
+		if (adreno_isidle(device))
+			return 0;
+	}
+
+	return -ETIMEDOUT;
+}
+
+/**
+ * adreno_idle() - wait for the GPU hardware to go idle
+ * @device: Pointer to the KGSL device structure for the GPU
+ *
+ * Wait up to ADRENO_IDLE_TIMEOUT milliseconds for the GPU hardware to go quiet.
+ */
+
+int adreno_idle(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int ret;
+
+	/*
+	 * Make sure the device mutex is held so the dispatcher can't send any
+	 * more commands to the hardware
+	 */
+
+	BUG_ON(!mutex_is_locked(&device->mutex));
+
+	/* Check if we are already idle before idling dispatcher */
+	if (adreno_isidle(device))
+		return 0;
+	/*
+	 * Wait for dispatcher to finish completing commands
+	 * already submitted
+	 */
+	ret = adreno_dispatcher_idle(adreno_dev);
+	if (ret)
+		return ret;
+
+	return adreno_spin_idle(device, ADRENO_IDLE_TIMEOUT);
+}
+
+/**
+ * adreno_drain() - Drain the dispatch queue
+ * @device: Pointer to the KGSL device structure for the GPU
+ *
+ * Drain the dispatcher of existing command batches.  This halts
+ * additional commands from being issued until the gate is completed.
+ */
+static int adreno_drain(struct kgsl_device *device)
+{
+	reinit_completion(&device->cmdbatch_gate);
+
+	return 0;
+}
+
+/* Caller must hold the device mutex. */
+static int adreno_suspend_context(struct kgsl_device *device)
+{
+	int status = 0;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/* process any profiling results that are available */
+	adreno_profile_process_results(adreno_dev);
+
+	status = adreno_idle(device);
+	if (status)
+		return status;
+	/* set the device to default pagetable */
+	kgsl_mmu_set_pt(&device->mmu, device->mmu.defaultpagetable);
+	kgsl_sharedmem_writel(device,
+		&adreno_dev->ringbuffers[0].pagetable_desc,
+		offsetof(struct adreno_ringbuffer_pagetable_info,
+			current_global_ptname), 0);
+	/* set ringbuffers to NULL ctxt */
+	adreno_set_active_ctxs_null(adreno_dev);
+
+	return status;
+}
+
+/**
+ * adreno_read - General read function to read adreno device memory
+ * @device - Pointer to the GPU device struct (for adreno device)
+ * @base - Base address (kernel virtual) where the device memory is mapped
+ * @offsetwords - Offset in words from the base address, of the memory that
+ * is to be read
+ * @value - Value read from the device memory
+ * @mem_len - Length of the device memory mapped to the kernel
+ */
+static void adreno_read(struct kgsl_device *device, void __iomem *base,
+		unsigned int offsetwords, unsigned int *value,
+		unsigned int mem_len)
+{
+
+	unsigned int __iomem *reg;
+	BUG_ON(offsetwords*sizeof(uint32_t) >= mem_len);
+	reg = (unsigned int __iomem *)(base + (offsetwords << 2));
+
+	if (!in_interrupt())
+		kgsl_pre_hwaccess(device);
+
+	/*ensure this read finishes before the next one.
+	 * i.e. act like normal readl() */
+	*value = __raw_readl(reg);
+	rmb();
+}
+
+/**
+ * adreno_regread - Used to read adreno device registers
+ * @offsetwords - Word (4 Bytes) offset to the register to be read
+ * @value - Value read from device register
+ */
+static void adreno_regread(struct kgsl_device *device, unsigned int offsetwords,
+	unsigned int *value)
+{
+	adreno_read(device, device->reg_virt, offsetwords, value,
+						device->reg_len);
+}
+
+/**
+ * adreno_shadermem_regread - Used to read GPU (adreno) shader memory
+ * @device - GPU device whose shader memory is to be read
+ * @offsetwords - Offset in words, of the shader memory address to be read
+ * @value - Pointer to where the read shader mem value is to be stored
+ */
+void adreno_shadermem_regread(struct kgsl_device *device,
+	unsigned int offsetwords, unsigned int *value)
+{
+	adreno_read(device, device->shader_mem_virt, offsetwords, value,
+					device->shader_mem_len);
+}
+
+static void adreno_regwrite(struct kgsl_device *device,
+				unsigned int offsetwords,
+				unsigned int value)
+{
+	unsigned int __iomem *reg;
+
+	BUG_ON(offsetwords*sizeof(uint32_t) >= device->reg_len);
+
+	if (!in_interrupt())
+		kgsl_pre_hwaccess(device);
+
+	trace_kgsl_regwrite(device, offsetwords, value);
+
+	kgsl_cffdump_regwrite(device, offsetwords << 2, value);
+	reg = (unsigned int __iomem *)(device->reg_virt + (offsetwords << 2));
+
+	/*ensure previous writes post before this one,
+	 * i.e. act like normal writel() */
+	wmb();
+	__raw_writel(value, reg);
+}
+
+/**
+ * adreno_waittimestamp - sleep while waiting for the specified timestamp
+ * @device - pointer to a KGSL device structure
+ * @context - pointer to the active kgsl context
+ * @timestamp - GPU timestamp to wait for
+ * @msecs - amount of time to wait (in milliseconds)
+ *
+ * Wait up to 'msecs' milliseconds for the specified timestamp to expire.
+ */
+static int adreno_waittimestamp(struct kgsl_device *device,
+		struct kgsl_context *context,
+		unsigned int timestamp,
+		unsigned int msecs)
+{
+	int ret;
+
+	if (context == NULL) {
+		/* If they are doing then complain once */
+		dev_WARN_ONCE(device->dev, 1,
+			"IOCTL_KGSL_DEVICE_WAITTIMESTAMP is deprecated\n");
+		return -ENOTTY;
+	}
+
+	/* Return -ENOENT if the context has been detached */
+	if (kgsl_context_detached(context))
+		return -ENOENT;
+
+	ret = adreno_drawctxt_wait(ADRENO_DEVICE(device), context,
+		timestamp, msecs);
+
+	/* If the context got invalidated then return a specific error */
+	if (kgsl_context_invalid(context))
+		ret = -EDEADLK;
+
+	/*
+	 * Return -EPROTO if the device has faulted since the last time we
+	 * checked.  Userspace uses this as a marker for performing post
+	 * fault activities
+	 */
+
+	if (!ret && test_and_clear_bit(ADRENO_CONTEXT_FAULT, &context->priv))
+		ret = -EPROTO;
+
+	return ret;
+}
+
+/**
+ * __adreno_readtimestamp() - Reads the timestamp from memstore memory
+ * @device: Pointer to device whose memstore is read
+ * @index: Index into the memstore memory
+ * @type: Type of timestamp to read
+ * @timestamp: The out parameter where the timestamp is read
+ */
+static int __adreno_readtimestamp(struct kgsl_device *device, int index,
+				int type, unsigned int *timestamp)
+{
+	int status = 0;
+
+	switch (type) {
+	case KGSL_TIMESTAMP_CONSUMED:
+		kgsl_sharedmem_readl(&device->memstore, timestamp,
+			KGSL_MEMSTORE_OFFSET(index, soptimestamp));
+		break;
+	case KGSL_TIMESTAMP_RETIRED:
+		kgsl_sharedmem_readl(&device->memstore, timestamp,
+			KGSL_MEMSTORE_OFFSET(index, eoptimestamp));
+		break;
+	default:
+		status = -EINVAL;
+		*timestamp = 0;
+		break;
+	}
+	return status;
+}
+
+/**
+ * adreno_rb_readtimestamp(): Return the value of given type of timestamp
+ * for a RB
+ * @device: GPU device whose timestamp values are being queried
+ * @priv: The object being queried for a timestamp (expected to be a rb pointer)
+ * @type: The type of timestamp (one of 3) to be read
+ * @timestamp: Pointer to where the read timestamp is to be written to
+ *
+ * CONSUMED and RETIRED type timestamps are sorted by id and are constantly
+ * updated by the GPU through shared memstore memory. QUEUED type timestamps
+ * are read directly from context struct.
+
+ * The function returns 0 on success and timestamp value at the *timestamp
+ * address and returns -EINVAL on any read error/invalid type and timestamp = 0.
+ */
+int adreno_rb_readtimestamp(struct kgsl_device *device,
+		void *priv, enum kgsl_timestamp_type type,
+		unsigned int *timestamp)
+{
+	int status = 0;
+	struct adreno_ringbuffer *rb = priv;
+
+	/*
+	 * If user passed in a NULL pointer for timestamp, return without
+	 * doing anything.
+	 */
+	if (!timestamp)
+		return status;
+
+	if (KGSL_TIMESTAMP_QUEUED == type)
+		*timestamp = rb->timestamp;
+	else
+		status = __adreno_readtimestamp(device,
+				rb->id + KGSL_MEMSTORE_MAX,
+				type, timestamp);
+
+	return status;
+}
+
+/**
+ * adreno_readtimestamp(): Return the value of given type of timestamp
+ * @device: GPU device whose timestamp values are being queried
+ * @priv: The object being queried for a timestamp (expected to be a context)
+ * @type: The type of timestamp (one of 3) to be read
+ * @timestamp: Pointer to where the read timestamp is to be written to
+ *
+ * CONSUMED and RETIRED type timestamps are sorted by id and are constantly
+ * updated by the GPU through shared memstore memory. QUEUED type timestamps
+ * are read directly from context struct.
+
+ * The function returns 0 on success and timestamp value at the *timestamp
+ * address and returns -EINVAL on any read error/invalid type and timestamp = 0.
+ */
+static int adreno_readtimestamp(struct kgsl_device *device,
+		void *priv, enum kgsl_timestamp_type type,
+		unsigned int *timestamp)
+{
+	int status = 0;
+	struct kgsl_context *context = priv;
+	unsigned int id = KGSL_CONTEXT_ID(context);
+
+	BUG_ON(NULL == context || id >= KGSL_MEMSTORE_MAX);
+	/*
+	 * If user passed in a NULL pointer for timestamp, return without
+	 * doing anything.
+	 */
+	if (!timestamp)
+		return status;
+
+	if (KGSL_TIMESTAMP_QUEUED == type)
+		*timestamp = adreno_context_timestamp(context);
+	else
+		status = __adreno_readtimestamp(device,
+				context->id, type, timestamp);
+
+	return status;
+}
+
+static inline s64 adreno_ticks_to_us(u32 ticks, u32 freq)
+{
+	freq /= 1000000;
+	return ticks / freq;
+}
+
+static unsigned int counter_delta(struct adreno_device *adreno_dev,
+			unsigned int reg, unsigned int *counter)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	unsigned int val;
+	unsigned int ret = 0;
+
+	/* Read the value */
+	kgsl_regread(device, reg, &val);
+
+	/* Return 0 for the first read */
+	if (*counter != 0) {
+		if (val < *counter)
+			ret = (0xFFFFFFFF - *counter) + val;
+		else
+			ret = val - *counter;
+	}
+
+	*counter = val;
+	return ret;
+}
+
+/**
+ * adreno_power_stats() - Reads the counters needed for freq decisions
+ * @device: Pointer to device whose counters are read
+ * @stats: Pointer to stats set that needs updating
+ * Power: The caller is expected to be in a clock enabled state as this
+ * function does reg reads
+ */
+static void adreno_power_stats(struct kgsl_device *device,
+				struct kgsl_power_stats *stats)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct adreno_busy_data *busy = &adreno_dev->busy_data;
+
+	memset(stats, 0, sizeof(*stats));
+
+	/* Get the busy cycles counted since the counter was last reset */
+	if (adreno_dev->perfctr_pwr_lo != 0) {
+		uint64_t gpu_busy;
+
+		gpu_busy = counter_delta(adreno_dev, adreno_dev->perfctr_pwr_lo,
+			&busy->gpu_busy);
+
+		stats->busy_time = adreno_ticks_to_us(gpu_busy,
+			kgsl_pwrctrl_active_freq(pwr));
+	}
+
+	if (device->pwrctrl.bus_control) {
+		uint64_t ram_cycles = 0, starved_ram = 0;
+
+		if (adreno_dev->ram_cycles_lo != 0)
+			ram_cycles = counter_delta(adreno_dev,
+				adreno_dev->ram_cycles_lo,
+				&busy->vbif_ram_cycles);
+
+		if (adreno_dev->starved_ram_lo != 0)
+			starved_ram = counter_delta(adreno_dev,
+				adreno_dev->starved_ram_lo,
+				&busy->vbif_starved_ram);
+
+		stats->ram_time = ram_cycles;
+		stats->ram_wait = starved_ram;
+	}
+	if (adreno_dev->lm_threshold_count)
+		kgsl_regread(&adreno_dev->dev, adreno_dev->lm_threshold_count,
+			&adreno_dev->lm_threshold_cross);
+}
+
+static unsigned int adreno_gpuid(struct kgsl_device *device,
+	unsigned int *chipid)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/* Some applications need to know the chip ID too, so pass
+	 * that as a parameter */
+
+	if (chipid != NULL)
+		*chipid = adreno_dev->chipid;
+
+	/* Standard KGSL gpuid format:
+	 * top word is 0x0002 for 2D or 0x0003 for 3D
+	 * Bottom word is core specific identifer
+	 */
+
+	return (0x0003 << 16) | ADRENO_GPUREV(adreno_dev);
+}
+
+static int adreno_regulator_enable(struct kgsl_device *device)
+{
+	int ret = 0;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+	if (gpudev->regulator_enable &&
+		!test_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED,
+			&adreno_dev->priv)) {
+		ret = gpudev->regulator_enable(adreno_dev);
+		if (!ret)
+			set_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED,
+				&adreno_dev->priv);
+	}
+	return ret;
+}
+
+static bool adreno_is_hw_collapsible(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+
+	/*
+	 * Skip power collapse for A304, if power ctrl flag is set to
+	 * non zero. As A304 soft_reset will not work, power collapse
+	 * needs to disable to avoid soft_reset.
+	 */
+	if (adreno_is_a304(adreno_dev) &&
+			device->pwrctrl.ctrl_flags)
+		return false;
+
+	return adreno_isidle(device) && (gpudev->is_sptp_idle ?
+				gpudev->is_sptp_idle(adreno_dev) : true);
+}
+
+static void adreno_regulator_disable(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+	if (gpudev->regulator_disable &&
+		test_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED,
+			&adreno_dev->priv)) {
+		gpudev->regulator_disable(adreno_dev);
+		clear_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED,
+			&adreno_dev->priv);
+	}
+}
+
+static void adreno_pwrlevel_change_settings(struct kgsl_device *device,
+		unsigned int prelevel, unsigned int postlevel, bool post)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (gpudev->pwrlevel_change_settings)
+		gpudev->pwrlevel_change_settings(adreno_dev, prelevel,
+					postlevel, post);
+}
+
+static void adreno_iommu_sync(struct kgsl_device *device, bool sync)
+{
+	struct scm_desc desc = {0};
+	int ret;
+
+	if (sync == true) {
+		mutex_lock(&kgsl_mmu_sync);
+		desc.args[0] = true;
+		desc.arginfo = SCM_ARGS(1);
+		ret = scm_call2_atomic(SCM_SIP_FNID(SCM_SVC_PWR, 0x8), &desc);
+		if (ret)
+			KGSL_DRV_ERR(device,
+				"MMU sync with Hypervisor off %x\n", ret);
+	} else {
+		desc.args[0] = false;
+		desc.arginfo = SCM_ARGS(1);
+		scm_call2_atomic(SCM_SIP_FNID(SCM_SVC_PWR, 0x8), &desc);
+		mutex_unlock(&kgsl_mmu_sync);
+	}
+}
+
+static void _regulator_disable(struct kgsl_regulator *regulator, bool poll)
+{
+	unsigned long wait_time = jiffies + msecs_to_jiffies(200);
+
+	if (IS_ERR_OR_NULL(regulator->reg))
+		return;
+
+	regulator_disable(regulator->reg);
+
+	if (poll == false)
+		return;
+
+	while (!time_after(jiffies, wait_time)) {
+		if (!regulator_is_enabled(regulator->reg))
+			return;
+		cpu_relax();
+	}
+
+	KGSL_CORE_ERR("regulator '%s' still on after 200ms\n", regulator->name);
+}
+
+static void adreno_regulator_disable_poll(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int i;
+
+	/* Fast path - hopefully we don't need this quirk */
+	if (!ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_IOMMU_SYNC)) {
+		for (i = KGSL_MAX_REGULATORS - 1; i >= 0; i--)
+			_regulator_disable(&pwr->regulators[i], false);
+		return;
+	}
+
+	adreno_iommu_sync(device, true);
+
+	for (i = 0; i < KGSL_MAX_REGULATORS; i++)
+		_regulator_disable(&pwr->regulators[i], true);
+
+	adreno_iommu_sync(device, false);
+}
+
+static const struct kgsl_functable adreno_functable = {
+	/* Mandatory functions */
+	.regread = adreno_regread,
+	.regwrite = adreno_regwrite,
+	.idle = adreno_idle,
+	.isidle = adreno_isidle,
+	.suspend_context = adreno_suspend_context,
+	.init = adreno_init,
+	.start = adreno_start,
+	.stop = adreno_stop,
+	.getproperty = adreno_getproperty,
+	.getproperty_compat = adreno_getproperty_compat,
+	.waittimestamp = adreno_waittimestamp,
+	.readtimestamp = adreno_readtimestamp,
+	.issueibcmds = adreno_ringbuffer_issueibcmds,
+	.ioctl = adreno_ioctl,
+	.compat_ioctl = adreno_compat_ioctl,
+	.power_stats = adreno_power_stats,
+	.gpuid = adreno_gpuid,
+	.snapshot = adreno_snapshot,
+	.irq_handler = adreno_irq_handler,
+	.drain = adreno_drain,
+	/* Optional functions */
+	.drawctxt_create = adreno_drawctxt_create,
+	.drawctxt_detach = adreno_drawctxt_detach,
+	.drawctxt_destroy = adreno_drawctxt_destroy,
+	.drawctxt_dump = adreno_drawctxt_dump,
+	.setproperty = adreno_setproperty,
+	.setproperty_compat = adreno_setproperty_compat,
+	.drawctxt_sched = adreno_drawctxt_sched,
+	.resume = adreno_dispatcher_start,
+	.regulator_enable = adreno_regulator_enable,
+	.is_hw_collapsible = adreno_is_hw_collapsible,
+	.regulator_disable = adreno_regulator_disable,
+	.pwrlevel_change_settings = adreno_pwrlevel_change_settings,
+	.regulator_disable_poll = adreno_regulator_disable_poll,
+};
+
+static struct platform_driver adreno_platform_driver = {
+	.probe = adreno_probe,
+	.remove = adreno_remove,
+	.suspend = kgsl_suspend_driver,
+	.resume = kgsl_resume_driver,
+	.id_table = adreno_id_table,
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = DEVICE_3D_NAME,
+		.pm = &kgsl_pm_ops,
+		.of_match_table = adreno_match_table,
+	}
+};
+
+static int __init kgsl_3d_init(void)
+{
+	return platform_driver_register(&adreno_platform_driver);
+}
+
+static void __exit kgsl_3d_exit(void)
+{
+	platform_driver_unregister(&adreno_platform_driver);
+}
+
+module_init(kgsl_3d_init);
+module_exit(kgsl_3d_exit);
+
+
+static struct of_device_id busmon_match_table[] = {
+	{ .compatible = "qcom,kgsl-busmon", .data = &device_3d0 },
+	{}
+};
+
+static int adreno_busmon_probe(struct platform_device *pdev)
+{
+	struct kgsl_device *device;
+	const struct of_device_id *pdid =
+			of_match_device(busmon_match_table, &pdev->dev);
+
+	if (pdid == NULL)
+		return -ENXIO;
+
+	device = (struct kgsl_device *)pdid->data;
+	device->busmondev = &pdev->dev;
+	dev_set_drvdata(device->busmondev, device);
+
+	return 0;
+}
+
+static struct platform_driver kgsl_bus_platform_driver = {
+	.probe = adreno_busmon_probe,
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "kgsl-busmon",
+		.of_match_table = busmon_match_table,
+	}
+};
+
+static int __init kgsl_busmon_init(void)
+{
+	return platform_driver_register(&kgsl_bus_platform_driver);
+}
+
+static void __exit kgsl_busmon_exit(void)
+{
+	platform_driver_unregister(&kgsl_bus_platform_driver);
+}
+
+module_init(kgsl_busmon_init);
+module_exit(kgsl_busmon_exit);
+
+MODULE_DESCRIPTION("3D Graphics driver");
+MODULE_VERSION("1.2");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:kgsl_3d");
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
new file mode 100644
index 000000000000..82399db488ae
--- /dev/null
+++ b/drivers/gpu/msm/adreno.h
@@ -0,0 +1,1399 @@
+/* Copyright (c) 2008-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __ADRENO_H
+#define __ADRENO_H
+
+#include "kgsl_device.h"
+#include "kgsl_sharedmem.h"
+#include "adreno_drawctxt.h"
+#include "adreno_ringbuffer.h"
+#include "adreno_profile.h"
+#include "adreno_dispatch.h"
+#include "kgsl_iommu.h"
+#include "adreno_perfcounter.h"
+#include <linux/stat.h>
+#include <linux/delay.h>
+
+#include "a4xx_reg.h"
+
+#ifdef CONFIG_MSM_OCMEM
+#include <soc/qcom/ocmem.h>
+#endif
+
+#define DEVICE_3D_NAME "kgsl-3d"
+#define DEVICE_3D0_NAME "kgsl-3d0"
+
+#define ADRENO_PRIORITY_MAX_RB_LEVELS	4
+
+/* ADRENO_DEVICE - Given a kgsl_device return the adreno device struct */
+#define ADRENO_DEVICE(device) \
+		container_of(device, struct adreno_device, dev)
+
+/* ADRENO_CONTEXT - Given a context return the adreno context struct */
+#define ADRENO_CONTEXT(context) \
+		container_of(context, struct adreno_context, base)
+
+/* ADRENO_GPU_DEVICE - Given an adreno device return the GPU specific struct */
+#define ADRENO_GPU_DEVICE(_a) ((_a)->gpucore->gpudev)
+
+#define ADRENO_CHIPID_CORE(_id) (((_id) >> 24) & 0xFF)
+#define ADRENO_CHIPID_MAJOR(_id) (((_id) >> 16) & 0xFF)
+#define ADRENO_CHIPID_MINOR(_id) (((_id) >> 8) & 0xFF)
+#define ADRENO_CHIPID_PATCH(_id) ((_id) & 0xFF)
+
+/* ADRENO_GPUREV - Return the GPU ID for the given adreno_device */
+#define ADRENO_GPUREV(_a) ((_a)->gpucore->gpurev)
+
+/* ADRENO_GPUREV - Return the GPU patchid for the given adreno_device */
+#define ADRENO_PATCHID(_a) ((_a)->gpucore->patchid)
+
+/*
+ * ADRENO_FEATURE - return true if the specified feature is supported by the GPU
+ * core
+ */
+#define ADRENO_FEATURE(_dev, _bit) \
+	((_dev)->gpucore->features & (_bit))
+
+/**
+ * ADRENO_QUIRK - return true if the specified quirk is required by the GPU
+ */
+#define ADRENO_QUIRK(_dev, _bit) \
+	((_dev)->quirks & (_bit))
+
+/*
+ * ADRENO_PREEMPT_STYLE - return preemption style
+ */
+#define ADRENO_PREEMPT_STYLE(flags) \
+	((flags & KGSL_CONTEXT_PREEMPT_STYLE_MASK) >> \
+		  KGSL_CONTEXT_PREEMPT_STYLE_SHIFT)
+
+/*
+ * return the dispatcher cmdqueue in which the given cmdbatch should
+ * be submitted
+ */
+#define ADRENO_CMDBATCH_DISPATCH_CMDQUEUE(c)	\
+	(&((ADRENO_CONTEXT(c->context))->rb->dispatch_q))
+
+#define ADRENO_CMDBATCH_RB(c)			\
+	((ADRENO_CONTEXT(c->context))->rb)
+
+/* Adreno core features */
+/* The core uses OCMEM for GMEM/binning memory */
+#define ADRENO_USES_OCMEM     BIT(0)
+/* The core supports an accelerated warm start */
+#define ADRENO_WARM_START     BIT(1)
+/* The core supports the microcode bootstrap functionality */
+#define ADRENO_USE_BOOTSTRAP  BIT(2)
+/* The core supports SP/TP hw controlled power collapse */
+#define ADRENO_SPTP_PC BIT(3)
+/* The core supports Peak Power Detection(PPD)*/
+#define ADRENO_PPD BIT(4)
+/* The GPU supports content protection */
+#define ADRENO_CONTENT_PROTECTION BIT(5)
+/* The GPU supports preemption */
+#define ADRENO_PREEMPTION BIT(6)
+/* The core uses GPMU for power and limit management */
+#define ADRENO_GPMU BIT(7)
+/* The GPMU supports Limits Management */
+#define ADRENO_LM BIT(8)
+/* The core uses 64 bit GPU addresses */
+#define ADRENO_64BIT BIT(9)
+
+/*
+ * Adreno GPU quirks - control bits for various workarounds
+ */
+
+/* Set TWOPASSUSEWFI in PC_DBG_ECO_CNTL (5XX) */
+#define ADRENO_QUIRK_TWO_PASS_USE_WFI BIT(0)
+/* Lock/unlock mutex to sync with the IOMMU */
+#define ADRENO_QUIRK_IOMMU_SYNC BIT(1)
+
+/* Flags to control command packet settings */
+#define KGSL_CMD_FLAGS_NONE             0
+#define KGSL_CMD_FLAGS_PMODE		BIT(0)
+#define KGSL_CMD_FLAGS_INTERNAL_ISSUE   BIT(1)
+#define KGSL_CMD_FLAGS_WFI              BIT(2)
+#define KGSL_CMD_FLAGS_PROFILE		BIT(3)
+#define KGSL_CMD_FLAGS_PWRON_FIXUP      BIT(4)
+#define KGSL_CMD_FLAGS_MEMLIST          BIT(5)
+
+/* Command identifiers */
+#define KGSL_CONTEXT_TO_MEM_IDENTIFIER	0x2EADBEEF
+#define KGSL_CMD_IDENTIFIER		0x2EEDFACE
+#define KGSL_CMD_INTERNAL_IDENTIFIER	0x2EEDD00D
+#define KGSL_START_OF_IB_IDENTIFIER	0x2EADEABE
+#define KGSL_END_OF_IB_IDENTIFIER	0x2ABEDEAD
+#define KGSL_END_OF_FRAME_IDENTIFIER	0x2E0F2E0F
+#define KGSL_NOP_IB_IDENTIFIER	        0x20F20F20
+#define KGSL_START_OF_PROFILE_IDENTIFIER	0x2DEFADE1
+#define KGSL_END_OF_PROFILE_IDENTIFIER	0x2DEFADE2
+#define KGSL_PWRON_FIXUP_IDENTIFIER	0x2AFAFAFA
+
+#define ADRENO_ISTORE_START 0x5000 /* Istore offset */
+
+#define ADRENO_NUM_CTX_SWITCH_ALLOWED_BEFORE_DRAW	50
+
+/* One cannot wait forever for the core to idle, so set an upper limit to the
+ * amount of time to wait for the core to go idle
+ */
+
+#define ADRENO_IDLE_TIMEOUT (20 * 1000)
+
+#define ADRENO_UCHE_GMEM_BASE	0x100000
+
+enum adreno_gpurev {
+	ADRENO_REV_UNKNOWN = 0,
+	ADRENO_REV_A304 = 304,
+	ADRENO_REV_A305 = 305,
+	ADRENO_REV_A305C = 306,
+	ADRENO_REV_A306 = 307,
+	ADRENO_REV_A306A = 308,
+	ADRENO_REV_A310 = 310,
+	ADRENO_REV_A320 = 320,
+	ADRENO_REV_A330 = 330,
+	ADRENO_REV_A305B = 335,
+	ADRENO_REV_A405 = 405,
+	ADRENO_REV_A418 = 418,
+	ADRENO_REV_A420 = 420,
+	ADRENO_REV_A430 = 430,
+	ADRENO_REV_A505 = 505,
+	ADRENO_REV_A506 = 506,
+	ADRENO_REV_A510 = 510,
+	ADRENO_REV_A530 = 530,
+};
+
+#define ADRENO_START_WARM 0
+#define ADRENO_START_COLD 1
+
+#define ADRENO_SOFT_FAULT BIT(0)
+#define ADRENO_HARD_FAULT BIT(1)
+#define ADRENO_TIMEOUT_FAULT BIT(2)
+#define ADRENO_IOMMU_PAGE_FAULT BIT(3)
+#define ADRENO_PREEMPT_FAULT BIT(4)
+
+#define ADRENO_SPTP_PC_CTRL 0
+#define ADRENO_PPD_CTRL     1
+#define ADRENO_LM_CTRL      2
+
+struct adreno_gpudev;
+
+struct adreno_busy_data {
+	unsigned int gpu_busy;
+	unsigned int vbif_ram_cycles;
+	unsigned int vbif_starved_ram;
+};
+
+/**
+ * struct adreno_gpu_core - A specific GPU core definition
+ * @gpurev: Unique GPU revision identifier
+ * @core: Match for the core version of the GPU
+ * @major: Match for the major version of the GPU
+ * @minor: Match for the minor version of the GPU
+ * @patchid: Match for the patch revision of the GPU
+ * @features: Common adreno features supported by this core
+ * @pm4fw_name: Filename for th PM4 firmware
+ * @pfpfw_name: Filename for the PFP firmware
+ * @zap_name: Filename for the Zap Shader ucode
+ * @gpudev: Pointer to the GPU family specific functions for this core
+ * @gmem_size: Amount of binning memory (GMEM/OCMEM) to reserve for the core
+ * @pm4_jt_idx: Index of the jump table in the PM4 microcode
+ * @pm4_jt_addr: Address offset to load the jump table for the PM4 microcode
+ * @pfp_jt_idx: Index of the jump table in the PFP microcode
+ * @pfp_jt_addr: Address offset to load the jump table for the PFP microcode
+ * @pm4_bstrp_size: Size of the bootstrap loader for PM4 microcode
+ * @pfp_bstrp_size: Size of the bootstrap loader for PFP microcde
+ * @pfp_bstrp_ver: Version of the PFP microcode that supports bootstraping
+ * @shader_offset: Offset of shader from gpu reg base
+ * @shader_size: Shader size
+ * @num_protected_regs: number of protected registers
+ * @gpmufw_name: Filename for the GPMU firmware
+ * @gpmu_major: Match for the GPMU & firmware, major revision
+ * @gpmu_minor: Match for the GPMU & firmware, minor revision
+ * @gpmu_features: Supported features for any given GPMU version
+ * @busy_mask: mask to check if GPU is busy in RBBM_STATUS
+ * @lm_major: Limits Management register sequence, major revision
+ * @lm_minor: LM register sequence, minor revision
+ * @regfw_name: Filename for the register sequence firmware
+ * @gpmu_tsens: ID for the temporature sensor used by the GPMU
+ * @max_power: Max possible power draw of a core, units elephant tail hairs
+ */
+struct adreno_gpu_core {
+	enum adreno_gpurev gpurev;
+	unsigned int core, major, minor, patchid;
+	unsigned long features;
+	const char *pm4fw_name;
+	const char *pfpfw_name;
+	const char *zap_name;
+	struct adreno_gpudev *gpudev;
+	size_t gmem_size;
+	unsigned int pm4_jt_idx;
+	unsigned int pm4_jt_addr;
+	unsigned int pfp_jt_idx;
+	unsigned int pfp_jt_addr;
+	unsigned int pm4_bstrp_size;
+	unsigned int pfp_bstrp_size;
+	unsigned int pfp_bstrp_ver;
+	unsigned long shader_offset;
+	unsigned int shader_size;
+	unsigned int num_protected_regs;
+	const char *gpmufw_name;
+	unsigned int gpmu_major;
+	unsigned int gpmu_minor;
+	unsigned int gpmu_features;
+	unsigned int busy_mask;
+	unsigned int lm_major, lm_minor;
+	const char *regfw_name;
+	unsigned int gpmu_tsens;
+	unsigned int max_power;
+};
+
+/**
+ * struct adreno_device - The mothership structure for all adreno related info
+ * @dev: Reference to struct kgsl_device
+ * @priv: Holds the private flags specific to the adreno_device
+ * @chipid: Chip ID specific to the GPU
+ * @gmem_base: Base physical address of GMEM
+ * @gmem_size: GMEM size
+ * @gpucore: Pointer to the adreno_gpu_core structure
+ * @pfp_fw: Buffer which holds the pfp ucode
+ * @pfp_fw_size: Size of pfp ucode buffer
+ * @pfp_fw_version: Version of pfp ucode
+ * @pfp: Memory descriptor which holds pfp ucode buffer info
+ * @pm4_fw: Buffer which holds the pm4 ucode
+ * @pm4_fw_size: Size of pm4 ucode buffer
+ * @pm4_fw_version: Version of pm4 ucode
+ * @pm4: Memory descriptor which holds pm4 ucode buffer info
+ * @gpmu_cmds_size: Length of gpmu cmd stream
+ * @gpmu_cmds: gpmu cmd stream
+ * @ringbuffers: Array of pointers to adreno_ringbuffers
+ * @num_ringbuffers: Number of ringbuffers for the GPU
+ * @cur_rb: Pointer to the current ringbuffer
+ * @next_rb: Ringbuffer we are switching to during preemption
+ * @prev_rb: Ringbuffer we are switching from during preemption
+ * @fast_hang_detect: Software fault detection availability
+ * @ft_policy: Defines the fault tolerance policy
+ * @long_ib_detect: Long IB detection availability
+ * @ft_pf_policy: Defines the fault policy for page faults
+ * @ocmem_hdl: Handle to the ocmem allocated buffer
+ * @profile: Container for adreno profiler information
+ * @dispatcher: Container for adreno GPU dispatcher
+ * @pwron_fixup: Command buffer to run a post-power collapse shader workaround
+ * @pwron_fixup_dwords: Number of dwords in the command buffer
+ * @input_work: Work struct for turning on the GPU after a touch event
+ * @busy_data: Struct holding GPU VBIF busy stats
+ * @ram_cycles_lo: Number of DDR clock cycles for the monitor session
+ * @perfctr_pwr_lo: Number of cycles VBIF is stalled by DDR
+ * @halt: Atomic variable to check whether the GPU is currently halted
+ * @ctx_d_debugfs: Context debugfs node
+ * @pwrctrl_flag: Flag to hold adreno specific power attributes
+ * @cmdbatch_profile_buffer: Memdesc holding the cmdbatch profiling buffer
+ * @cmdbatch_profile_index: Index to store the start/stop ticks in the profiling
+ * buffer
+ * @sp_local_gpuaddr: Base GPU virtual address for SP local memory
+ * @sp_pvt_gpuaddr: Base GPU virtual address for SP private memory
+ * @lm_fw: The LM firmware handle
+ * @lm_sequence: Pointer to the start of the register write sequence for LM
+ * @lm_size: The dword size of the LM sequence
+ * @lm_limit: limiting value for LM
+ * @lm_threshold_count: register value for counter for lm threshold breakin
+ * @lm_threshold_cross: number of current peaks exceeding threshold
+ * @speed_bin: Indicate which power level set to use
+ * @csdev: Pointer to a coresight device (if applicable)
+ */
+struct adreno_device {
+	struct kgsl_device dev;    /* Must be first field in this struct */
+	unsigned long priv;
+	unsigned int chipid;
+	unsigned long gmem_base;
+	unsigned long gmem_size;
+	const struct adreno_gpu_core *gpucore;
+	unsigned int *pfp_fw;
+	size_t pfp_fw_size;
+	unsigned int pfp_fw_version;
+	struct kgsl_memdesc pfp;
+	unsigned int *pm4_fw;
+	size_t pm4_fw_size;
+	unsigned int pm4_fw_version;
+	struct kgsl_memdesc pm4;
+	size_t gpmu_cmds_size;
+	unsigned int *gpmu_cmds;
+	struct adreno_ringbuffer ringbuffers[ADRENO_PRIORITY_MAX_RB_LEVELS];
+	int num_ringbuffers;
+	struct adreno_ringbuffer *cur_rb;
+	struct adreno_ringbuffer *next_rb;
+	struct adreno_ringbuffer *prev_rb;
+	unsigned int fast_hang_detect;
+	unsigned long ft_policy;
+	unsigned int long_ib_detect;
+	unsigned long ft_pf_policy;
+	struct ocmem_buf *ocmem_hdl;
+	struct adreno_profile profile;
+	struct adreno_dispatcher dispatcher;
+	struct kgsl_memdesc pwron_fixup;
+	unsigned int pwron_fixup_dwords;
+	struct work_struct input_work;
+	struct adreno_busy_data busy_data;
+	unsigned int ram_cycles_lo;
+	unsigned int starved_ram_lo;
+	unsigned int perfctr_pwr_lo;
+	atomic_t halt;
+	struct dentry *ctx_d_debugfs;
+	unsigned long pwrctrl_flag;
+
+	struct kgsl_memdesc cmdbatch_profile_buffer;
+	unsigned int cmdbatch_profile_index;
+	uint64_t sp_local_gpuaddr;
+	uint64_t sp_pvt_gpuaddr;
+	const struct firmware *lm_fw;
+	uint32_t *lm_sequence;
+	uint32_t lm_size;
+	struct kgsl_memdesc preemption_counters;
+	struct work_struct gpmu_work;
+	uint32_t lm_leakage;
+	uint32_t lm_limit;
+	uint32_t lm_threshold_count;
+	uint32_t lm_threshold_cross;
+
+	unsigned int speed_bin;
+	unsigned int quirks;
+
+	struct coresight_device *csdev;
+};
+
+/**
+ * enum adreno_device_flags - Private flags for the adreno_device
+ * @ADRENO_DEVICE_PWRON - Set during init after a power collapse
+ * @ADRENO_DEVICE_PWRON_FIXUP - Set if the target requires the shader fixup
+ * after power collapse
+ * @ADRENO_DEVICE_CORESIGHT - Set if the coresight (trace bus) registers should
+ * be restored after power collapse
+ * @ADRENO_DEVICE_HANG_INTR - Set if the hang interrupt should be enabled for
+ * this target
+ * @ADRENO_DEVICE_STARTED - Set if the device start sequence is in progress
+ * @ADRENO_DEVICE_FAULT - Set if the device is currently in fault (and shouldn't
+ * send any more commands to the ringbuffer)
+ * @ADRENO_DEVICE_CMDBATCH_PROFILE - Set if the device supports command batch
+ * profiling via the ALWAYSON counter
+ * @ADRENO_DEVICE_PREEMPTION - Turn on/off preemption
+ * @ADRENO_DEVICE_SOFT_FAULT_DETECT - Set if soft fault detect is enabled
+ * @ADRENO_DEVICE_GPMU_INITIALIZED - Set if GPMU firmware initialization succeed
+ * @ADRENO_DEVICE_ISDB_ENABLED - Set if the Integrated Shader DeBugger is
+ * attached and enabled
+ */
+enum adreno_device_flags {
+	ADRENO_DEVICE_PWRON = 0,
+	ADRENO_DEVICE_PWRON_FIXUP = 1,
+	ADRENO_DEVICE_INITIALIZED = 2,
+	ADRENO_DEVICE_CORESIGHT = 3,
+	ADRENO_DEVICE_HANG_INTR = 4,
+	ADRENO_DEVICE_STARTED = 5,
+	ADRENO_DEVICE_FAULT = 6,
+	ADRENO_DEVICE_CMDBATCH_PROFILE = 7,
+	ADRENO_DEVICE_GPU_REGULATOR_ENABLED = 8,
+	ADRENO_DEVICE_PREEMPTION = 9,
+	ADRENO_DEVICE_SOFT_FAULT_DETECT = 10,
+	ADRENO_DEVICE_GPMU_INITIALIZED = 11,
+	ADRENO_DEVICE_ISDB_ENABLED = 12,
+};
+
+/**
+ * struct adreno_cmdbatch_profile_entry - a single command batch entry in the
+ * kernel profiling buffer
+ * @started: Number of GPU ticks at start of the command batch
+ * @retired: Number of GPU ticks at the end of the command batch
+ */
+struct adreno_cmdbatch_profile_entry {
+	uint64_t started;
+	uint64_t retired;
+};
+
+#define ADRENO_CMDBATCH_PROFILE_COUNT \
+	(PAGE_SIZE / sizeof(struct adreno_cmdbatch_profile_entry))
+
+#define ADRENO_CMDBATCH_PROFILE_OFFSET(_index, _member) \
+	 ((_index) * sizeof(struct adreno_cmdbatch_profile_entry) \
+	  + offsetof(struct adreno_cmdbatch_profile_entry, _member))
+
+
+/**
+ * adreno_regs: List of registers that are used in kgsl driver for all
+ * 3D devices. Each device type has different offset value for the same
+ * register, so an array of register offsets are declared for every device
+ * and are indexed by the enumeration values defined in this enum
+ */
+enum adreno_regs {
+	ADRENO_REG_CP_ME_RAM_WADDR,
+	ADRENO_REG_CP_ME_RAM_DATA,
+	ADRENO_REG_CP_PFP_UCODE_DATA,
+	ADRENO_REG_CP_PFP_UCODE_ADDR,
+	ADRENO_REG_CP_WFI_PEND_CTR,
+	ADRENO_REG_CP_RB_BASE,
+	ADRENO_REG_CP_RB_BASE_HI,
+	ADRENO_REG_CP_RB_RPTR,
+	ADRENO_REG_CP_RB_WPTR,
+	ADRENO_REG_CP_CNTL,
+	ADRENO_REG_CP_ME_CNTL,
+	ADRENO_REG_CP_RB_CNTL,
+	ADRENO_REG_CP_IB1_BASE,
+	ADRENO_REG_CP_IB1_BASE_HI,
+	ADRENO_REG_CP_IB1_BUFSZ,
+	ADRENO_REG_CP_IB2_BASE,
+	ADRENO_REG_CP_IB2_BASE_HI,
+	ADRENO_REG_CP_IB2_BUFSZ,
+	ADRENO_REG_CP_TIMESTAMP,
+	ADRENO_REG_CP_SCRATCH_REG6,
+	ADRENO_REG_CP_SCRATCH_REG7,
+	ADRENO_REG_CP_ME_RAM_RADDR,
+	ADRENO_REG_CP_ROQ_ADDR,
+	ADRENO_REG_CP_ROQ_DATA,
+	ADRENO_REG_CP_MERCIU_ADDR,
+	ADRENO_REG_CP_MERCIU_DATA,
+	ADRENO_REG_CP_MERCIU_DATA2,
+	ADRENO_REG_CP_MEQ_ADDR,
+	ADRENO_REG_CP_MEQ_DATA,
+	ADRENO_REG_CP_HW_FAULT,
+	ADRENO_REG_CP_PROTECT_STATUS,
+	ADRENO_REG_CP_PREEMPT,
+	ADRENO_REG_CP_PREEMPT_DEBUG,
+	ADRENO_REG_CP_PREEMPT_DISABLE,
+	ADRENO_REG_CP_PROTECT_REG_0,
+	ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
+	ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+	ADRENO_REG_RBBM_STATUS,
+	ADRENO_REG_RBBM_STATUS3,
+	ADRENO_REG_RBBM_PERFCTR_CTL,
+	ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0,
+	ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1,
+	ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2,
+	ADRENO_REG_RBBM_PERFCTR_LOAD_CMD3,
+	ADRENO_REG_RBBM_PERFCTR_PWR_1_LO,
+	ADRENO_REG_RBBM_INT_0_MASK,
+	ADRENO_REG_RBBM_INT_0_STATUS,
+	ADRENO_REG_RBBM_PM_OVERRIDE2,
+	ADRENO_REG_RBBM_INT_CLEAR_CMD,
+	ADRENO_REG_RBBM_SW_RESET_CMD,
+	ADRENO_REG_RBBM_BLOCK_SW_RESET_CMD,
+	ADRENO_REG_RBBM_BLOCK_SW_RESET_CMD2,
+	ADRENO_REG_RBBM_CLOCK_CTL,
+	ADRENO_REG_VPC_DEBUG_RAM_SEL,
+	ADRENO_REG_VPC_DEBUG_RAM_READ,
+	ADRENO_REG_PA_SC_AA_CONFIG,
+	ADRENO_REG_SQ_GPR_MANAGEMENT,
+	ADRENO_REG_SQ_INST_STORE_MANAGMENT,
+	ADRENO_REG_TP0_CHICKEN,
+	ADRENO_REG_RBBM_RBBM_CTL,
+	ADRENO_REG_UCHE_INVALIDATE0,
+	ADRENO_REG_UCHE_INVALIDATE1,
+	ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO,
+	ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI,
+	ADRENO_REG_RBBM_SECVID_TRUST_CONTROL,
+	ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO,
+	ADRENO_REG_RBBM_ALWAYSON_COUNTER_HI,
+	ADRENO_REG_RBBM_SECVID_TRUST_CONFIG,
+	ADRENO_REG_RBBM_SECVID_TSB_CONTROL,
+	ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_BASE,
+	ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_BASE_HI,
+	ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_SIZE,
+	ADRENO_REG_VBIF_XIN_HALT_CTRL0,
+	ADRENO_REG_VBIF_XIN_HALT_CTRL1,
+	ADRENO_REG_VBIF_VERSION,
+	ADRENO_REG_REGISTER_MAX,
+};
+
+/**
+ * adreno_reg_offsets: Holds array of register offsets
+ * @offsets: Offset array of size defined by enum adreno_regs
+ * @offset_0: This is the index of the register in offset array whose value
+ * is 0. 0 is a valid register offset and during initialization of the
+ * offset array we need to know if an offset value is correctly defined to 0
+ */
+struct adreno_reg_offsets {
+	unsigned int *const offsets;
+	enum adreno_regs offset_0;
+};
+
+#define ADRENO_REG_UNUSED	0xFFFFFFFF
+#define ADRENO_REG_SKIP	0xFFFFFFFE
+#define ADRENO_REG_DEFINE(_offset, _reg) [_offset] = _reg
+
+/*
+ * struct adreno_vbif_data - Describes vbif register value pair
+ * @reg: Offset to vbif register
+ * @val: The value that should be programmed in the register at reg
+ */
+struct adreno_vbif_data {
+	unsigned int reg;
+	unsigned int val;
+};
+
+/*
+ * struct adreno_vbif_platform - Holds an array of vbif reg value pairs
+ * for a particular core
+ * @devfunc: Pointer to platform/core identification function
+ * @vbif: Array of reg value pairs for vbif registers
+ */
+struct adreno_vbif_platform {
+	int(*devfunc)(struct adreno_device *);
+	const struct adreno_vbif_data *vbif;
+};
+
+/*
+ * struct adreno_vbif_snapshot_registers - Holds an array of vbif registers
+ * listed for snapshot dump for a particular core
+ * @version: vbif version
+ * @registers: vbif registers listed for snapshot dump
+ * @count: count of vbif registers listed for snapshot
+ */
+struct adreno_vbif_snapshot_registers {
+	const unsigned int version;
+	const unsigned int *registers;
+	const int count;
+};
+
+/**
+ * struct adreno_coresight_register - Definition for a coresight (tracebus)
+ * debug register
+ * @offset: Offset of the debug register in the KGSL mmio region
+ * @initial: Default value to write when coresight is enabled
+ * @value: Current shadow value of the register (to be reprogrammed after power
+ * collapse)
+ */
+struct adreno_coresight_register {
+	unsigned int offset;
+	unsigned int initial;
+	unsigned int value;
+};
+
+struct adreno_coresight_attr {
+	struct device_attribute attr;
+	struct adreno_coresight_register *reg;
+};
+
+ssize_t adreno_coresight_show_register(struct device *device,
+		struct device_attribute *attr, char *buf);
+
+ssize_t adreno_coresight_store_register(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size);
+
+#define ADRENO_CORESIGHT_ATTR(_attrname, _reg) \
+	struct adreno_coresight_attr coresight_attr_##_attrname  = { \
+		__ATTR(_attrname, S_IRUGO | S_IWUSR, \
+		adreno_coresight_show_register, \
+		adreno_coresight_store_register), \
+		(_reg), }
+
+/**
+ * struct adreno_coresight - GPU specific coresight definition
+ * @registers - Array of GPU specific registers to configure trace bus output
+ * @count - Number of registers in the array
+ * @groups - Pointer to an attribute list of control files
+ */
+struct adreno_coresight {
+	struct adreno_coresight_register *registers;
+	unsigned int count;
+	const struct attribute_group **groups;
+};
+
+
+struct adreno_irq_funcs {
+	void (*func)(struct adreno_device *, int);
+};
+#define ADRENO_IRQ_CALLBACK(_c) { .func = _c }
+
+struct adreno_irq {
+	unsigned int mask;
+	struct adreno_irq_funcs *funcs;
+};
+
+/*
+ * struct adreno_debugbus_block - Holds info about debug buses of a chip
+ * @block_id: Bus identifier
+ * @dwords: Number of dwords of data that this block holds
+ */
+struct adreno_debugbus_block {
+	unsigned int block_id;
+	unsigned int dwords;
+};
+
+/*
+ * struct adreno_snapshot_section_sizes - Structure holding the size of
+ * different sections dumped during device snapshot
+ * @cp_pfp: CP PFP data section size
+ * @cp_me: CP ME data section size
+ * @vpc_mem: VPC memory section size
+ * @cp_meq: CP MEQ size
+ * @shader_mem: Size of shader memory of 1 shader section
+ * @cp_merciu: CP MERCIU size
+ * @roq: ROQ size
+ */
+struct adreno_snapshot_sizes {
+	int cp_pfp;
+	int cp_me;
+	int vpc_mem;
+	int cp_meq;
+	int shader_mem;
+	int cp_merciu;
+	int roq;
+};
+
+/*
+ * struct adreno_snapshot_data - Holds data used in snapshot
+ * @sect_sizes: Has sections sizes
+ */
+struct adreno_snapshot_data {
+	struct adreno_snapshot_sizes *sect_sizes;
+};
+
+struct adreno_gpudev {
+	/*
+	 * These registers are in a different location on different devices,
+	 * so define them in the structure and use them as variables.
+	 */
+	const struct adreno_reg_offsets *reg_offsets;
+	const struct adreno_ft_perf_counters *ft_perf_counters;
+	unsigned int ft_perf_counters_count;
+
+	struct adreno_perfcounters *perfcounters;
+	const struct adreno_invalid_countables
+			*invalid_countables;
+	struct adreno_snapshot_data *snapshot_data;
+
+	struct adreno_coresight *coresight;
+
+	struct adreno_irq *irq;
+	int num_prio_levels;
+	unsigned int vbif_xin_halt_ctrl0_mask;
+	/* GPU specific function hooks */
+	void (*irq_trace)(struct adreno_device *, unsigned int status);
+	void (*snapshot)(struct adreno_device *, struct kgsl_snapshot *);
+	void (*platform_setup)(struct adreno_device *);
+	void (*init)(struct adreno_device *);
+	int (*rb_init)(struct adreno_device *, struct adreno_ringbuffer *);
+	int (*hw_init)(struct adreno_device *);
+	int (*microcode_read)(struct adreno_device *);
+	int (*microcode_load)(struct adreno_device *, unsigned int start_type);
+	void (*perfcounter_init)(struct adreno_device *);
+	void (*perfcounter_close)(struct adreno_device *);
+	void (*start)(struct adreno_device *);
+	bool (*is_sptp_idle)(struct adreno_device *);
+	int (*regulator_enable)(struct adreno_device *);
+	void (*regulator_disable)(struct adreno_device *);
+	void (*pwrlevel_change_settings)(struct adreno_device *,
+				unsigned int prelevel, unsigned int postlevel,
+				bool post);
+	int (*preemption_pre_ibsubmit)(struct adreno_device *,
+				struct adreno_ringbuffer *, unsigned int *,
+				struct kgsl_context *, uint64_t cond_addr,
+				struct kgsl_memobj_node *);
+	int (*preemption_post_ibsubmit)(struct adreno_device *,
+				struct adreno_ringbuffer *, unsigned int *,
+				struct kgsl_context *);
+	int (*preemption_token)(struct adreno_device *,
+				struct adreno_ringbuffer *, unsigned int *,
+				uint64_t gpuaddr);
+	int (*preemption_init)(struct adreno_device *);
+	void (*preemption_schedule)(struct adreno_device *);
+	void (*enable_64bit)(struct adreno_device *);
+};
+
+struct log_field {
+	bool show;
+	const char *display;
+};
+
+/**
+ * enum kgsl_ft_policy_bits - KGSL fault tolerance policy bits
+ * @KGSL_FT_OFF: Disable fault detection (not used)
+ * @KGSL_FT_REPLAY: Replay the faulting command
+ * @KGSL_FT_SKIPIB: Skip the faulting indirect buffer
+ * @KGSL_FT_SKIPFRAME: Skip the frame containing the faulting IB
+ * @KGSL_FT_DISABLE: Tells the dispatcher to disable FT for the command batch
+ * @KGSL_FT_TEMP_DISABLE: Disables FT for all commands
+ * @KGSL_FT_THROTTLE: Disable the context if it faults too often
+ * @KGSL_FT_SKIPCMD: Skip the command containing the faulting IB
+ */
+enum kgsl_ft_policy_bits {
+	KGSL_FT_OFF = 0,
+	KGSL_FT_REPLAY = 1,
+	KGSL_FT_SKIPIB = 2,
+	KGSL_FT_SKIPFRAME = 3,
+	KGSL_FT_DISABLE = 4,
+	KGSL_FT_TEMP_DISABLE = 5,
+	KGSL_FT_THROTTLE = 6,
+	KGSL_FT_SKIPCMD = 7,
+	/* KGSL_FT_MAX_BITS is used to calculate the mask */
+	KGSL_FT_MAX_BITS,
+	/* Internal bits - set during GFT */
+	/* Skip the PM dump on replayed command batches */
+	KGSL_FT_SKIP_PMDUMP = 31,
+};
+
+#define KGSL_FT_POLICY_MASK GENMASK(KGSL_FT_MAX_BITS - 1, 0)
+
+#define  KGSL_FT_DEFAULT_POLICY \
+	(BIT(KGSL_FT_REPLAY) | \
+	 BIT(KGSL_FT_SKIPCMD) | \
+	 BIT(KGSL_FT_THROTTLE))
+
+#define ADRENO_FT_TYPES \
+	{ BIT(KGSL_FT_OFF), "off" }, \
+	{ BIT(KGSL_FT_REPLAY), "replay" }, \
+	{ BIT(KGSL_FT_SKIPIB), "skipib" }, \
+	{ BIT(KGSL_FT_SKIPFRAME), "skipframe" }, \
+	{ BIT(KGSL_FT_DISABLE), "disable" }, \
+	{ BIT(KGSL_FT_TEMP_DISABLE), "temp" }, \
+	{ BIT(KGSL_FT_THROTTLE), "throttle"}, \
+	{ BIT(KGSL_FT_SKIPCMD), "skipcmd" }
+
+/**
+ * enum kgsl_ft_pagefault_policy_bits - KGSL pagefault policy bits
+ * @KGSL_FT_PAGEFAULT_INT_ENABLE: No longer used, but retained for compatibility
+ * @KGSL_FT_PAGEFAULT_GPUHALT_ENABLE: enable GPU halt on pagefaults
+ * @KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE: log one pagefault per page
+ * @KGSL_FT_PAGEFAULT_LOG_ONE_PER_INT: log one pagefault per interrupt
+ */
+enum {
+	KGSL_FT_PAGEFAULT_INT_ENABLE = 0,
+	KGSL_FT_PAGEFAULT_GPUHALT_ENABLE = 1,
+	KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE = 2,
+	KGSL_FT_PAGEFAULT_LOG_ONE_PER_INT = 3,
+	/* KGSL_FT_PAGEFAULT_MAX_BITS is used to calculate the mask */
+	KGSL_FT_PAGEFAULT_MAX_BITS,
+};
+
+#define KGSL_FT_PAGEFAULT_MASK GENMASK(KGSL_FT_PAGEFAULT_MAX_BITS - 1, 0)
+
+#define KGSL_FT_PAGEFAULT_DEFAULT_POLICY 0
+
+#define FOR_EACH_RINGBUFFER(_dev, _rb, _i)			\
+	for ((_i) = 0, (_rb) = &((_dev)->ringbuffers[0]);	\
+		(_i) < (_dev)->num_ringbuffers;			\
+		(_i)++, (_rb)++)
+
+struct adreno_ft_perf_counters {
+	unsigned int counter;
+	unsigned int countable;
+};
+
+extern unsigned int *adreno_ft_regs;
+extern unsigned int adreno_ft_regs_num;
+extern unsigned int *adreno_ft_regs_val;
+
+extern struct adreno_gpudev adreno_a3xx_gpudev;
+extern struct adreno_gpudev adreno_a4xx_gpudev;
+extern struct adreno_gpudev adreno_a5xx_gpudev;
+
+extern int adreno_wake_nice;
+extern unsigned int adreno_wake_timeout;
+
+long adreno_ioctl(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, unsigned long arg);
+
+long adreno_ioctl_helper(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, unsigned long arg,
+		const struct kgsl_ioctl *cmds, int len);
+
+int adreno_spin_idle(struct kgsl_device *device, unsigned int timeout);
+int adreno_idle(struct kgsl_device *device);
+bool adreno_isidle(struct kgsl_device *device);
+
+int adreno_set_constraint(struct kgsl_device *device,
+				struct kgsl_context *context,
+				struct kgsl_device_constraint *constraint);
+
+void adreno_shadermem_regread(struct kgsl_device *device,
+						unsigned int offsetwords,
+						unsigned int *value);
+
+void adreno_snapshot(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot,
+		struct kgsl_context *context);
+
+int adreno_reset(struct kgsl_device *device, int fault);
+
+void adreno_fault_skipcmd_detached(struct kgsl_device *device,
+					 struct adreno_context *drawctxt,
+					 struct kgsl_cmdbatch *cmdbatch);
+
+int adreno_a3xx_pwron_fixup_init(struct adreno_device *adreno_dev);
+int adreno_a4xx_pwron_fixup_init(struct adreno_device *adreno_dev);
+
+int adreno_coresight_init(struct adreno_device *adreno_dev);
+
+void adreno_coresight_start(struct adreno_device *adreno_dev);
+void adreno_coresight_stop(struct adreno_device *adreno_dev);
+
+void adreno_coresight_remove(struct adreno_device *adreno_dev);
+
+bool adreno_hw_isidle(struct adreno_device *adreno_dev);
+
+int adreno_iommu_set_pt_ctx(struct adreno_ringbuffer *rb,
+			struct kgsl_pagetable *new_pt,
+			struct adreno_context *drawctxt);
+
+int adreno_iommu_init(struct adreno_device *adreno_dev);
+
+void adreno_iommu_set_pt_generate_rb_cmds(struct adreno_ringbuffer *rb,
+					struct kgsl_pagetable *pt);
+
+void adreno_fault_detect_start(struct adreno_device *adreno_dev);
+void adreno_fault_detect_stop(struct adreno_device *adreno_dev);
+
+void adreno_hang_int_callback(struct adreno_device *adreno_dev, int bit);
+void adreno_cp_callback(struct adreno_device *adreno_dev, int bit);
+
+unsigned int adreno_iommu_set_pt_ib(struct adreno_ringbuffer *rb,
+					unsigned int *cmds,
+					struct kgsl_pagetable *pt);
+
+unsigned int adreno_iommu_set_pt_generate_cmds(
+				struct adreno_ringbuffer *rb,
+				unsigned int *cmds,
+				struct kgsl_pagetable *pt);
+
+int adreno_sysfs_init(struct kgsl_device *device);
+void adreno_sysfs_close(struct kgsl_device *device);
+
+void adreno_irqctrl(struct adreno_device *adreno_dev, int state);
+
+long adreno_ioctl_perfcounter_get(struct kgsl_device_private *dev_priv,
+	unsigned int cmd, void *data);
+
+long adreno_ioctl_perfcounter_put(struct kgsl_device_private *dev_priv,
+	unsigned int cmd, void *data);
+
+int adreno_efuse_map(struct adreno_device *adreno_dev);
+int adreno_efuse_read_u32(struct adreno_device *adreno_dev, unsigned int offset,
+		unsigned int *val);
+void adreno_efuse_unmap(struct adreno_device *adreno_dev);
+
+#define ADRENO_TARGET(_name, _id) \
+static inline int adreno_is_##_name(struct adreno_device *adreno_dev) \
+{ \
+	return (ADRENO_GPUREV(adreno_dev) == (_id)); \
+}
+
+static inline int adreno_is_a3xx(struct adreno_device *adreno_dev)
+{
+	return ((ADRENO_GPUREV(adreno_dev) >= 300) &&
+		(ADRENO_GPUREV(adreno_dev) < 400));
+}
+
+ADRENO_TARGET(a304, ADRENO_REV_A304)
+ADRENO_TARGET(a305, ADRENO_REV_A305)
+ADRENO_TARGET(a305b, ADRENO_REV_A305B)
+ADRENO_TARGET(a305c, ADRENO_REV_A305C)
+ADRENO_TARGET(a306, ADRENO_REV_A306)
+ADRENO_TARGET(a306a, ADRENO_REV_A306A)
+ADRENO_TARGET(a310, ADRENO_REV_A310)
+ADRENO_TARGET(a320, ADRENO_REV_A320)
+ADRENO_TARGET(a330, ADRENO_REV_A330)
+
+static inline int adreno_is_a330v2(struct adreno_device *adreno_dev)
+{
+	return ((ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A330) &&
+		(ADRENO_CHIPID_PATCH(adreno_dev->chipid) > 0));
+}
+
+static inline int adreno_is_a330v21(struct adreno_device *adreno_dev)
+{
+	return ((ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A330) &&
+		(ADRENO_CHIPID_PATCH(adreno_dev->chipid) > 0xF));
+}
+
+static inline int adreno_is_a4xx(struct adreno_device *adreno_dev)
+{
+	return ADRENO_GPUREV(adreno_dev) >= 400 &&
+		ADRENO_GPUREV(adreno_dev) < 500;
+}
+
+ADRENO_TARGET(a405, ADRENO_REV_A405);
+
+static inline int adreno_is_a405v2(struct adreno_device *adreno_dev)
+{
+	return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A405) &&
+		(ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 0x10);
+}
+
+ADRENO_TARGET(a418, ADRENO_REV_A418)
+ADRENO_TARGET(a420, ADRENO_REV_A420)
+ADRENO_TARGET(a430, ADRENO_REV_A430)
+
+static inline int adreno_is_a430v2(struct adreno_device *adreno_dev)
+{
+	return ((ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A430) &&
+		(ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 1));
+}
+
+static inline int adreno_is_a5xx(struct adreno_device *adreno_dev)
+{
+	return ADRENO_GPUREV(adreno_dev) >= 500 &&
+			ADRENO_GPUREV(adreno_dev) < 600;
+}
+
+ADRENO_TARGET(a505, ADRENO_REV_A505)
+ADRENO_TARGET(a506, ADRENO_REV_A506)
+ADRENO_TARGET(a510, ADRENO_REV_A510)
+ADRENO_TARGET(a530, ADRENO_REV_A530)
+
+static inline int adreno_is_a530v1(struct adreno_device *adreno_dev)
+{
+	return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A530) &&
+		(ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 0);
+}
+
+static inline int adreno_is_a530v2(struct adreno_device *adreno_dev)
+{
+	return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A530) &&
+		(ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 1);
+}
+
+static inline int adreno_is_a530v3(struct adreno_device *adreno_dev)
+{
+	return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A530) &&
+		(ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 2);
+}
+
+static inline int adreno_is_a505_or_a506(struct adreno_device *adreno_dev)
+{
+	return ADRENO_GPUREV(adreno_dev) >= 505 &&
+			ADRENO_GPUREV(adreno_dev) <= 506;
+}
+/**
+ * adreno_context_timestamp() - Return the last queued timestamp for the context
+ * @k_ctxt: Pointer to the KGSL context to query
+ *
+ * Return the last queued context for the given context. This is used to verify
+ * that incoming requests are not using an invalid (unsubmitted) timestamp
+ */
+static inline int adreno_context_timestamp(struct kgsl_context *k_ctxt)
+{
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(k_ctxt);
+	return drawctxt->timestamp;
+}
+
+/*
+ * adreno_checkreg_off() - Checks the validity of a register enum
+ * @adreno_dev:		Pointer to adreno device
+ * @offset_name:	The register enum that is checked
+ */
+static inline bool adreno_checkreg_off(struct adreno_device *adreno_dev,
+					enum adreno_regs offset_name)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (offset_name >= ADRENO_REG_REGISTER_MAX ||
+		ADRENO_REG_UNUSED == gpudev->reg_offsets->offsets[offset_name])
+			BUG();
+
+	/*
+	 * GPU register programming is kept common as much as possible
+	 * across the cores, Use ADRENO_REG_SKIP when certain register
+	 * programming needs to be skipped for certain GPU cores.
+	 * Example: Certain registers on a5xx like IB1_BASE are 64 bit.
+	 * Common programming programs 64bit register but upper 32 bits
+	 * are skipped in a4xx and a3xx using ADRENO_REG_SKIP.
+	 */
+	if (ADRENO_REG_SKIP == gpudev->reg_offsets->offsets[offset_name])
+		return false;
+
+	return true;
+}
+
+/*
+ * adreno_readreg() - Read a register by getting its offset from the
+ * offset array defined in gpudev node
+ * @adreno_dev:		Pointer to the the adreno device
+ * @offset_name:	The register enum that is to be read
+ * @val:		Register value read is placed here
+ */
+static inline void adreno_readreg(struct adreno_device *adreno_dev,
+				enum adreno_regs offset_name, unsigned int *val)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	if (adreno_checkreg_off(adreno_dev, offset_name))
+		kgsl_regread(&adreno_dev->dev,
+				gpudev->reg_offsets->offsets[offset_name], val);
+	else
+		*val = 0;
+}
+
+/*
+ * adreno_writereg() - Write a register by getting its offset from the
+ * offset array defined in gpudev node
+ * @adreno_dev:		Pointer to the the adreno device
+ * @offset_name:	The register enum that is to be written
+ * @val:		Value to write
+ */
+static inline void adreno_writereg(struct adreno_device *adreno_dev,
+				enum adreno_regs offset_name, unsigned int val)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	if (adreno_checkreg_off(adreno_dev, offset_name))
+		kgsl_regwrite(&adreno_dev->dev,
+				gpudev->reg_offsets->offsets[offset_name], val);
+}
+
+/*
+ * adreno_getreg() - Returns the offset value of a register from the
+ * register offset array in the gpudev node
+ * @adreno_dev:		Pointer to the the adreno device
+ * @offset_name:	The register enum whore offset is returned
+ */
+static inline unsigned int adreno_getreg(struct adreno_device *adreno_dev,
+				enum adreno_regs offset_name)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	if (!adreno_checkreg_off(adreno_dev, offset_name))
+		return ADRENO_REG_REGISTER_MAX;
+	return gpudev->reg_offsets->offsets[offset_name];
+}
+
+/**
+ * adreno_gpu_fault() - Return the current state of the GPU
+ * @adreno_dev: A pointer to the adreno_device to query
+ *
+ * Return 0 if there is no fault or positive with the last type of fault that
+ * occurred
+ */
+static inline unsigned int adreno_gpu_fault(struct adreno_device *adreno_dev)
+{
+	smp_rmb();
+	return atomic_read(&adreno_dev->dispatcher.fault);
+}
+
+/**
+ * adreno_set_gpu_fault() - Set the current fault status of the GPU
+ * @adreno_dev: A pointer to the adreno_device to set
+ * @state: fault state to set
+ *
+ */
+static inline void adreno_set_gpu_fault(struct adreno_device *adreno_dev,
+	int state)
+{
+	/* only set the fault bit w/o overwriting other bits */
+	atomic_add(state, &adreno_dev->dispatcher.fault);
+	smp_wmb();
+}
+
+
+/**
+ * adreno_clear_gpu_fault() - Clear the GPU fault register
+ * @adreno_dev: A pointer to an adreno_device structure
+ *
+ * Clear the GPU fault status for the adreno device
+ */
+
+static inline void adreno_clear_gpu_fault(struct adreno_device *adreno_dev)
+{
+	atomic_set(&adreno_dev->dispatcher.fault, 0);
+	smp_wmb();
+}
+
+/**
+ * adreno_gpu_halt() - Return the GPU halt refcount
+ * @adreno_dev: A pointer to the adreno_device
+ */
+static inline int adreno_gpu_halt(struct adreno_device *adreno_dev)
+{
+	smp_rmb();
+	return atomic_read(&adreno_dev->halt);
+}
+
+
+/**
+ * adreno_clear_gpu_halt() - Clear the GPU halt refcount
+ * @adreno_dev: A pointer to the adreno_device
+ */
+static inline void adreno_clear_gpu_halt(struct adreno_device *adreno_dev)
+{
+	atomic_set(&adreno_dev->halt, 0);
+	smp_wmb();
+}
+
+/**
+ * adreno_get_gpu_halt() - Increment GPU halt refcount
+ * @adreno_dev: A pointer to the adreno_device
+ */
+static inline void adreno_get_gpu_halt(struct adreno_device *adreno_dev)
+{
+	atomic_inc(&adreno_dev->halt);
+}
+
+/**
+ * adreno_put_gpu_halt() - Decrement GPU halt refcount
+ * @adreno_dev: A pointer to the adreno_device
+ */
+static inline void adreno_put_gpu_halt(struct adreno_device *adreno_dev)
+{
+	if (atomic_dec_return(&adreno_dev->halt) < 0)
+		BUG();
+}
+
+
+/*
+ * adreno_vbif_start() - Program VBIF registers, called in device start
+ * @adreno_dev: Pointer to device whose vbif data is to be programmed
+ * @vbif_platforms: list register value pair of vbif for a family
+ * of adreno cores
+ * @num_platforms: Number of platforms contained in vbif_platforms
+ */
+static inline void adreno_vbif_start(struct adreno_device *adreno_dev,
+			const struct adreno_vbif_platform *vbif_platforms,
+			int num_platforms)
+{
+	int i;
+	const struct adreno_vbif_data *vbif = NULL;
+
+	for (i = 0; i < num_platforms; i++) {
+		if (vbif_platforms[i].devfunc(adreno_dev)) {
+			vbif = vbif_platforms[i].vbif;
+			break;
+		}
+	}
+
+	while ((vbif != NULL) && (vbif->reg != 0)) {
+		kgsl_regwrite(&adreno_dev->dev, vbif->reg, vbif->val);
+		vbif++;
+	}
+}
+
+/**
+ * adreno_set_protected_registers() - Protect the specified range of registers
+ * from being accessed by the GPU
+ * @adreno_dev: pointer to the Adreno device
+ * @index: Pointer to the index of the protect mode register to write to
+ * @reg: Starting dword register to write
+ * @mask_len: Size of the mask to protect (# of registers = 2 ** mask_len)
+ *
+ * Add the range of registers to the list of protected mode registers that will
+ * cause an exception if the GPU accesses them.  There are 16 available
+ * protected mode registers.  Index is used to specify which register to write
+ * to - the intent is to call this function multiple times with the same index
+ * pointer for each range and the registers will be magically programmed in
+ * incremental fashion
+ */
+static inline void adreno_set_protected_registers(
+		struct adreno_device *adreno_dev, unsigned int *index,
+		unsigned int reg, int mask_len)
+{
+	unsigned int val;
+	unsigned int base =
+		adreno_getreg(adreno_dev, ADRENO_REG_CP_PROTECT_REG_0);
+	unsigned int offset = *index;
+
+	if (adreno_dev->gpucore->num_protected_regs)
+		BUG_ON(*index >= adreno_dev->gpucore->num_protected_regs);
+	else
+		BUG_ON(*index >= 16);
+
+	/*
+	 * On A4XX targets with more than 16 protected mode registers
+	 * the upper registers are not contiguous with the lower 16
+	 * registers so we have to adjust the base and offset accordingly
+	 */
+
+	if (adreno_is_a4xx(adreno_dev) && *index >= 0x10) {
+		base = A4XX_CP_PROTECT_REG_10;
+		offset = *index - 0x10;
+	}
+
+	val = 0x60000000 | ((mask_len & 0x1F) << 24) | ((reg << 2) & 0xFFFFF);
+
+	kgsl_regwrite(&adreno_dev->dev, base + offset, val);
+	*index = *index + 1;
+}
+
+#ifdef CONFIG_DEBUG_FS
+void adreno_debugfs_init(struct adreno_device *adreno_dev);
+void adreno_context_debugfs_init(struct adreno_device *,
+				struct adreno_context *);
+#else
+static inline void adreno_debugfs_init(struct adreno_device *adreno_dev) { }
+static inline void adreno_context_debugfs_init(struct adreno_device *device,
+						struct adreno_context *context)
+						{ }
+#endif
+
+/**
+ * adreno_compare_pm4_version() - Compare the PM4 microcode version
+ * @adreno_dev: Pointer to the adreno_device struct
+ * @version: Version number to compare again
+ *
+ * Compare the current version against the specified version and return -1 if
+ * the current code is older, 0 if equal or 1 if newer.
+ */
+static inline int adreno_compare_pm4_version(struct adreno_device *adreno_dev,
+	unsigned int version)
+{
+	if (adreno_dev->pm4_fw_version == version)
+		return 0;
+
+	return (adreno_dev->pm4_fw_version > version) ? 1 : -1;
+}
+
+/**
+ * adreno_compare_pfp_version() - Compare the PFP microcode version
+ * @adreno_dev: Pointer to the adreno_device struct
+ * @version: Version number to compare against
+ *
+ * Compare the current version against the specified version and return -1 if
+ * the current code is older, 0 if equal or 1 if newer.
+ */
+static inline int adreno_compare_pfp_version(struct adreno_device *adreno_dev,
+	unsigned int version)
+{
+	if (adreno_dev->pfp_fw_version == version)
+		return 0;
+
+	return (adreno_dev->pfp_fw_version > version) ? 1 : -1;
+}
+
+/*
+ * adreno_bootstrap_ucode() - Checks if Ucode bootstrapping is supported
+ * @adreno_dev:		Pointer to the the adreno device
+ */
+static inline int adreno_bootstrap_ucode(struct adreno_device *adreno_dev)
+{
+	return (ADRENO_FEATURE(adreno_dev, ADRENO_USE_BOOTSTRAP) &&
+		adreno_compare_pfp_version(adreno_dev,
+			adreno_dev->gpucore->pfp_bstrp_ver) >= 0) ? 1 : 0;
+}
+
+/**
+ * adreno_preempt_state() - Check if preemption state is equal to given state
+ * @adreno_dev: Device whose preemption state is checked
+ * @state: State to compare against
+ */
+static inline unsigned int adreno_preempt_state(
+			struct adreno_device *adreno_dev,
+			enum adreno_dispatcher_preempt_states state)
+{
+	return atomic_read(&adreno_dev->dispatcher.preemption_state) ==
+		state;
+}
+
+/**
+ * adreno_get_rptr() - Get the current ringbuffer read pointer
+ * @rb: Pointer the ringbuffer to query
+ *
+ * Get the current read pointer from the GPU register.
+ */
+static inline unsigned int
+adreno_get_rptr(struct adreno_ringbuffer *rb)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
+	if (adreno_dev->cur_rb == rb &&
+		adreno_preempt_state(adreno_dev,
+			ADRENO_DISPATCHER_PREEMPT_CLEAR))
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &(rb->rptr));
+
+	return rb->rptr;
+}
+
+static inline bool adreno_is_preemption_enabled(
+				struct adreno_device *adreno_dev)
+{
+	return test_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv);
+}
+
+/**
+ * adreno_ctx_get_rb() - Return the ringbuffer that a context should
+ * use based on priority
+ * @adreno_dev: The adreno device that context is using
+ * @drawctxt: The context pointer
+ */
+static inline struct adreno_ringbuffer *adreno_ctx_get_rb(
+				struct adreno_device *adreno_dev,
+				struct adreno_context *drawctxt)
+{
+	struct kgsl_context *context;
+	int level;
+	if (!drawctxt)
+		return NULL;
+
+	context = &(drawctxt->base);
+
+	/*
+	 * If preemption is disabled then everybody needs to go on the same
+	 * ringbuffer
+	 */
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return &(adreno_dev->ringbuffers[0]);
+
+	/*
+	 * Math to convert the priority field in context structure to an RB ID.
+	 * Divide up the context priority based on number of ringbuffer levels.
+	 */
+	level = context->priority / adreno_dev->num_ringbuffers;
+	if (level < adreno_dev->num_ringbuffers)
+		return &(adreno_dev->ringbuffers[level]);
+	else
+		return &(adreno_dev->ringbuffers[
+				adreno_dev->num_ringbuffers - 1]);
+}
+/*
+ * adreno_set_active_ctxs_null() - Put back reference to any active context
+ * and set the active context to NULL
+ * @adreno_dev: The adreno device
+ */
+static inline void adreno_set_active_ctxs_null(struct adreno_device *adreno_dev)
+{
+	int i;
+	struct adreno_ringbuffer *rb;
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		if (rb->drawctxt_active)
+			kgsl_context_put(&(rb->drawctxt_active->base));
+		rb->drawctxt_active = NULL;
+		kgsl_sharedmem_writel(rb->device, &rb->pagetable_desc,
+			offsetof(struct adreno_ringbuffer_pagetable_info,
+				current_rb_ptname), 0);
+	}
+}
+
+/*
+ * adreno_compare_prio_level() - Compares 2 priority levels based on enum values
+ * @p1: First priority level
+ * @p2: Second priority level
+ *
+ * Returns greater than 0 if p1 is higher priority, 0 if levels are equal else
+ * less than 0
+ */
+static inline int adreno_compare_prio_level(int p1, int p2)
+{
+	return p2 - p1;
+}
+
+void adreno_readreg64(struct adreno_device *adreno_dev,
+		enum adreno_regs lo, enum adreno_regs hi, uint64_t *val);
+
+void adreno_writereg64(struct adreno_device *adreno_dev,
+		enum adreno_regs lo, enum adreno_regs hi, uint64_t val);
+
+unsigned int adreno_iommu_set_apriv(struct adreno_device *adreno_dev,
+				unsigned int *cmds, int set);
+
+static inline bool adreno_soft_fault_detect(struct adreno_device *adreno_dev)
+{
+	return adreno_dev->fast_hang_detect &&
+		!test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv);
+}
+
+static inline bool adreno_long_ib_detect(struct adreno_device *adreno_dev)
+{
+	return adreno_dev->long_ib_detect &&
+		!test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv);
+}
+
+#endif /*__ADRENO_H */
diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c
new file mode 100644
index 000000000000..18b68ac7c633
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a3xx.c
@@ -0,0 +1,1847 @@
+/* Copyright (c) 2012-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/msm_kgsl.h>
+
+#include "kgsl.h"
+#include "adreno.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_cffdump.h"
+#include "a3xx_reg.h"
+#include "adreno_a3xx.h"
+#include "adreno_a4xx.h"
+#include "a4xx_reg.h"
+#include "adreno_cp_parser.h"
+#include "adreno_trace.h"
+#include "adreno_pm4types.h"
+#include "adreno_perfcounter.h"
+
+/*
+ * Define registers for a3xx that contain addresses used by the
+ * cp parser logic
+ */
+const unsigned int a3xx_cp_addr_regs[ADRENO_CP_ADDR_MAX] = {
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0,
+				A3XX_VSC_PIPE_DATA_ADDRESS_0),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_0,
+				A3XX_VSC_PIPE_DATA_LENGTH_0),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_1,
+				A3XX_VSC_PIPE_DATA_ADDRESS_1),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_1,
+				A3XX_VSC_PIPE_DATA_LENGTH_1),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_2,
+				A3XX_VSC_PIPE_DATA_ADDRESS_2),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_2,
+				A3XX_VSC_PIPE_DATA_LENGTH_2),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_3,
+				A3XX_VSC_PIPE_DATA_ADDRESS_3),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_3,
+				A3XX_VSC_PIPE_DATA_LENGTH_3),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_4,
+				A3XX_VSC_PIPE_DATA_ADDRESS_4),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_4,
+				A3XX_VSC_PIPE_DATA_LENGTH_4),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_5,
+				A3XX_VSC_PIPE_DATA_ADDRESS_5),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_5,
+				A3XX_VSC_PIPE_DATA_LENGTH_5),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_6,
+				A3XX_VSC_PIPE_DATA_ADDRESS_6),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_6,
+				A3XX_VSC_PIPE_DATA_LENGTH_6),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_7,
+				A3XX_VSC_PIPE_DATA_ADDRESS_7),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7,
+				A3XX_VSC_PIPE_DATA_LENGTH_7),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0,
+				A3XX_VFD_FETCH_INSTR_1_0),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_1,
+				A3XX_VFD_FETCH_INSTR_1_1),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_2,
+				A3XX_VFD_FETCH_INSTR_1_2),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_3,
+				A3XX_VFD_FETCH_INSTR_1_3),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_4,
+				A3XX_VFD_FETCH_INSTR_1_4),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_5,
+				A3XX_VFD_FETCH_INSTR_1_5),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_6,
+				A3XX_VFD_FETCH_INSTR_1_6),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_7,
+				A3XX_VFD_FETCH_INSTR_1_7),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_8,
+				A3XX_VFD_FETCH_INSTR_1_8),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_9,
+				A3XX_VFD_FETCH_INSTR_1_9),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_10,
+				A3XX_VFD_FETCH_INSTR_1_A),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_11,
+				A3XX_VFD_FETCH_INSTR_1_B),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_12,
+				A3XX_VFD_FETCH_INSTR_1_C),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_13,
+				A3XX_VFD_FETCH_INSTR_1_D),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_14,
+				A3XX_VFD_FETCH_INSTR_1_E),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15,
+				A3XX_VFD_FETCH_INSTR_1_F),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_SIZE_ADDRESS,
+				A3XX_VSC_SIZE_ADDRESS),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR,
+				A3XX_SP_VS_PVT_MEM_ADDR_REG),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR,
+				A3XX_SP_FS_PVT_MEM_ADDR_REG),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_VS_OBJ_START_REG,
+				A3XX_SP_VS_OBJ_START_REG),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_FS_OBJ_START_REG,
+				A3XX_SP_FS_OBJ_START_REG),
+};
+
+static unsigned int adreno_a3xx_rbbm_clock_ctl_default(struct adreno_device
+							*adreno_dev)
+{
+	if (adreno_is_a320(adreno_dev))
+		return A320_RBBM_CLOCK_CTL_DEFAULT;
+	else if (adreno_is_a330v2(adreno_dev))
+		return A3XX_RBBM_CLOCK_CTL_DEFAULT;
+	else if (adreno_is_a330(adreno_dev))
+		return A330_RBBM_CLOCK_CTL_DEFAULT;
+	return A3XX_RBBM_CLOCK_CTL_DEFAULT;
+}
+
+static const unsigned int _a3xx_pwron_fixup_fs_instructions[] = {
+	0x00000000, 0x302CC300, 0x00000000, 0x302CC304,
+	0x00000000, 0x302CC308, 0x00000000, 0x302CC30C,
+	0x00000000, 0x302CC310, 0x00000000, 0x302CC314,
+	0x00000000, 0x302CC318, 0x00000000, 0x302CC31C,
+	0x00000000, 0x302CC320, 0x00000000, 0x302CC324,
+	0x00000000, 0x302CC328, 0x00000000, 0x302CC32C,
+	0x00000000, 0x302CC330, 0x00000000, 0x302CC334,
+	0x00000000, 0x302CC338, 0x00000000, 0x302CC33C,
+	0x00000000, 0x00000400, 0x00020000, 0x63808003,
+	0x00060004, 0x63828007, 0x000A0008, 0x6384800B,
+	0x000E000C, 0x6386800F, 0x00120010, 0x63888013,
+	0x00160014, 0x638A8017, 0x001A0018, 0x638C801B,
+	0x001E001C, 0x638E801F, 0x00220020, 0x63908023,
+	0x00260024, 0x63928027, 0x002A0028, 0x6394802B,
+	0x002E002C, 0x6396802F, 0x00320030, 0x63988033,
+	0x00360034, 0x639A8037, 0x003A0038, 0x639C803B,
+	0x003E003C, 0x639E803F, 0x00000000, 0x00000400,
+	0x00000003, 0x80D60003, 0x00000007, 0x80D60007,
+	0x0000000B, 0x80D6000B, 0x0000000F, 0x80D6000F,
+	0x00000013, 0x80D60013, 0x00000017, 0x80D60017,
+	0x0000001B, 0x80D6001B, 0x0000001F, 0x80D6001F,
+	0x00000023, 0x80D60023, 0x00000027, 0x80D60027,
+	0x0000002B, 0x80D6002B, 0x0000002F, 0x80D6002F,
+	0x00000033, 0x80D60033, 0x00000037, 0x80D60037,
+	0x0000003B, 0x80D6003B, 0x0000003F, 0x80D6003F,
+	0x00000000, 0x03000000, 0x00000000, 0x00000000,
+};
+
+/**
+ * adreno_a3xx_pwron_fixup_init() - Initalize a special command buffer to run a
+ * post-power collapse shader workaround
+ * @adreno_dev: Pointer to a adreno_device struct
+ *
+ * Some targets require a special workaround shader to be executed after
+ * power-collapse.  Construct the IB once at init time and keep it
+ * handy
+ *
+ * Returns: 0 on success or negative on error
+ */
+int adreno_a3xx_pwron_fixup_init(struct adreno_device *adreno_dev)
+{
+	unsigned int *cmds;
+	int count = ARRAY_SIZE(_a3xx_pwron_fixup_fs_instructions);
+	int ret;
+
+	/* Return if the fixup is already in place */
+	if (test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv))
+		return 0;
+
+	ret = kgsl_allocate_global(&adreno_dev->dev,
+		&adreno_dev->pwron_fixup, PAGE_SIZE,
+		KGSL_MEMFLAGS_GPUREADONLY, 0);
+
+	if (ret)
+		return ret;
+
+	cmds = adreno_dev->pwron_fixup.hostptr;
+
+	*cmds++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x90000000;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_REG_RMW, 3);
+	*cmds++ = A3XX_RBBM_CLOCK_CTL;
+	*cmds++ = 0xFFFCFFFF;
+	*cmds++ = 0x00010000;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1);
+	*cmds++ = 0x1E000150;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
+	*cmds++ = 0x1E000150;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1);
+	*cmds++ = 0x1E000150;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_1_REG, 1);
+	*cmds++ = 0x00000040;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_2_REG, 1);
+	*cmds++ = 0x80000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_3_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_VS_CONTROL_REG, 1);
+	*cmds++ = 0x00000001;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_FS_CONTROL_REG, 1);
+	*cmds++ = 0x0D001002;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_0_REG, 1);
+	*cmds++ = 0x00401101;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_1_REG, 1);
+	*cmds++ = 0x00000400;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_2_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_3_REG, 1);
+	*cmds++ = 0x00000001;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_4_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_5_REG, 1);
+	*cmds++ = 0x00000001;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_6_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_0_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_1_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_CONST_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_X_REG, 1);
+	*cmds++ = 0x00000010;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG, 1);
+	*cmds++ = 0x00000001;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG, 1);
+	*cmds++ = 0x00000001;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_WG_OFFSET_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_SP_CTRL_REG, 1);
+	*cmds++ = 0x00040000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
+	*cmds++ = 0x0000000A;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG1, 1);
+	*cmds++ = 0x00000001;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_PARAM_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_4, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_5, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_6, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_7, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OBJ_OFFSET_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OBJ_START_REG, 1);
+	*cmds++ = 0x00000004;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_PARAM_REG, 1);
+	*cmds++ = 0x04008001;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_ADDR_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_LENGTH_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1);
+	*cmds++ = 0x0DB0400A;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG1, 1);
+	*cmds++ = 0x00300402;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_OBJ_OFFSET_REG, 1);
+	*cmds++ = 0x00010000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_OBJ_START_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_PARAM_REG, 1);
+	*cmds++ = 0x04008001;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_ADDR_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_FLAT_SHAD_MODE_REG_1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_OUTPUT_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_LENGTH_REG, 1);
+	*cmds++ = 0x0000000D;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_CLIP_CNTL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_GB_CLIP_ADJ, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_XOFFSET, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_XSCALE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_YOFFSET, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_YSCALE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_ZOFFSET, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_ZSCALE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X4, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y4, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z4, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W4, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X5, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y5, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z5, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W5, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SU_POINT_MINMAX, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SU_POINT_SIZE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SU_POLY_OFFSET_OFFSET, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SU_POLY_OFFSET_SCALE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SU_MODE_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SC_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SC_SCREEN_SCISSOR_BR, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SC_WINDOW_SCISSOR_BR, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_TSE_DEBUG_ECO, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER0_SELECT, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER1_SELECT, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER2_SELECT, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER3_SELECT, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MODE_CONTROL, 1);
+	*cmds++ = 0x00008000;
+	*cmds++ = cp_type0_packet(A3XX_RB_RENDER_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MSAA_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_ALPHA_REFERENCE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_BLEND_RED, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_BLEND_GREEN, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_BLEND_BLUE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_BLEND_ALPHA, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_COPY_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_COPY_DEST_BASE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_COPY_DEST_PITCH, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_COPY_DEST_INFO, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_DEPTH_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_DEPTH_CLEAR, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_DEPTH_BUF_INFO, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_DEPTH_BUF_PITCH, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_STENCIL_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_STENCIL_CLEAR, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_STENCIL_BUF_INFO, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_STENCIL_BUF_PITCH, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_STENCIL_REF_MASK, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_STENCIL_REF_MASK_BF, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_LRZ_VSC_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_WINDOW_OFFSET, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_SAMPLE_COUNT_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_SAMPLE_COUNT_ADDR, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_Z_CLAMP_MIN, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_Z_CLAMP_MAX, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_GMEM_BASE_ADDR, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_DEBUG_ECO_CONTROLS_ADDR, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_PERFCOUNTER0_SELECT, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_PERFCOUNTER1_SELECT, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
+	*cmds++ = (1 << CP_LOADSTATE_DSTOFFSET_SHIFT) |
+		(0 << CP_LOADSTATE_STATESRC_SHIFT) |
+		(6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) |
+		(1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+	*cmds++ = (1 << CP_LOADSTATE_STATETYPE_SHIFT) |
+		(0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
+	*cmds++ = 0x00400000;
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
+	*cmds++ = (2 << CP_LOADSTATE_DSTOFFSET_SHIFT) |
+		(6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) |
+		(1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+	*cmds++ = (1 << CP_LOADSTATE_STATETYPE_SHIFT);
+	*cmds++ = 0x00400220;
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
+	*cmds++ = (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) |
+		(1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+	*cmds++ = (1 << CP_LOADSTATE_STATETYPE_SHIFT);
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_LOAD_STATE, 2 + count);
+	*cmds++ = (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) |
+		(13 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+	*cmds++ = 0x00000000;
+
+	memcpy(cmds, _a3xx_pwron_fixup_fs_instructions, count << 2);
+
+	cmds += count;
+
+	*cmds++ = cp_type3_packet(CP_EXEC_CL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_0_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1);
+	*cmds++ = 0x1E000150;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
+	*cmds++ = 0x1E000050;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_REG_RMW, 3);
+	*cmds++ = A3XX_RBBM_CLOCK_CTL;
+	*cmds++ = 0xFFFCFFFF;
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+
+	/*
+	 * Remember the number of dwords in the command buffer for when we
+	 * program the indirect buffer call in the ringbuffer
+	 */
+	adreno_dev->pwron_fixup_dwords =
+		(cmds - (unsigned int *) adreno_dev->pwron_fixup.hostptr);
+
+	/* Mark the flag in ->priv to show that we have the fix */
+	set_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv);
+	return 0;
+}
+
+static void a3xx_platform_setup(struct adreno_device *adreno_dev)
+{
+	struct adreno_gpudev *gpudev;
+	const struct adreno_reg_offsets *reg_offsets;
+
+	if (adreno_is_a306(adreno_dev) || adreno_is_a306a(adreno_dev)) {
+		gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+		reg_offsets = gpudev->reg_offsets;
+		reg_offsets->offsets[ADRENO_REG_VBIF_XIN_HALT_CTRL0] =
+			A3XX_VBIF2_XIN_HALT_CTRL0;
+		reg_offsets->offsets[ADRENO_REG_VBIF_XIN_HALT_CTRL1] =
+			A3XX_VBIF2_XIN_HALT_CTRL1;
+		gpudev->vbif_xin_halt_ctrl0_mask =
+				A3XX_VBIF2_XIN_HALT_CTRL0_MASK;
+	}
+}
+
+/*
+ * a3xx_rb_init() - Initialize ringbuffer
+ * @adreno_dev: Pointer to adreno device
+ * @rb: Pointer to the ringbuffer of device
+ *
+ * Submit commands for ME initialization, common function shared between
+ * a3xx devices
+ */
+static int a3xx_rb_init(struct adreno_device *adreno_dev,
+			 struct adreno_ringbuffer *rb)
+{
+	unsigned int *cmds;
+	int ret;
+
+	cmds = adreno_ringbuffer_allocspace(rb, 18);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+	if (cmds == NULL)
+		return -ENOSPC;
+
+	*cmds++ = cp_type3_packet(CP_ME_INIT, 17);
+
+	*cmds++ = 0x000003f7;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000080;
+	*cmds++ = 0x00000100;
+	*cmds++ = 0x00000180;
+	*cmds++ = 0x00006600;
+	*cmds++ = 0x00000150;
+	*cmds++ = 0x0000014e;
+	*cmds++ = 0x00000154;
+	*cmds++ = 0x00000001;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+
+	/* Enable protected mode registers for A3XX/A4XX */
+	*cmds++ = 0x20000000;
+
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+
+	ret = adreno_ringbuffer_submit_spin(rb, NULL, 2000);
+	if (ret) {
+		struct kgsl_device *device = &adreno_dev->dev;
+
+		dev_err(device->dev, "CP initialization failed to idle\n");
+		kgsl_device_snapshot(device, NULL);
+	}
+
+	return ret;
+}
+
+/*
+ * a3xx_err_callback() - Call back for a3xx error interrupts
+ * @adreno_dev: Pointer to device
+ * @bit: Interrupt bit
+ */
+static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	unsigned int reg;
+
+	switch (bit) {
+	case A3XX_INT_RBBM_AHB_ERROR: {
+		kgsl_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, &reg);
+
+		/*
+		 * Return the word address of the erroring register so that it
+		 * matches the register specification
+		 */
+		KGSL_DRV_CRIT(device,
+			"RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
+			reg & (1 << 28) ? "WRITE" : "READ",
+			(reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
+			(reg >> 24) & 0xF);
+
+		/* Clear the error */
+		kgsl_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
+
+		return;
+	}
+	case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: ATB bus oveflow\n");
+		break;
+	case A3XX_INT_CP_T0_PACKET_IN_IB:
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"ringbuffer TO packet in IB interrupt\n");
+		break;
+	case A3XX_INT_CP_OPCODE_ERROR:
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"ringbuffer opcode error interrupt\n");
+		break;
+	case A3XX_INT_CP_RESERVED_BIT_ERROR:
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"ringbuffer reserved bit error interrupt\n");
+		break;
+	case A3XX_INT_CP_HW_FAULT:
+		kgsl_regread(device, A3XX_CP_HW_FAULT, &reg);
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"CP | Ringbuffer HW fault | status=%x\n", reg);
+		break;
+	case A3XX_INT_CP_REG_PROTECT_FAULT:
+		kgsl_regread(device, A3XX_CP_PROTECT_STATUS, &reg);
+		KGSL_DRV_CRIT(device,
+			"CP | Protected mode error| %s | addr=%x\n",
+			reg & (1 << 24) ? "WRITE" : "READ",
+			(reg & 0xFFFFF) >> 2);
+		return;
+	case A3XX_INT_CP_AHB_ERROR_HALT:
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"ringbuffer AHB error interrupt\n");
+		break;
+	case A3XX_INT_UCHE_OOB_ACCESS:
+		KGSL_DRV_CRIT_RATELIMIT(device, "UCHE: Out of bounds access\n");
+		break;
+	default:
+		KGSL_DRV_CRIT_RATELIMIT(device, "Unknown interrupt\n");
+	}
+}
+
+#define A3XX_INT_MASK \
+	((1 << A3XX_INT_RBBM_AHB_ERROR) |        \
+	 (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \
+	 (1 << A3XX_INT_CP_T0_PACKET_IN_IB) |    \
+	 (1 << A3XX_INT_CP_OPCODE_ERROR) |       \
+	 (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \
+	 (1 << A3XX_INT_CP_HW_FAULT) |           \
+	 (1 << A3XX_INT_CP_IB1_INT) |            \
+	 (1 << A3XX_INT_CP_IB2_INT) |            \
+	 (1 << A3XX_INT_CP_RB_INT) |             \
+	 (1 << A3XX_INT_CACHE_FLUSH_TS) |	 \
+	 (1 << A3XX_INT_CP_REG_PROTECT_FAULT) |  \
+	 (1 << A3XX_INT_CP_AHB_ERROR_HALT) |     \
+	 (1 << A3XX_INT_UCHE_OOB_ACCESS))
+
+static struct adreno_irq_funcs a3xx_irq_funcs[32] = {
+	ADRENO_IRQ_CALLBACK(NULL),                    /* 0 - RBBM_GPU_IDLE */
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),  /* 1 - RBBM_AHB_ERROR */
+	ADRENO_IRQ_CALLBACK(NULL),  /* 2 - RBBM_REG_TIMEOUT */
+	ADRENO_IRQ_CALLBACK(NULL),  /* 3 - RBBM_ME_MS_TIMEOUT */
+	ADRENO_IRQ_CALLBACK(NULL),  /* 4 - RBBM_PFP_MS_TIMEOUT */
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),  /* 5 - RBBM_ATB_BUS_OVERFLOW */
+	ADRENO_IRQ_CALLBACK(NULL),  /* 6 - RBBM_VFD_ERROR */
+	ADRENO_IRQ_CALLBACK(NULL),	/* 7 - CP_SW */
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),  /* 8 - CP_T0_PACKET_IN_IB */
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),  /* 9 - CP_OPCODE_ERROR */
+	/* 10 - CP_RESERVED_BIT_ERROR */
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),  /* 11 - CP_HW_FAULT */
+	ADRENO_IRQ_CALLBACK(NULL),	             /* 12 - CP_DMA */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback),   /* 13 - CP_IB2_INT */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback),   /* 14 - CP_IB1_INT */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback),   /* 15 - CP_RB_INT */
+	/* 16 - CP_REG_PROTECT_FAULT */
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),
+	ADRENO_IRQ_CALLBACK(NULL),	       /* 17 - CP_RB_DONE_TS */
+	ADRENO_IRQ_CALLBACK(NULL),	       /* 18 - CP_VS_DONE_TS */
+	ADRENO_IRQ_CALLBACK(NULL),	       /* 19 - CP_PS_DONE_TS */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */
+	/* 21 - CP_AHB_ERROR_FAULT */
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),
+	ADRENO_IRQ_CALLBACK(NULL),	       /* 22 - Unused */
+	ADRENO_IRQ_CALLBACK(NULL),	       /* 23 - Unused */
+	/* 24 - MISC_HANG_DETECT */
+	ADRENO_IRQ_CALLBACK(adreno_hang_int_callback),
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),  /* 25 - UCHE_OOB_ACCESS */
+};
+
+static struct adreno_irq a3xx_irq = {
+	.funcs = a3xx_irq_funcs,
+	.mask = A3XX_INT_MASK,
+};
+
+/* VBIF registers start after 0x3000 so use 0x0 as end of list marker */
+static const struct adreno_vbif_data a304_vbif[] = {
+	{ A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 },
+	{0, 0},
+};
+
+static const struct adreno_vbif_data a305_vbif[] = {
+	/* Set up 16 deep read/write request queues */
+	{ A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010 },
+	{ A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010 },
+	{ A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010 },
+	{ A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010 },
+	{ A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303 },
+	{ A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010 },
+	{ A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010 },
+	/* Enable WR-REQ */
+	{ A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000FF },
+	/* Set up round robin arbitration between both AXI ports */
+	{ A3XX_VBIF_ARB_CTL, 0x00000030 },
+	/* Set up AOOO */
+	{ A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003C },
+	{ A3XX_VBIF_OUT_AXI_AOOO, 0x003C003C },
+	{0, 0},
+};
+
+static const struct adreno_vbif_data a305b_vbif[] = {
+	{ A3XX_VBIF_IN_RD_LIM_CONF0, 0x00181818 },
+	{ A3XX_VBIF_IN_WR_LIM_CONF0, 0x00181818 },
+	{ A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000018 },
+	{ A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000018 },
+	{ A3XX_VBIF_DDR_OUT_MAX_BURST, 0x00000303 },
+	{ A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 },
+	{0, 0},
+};
+
+static const struct adreno_vbif_data a305c_vbif[] = {
+	{ A3XX_VBIF_IN_RD_LIM_CONF0, 0x00101010 },
+	{ A3XX_VBIF_IN_WR_LIM_CONF0, 0x00101010 },
+	{ A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000010 },
+	{ A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000010 },
+	{ A3XX_VBIF_DDR_OUT_MAX_BURST, 0x00000101 },
+	{ A3XX_VBIF_ARB_CTL, 0x00000010 },
+	/* Set up AOOO */
+	{ A3XX_VBIF_OUT_AXI_AOOO_EN, 0x00000007 },
+	{ A3XX_VBIF_OUT_AXI_AOOO, 0x00070007 },
+	{0, 0},
+};
+
+static const struct adreno_vbif_data a306_vbif[] = {
+	{ A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 },
+	{ A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000A },
+	{ A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000A },
+	{0, 0},
+};
+
+static const struct adreno_vbif_data a306a_vbif[] = {
+	{ A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 },
+	{ A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000A },
+	{ A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000A },
+	{0, 0},
+};
+
+static const struct adreno_vbif_data a310_vbif[] = {
+	{ A3XX_VBIF_ABIT_SORT, 0x0001000F },
+	{ A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4 },
+	/* Enable WR-REQ */
+	{ A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001 },
+	/* Set up VBIF_ROUND_ROBIN_QOS_ARB */
+	{ A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x3 },
+	{ A3XX_VBIF_IN_RD_LIM_CONF0, 0x18180C0C },
+	{ A3XX_VBIF_IN_WR_LIM_CONF0, 0x1818000C },
+	{0, 0},
+};
+
+static const struct adreno_vbif_data a320_vbif[] = {
+	/* Set up 16 deep read/write request queues */
+	{ A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010 },
+	{ A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010 },
+	{ A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010 },
+	{ A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010 },
+	{ A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303 },
+	{ A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010 },
+	{ A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010 },
+	/* Enable WR-REQ */
+	{ A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000FF },
+	/* Set up round robin arbitration between both AXI ports */
+	{ A3XX_VBIF_ARB_CTL, 0x00000030 },
+	/* Set up AOOO */
+	{ A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003C },
+	{ A3XX_VBIF_OUT_AXI_AOOO, 0x003C003C },
+	/* Enable 1K sort */
+	{ A3XX_VBIF_ABIT_SORT, 0x000000FF },
+	{ A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4 },
+	{0, 0},
+};
+
+static const struct adreno_vbif_data a330_vbif[] = {
+	/* Set up 16 deep read/write request queues */
+	{ A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818 },
+	{ A3XX_VBIF_IN_RD_LIM_CONF1, 0x00001818 },
+	{ A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00001818 },
+	{ A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00001818 },
+	{ A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303 },
+	{ A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818 },
+	{ A3XX_VBIF_IN_WR_LIM_CONF1, 0x00001818 },
+	/* Enable WR-REQ */
+	{ A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003F },
+	/* Set up round robin arbitration between both AXI ports */
+	{ A3XX_VBIF_ARB_CTL, 0x00000030 },
+	/* Set up VBIF_ROUND_ROBIN_QOS_ARB */
+	{ A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001 },
+	/* Set up AOOO */
+	{ A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003F },
+	{ A3XX_VBIF_OUT_AXI_AOOO, 0x003F003F },
+	/* Enable 1K sort */
+	{ A3XX_VBIF_ABIT_SORT, 0x0001003F },
+	{ A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4 },
+	/* Disable VBIF clock gating. This is to enable AXI running
+	 * higher frequency than GPU.
+	 */
+	{ A3XX_VBIF_CLKON, 1 },
+	{0, 0},
+};
+
+/*
+ * Most of the VBIF registers on 8974v2 have the correct values at power on, so
+ * we won't modify those if we don't need to
+ */
+static const struct adreno_vbif_data a330v2_vbif[] = {
+	/* Enable 1k sort */
+	{ A3XX_VBIF_ABIT_SORT, 0x0001003F },
+	{ A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4 },
+	/* Enable WR-REQ */
+	{ A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003F },
+	{ A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303 },
+	/* Set up VBIF_ROUND_ROBIN_QOS_ARB */
+	{ A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 },
+	{0, 0},
+};
+
+/*
+ * Most of the VBIF registers on a330v2.1 have the correct values at power on,
+ * so we won't modify those if we don't need to
+ */
+static const struct adreno_vbif_data a330v21_vbif[] = {
+	/* Enable WR-REQ */
+	{ A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x1 },
+	/* Set up VBIF_ROUND_ROBIN_QOS_ARB */
+	{ A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 },
+	{ A3XX_VBIF_IN_RD_LIM_CONF0, 0x18180c0c },
+	{0, 0},
+};
+
+static const struct adreno_vbif_platform a3xx_vbif_platforms[] = {
+	{ adreno_is_a304, a304_vbif },
+	{ adreno_is_a305, a305_vbif },
+	{ adreno_is_a305c, a305c_vbif },
+	{ adreno_is_a306, a306_vbif },
+	{ adreno_is_a306a, a306a_vbif },
+	{ adreno_is_a310, a310_vbif },
+	{ adreno_is_a320, a320_vbif },
+	/* A330v2.1 needs to be ahead of A330v2 so the right device matches */
+	{ adreno_is_a330v21, a330v21_vbif},
+	/* A330v2 needs to be ahead of A330 so the right device matches */
+	{ adreno_is_a330v2, a330v2_vbif },
+	{ adreno_is_a330, a330_vbif },
+	{ adreno_is_a305b, a305b_vbif },
+};
+
+/*
+ * Define the available perfcounter groups - these get used by
+ * adreno_perfcounter_get and adreno_perfcounter_put
+ */
+
+static struct adreno_perfcount_register a3xx_perfcounters_cp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_CP_0_LO,
+		A3XX_RBBM_PERFCTR_CP_0_HI, 0, A3XX_CP_PERFCOUNTER_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_rbbm[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_0_LO,
+		A3XX_RBBM_PERFCTR_RBBM_0_HI, 1, A3XX_RBBM_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_1_LO,
+		A3XX_RBBM_PERFCTR_RBBM_1_HI, 2, A3XX_RBBM_PERFCOUNTER1_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_pc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_0_LO,
+		A3XX_RBBM_PERFCTR_PC_0_HI, 3, A3XX_PC_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_1_LO,
+		A3XX_RBBM_PERFCTR_PC_1_HI, 4, A3XX_PC_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_2_LO,
+		A3XX_RBBM_PERFCTR_PC_2_HI, 5, A3XX_PC_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_3_LO,
+		A3XX_RBBM_PERFCTR_PC_3_HI, 6, A3XX_PC_PERFCOUNTER3_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_vfd[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_0_LO,
+		A3XX_RBBM_PERFCTR_VFD_0_HI, 7, A3XX_VFD_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_1_LO,
+		A3XX_RBBM_PERFCTR_VFD_1_HI, 8, A3XX_VFD_PERFCOUNTER1_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_hlsq[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_0_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_0_HI, 9,
+		A3XX_HLSQ_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_1_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_1_HI, 10,
+		A3XX_HLSQ_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_2_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_2_HI, 11,
+		A3XX_HLSQ_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_3_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_3_HI, 12,
+		A3XX_HLSQ_PERFCOUNTER3_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_4_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_4_HI, 13,
+		A3XX_HLSQ_PERFCOUNTER4_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_5_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_5_HI, 14,
+		A3XX_HLSQ_PERFCOUNTER5_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_vpc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_0_LO,
+		A3XX_RBBM_PERFCTR_VPC_0_HI, 15, A3XX_VPC_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_1_LO,
+		A3XX_RBBM_PERFCTR_VPC_1_HI, 16, A3XX_VPC_PERFCOUNTER1_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_tse[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_0_LO,
+		A3XX_RBBM_PERFCTR_TSE_0_HI, 17, A3XX_GRAS_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_1_LO,
+		A3XX_RBBM_PERFCTR_TSE_1_HI, 18, A3XX_GRAS_PERFCOUNTER1_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_ras[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_0_LO,
+		A3XX_RBBM_PERFCTR_RAS_0_HI, 19, A3XX_GRAS_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_1_LO,
+		A3XX_RBBM_PERFCTR_RAS_1_HI, 20, A3XX_GRAS_PERFCOUNTER3_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_uche[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_0_LO,
+		A3XX_RBBM_PERFCTR_UCHE_0_HI, 21,
+		A3XX_UCHE_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_1_LO,
+		A3XX_RBBM_PERFCTR_UCHE_1_HI, 22,
+		A3XX_UCHE_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_2_LO,
+		A3XX_RBBM_PERFCTR_UCHE_2_HI, 23,
+		A3XX_UCHE_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_3_LO,
+		A3XX_RBBM_PERFCTR_UCHE_3_HI, 24,
+		A3XX_UCHE_PERFCOUNTER3_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_4_LO,
+		A3XX_RBBM_PERFCTR_UCHE_4_HI, 25,
+		A3XX_UCHE_PERFCOUNTER4_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_5_LO,
+		A3XX_RBBM_PERFCTR_UCHE_5_HI, 26,
+		A3XX_UCHE_PERFCOUNTER5_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_tp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_0_LO,
+		A3XX_RBBM_PERFCTR_TP_0_HI, 27, A3XX_TP_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_1_LO,
+		A3XX_RBBM_PERFCTR_TP_1_HI, 28, A3XX_TP_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_2_LO,
+		A3XX_RBBM_PERFCTR_TP_2_HI, 29, A3XX_TP_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_3_LO,
+		A3XX_RBBM_PERFCTR_TP_3_HI, 30, A3XX_TP_PERFCOUNTER3_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_4_LO,
+		A3XX_RBBM_PERFCTR_TP_4_HI, 31, A3XX_TP_PERFCOUNTER4_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_5_LO,
+		A3XX_RBBM_PERFCTR_TP_5_HI, 32, A3XX_TP_PERFCOUNTER5_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_sp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_0_LO,
+		A3XX_RBBM_PERFCTR_SP_0_HI, 33, A3XX_SP_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_1_LO,
+		A3XX_RBBM_PERFCTR_SP_1_HI, 34, A3XX_SP_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_2_LO,
+		A3XX_RBBM_PERFCTR_SP_2_HI, 35, A3XX_SP_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_3_LO,
+		A3XX_RBBM_PERFCTR_SP_3_HI, 36, A3XX_SP_PERFCOUNTER3_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_4_LO,
+		A3XX_RBBM_PERFCTR_SP_4_HI, 37, A3XX_SP_PERFCOUNTER4_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_5_LO,
+		A3XX_RBBM_PERFCTR_SP_5_HI, 38, A3XX_SP_PERFCOUNTER5_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_6_LO,
+		A3XX_RBBM_PERFCTR_SP_6_HI, 39, A3XX_SP_PERFCOUNTER6_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_7_LO,
+		A3XX_RBBM_PERFCTR_SP_7_HI, 40, A3XX_SP_PERFCOUNTER7_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_rb[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_0_LO,
+		A3XX_RBBM_PERFCTR_RB_0_HI, 41, A3XX_RB_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_1_LO,
+		A3XX_RBBM_PERFCTR_RB_1_HI, 42, A3XX_RB_PERFCOUNTER1_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PWR_0_LO,
+		A3XX_RBBM_PERFCTR_PWR_0_HI, -1, 0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PWR_1_LO,
+		A3XX_RBBM_PERFCTR_PWR_1_HI, -1, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_vbif[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_CNT0_LO,
+		A3XX_VBIF_PERF_CNT0_HI, -1, 0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_CNT1_LO,
+		A3XX_VBIF_PERF_CNT1_HI, -1, 0 },
+};
+static struct adreno_perfcount_register a3xx_perfcounters_vbif_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_PWR_CNT0_LO,
+		A3XX_VBIF_PERF_PWR_CNT0_HI, -1, 0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_PWR_CNT1_LO,
+		A3XX_VBIF_PERF_PWR_CNT1_HI, -1, 0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_PWR_CNT2_LO,
+		A3XX_VBIF_PERF_PWR_CNT2_HI, -1, 0 },
+};
+static struct adreno_perfcount_register a3xx_perfcounters_vbif2[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW0,
+		A3XX_VBIF2_PERF_CNT_HIGH0, -1, A3XX_VBIF2_PERF_CNT_SEL0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW1,
+		A3XX_VBIF2_PERF_CNT_HIGH1, -1, A3XX_VBIF2_PERF_CNT_SEL1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW2,
+		A3XX_VBIF2_PERF_CNT_HIGH2, -1, A3XX_VBIF2_PERF_CNT_SEL2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW3,
+		A3XX_VBIF2_PERF_CNT_HIGH3, -1, A3XX_VBIF2_PERF_CNT_SEL3 },
+};
+/*
+ * Placing EN register in select field since vbif perf counters
+ * dont have select register to program
+ */
+static struct adreno_perfcount_register a3xx_perfcounters_vbif2_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0,
+		0, A3XX_VBIF2_PERF_PWR_CNT_LOW0,
+		A3XX_VBIF2_PERF_PWR_CNT_HIGH0, -1,
+		A3XX_VBIF2_PERF_PWR_CNT_EN0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0,
+		0, A3XX_VBIF2_PERF_PWR_CNT_LOW1,
+		A3XX_VBIF2_PERF_PWR_CNT_HIGH1, -1,
+		A3XX_VBIF2_PERF_PWR_CNT_EN1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0,
+		0, A3XX_VBIF2_PERF_PWR_CNT_LOW2,
+		A3XX_VBIF2_PERF_PWR_CNT_HIGH2, -1,
+		A3XX_VBIF2_PERF_PWR_CNT_EN2 },
+};
+
+#define A3XX_PERFCOUNTER_GROUP(offset, name) \
+	ADRENO_PERFCOUNTER_GROUP(a3xx, offset, name)
+
+#define A3XX_PERFCOUNTER_GROUP_FLAGS(offset, name, flags) \
+	ADRENO_PERFCOUNTER_GROUP_FLAGS(a3xx, offset, name, flags)
+
+static struct adreno_perfcount_group a3xx_perfcounter_groups[] = {
+	A3XX_PERFCOUNTER_GROUP(CP, cp),
+	A3XX_PERFCOUNTER_GROUP(RBBM, rbbm),
+	A3XX_PERFCOUNTER_GROUP(PC, pc),
+	A3XX_PERFCOUNTER_GROUP(VFD, vfd),
+	A3XX_PERFCOUNTER_GROUP(HLSQ, hlsq),
+	A3XX_PERFCOUNTER_GROUP(VPC, vpc),
+	A3XX_PERFCOUNTER_GROUP(TSE, tse),
+	A3XX_PERFCOUNTER_GROUP(RAS, ras),
+	A3XX_PERFCOUNTER_GROUP(UCHE, uche),
+	A3XX_PERFCOUNTER_GROUP(TP, tp),
+	A3XX_PERFCOUNTER_GROUP(SP, sp),
+	A3XX_PERFCOUNTER_GROUP(RB, rb),
+	A3XX_PERFCOUNTER_GROUP_FLAGS(PWR, pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED),
+	A3XX_PERFCOUNTER_GROUP(VBIF, vbif),
+	A3XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, vbif_pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED),
+};
+
+static struct adreno_perfcounters a3xx_perfcounters = {
+	a3xx_perfcounter_groups,
+	ARRAY_SIZE(a3xx_perfcounter_groups),
+};
+
+static struct adreno_ft_perf_counters a3xx_ft_perf_counters[] = {
+	{KGSL_PERFCOUNTER_GROUP_SP, SP_ALU_ACTIVE_CYCLES},
+	{KGSL_PERFCOUNTER_GROUP_SP, SP0_ICL1_MISSES},
+	{KGSL_PERFCOUNTER_GROUP_SP, SP_FS_CFLOW_INSTRUCTIONS},
+	{KGSL_PERFCOUNTER_GROUP_TSE, TSE_INPUT_PRIM_NUM},
+};
+
+static void a3xx_perfcounter_init(struct adreno_device *adreno_dev)
+{
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+
+	/* SP[3] counter is broken on a330 so disable it if a330 device */
+	if (adreno_is_a330(adreno_dev))
+		a3xx_perfcounters_sp[3].countable = KGSL_PERFCOUNTER_BROKEN;
+
+	if (counters &&
+		(adreno_is_a306(adreno_dev) || adreno_is_a304(adreno_dev) ||
+		adreno_is_a306a(adreno_dev))) {
+		counters->groups[KGSL_PERFCOUNTER_GROUP_VBIF].regs =
+			a3xx_perfcounters_vbif2;
+		counters->groups[KGSL_PERFCOUNTER_GROUP_VBIF_PWR].regs =
+			a3xx_perfcounters_vbif2_pwr;
+	}
+
+	/*
+	 * Enable the GPU busy count counter. This is a fixed counter on
+	 * A3XX so we don't need to bother checking the return value
+	 */
+	adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_PWR, 1,
+		NULL, NULL, PERFCOUNTER_FLAG_KERNEL);
+}
+
+static void a3xx_perfcounter_close(struct adreno_device *adreno_dev)
+{
+	adreno_perfcounter_put(adreno_dev, KGSL_PERFCOUNTER_GROUP_PWR, 1,
+		PERFCOUNTER_FLAG_KERNEL);
+}
+
+/**
+ * a3xx_protect_init() - Initializes register protection on a3xx
+ * @adreno_dev: Pointer to the device structure
+ * Performs register writes to enable protected access to sensitive
+ * registers
+ */
+static void a3xx_protect_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	int index = 0;
+	struct kgsl_protected_registers *iommu_regs;
+
+	/* enable access protection to privileged registers */
+	kgsl_regwrite(device, A3XX_CP_PROTECT_CTRL, 0x00000007);
+
+	/* RBBM registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0x18, 0);
+	adreno_set_protected_registers(adreno_dev, &index, 0x20, 2);
+	adreno_set_protected_registers(adreno_dev, &index, 0x33, 0);
+	adreno_set_protected_registers(adreno_dev, &index, 0x42, 0);
+	adreno_set_protected_registers(adreno_dev, &index, 0x50, 4);
+	adreno_set_protected_registers(adreno_dev, &index, 0x63, 0);
+	adreno_set_protected_registers(adreno_dev, &index, 0x100, 4);
+
+	/* CP registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0x1C0, 5);
+	adreno_set_protected_registers(adreno_dev, &index, 0x1EC, 1);
+	adreno_set_protected_registers(adreno_dev, &index, 0x1F6, 1);
+	adreno_set_protected_registers(adreno_dev, &index, 0x1F8, 2);
+	adreno_set_protected_registers(adreno_dev, &index, 0x45E, 2);
+	adreno_set_protected_registers(adreno_dev, &index, 0x460, 4);
+
+	/* RB registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0xCC0, 0);
+
+	/* VBIF registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0x3000, 6);
+
+	/* SMMU registers */
+	iommu_regs = kgsl_mmu_get_prot_regs(&device->mmu);
+	if (iommu_regs)
+		adreno_set_protected_registers(adreno_dev, &index,
+				iommu_regs->base, iommu_regs->range);
+}
+
+static void a3xx_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	adreno_vbif_start(adreno_dev, a3xx_vbif_platforms,
+			ARRAY_SIZE(a3xx_vbif_platforms));
+
+	/* Make all blocks contribute to the GPU BUSY perf counter */
+	kgsl_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
+
+	/* Tune the hystersis counters for SP and CP idle detection */
+	kgsl_regwrite(device, A3XX_RBBM_SP_HYST_CNT, 0x10);
+	kgsl_regwrite(device, A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
+
+	/* Enable the RBBM error reporting bits.  This lets us get
+	   useful information on failure */
+
+	kgsl_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001);
+
+	/* Enable AHB error reporting */
+	kgsl_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF);
+
+	/* Turn on the power counters */
+	kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00030000);
+
+	/* Turn on hang detection - this spews a lot of useful information
+	 * into the RBBM registers on a hang */
+	if (adreno_is_a330v2(adreno_dev)) {
+		set_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv);
+		gpudev->irq->mask |= (1 << A3XX_INT_MISC_HANG_DETECT);
+		kgsl_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL,
+				(1 << 31) | 0xFFFF);
+	} else
+		kgsl_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL,
+				(1 << 16) | 0xFFF);
+
+	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0). */
+	kgsl_regwrite(device, A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
+
+	/* Enable VFD to access most of the UCHE (7 ways out of 8) */
+	kgsl_regwrite(device, A3XX_UCHE_CACHE_WAYS_VFD, 0x07);
+
+	/* Enable Clock gating */
+	kgsl_regwrite(device, A3XX_RBBM_CLOCK_CTL,
+		adreno_a3xx_rbbm_clock_ctl_default(adreno_dev));
+
+	if (adreno_is_a330v2(adreno_dev))
+		kgsl_regwrite(device, A3XX_RBBM_GPR0_CTL,
+			A330v2_RBBM_GPR0_CTL_DEFAULT);
+	else if (adreno_is_a330(adreno_dev))
+		kgsl_regwrite(device, A3XX_RBBM_GPR0_CTL,
+			A330_RBBM_GPR0_CTL_DEFAULT);
+	else if (adreno_is_a310(adreno_dev))
+		kgsl_regwrite(device, A3XX_RBBM_GPR0_CTL,
+			A310_RBBM_GPR0_CTL_DEFAULT);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_USES_OCMEM))
+		kgsl_regwrite(device, A3XX_RB_GMEM_BASE_ADDR,
+			(unsigned int)(adreno_dev->gmem_base >> 14));
+
+	/* Turn on protection */
+	a3xx_protect_init(adreno_dev);
+
+	/* Turn on performance counters */
+	kgsl_regwrite(device, A3XX_RBBM_PERFCTR_CTL, 0x01);
+
+	kgsl_regwrite(device, A3XX_CP_DEBUG, A3XX_CP_DEBUG_DEFAULT);
+}
+
+static struct adreno_coresight_register a3xx_coresight_registers[] = {
+	{ A3XX_RBBM_DEBUG_BUS_CTL, 0x0001093F },
+	{ A3XX_RBBM_EXT_TRACE_STOP_CNT, 0x00017fff },
+	{ A3XX_RBBM_EXT_TRACE_START_CNT, 0x0001000f },
+	{ A3XX_RBBM_EXT_TRACE_PERIOD_CNT, 0x0001ffff },
+	{ A3XX_RBBM_EXT_TRACE_CMD, 0x00000001 },
+	{ A3XX_RBBM_EXT_TRACE_BUS_CTL, 0x89100010 },
+	{ A3XX_RBBM_DEBUG_BUS_STB_CTL0, 0x00000000 },
+	{ A3XX_RBBM_DEBUG_BUS_STB_CTL1, 0xFFFFFFFE },
+	{ A3XX_RBBM_INT_TRACE_BUS_CTL, 0x00201111 },
+};
+
+static ADRENO_CORESIGHT_ATTR(config_debug_bus,
+	&a3xx_coresight_registers[0]);
+static ADRENO_CORESIGHT_ATTR(config_trace_stop_cnt,
+	&a3xx_coresight_registers[1]);
+static ADRENO_CORESIGHT_ATTR(config_trace_start_cnt,
+	&a3xx_coresight_registers[2]);
+static ADRENO_CORESIGHT_ATTR(config_trace_period_cnt,
+	&a3xx_coresight_registers[3]);
+static ADRENO_CORESIGHT_ATTR(config_trace_cmd,
+	&a3xx_coresight_registers[4]);
+static ADRENO_CORESIGHT_ATTR(config_trace_bus_ctl,
+	&a3xx_coresight_registers[5]);
+
+static struct attribute *a3xx_coresight_attrs[] = {
+	&coresight_attr_config_debug_bus.attr.attr,
+	&coresight_attr_config_trace_start_cnt.attr.attr,
+	&coresight_attr_config_trace_stop_cnt.attr.attr,
+	&coresight_attr_config_trace_period_cnt.attr.attr,
+	&coresight_attr_config_trace_cmd.attr.attr,
+	&coresight_attr_config_trace_bus_ctl.attr.attr,
+	NULL,
+};
+
+static const struct attribute_group a3xx_coresight_group = {
+	.attrs = a3xx_coresight_attrs,
+};
+
+static const struct attribute_group *a3xx_coresight_groups[] = {
+	&a3xx_coresight_group,
+	NULL,
+};
+
+static struct adreno_coresight a3xx_coresight = {
+	.registers = a3xx_coresight_registers,
+	.count = ARRAY_SIZE(a3xx_coresight_registers),
+	.groups = a3xx_coresight_groups,
+};
+
+/* Register offset defines for A3XX */
+static unsigned int a3xx_register_offsets[ADRENO_REG_REGISTER_MAX] = {
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_RAM_WADDR, A3XX_CP_ME_RAM_WADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_RAM_DATA, A3XX_CP_ME_RAM_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PFP_UCODE_DATA, A3XX_CP_PFP_UCODE_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PFP_UCODE_ADDR, A3XX_CP_PFP_UCODE_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_WFI_PEND_CTR, A3XX_CP_WFI_PEND_CTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A3XX_CP_RB_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A3XX_CP_RB_RPTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A3XX_CP_RB_WPTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_CNTL, A3XX_CP_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A3XX_CP_ME_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A3XX_CP_RB_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A3XX_CP_IB1_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, A3XX_CP_IB1_BUFSZ),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, A3XX_CP_IB2_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A3XX_CP_IB2_BUFSZ),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_TIMESTAMP, A3XX_CP_SCRATCH_REG0),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_SCRATCH_REG6, A3XX_CP_SCRATCH_REG6),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_SCRATCH_REG7, A3XX_CP_SCRATCH_REG7),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_RAM_RADDR, A3XX_CP_ME_RAM_RADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ROQ_ADDR, A3XX_CP_ROQ_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ROQ_DATA, A3XX_CP_ROQ_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_ADDR, A3XX_CP_MERCIU_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_DATA, A3XX_CP_MERCIU_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_DATA2, A3XX_CP_MERCIU_DATA2),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MEQ_ADDR, A3XX_CP_MEQ_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MEQ_DATA, A3XX_CP_MEQ_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PROTECT_REG_0, A3XX_CP_PROTECT_REG_0),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A3XX_RBBM_STATUS),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_CTL, A3XX_RBBM_PERFCTR_CTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0,
+					A3XX_RBBM_PERFCTR_LOAD_CMD0),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1,
+					A3XX_RBBM_PERFCTR_LOAD_CMD1),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_PWR_1_LO,
+					A3XX_RBBM_PERFCTR_PWR_1_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, A3XX_RBBM_INT_0_MASK),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_STATUS, A3XX_RBBM_INT_0_STATUS),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_CLEAR_CMD,
+				A3XX_RBBM_INT_CLEAR_CMD),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_CLOCK_CTL, A3XX_RBBM_CLOCK_CTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_VPC_DEBUG_RAM_SEL,
+				A3XX_VPC_VPC_DEBUG_RAM_SEL),
+	ADRENO_REG_DEFINE(ADRENO_REG_VPC_DEBUG_RAM_READ,
+				A3XX_VPC_VPC_DEBUG_RAM_READ),
+	ADRENO_REG_DEFINE(ADRENO_REG_PA_SC_AA_CONFIG, A3XX_PA_SC_AA_CONFIG),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PM_OVERRIDE2, A3XX_RBBM_PM_OVERRIDE2),
+	ADRENO_REG_DEFINE(ADRENO_REG_SQ_GPR_MANAGEMENT, A3XX_SQ_GPR_MANAGEMENT),
+	ADRENO_REG_DEFINE(ADRENO_REG_SQ_INST_STORE_MANAGMENT,
+				A3XX_SQ_INST_STORE_MANAGMENT),
+	ADRENO_REG_DEFINE(ADRENO_REG_TP0_CHICKEN, A3XX_TP0_CHICKEN),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_RBBM_CTL, A3XX_RBBM_RBBM_CTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A3XX_RBBM_SW_RESET_CMD),
+	ADRENO_REG_DEFINE(ADRENO_REG_UCHE_INVALIDATE0,
+			A3XX_UCHE_CACHE_INVALIDATE0_REG),
+	ADRENO_REG_DEFINE(ADRENO_REG_UCHE_INVALIDATE1,
+			A3XX_UCHE_CACHE_INVALIDATE1_REG),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO,
+				A3XX_RBBM_PERFCTR_LOAD_VALUE_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI,
+				A3XX_RBBM_PERFCTR_LOAD_VALUE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_VBIF_XIN_HALT_CTRL0,
+				A3XX_VBIF_XIN_HALT_CTRL0),
+	ADRENO_REG_DEFINE(ADRENO_REG_VBIF_XIN_HALT_CTRL1,
+				A3XX_VBIF_XIN_HALT_CTRL1),
+};
+
+static const struct adreno_reg_offsets a3xx_reg_offsets = {
+	.offsets = a3xx_register_offsets,
+	.offset_0 = ADRENO_REG_REGISTER_MAX,
+};
+
+/*
+ * Defined the size of sections dumped in snapshot, these values
+ * may change after initialization based on the specific core
+ */
+static struct adreno_snapshot_sizes a3xx_snap_sizes = {
+	.cp_pfp = 0x14,
+	.vpc_mem = 512,
+	.cp_meq = 16,
+	.shader_mem = 0x4000,
+	.cp_merciu = 0,
+	.roq = 128,
+};
+
+static struct adreno_snapshot_data a3xx_snapshot_data = {
+	.sect_sizes = &a3xx_snap_sizes,
+};
+
+static int _load_firmware(struct kgsl_device *device, const char *fwfile,
+			  void **buf, int *len)
+{
+	const struct firmware *fw = NULL;
+	int ret;
+
+	ret = request_firmware(&fw, fwfile, device->dev);
+
+	if (ret) {
+		KGSL_DRV_ERR(device, "request_firmware(%s) failed: %d\n",
+			     fwfile, ret);
+		return ret;
+	}
+
+	if (fw)
+		*buf = kmalloc(fw->size, GFP_KERNEL);
+	else
+		return -EINVAL;
+
+	if (*buf) {
+		memcpy(*buf, fw->data, fw->size);
+		*len = fw->size;
+	}
+
+	release_firmware(fw);
+	return (*buf != NULL) ? 0 : -ENOMEM;
+}
+
+int a3xx_microcode_read(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	if (adreno_dev->pm4_fw == NULL) {
+		int len;
+		void *ptr;
+
+		int ret = _load_firmware(device,
+			adreno_dev->gpucore->pm4fw_name, &ptr, &len);
+
+		if (ret) {
+			KGSL_DRV_FATAL(device, "Failed to read pm4 ucode %s\n",
+					   adreno_dev->gpucore->pm4fw_name);
+			return ret;
+		}
+
+		/* PM4 size is 3 dword aligned plus 1 dword of version */
+		if (len % ((sizeof(uint32_t) * 3)) != sizeof(uint32_t)) {
+			KGSL_DRV_ERR(device, "Bad pm4 microcode size: %d\n",
+				len);
+			kfree(ptr);
+			return -ENOMEM;
+		}
+
+		adreno_dev->pm4_fw_size = len / sizeof(uint32_t);
+		adreno_dev->pm4_fw = ptr;
+		adreno_dev->pm4_fw_version = adreno_dev->pm4_fw[1];
+	}
+
+	if (adreno_dev->pfp_fw == NULL) {
+		int len;
+		void *ptr;
+
+		int ret = _load_firmware(device,
+			adreno_dev->gpucore->pfpfw_name, &ptr, &len);
+		if (ret) {
+			KGSL_DRV_FATAL(device, "Failed to read pfp ucode %s\n",
+					   adreno_dev->gpucore->pfpfw_name);
+			return ret;
+		}
+
+		/* PFP size shold be dword aligned */
+		if (len % sizeof(uint32_t) != 0) {
+			KGSL_DRV_ERR(device, "Bad PFP microcode size: %d\n",
+						len);
+			kfree(ptr);
+			return -ENOMEM;
+		}
+
+		adreno_dev->pfp_fw_size = len / sizeof(uint32_t);
+		adreno_dev->pfp_fw = ptr;
+		adreno_dev->pfp_fw_version = adreno_dev->pfp_fw[5];
+	}
+
+	return 0;
+}
+
+/**
+ * adreno_ringbuffer_load_pm4_ucode() - Load pm4 ucode
+ * @device: Pointer to a KGSL device
+ * @start: Starting index in pm4 ucode to load
+ * @end: Ending index of pm4 ucode to load
+ * @addr: Address to load the pm4 ucode
+ *
+ * Load the pm4 ucode from @start at @addr.
+ */
+static inline int adreno_ringbuffer_load_pm4_ucode(struct kgsl_device *device,
+			unsigned int start, unsigned int end, unsigned int addr)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int i;
+
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_RAM_WADDR, addr);
+	for (i = start; i < end; i++)
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_RAM_DATA,
+					adreno_dev->pm4_fw[i]);
+
+	return 0;
+}
+
+/**
+ * adreno_ringbuffer_load_pfp_ucode() - Load pfp ucode
+ * @device: Pointer to a KGSL device
+ * @start: Starting index in pfp ucode to load
+ * @end: Ending index of pfp ucode to load
+ * @addr: Address to load the pfp ucode
+ *
+ * Load the pfp ucode from @start at @addr.
+ */
+static inline int adreno_ringbuffer_load_pfp_ucode(struct kgsl_device *device,
+			unsigned int start, unsigned int end, unsigned int addr)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int i;
+
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, addr);
+	for (i = start; i < end; i++)
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA,
+						adreno_dev->pfp_fw[i]);
+
+	return 0;
+}
+
+/**
+ * _ringbuffer_bootstrap_ucode() - Bootstrap GPU Ucode
+ * @rb: Pointer to adreno ringbuffer
+ * @load_jt: If non zero only load Jump tables
+ *
+ * Bootstrap ucode for GPU
+ * load_jt == 0, bootstrap full microcode
+ * load_jt == 1, bootstrap jump tables of microcode
+ *
+ * For example a bootstrap packet would like below
+ * Setup a type3 bootstrap packet
+ * PFP size to bootstrap
+ * PFP addr to write the PFP data
+ * PM4 size to bootstrap
+ * PM4 addr to write the PM4 data
+ * PFP dwords from microcode to bootstrap
+ * PM4 size dwords from microcode to bootstrap
+ */
+static int _ringbuffer_bootstrap_ucode(struct adreno_ringbuffer *rb,
+					unsigned int load_jt)
+{
+	unsigned int *cmds, bootstrap_size, rb_size;
+	int i = 0;
+	int ret;
+	struct kgsl_device *device = rb->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int pm4_size, pm4_idx, pm4_addr, pfp_size, pfp_idx, pfp_addr;
+
+	/* Only bootstrap jump tables of ucode */
+	if (load_jt) {
+		pm4_idx = adreno_dev->gpucore->pm4_jt_idx;
+		pm4_addr = adreno_dev->gpucore->pm4_jt_addr;
+		pfp_idx = adreno_dev->gpucore->pfp_jt_idx;
+		pfp_addr = adreno_dev->gpucore->pfp_jt_addr;
+	} else {
+		/* Bootstrap full ucode */
+		pm4_idx = 1;
+		pm4_addr = 0;
+		pfp_idx = 1;
+		pfp_addr = 0;
+	}
+
+	pm4_size = (adreno_dev->pm4_fw_size - pm4_idx);
+	pfp_size = (adreno_dev->pfp_fw_size - pfp_idx);
+
+	bootstrap_size = (pm4_size + pfp_size + 5);
+
+	/*
+	 * Overwrite the first entry in the jump table with the special
+	 * bootstrap opcode
+	 */
+
+	if (adreno_is_a4xx(adreno_dev)) {
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR,
+			0x400);
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA,
+			 0x6f0009);
+		/*
+		 * The support packets (the RMW and INTERRUPT) that are sent
+		 * after the bootstrap packet should not be included in the size
+		 * of the bootstrap packet but we do need to reserve enough
+		 * space for those too
+		 */
+		rb_size = bootstrap_size + 6;
+	} else {
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR,
+			0x200);
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA,
+			 0x6f0005);
+		rb_size = bootstrap_size;
+	}
+
+	/* clear ME_HALT to start micro engine */
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0);
+
+	cmds = adreno_ringbuffer_allocspace(rb, rb_size);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+	if (cmds == NULL)
+		return -ENOSPC;
+
+	/* Construct the packet that bootsraps the ucode */
+	*cmds++ = cp_type3_packet(CP_BOOTSTRAP_UCODE, (bootstrap_size - 1));
+	*cmds++ = pfp_size;
+	*cmds++ = pfp_addr;
+	*cmds++ = pm4_size;
+	*cmds++ = pm4_addr;
+
+	/**
+	 * Theory of operation:
+	 *
+	 * In A4x, we cannot have the PFP executing instructions while its
+	 * instruction RAM is loading. We load the PFP's instruction RAM
+	 * using type-0 writes from the ME.
+	 *
+	 * To make sure the PFP is not fetching instructions at the same
+	 * time, we put it in a one-instruction loop:
+	 * mvc (ME), (ringbuffer)
+	 * which executes repeatedly until all of the data has been moved
+	 * from the ring buffer to the ME.
+	 */
+	if (adreno_is_a4xx(adreno_dev)) {
+		for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++)
+			*cmds++ = adreno_dev->pm4_fw[i];
+		for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++)
+			*cmds++ = adreno_dev->pfp_fw[i];
+
+		*cmds++ = cp_type3_packet(CP_REG_RMW, 3);
+		*cmds++ = 0x20000000 + A4XX_CP_RB_WPTR;
+		*cmds++ = 0xffffffff;
+		*cmds++ = 0x00000002;
+		*cmds++ = cp_type3_packet(CP_INTERRUPT, 1);
+		*cmds++ = 0;
+
+		rb->wptr = rb->wptr - 2;
+		adreno_ringbuffer_submit(rb, NULL);
+		rb->wptr = rb->wptr + 2;
+	} else {
+		for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++)
+			*cmds++ = adreno_dev->pfp_fw[i];
+		for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++)
+			*cmds++ = adreno_dev->pm4_fw[i];
+		adreno_ringbuffer_submit(rb, NULL);
+	}
+
+	/* idle device to validate bootstrap */
+	ret = adreno_spin_idle(device, 2000);
+
+	if (ret) {
+		KGSL_DRV_ERR(rb->device,
+		"microcode bootstrap failed to idle\n");
+		kgsl_device_snapshot(device, NULL);
+	}
+
+	/* Clear the chicken bit for speed up on A430 and its derivatives */
+	if (!adreno_is_a420(adreno_dev))
+		kgsl_regwrite(device, A4XX_CP_DEBUG,
+					A4XX_CP_DEBUG_DEFAULT & ~(1 << 14));
+
+	return ret;
+}
+
+int a3xx_microcode_load(struct adreno_device *adreno_dev,
+				unsigned int start_type)
+{
+	int status;
+	struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev);
+	struct kgsl_device *device = rb->device;
+
+	if (start_type == ADRENO_START_COLD) {
+		/* If bootstrapping if supported to load ucode */
+		if (adreno_bootstrap_ucode(adreno_dev)) {
+
+			/*
+			 * load first pm4_bstrp_size + pfp_bstrp_size microcode
+			 * dwords using AHB write, this small microcode has
+			 * dispatcher + booter this initial microcode enables
+			 * CP to understand CP_BOOTSTRAP_UCODE packet in
+			 * function _ringbuffer_bootstrap_ucode.
+			 * CP_BOOTSTRAP_UCODE packet loads rest of the
+			 * microcode.
+			 */
+
+			status = adreno_ringbuffer_load_pm4_ucode(rb->device, 1,
+				adreno_dev->gpucore->pm4_bstrp_size+1, 0);
+			if (status != 0)
+				return status;
+
+			status = adreno_ringbuffer_load_pfp_ucode(rb->device, 1,
+				adreno_dev->gpucore->pfp_bstrp_size+1, 0);
+			if (status != 0)
+				return status;
+
+			/* Bootstrap rest of the ucode here */
+			status = _ringbuffer_bootstrap_ucode(rb, 0);
+			if (status != 0)
+				return status;
+
+		} else {
+			/* load the CP ucode using AHB writes */
+			status = adreno_ringbuffer_load_pm4_ucode(rb->device, 1,
+						adreno_dev->pm4_fw_size, 0);
+			if (status != 0)
+				return status;
+
+			/* load the prefetch parser ucode using AHB writes */
+			status = adreno_ringbuffer_load_pfp_ucode(rb->device, 1,
+						adreno_dev->pfp_fw_size, 0);
+			if (status != 0)
+				return status;
+		}
+	} else if (start_type == ADRENO_START_WARM) {
+			/* If bootstrapping if supported to load jump tables */
+		if (adreno_bootstrap_ucode(adreno_dev)) {
+			status = _ringbuffer_bootstrap_ucode(rb, 1);
+			if (status != 0)
+				return status;
+
+		} else {
+			/* load the CP jump tables using AHB writes */
+			status = adreno_ringbuffer_load_pm4_ucode(device,
+				adreno_dev->gpucore->pm4_jt_idx,
+				adreno_dev->pm4_fw_size,
+				adreno_dev->gpucore->pm4_jt_addr);
+			if (status != 0)
+				return status;
+
+			/*
+			 * load the prefetch parser jump tables using AHB writes
+			 */
+			status = adreno_ringbuffer_load_pfp_ucode(device,
+				adreno_dev->gpucore->pfp_jt_idx,
+				adreno_dev->pfp_fw_size,
+				adreno_dev->gpucore->pfp_jt_addr);
+			if (status != 0)
+				return status;
+		}
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+
+struct adreno_gpudev adreno_a3xx_gpudev = {
+	.reg_offsets = &a3xx_reg_offsets,
+	.ft_perf_counters = a3xx_ft_perf_counters,
+	.ft_perf_counters_count = ARRAY_SIZE(a3xx_ft_perf_counters),
+	.perfcounters = &a3xx_perfcounters,
+	.irq = &a3xx_irq,
+	.irq_trace = trace_kgsl_a3xx_irq_status,
+	.snapshot_data = &a3xx_snapshot_data,
+	.num_prio_levels = 1,
+	.vbif_xin_halt_ctrl0_mask = A3XX_VBIF_XIN_HALT_CTRL0_MASK,
+	.platform_setup = a3xx_platform_setup,
+	.rb_init = a3xx_rb_init,
+	.microcode_read = a3xx_microcode_read,
+	.microcode_load = a3xx_microcode_load,
+	.perfcounter_init = a3xx_perfcounter_init,
+	.perfcounter_close = a3xx_perfcounter_close,
+	.start = a3xx_start,
+	.snapshot = a3xx_snapshot,
+	.coresight = &a3xx_coresight,
+};
diff --git a/drivers/gpu/msm/adreno_a3xx.h b/drivers/gpu/msm/adreno_a3xx.h
new file mode 100644
index 000000000000..4ab1236020e8
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a3xx.h
@@ -0,0 +1,30 @@
+/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __A3XX_H
+#define __A3XX_H
+
+unsigned int a3xx_irq_pending(struct adreno_device *adreno_dev);
+
+int a3xx_microcode_read(struct adreno_device *adreno_dev);
+int a3xx_microcode_load(struct adreno_device *adreno_dev,
+				unsigned int start_type);
+int a3xx_perfcounter_enable(struct adreno_device *adreno_dev,
+	unsigned int group, unsigned int counter, unsigned int countable);
+uint64_t a3xx_perfcounter_read(struct adreno_device *adreno_dev,
+	unsigned int group, unsigned int counter);
+
+void a3xx_a4xx_err_callback(struct adreno_device *adreno_dev, int bit);
+
+void a3xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+#endif /*__A3XX_H */
diff --git a/drivers/gpu/msm/adreno_a3xx_snapshot.c b/drivers/gpu/msm/adreno_a3xx_snapshot.c
new file mode 100644
index 000000000000..c4d415adc9f0
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a3xx_snapshot.c
@@ -0,0 +1,370 @@
+/* Copyright (c) 2012-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/io.h>
+#include "kgsl.h"
+#include "adreno.h"
+#include "kgsl_snapshot.h"
+#include "a3xx_reg.h"
+#include "adreno_snapshot.h"
+#include "adreno_a3xx.h"
+
+/*
+ * Set of registers to dump for A3XX on snapshot.
+ * Registers in pairs - first value is the start offset, second
+ * is the stop offset (inclusive)
+ */
+
+static const unsigned int a3xx_registers[] = {
+	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
+	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
+	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
+	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
+	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
+	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f6, 0x01f8, 0x01f9,
+	0x01fc, 0x01ff,
+	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
+	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
+	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
+	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
+	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
+	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5,
+	0x0e41, 0x0e45, 0x0e64, 0x0e65,
+	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
+	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
+	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
+	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
+	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
+	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
+	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
+	0x2240, 0x227e,
+	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
+	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
+	0x22ff, 0x22ff, 0x2340, 0x2343,
+	0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
+	0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
+	0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
+	0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
+	0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
+	0x25f0, 0x25f0,
+	0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
+	0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
+	0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743,
+	0x300C, 0x300E, 0x301C, 0x301D,
+	0x302A, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 0x3034, 0x3036,
+	0x303C, 0x303C, 0x305E, 0x305F,
+};
+
+/* Removed the following HLSQ register ranges from being read during
+ * fault tolerance since reading the registers may cause the device to hang:
+ */
+static const unsigned int a3xx_hlsq_registers[] = {
+	0x0e00, 0x0e05, 0x0e0c, 0x0e0c, 0x0e22, 0x0e23,
+	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a,
+	0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
+};
+
+/* The set of additional registers to be dumped for A330 */
+
+static const unsigned int a330_registers[] = {
+	0x1d0, 0x1d0, 0x1d4, 0x1d4, 0x453, 0x453,
+};
+
+/* Shader memory size in words */
+#define SHADER_MEMORY_SIZE 0x4000
+
+/**
+ * _rbbm_debug_bus_read - Helper function to read data from the RBBM
+ * debug bus.
+ * @device - GPU device to read/write registers
+ * @block_id - Debug bus block to read from
+ * @index - Index in the debug bus block to read
+ * @ret - Value of the register read
+ */
+static void _rbbm_debug_bus_read(struct kgsl_device *device,
+	unsigned int block_id, unsigned int index, unsigned int *val)
+{
+	unsigned int block = (block_id << 8) | 1 << 16;
+	kgsl_regwrite(device, A3XX_RBBM_DEBUG_BUS_CTL, block | index);
+	kgsl_regread(device, A3XX_RBBM_DEBUG_BUS_DATA_STATUS, val);
+}
+
+/**
+ * a3xx_snapshot_shader_memory - Helper function to dump the GPU shader
+ * memory to the snapshot buffer.
+ * @device: GPU device whose shader memory is to be dumped
+ * @buf: Pointer to binary snapshot data blob being made
+ * @remain: Number of remaining bytes in the snapshot blob
+ * @priv: Unused parameter
+ *
+ */
+static size_t a3xx_snapshot_shader_memory(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int i;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int shader_read_len = SHADER_MEMORY_SIZE;
+
+	if (shader_read_len > (device->shader_mem_len >> 2))
+		shader_read_len = (device->shader_mem_len >> 2);
+
+	if (remain < DEBUG_SECTION_SZ(shader_read_len)) {
+		SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_SHADER_MEMORY;
+	header->size = shader_read_len;
+
+	/* Map shader memory to kernel, for dumping */
+	if (device->shader_mem_virt == NULL)
+		device->shader_mem_virt = devm_ioremap(device->dev,
+					device->shader_mem_phys,
+					device->shader_mem_len);
+
+	if (device->shader_mem_virt == NULL) {
+		KGSL_DRV_ERR(device,
+		"Unable to map shader memory region\n");
+		return 0;
+	}
+
+	/* Now, dump shader memory to snapshot */
+	for (i = 0; i < shader_read_len; i++)
+		adreno_shadermem_regread(device, i, &data[i]);
+
+
+	return DEBUG_SECTION_SZ(shader_read_len);
+}
+
+static size_t a3xx_snapshot_debugbus_block(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	struct kgsl_snapshot_debugbus *header
+		= (struct kgsl_snapshot_debugbus *)buf;
+	struct adreno_debugbus_block *block = priv;
+	int i;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int dwords;
+	size_t size;
+
+	/*
+	 * For A305 and A320 all debug bus regions are the same size (0x40). For
+	 * A330, they can be different sizes - most are still 0x40, but some
+	 * like CP are larger
+	 */
+
+	dwords = (adreno_is_a330(adreno_dev) ||
+		adreno_is_a305b(adreno_dev)) ?
+		block->dwords : 0x40;
+
+	size = (dwords * sizeof(unsigned int)) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+
+	header->id = block->block_id;
+	header->count = dwords;
+
+	for (i = 0; i < dwords; i++)
+		_rbbm_debug_bus_read(device, block->block_id, i, &data[i]);
+
+	return size;
+}
+
+static struct adreno_debugbus_block debugbus_blocks[] = {
+	{ RBBM_BLOCK_ID_CP, 0x52, },
+	{ RBBM_BLOCK_ID_RBBM, 0x40, },
+	{ RBBM_BLOCK_ID_VBIF, 0x40, },
+	{ RBBM_BLOCK_ID_HLSQ, 0x40, },
+	{ RBBM_BLOCK_ID_UCHE, 0x40, },
+	{ RBBM_BLOCK_ID_PC, 0x40, },
+	{ RBBM_BLOCK_ID_VFD, 0x40, },
+	{ RBBM_BLOCK_ID_VPC, 0x40, },
+	{ RBBM_BLOCK_ID_TSE, 0x40, },
+	{ RBBM_BLOCK_ID_RAS, 0x40, },
+	{ RBBM_BLOCK_ID_VSC, 0x40, },
+	{ RBBM_BLOCK_ID_SP_0, 0x40, },
+	{ RBBM_BLOCK_ID_SP_1, 0x40, },
+	{ RBBM_BLOCK_ID_SP_2, 0x40, },
+	{ RBBM_BLOCK_ID_SP_3, 0x40, },
+	{ RBBM_BLOCK_ID_TPL1_0, 0x40, },
+	{ RBBM_BLOCK_ID_TPL1_1, 0x40, },
+	{ RBBM_BLOCK_ID_TPL1_2, 0x40, },
+	{ RBBM_BLOCK_ID_TPL1_3, 0x40, },
+	{ RBBM_BLOCK_ID_RB_0, 0x40, },
+	{ RBBM_BLOCK_ID_RB_1, 0x40, },
+	{ RBBM_BLOCK_ID_RB_2, 0x40, },
+	{ RBBM_BLOCK_ID_RB_3, 0x40, },
+	{ RBBM_BLOCK_ID_MARB_0, 0x40, },
+	{ RBBM_BLOCK_ID_MARB_1, 0x40, },
+	{ RBBM_BLOCK_ID_MARB_2, 0x40, },
+	{ RBBM_BLOCK_ID_MARB_3, 0x40, },
+};
+
+static void a3xx_snapshot_debugbus(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(debugbus_blocks); i++) {
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_DEBUGBUS, snapshot,
+			a3xx_snapshot_debugbus_block,
+			(void *) &debugbus_blocks[i]);
+	}
+}
+
+static void _snapshot_hlsq_regs(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/*
+	 * Trying to read HLSQ registers when the HLSQ block is busy
+	 * will cause the device to hang.  The RBBM_DEBUG_BUS has information
+	 * that will tell us if the HLSQ block is busy or not.  Read values
+	 * from the debug bus to ensure the HLSQ block is not busy (this
+	 * is hardware dependent).  If the HLSQ block is busy do not
+	 * dump the registers, otherwise dump the HLSQ registers.
+	 */
+
+	if (adreno_is_a330(adreno_dev)) {
+		/*
+		 * stall_ctxt_full status bit: RBBM_BLOCK_ID_HLSQ index 49 [27]
+		 *
+		 * if (!stall_context_full)
+		 * then dump HLSQ registers
+		 */
+		unsigned int stall_context_full = 0;
+
+		_rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 49,
+				&stall_context_full);
+		stall_context_full &= 0x08000000;
+
+		if (stall_context_full)
+			return;
+	} else {
+		/*
+		 * tpif status bits: RBBM_BLOCK_ID_HLSQ index 4 [4:0]
+		 * spif status bits: RBBM_BLOCK_ID_HLSQ index 7 [5:0]
+		 *
+		 * if ((tpif == 0, 1, 28) && (spif == 0, 1, 10))
+		 * then dump HLSQ registers
+		 */
+		unsigned int next_pif = 0;
+
+		/* check tpif */
+		_rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 4, &next_pif);
+		next_pif &= 0x1f;
+		if (next_pif != 0 && next_pif != 1 && next_pif != 28)
+			return;
+
+		/* check spif */
+		_rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 7, &next_pif);
+		next_pif &= 0x3f;
+		if (next_pif != 0 && next_pif != 1 && next_pif != 10)
+			return;
+	}
+
+	SNAPSHOT_REGISTERS(device, snapshot, a3xx_hlsq_registers);
+}
+
+/*
+ * a3xx_snapshot() - A3XX GPU snapshot function
+ * @adreno_dev: Device being snapshotted
+ * @snapshot: Snapshot meta data
+ * @remain: Amount of space left in snapshot memory
+ *
+ * This is where all of the A3XX specific bits and pieces are grabbed
+ * into the snapshot memory
+ */
+void a3xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_snapshot_data *snap_data = gpudev->snapshot_data;
+	unsigned int reg;
+
+	/* Disable Clock gating temporarily for the debug bus to work */
+	adreno_writereg(adreno_dev, ADRENO_REG_RBBM_CLOCK_CTL, 0x00);
+
+	SNAPSHOT_REGISTERS(device, snapshot, a3xx_registers);
+
+	_snapshot_hlsq_regs(device, snapshot);
+
+	if (adreno_is_a330(adreno_dev) || adreno_is_a305b(adreno_dev))
+		SNAPSHOT_REGISTERS(device, snapshot, a330_registers);
+
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A3XX_CP_STATE_DEBUG_INDEX, A3XX_CP_STATE_DEBUG_DATA,
+		0x0, snap_data->sect_sizes->cp_pfp);
+
+	/* CP_ME indexed registers */
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A3XX_CP_ME_CNTL, A3XX_CP_ME_STATUS, 64, 44);
+
+	/* VPC memory */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_vpc_memory,
+		&snap_data->sect_sizes->vpc_mem);
+
+	/* CP MEQ */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, snapshot,
+		adreno_snapshot_cp_meq, &snap_data->sect_sizes->cp_meq);
+
+	/* Shader working/shadow memory */
+	 kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a3xx_snapshot_shader_memory,
+		&snap_data->sect_sizes->shader_mem);
+
+
+	/* CP PFP and PM4 */
+
+	/*
+	 * Reading the microcode while the CP is running will
+	 * basically move the CP instruction pointer to
+	 * whatever address we read. Big badaboom ensues. Stop the CP
+	 * (if it isn't already stopped) to ensure that we are safe.
+	 * We do this here and not earlier to avoid corrupting the RBBM
+	 * status and CP registers - by the time we get here we don't
+	 * care about the contents of the CP anymore.
+	 */
+
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_ME_CNTL, &reg);
+	reg |= (1 << 27) | (1 << 28);
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, reg);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_pfp_ram, NULL);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_pm4_ram, NULL);
+
+	/* CP ROQ */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_roq, &snap_data->sect_sizes->roq);
+
+	if (snap_data->sect_sizes->cp_merciu) {
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+			snapshot, adreno_snapshot_cp_merciu,
+			&snap_data->sect_sizes->cp_merciu);
+	}
+
+	a3xx_snapshot_debugbus(device, snapshot);
+}
diff --git a/drivers/gpu/msm/adreno_a4xx.c b/drivers/gpu/msm/adreno_a4xx.c
new file mode 100644
index 000000000000..99a331d7a470
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a4xx.c
@@ -0,0 +1,2167 @@
+/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/msm_kgsl.h>
+
+#include "adreno.h"
+#include "kgsl_sharedmem.h"
+#include "a4xx_reg.h"
+#include "adreno_a3xx.h"
+#include "adreno_a4xx.h"
+#include "adreno_cp_parser.h"
+#include "adreno_trace.h"
+#include "adreno_pm4types.h"
+#include "adreno_perfcounter.h"
+
+#define SP_TP_PWR_ON BIT(20)
+
+/*
+ * Define registers for a4xx that contain addresses used by the
+ * cp parser logic
+ */
+const unsigned int a4xx_cp_addr_regs[ADRENO_CP_ADDR_MAX] = {
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0,
+				A4XX_VSC_PIPE_DATA_ADDRESS_0),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_0,
+				A4XX_VSC_PIPE_DATA_LENGTH_0),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_1,
+				A4XX_VSC_PIPE_DATA_ADDRESS_1),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_1,
+				A4XX_VSC_PIPE_DATA_LENGTH_1),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_2,
+				A4XX_VSC_PIPE_DATA_ADDRESS_2),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_2,
+				A4XX_VSC_PIPE_DATA_LENGTH_2),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_3,
+				A4XX_VSC_PIPE_DATA_ADDRESS_3),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_3,
+				A4XX_VSC_PIPE_DATA_LENGTH_3),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_4,
+				A4XX_VSC_PIPE_DATA_ADDRESS_4),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_4,
+				A4XX_VSC_PIPE_DATA_LENGTH_4),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_5,
+				A4XX_VSC_PIPE_DATA_ADDRESS_5),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_5,
+				A4XX_VSC_PIPE_DATA_LENGTH_5),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_6,
+				A4XX_VSC_PIPE_DATA_ADDRESS_6),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_6,
+				A4XX_VSC_PIPE_DATA_LENGTH_6),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_7,
+				A4XX_VSC_PIPE_DATA_ADDRESS_7),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7,
+				A4XX_VSC_PIPE_DATA_LENGTH_7),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0,
+				A4XX_VFD_FETCH_INSTR_1_0),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_1,
+				A4XX_VFD_FETCH_INSTR_1_1),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_2,
+				A4XX_VFD_FETCH_INSTR_1_2),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_3,
+				A4XX_VFD_FETCH_INSTR_1_3),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_4,
+				A4XX_VFD_FETCH_INSTR_1_4),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_5,
+				A4XX_VFD_FETCH_INSTR_1_5),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_6,
+				A4XX_VFD_FETCH_INSTR_1_6),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_7,
+				A4XX_VFD_FETCH_INSTR_1_7),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_8,
+				A4XX_VFD_FETCH_INSTR_1_8),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_9,
+				A4XX_VFD_FETCH_INSTR_1_9),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_10,
+				A4XX_VFD_FETCH_INSTR_1_10),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_11,
+				A4XX_VFD_FETCH_INSTR_1_11),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_12,
+				A4XX_VFD_FETCH_INSTR_1_12),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_13,
+				A4XX_VFD_FETCH_INSTR_1_13),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_14,
+				A4XX_VFD_FETCH_INSTR_1_14),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15,
+				A4XX_VFD_FETCH_INSTR_1_15),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_16,
+				A4XX_VFD_FETCH_INSTR_1_16),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_17,
+				A4XX_VFD_FETCH_INSTR_1_17),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_18,
+				A4XX_VFD_FETCH_INSTR_1_18),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_19,
+				A4XX_VFD_FETCH_INSTR_1_19),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_20,
+				A4XX_VFD_FETCH_INSTR_1_20),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_21,
+				A4XX_VFD_FETCH_INSTR_1_21),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_22,
+				A4XX_VFD_FETCH_INSTR_1_22),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_23,
+				A4XX_VFD_FETCH_INSTR_1_23),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_24,
+				A4XX_VFD_FETCH_INSTR_1_24),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_25,
+				A4XX_VFD_FETCH_INSTR_1_25),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_26,
+				A4XX_VFD_FETCH_INSTR_1_26),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_27,
+				A4XX_VFD_FETCH_INSTR_1_27),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_28,
+				A4XX_VFD_FETCH_INSTR_1_28),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_29,
+				A4XX_VFD_FETCH_INSTR_1_29),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_30,
+				A4XX_VFD_FETCH_INSTR_1_30),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_31,
+				A4XX_VFD_FETCH_INSTR_1_31),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_SIZE_ADDRESS,
+				A4XX_VSC_SIZE_ADDRESS),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR,
+				A4XX_SP_VS_PVT_MEM_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR,
+				A4XX_SP_FS_PVT_MEM_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_VS_OBJ_START_REG,
+				A4XX_SP_VS_OBJ_START),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_FS_OBJ_START_REG,
+				A4XX_SP_FS_OBJ_START),
+	ADRENO_REG_DEFINE(ADRENO_CP_UCHE_INVALIDATE0,
+				A4XX_UCHE_INVALIDATE0),
+	ADRENO_REG_DEFINE(ADRENO_CP_UCHE_INVALIDATE1,
+				A4XX_UCHE_INVALIDATE1),
+};
+
+static const struct adreno_vbif_data a405_vbif[] = {
+	{ A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003 },
+	{0, 0},
+};
+
+static const struct adreno_vbif_data a420_vbif[] = {
+	{ A4XX_VBIF_ABIT_SORT, 0x0001001F },
+	{ A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4 },
+	{ A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001 },
+	{ A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818 },
+	{ A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018 },
+	{ A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818 },
+	{ A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018 },
+	{ A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003 },
+	{0, 0},
+};
+
+static const struct adreno_vbif_data a430_vbif[] = {
+	{ A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001 },
+	{ A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818 },
+	{ A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018 },
+	{ A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818 },
+	{ A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018 },
+	{ A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003 },
+	{0, 0},
+};
+
+static const struct adreno_vbif_platform a4xx_vbif_platforms[] = {
+	{ adreno_is_a405, a405_vbif },
+	{ adreno_is_a420, a420_vbif },
+	{ adreno_is_a430, a430_vbif },
+	{ adreno_is_a418, a430_vbif },
+};
+
+/* a4xx_preemption_start() - Setup state to start preemption */
+static void a4xx_preemption_start(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	uint32_t val;
+
+	/*
+	 * Setup scratch registers from which the GPU will program the
+	 * registers required to start execution of new ringbuffer
+	 * set ringbuffer address
+	 */
+	kgsl_regwrite(device, A4XX_CP_SCRATCH_REG8,
+		rb->buffer_desc.gpuaddr);
+	kgsl_regread(device, A4XX_CP_RB_CNTL, &val);
+	/* scratch REG9 corresponds to CP_RB_CNTL register */
+	kgsl_regwrite(device, A4XX_CP_SCRATCH_REG9, val);
+	/* scratch REG10 corresponds to rptr address */
+	kgsl_regwrite(device, A4XX_CP_SCRATCH_REG10, 0);
+	/* scratch REG11 corresponds to rptr */
+	kgsl_regwrite(device, A4XX_CP_SCRATCH_REG11, rb->rptr);
+	/* scratch REG12 corresponds to wptr */
+	kgsl_regwrite(device, A4XX_CP_SCRATCH_REG12, rb->wptr);
+	/*
+	 * scratch REG13 corresponds to  IB1_BASE,
+	 * 0 since we do not do switches in between IB's
+	 */
+	kgsl_regwrite(device, A4XX_CP_SCRATCH_REG13, 0);
+	/* scratch REG14 corresponds to IB1_BUFSZ */
+	kgsl_regwrite(device, A4XX_CP_SCRATCH_REG14, 0);
+	/* scratch REG15 corresponds to IB2_BASE */
+	kgsl_regwrite(device, A4XX_CP_SCRATCH_REG15, 0);
+	/* scratch REG16 corresponds to  IB2_BUFSZ */
+	kgsl_regwrite(device, A4XX_CP_SCRATCH_REG16, 0);
+	/* scratch REG17 corresponds to GPR11 */
+	kgsl_regwrite(device, A4XX_CP_SCRATCH_REG17, rb->gpr11);
+}
+
+/* a4xx_preemption_save() - Save the state after preemption is done */
+static void a4xx_preemption_save(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	kgsl_regread(device, A4XX_CP_SCRATCH_REG18, &rb->rptr);
+	kgsl_regread(device, A4XX_CP_SCRATCH_REG23, &rb->gpr11);
+}
+
+static int a4xx_preemption_token(struct adreno_device *adreno_dev,
+			struct adreno_ringbuffer *rb, unsigned int *cmds,
+			uint64_t gpuaddr)
+{
+	unsigned int *cmds_orig = cmds;
+
+	/* Turn on preemption flag */
+	/* preemption token - fill when pt switch command size is known */
+	*cmds++ = cp_type3_packet(CP_PREEMPT_TOKEN, 3);
+	*cmds++ = (uint)gpuaddr;
+	*cmds++ = 1;
+	/* generate interrupt on preemption completion */
+	*cmds++ = 1 << CP_PREEMPT_ORDINAL_INTERRUPT;
+
+	return cmds - cmds_orig;
+
+}
+
+static int a4xx_preemption_pre_ibsubmit(
+			struct adreno_device *adreno_dev,
+			struct adreno_ringbuffer *rb, unsigned int *cmds,
+			struct kgsl_context *context, uint64_t cond_addr,
+			struct kgsl_memobj_node *ib)
+{
+	unsigned int *cmds_orig = cmds;
+	int exec_ib = 0;
+
+	cmds += a4xx_preemption_token(adreno_dev, rb, cmds,
+				rb->device->memstore.gpuaddr +
+				KGSL_MEMSTORE_OFFSET(context->id, preempted));
+
+	if (ib)
+		exec_ib = 1;
+
+	*cmds++ = cp_type3_packet(CP_COND_EXEC, 4);
+	*cmds++ = cond_addr;
+	*cmds++ = cond_addr;
+	*cmds++ = 1;
+	*cmds++ = 7 + exec_ib * 3;
+	if (exec_ib) {
+		*cmds++ = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2);
+		*cmds++ = ib->gpuaddr;
+		*cmds++ = (unsigned int) ib->size >> 2;
+	}
+	/* clear preemption flag */
+	*cmds++ = cp_type3_packet(CP_MEM_WRITE, 2);
+	*cmds++ = cond_addr;
+	*cmds++ = 0;
+	*cmds++ = cp_type3_packet(CP_WAIT_MEM_WRITES, 1);
+	*cmds++ = 0;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_ME, 1);
+	*cmds++ = 0;
+
+	return cmds - cmds_orig;
+}
+
+/*
+ * a4xx_is_sptp_idle() - A430 SP/TP should be off to be considered idle
+ * @adreno_dev: The adreno device pointer
+ */
+static bool a4xx_is_sptp_idle(struct adreno_device *adreno_dev)
+{
+	unsigned int reg;
+	struct kgsl_device *device = &adreno_dev->dev;
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_SPTP_PC))
+		return true;
+
+	/* If SP/TP pc isn't enabled, don't worry about power */
+	kgsl_regread(device, A4XX_CP_POWER_COLLAPSE_CNTL, &reg);
+	if (!(reg & 0x10))
+		return true;
+
+	/* Check that SP/TP is off */
+	kgsl_regread(device, A4XX_RBBM_POWER_STATUS, &reg);
+	return !(reg & SP_TP_PWR_ON);
+}
+
+/*
+ * a4xx_regulator_enable() - Enable any necessary HW regulators
+ * @adreno_dev: The adreno device pointer
+ *
+ * Some HW blocks may need their regulators explicitly enabled
+ * on a restart.  Clocks must be on during this call.
+ */
+static int a4xx_regulator_enable(struct adreno_device *adreno_dev)
+{
+	unsigned int reg;
+	struct kgsl_device *device = &adreno_dev->dev;
+	if (!(adreno_is_a430(adreno_dev) || adreno_is_a418(adreno_dev)))
+		return 0;
+
+	/* Set the default register values; set SW_COLLAPSE to 0 */
+	kgsl_regwrite(device, A4XX_RBBM_POWER_CNTL_IP, 0x778000);
+	do {
+		udelay(5);
+		kgsl_regread(device, A4XX_RBBM_POWER_STATUS, &reg);
+	} while (!(reg & SP_TP_PWR_ON));
+	return 0;
+}
+
+/*
+ * a4xx_regulator_disable() - Disable any necessary HW regulators
+ * @adreno_dev: The adreno device pointer
+ *
+ * Some HW blocks may need their regulators explicitly disabled
+ * on a power down to prevent current spikes.  Clocks must be on
+ * during this call.
+ */
+static void a4xx_regulator_disable(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	if (!(adreno_is_a430(adreno_dev) || adreno_is_a418(adreno_dev)))
+		return;
+
+	/* Set the default register values; set SW_COLLAPSE to 1 */
+	kgsl_regwrite(device, A4XX_RBBM_POWER_CNTL_IP, 0x778001);
+}
+
+/*
+ * a4xx_enable_pc() - Enable the SP/TP block power collapse
+ * @adreno_dev: The adreno device pointer
+ */
+static void a4xx_enable_pc(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_SPTP_PC) ||
+		!test_bit(ADRENO_SPTP_PC_CTRL, &adreno_dev->pwrctrl_flag))
+		return;
+
+	kgsl_regwrite(device, A4XX_CP_POWER_COLLAPSE_CNTL, 0x00400010);
+	trace_adreno_sp_tp((unsigned long) __builtin_return_address(0));
+};
+
+/*
+ * a4xx_enable_ppd() - Enable the Peak power detect logic in the h/w
+ * @adreno_dev: The adreno device pointer
+ *
+ * A430 can detect peak current conditions inside h/w and throttle
+ * the workload to ALUs to mitigate it.
+ */
+static void a4xx_enable_ppd(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_PPD) ||
+		!test_bit(ADRENO_PPD_CTRL, &adreno_dev->pwrctrl_flag) ||
+		!adreno_is_a430v2(adreno_dev))
+		return;
+
+	/* Program thresholds */
+	kgsl_regwrite(device, A4XX_RBBM_PPD_EPOCH_INTER_TH_HIGH_CLEAR_THR,
+								0x003F0101);
+	kgsl_regwrite(device, A4XX_RBBM_PPD_EPOCH_INTER_TH_LOW, 0x00000101);
+	kgsl_regwrite(device, A4XX_RBBM_PPD_V2_SP_PWR_WEIGHTS, 0x00085014);
+	kgsl_regwrite(device, A4XX_RBBM_PPD_V2_SP_RB_EPOCH_TH, 0x00000B46);
+	kgsl_regwrite(device, A4XX_RBBM_PPD_V2_TP_CONFIG, 0xE4525111);
+	kgsl_regwrite(device, A4XX_RBBM_PPD_RAMP_V2_CONTROL, 0x0000000B);
+
+	/* Enable PPD*/
+	kgsl_regwrite(device, A4XX_RBBM_PPD_CTRL, 0x1002E40C);
+};
+
+/*
+ * a4xx_pwrlevel_change_settings() - Program the hardware during power level
+ * transitions
+ * @adreno_dev: The adreno device pointer
+ * @prelevel: The previous power level
+ * @postlevel: The new power level
+ * @post: True if called after the clock change has taken effect
+ */
+static void a4xx_pwrlevel_change_settings(struct adreno_device *adreno_dev,
+				unsigned int prelevel, unsigned int postlevel,
+				bool post)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	static int pre;
+
+	/* PPD programming only for A430v2 */
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_PPD) ||
+		!test_bit(ADRENO_PPD_CTRL, &adreno_dev->pwrctrl_flag) ||
+		!adreno_is_a430v2(adreno_dev))
+		return;
+
+	/* if this is a real pre, or a post without a previous pre, set pre */
+	if ((post == 0) || (pre == 0 && post == 1))
+		pre = 1;
+	else if (post == 1)
+		pre = 0;
+
+	if ((prelevel == 0) && pre) {
+		/* Going to Non-Turbo mode - mask the throttle and reset */
+		kgsl_regwrite(device, A4XX_RBBM_PPD_CTRL, 0x1002E40E);
+		kgsl_regwrite(device, A4XX_RBBM_PPD_CTRL, 0x1002E40C);
+	} else if ((postlevel == 0) && post) {
+		/* Going to Turbo mode - unmask the throttle and reset */
+		kgsl_regwrite(device, A4XX_RBBM_PPD_CTRL, 0x1002E40A);
+		kgsl_regwrite(device, A4XX_RBBM_PPD_CTRL, 0x1002E408);
+	}
+
+	if (post)
+		pre = 0;
+}
+
+/*
+ * a4xx_enable_hwcg() - Program the clock control registers
+ * @device: The adreno device pointer
+ */
+static void a4xx_enable_hwcg(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_TP0, 0x02222202);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_TP1, 0x02222202);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_TP2, 0x02222202);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_TP3, 0x02222202);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_TP0, 0x00002222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_TP1, 0x00002222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_TP2, 0x00002222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_TP3, 0x00002222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_TP0, 0x0E739CE7);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_TP1, 0x0E739CE7);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_TP2, 0x0E739CE7);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_TP3, 0x0E739CE7);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_TP0, 0x00111111);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_TP1, 0x00111111);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_TP2, 0x00111111);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_TP3, 0x00111111);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP0, 0x22222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP1, 0x22222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP2, 0x22222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP3, 0x22222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP0, 0x00222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP1, 0x00222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP2, 0x00222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP3, 0x00222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_SP0, 0x00000104);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_SP1, 0x00000104);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_SP2, 0x00000104);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_SP3, 0x00000104);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_SP0, 0x00000081);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_SP1, 0x00000081);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_SP2, 0x00000081);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_SP3, 0x00000081);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_RB0, 0x22222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_RB1, 0x22222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_RB2, 0x22222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_RB3, 0x22222222);
+	/* Disable L1 clocking in A420 due to CCU issues with it */
+	if (adreno_is_a420(adreno_dev)) {
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB0, 0x00002020);
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB1, 0x00002020);
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB2, 0x00002020);
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB3, 0x00002020);
+	} else {
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB0, 0x00022020);
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB1, 0x00022020);
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB2, 0x00022020);
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB3, 0x00022020);
+	}
+	/* No CCU for A405 */
+	if (!adreno_is_a405(adreno_dev)) {
+		kgsl_regwrite(device,
+			A4XX_RBBM_CLOCK_CTL_MARB_CCU0, 0x00000922);
+		kgsl_regwrite(device,
+			A4XX_RBBM_CLOCK_CTL_MARB_CCU1, 0x00000922);
+		kgsl_regwrite(device,
+			A4XX_RBBM_CLOCK_CTL_MARB_CCU2, 0x00000922);
+		kgsl_regwrite(device,
+			A4XX_RBBM_CLOCK_CTL_MARB_CCU3, 0x00000922);
+		kgsl_regwrite(device,
+			A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU0, 0x00000000);
+		kgsl_regwrite(device,
+			A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU1, 0x00000000);
+		kgsl_regwrite(device,
+			A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU2, 0x00000000);
+		kgsl_regwrite(device,
+			A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU3, 0x00000000);
+		kgsl_regwrite(device,
+				A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_0,
+				0x00000001);
+		kgsl_regwrite(device,
+				A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_1,
+				0x00000001);
+		kgsl_regwrite(device,
+				A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_2,
+				0x00000001);
+		kgsl_regwrite(device,
+				A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_3,
+				0x00000001);
+	}
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
+	/*
+	 * Due to a HW timing issue, top level HW clock gating is causing
+	 * register read/writes to be dropped in adreno a430.
+	 * This timing issue started happening because of SP/TP power collapse.
+	 * On targets that do not have SP/TP PC there is no timing issue.
+	 * The HW timing issue could be fixed by
+	 * a) disabling SP/TP power collapse
+	 * b) or disabling HW clock gating.
+	 * Disabling HW clock gating + NAP enabled combination has
+	 * minimal power impact. So this option is chosen over disabling
+	 * SP/TP power collapse.
+	 * Revisions of A430 which chipid 2 and above do not have the issue.
+	 */
+	if (adreno_is_a430(adreno_dev) &&
+		(ADRENO_CHIPID_PATCH(adreno_dev->chipid) < 2))
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL, 0);
+	else
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2, 0);
+}
+
+/**
+ * a4xx_protect_init() - Initializes register protection on a4xx
+ * @adreno_dev: Pointer to the device structure
+ * Performs register writes to enable protected access to sensitive
+ * registers
+ */
+static void a4xx_protect_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	int index = 0;
+	struct kgsl_protected_registers *iommu_regs;
+
+	/* enable access protection to privileged registers */
+	kgsl_regwrite(device, A4XX_CP_PROTECT_CTRL, 0x00000007);
+	/* RBBM registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0x4, 2);
+	adreno_set_protected_registers(adreno_dev, &index, 0x8, 3);
+	adreno_set_protected_registers(adreno_dev, &index, 0x10, 4);
+	adreno_set_protected_registers(adreno_dev, &index, 0x20, 5);
+	adreno_set_protected_registers(adreno_dev, &index, 0x40, 6);
+	adreno_set_protected_registers(adreno_dev, &index, 0x80, 4);
+
+	/* Content protection registers */
+	if (kgsl_mmu_is_secured(&device->mmu)) {
+		adreno_set_protected_registers(adreno_dev, &index,
+			   A4XX_RBBM_SECVID_TSB_TRUSTED_BASE, 3);
+		adreno_set_protected_registers(adreno_dev, &index,
+			   A4XX_RBBM_SECVID_TRUST_CONTROL, 1);
+	}
+
+	/* CP registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0x200, 7);
+	adreno_set_protected_registers(adreno_dev, &index, 0x580, 4);
+	adreno_set_protected_registers(adreno_dev, &index, A4XX_CP_PREEMPT, 1);
+	/* RB registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0xCC0, 0);
+
+	/* HLSQ registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0xE00, 0);
+
+	/* VPC registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0xE60, 1);
+
+	if (adreno_is_a430(adreno_dev) || adreno_is_a420(adreno_dev) ||
+			adreno_is_a418(adreno_dev)) {
+		/*
+		 * Protect registers that might cause XPU violation if
+		 * accessed by GPU
+		 */
+		adreno_set_protected_registers(adreno_dev, &index, 0x2c00, 10);
+		adreno_set_protected_registers(adreno_dev, &index, 0x3300, 8);
+		adreno_set_protected_registers(adreno_dev, &index, 0x3400, 10);
+	}
+
+	/* SMMU registers */
+	iommu_regs = kgsl_mmu_get_prot_regs(&device->mmu);
+	if (iommu_regs)
+		adreno_set_protected_registers(adreno_dev, &index,
+				iommu_regs->base, iommu_regs->range);
+}
+
+static struct adreno_snapshot_sizes a4xx_snap_sizes = {
+	.cp_pfp = 0x14,
+	.vpc_mem = 2048,
+	.cp_meq = 64,
+	.shader_mem = 0x4000,
+	.cp_merciu = 64,
+	.roq = 512,
+};
+
+
+static void a4xx_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int cp_debug = A4XX_CP_DEBUG_DEFAULT;
+
+	adreno_vbif_start(adreno_dev, a4xx_vbif_platforms,
+			ARRAY_SIZE(a4xx_vbif_platforms));
+	/* Make all blocks contribute to the GPU BUSY perf counter */
+	kgsl_regwrite(device, A4XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
+
+	/* Tune the hystersis counters for SP and CP idle detection */
+	kgsl_regwrite(device, A4XX_RBBM_SP_HYST_CNT, 0x10);
+	kgsl_regwrite(device, A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
+	if (adreno_is_a430(adreno_dev))
+		kgsl_regwrite(device, A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
+
+	/*
+	 * Enable the RBBM error reporting bits.  This lets us get
+	 * useful information on failure
+	 */
+
+	kgsl_regwrite(device, A4XX_RBBM_AHB_CTL0, 0x00000001);
+
+	/* Enable AHB error reporting */
+	kgsl_regwrite(device, A4XX_RBBM_AHB_CTL1, 0xA6FFFFFF);
+
+	/* Turn on the power counters */
+	kgsl_regwrite(device, A4XX_RBBM_RBBM_CTL, 0x00000030);
+
+	/*
+	 * Turn on hang detection - this spews a lot of useful information
+	 * into the RBBM registers on a hang
+	 */
+	set_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv);
+	gpudev->irq->mask |= (1 << A4XX_INT_MISC_HANG_DETECT);
+	kgsl_regwrite(device, A4XX_RBBM_INTERFACE_HANG_INT_CTL,
+			(1 << 30) | 0xFFFF);
+
+	/* Set the GMEM/OCMEM base address for A4XX */
+	kgsl_regwrite(device, A4XX_RB_GMEM_BASE_ADDR,
+			(unsigned int)(adreno_dev->gmem_base >> 14));
+
+	/* Turn on performance counters */
+	kgsl_regwrite(device, A4XX_RBBM_PERFCTR_CTL, 0x01);
+
+	/* Enable VFD to access most of the UCHE (7 ways out of 8) */
+	kgsl_regwrite(device, A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
+
+	/* Disable L2 bypass to avoid UCHE out of bounds errors */
+	kgsl_regwrite(device, UCHE_TRAP_BASE_LO, 0xffff0000);
+	kgsl_regwrite(device, UCHE_TRAP_BASE_HI, 0xffff0000);
+
+	/* On A420 cores turn on SKIP_IB2_DISABLE in addition to the default */
+	if (adreno_is_a420(adreno_dev))
+		cp_debug |= (1 << 29);
+	/*
+	 * Set chicken bit to disable the speed up of bootstrap on A430
+	 * and its derivatives
+	 */
+	else
+		cp_debug |= (1 << 14);
+
+	kgsl_regwrite(device, A4XX_CP_DEBUG, cp_debug);
+
+	/* On A430 enable SP regfile sleep for power savings */
+	if (!adreno_is_a420(adreno_dev)) {
+		kgsl_regwrite(device, A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
+				0x00000441);
+		kgsl_regwrite(device, A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
+				0x00000441);
+	}
+
+	a4xx_enable_hwcg(device);
+	/*
+	 * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
+	 * due to timing issue with HLSQ_TP_CLK_EN
+	 */
+	if (adreno_is_a420(adreno_dev)) {
+		unsigned int val;
+		kgsl_regread(device, A4XX_RBBM_CLOCK_DELAY_HLSQ, &val);
+		val &= ~A4XX_CGC_HLSQ_TP_EARLY_CYC_MASK;
+		val |= 2 << A4XX_CGC_HLSQ_TP_EARLY_CYC_SHIFT;
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
+	}
+
+	/* A430 and derivatives offers bigger chunk of CP_STATE_DEBUG regs */
+	if (!adreno_is_a420(adreno_dev))
+		a4xx_snap_sizes.cp_pfp = 0x34;
+
+	if (adreno_is_a405(adreno_dev))
+		gpudev->vbif_xin_halt_ctrl0_mask =
+			A405_VBIF_XIN_HALT_CTRL0_MASK;
+
+	a4xx_protect_init(adreno_dev);
+}
+
+/*
+ * a4xx_err_callback() - Callback for a4xx error interrupts
+ * @adreno_dev: Pointer to device
+ * @bit: Interrupt bit
+ */
+static void a4xx_err_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	unsigned int reg;
+
+	switch (bit) {
+	case A4XX_INT_RBBM_AHB_ERROR: {
+		kgsl_regread(device, A4XX_RBBM_AHB_ERROR_STATUS, &reg);
+
+		/*
+		 * Return the word address of the erroring register so that it
+		 * matches the register specification
+		 */
+		KGSL_DRV_CRIT(device,
+			"RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
+			reg & (1 << 28) ? "WRITE" : "READ",
+			(reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
+			(reg >> 24) & 0xF);
+
+		/* Clear the error */
+		kgsl_regwrite(device, A4XX_RBBM_AHB_CMD, (1 << 4));
+		return;
+	}
+	case A4XX_INT_RBBM_REG_TIMEOUT:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: AHB register timeout\n");
+		break;
+	case A4XX_INT_RBBM_ME_MS_TIMEOUT:
+		kgsl_regread(device, A4XX_RBBM_AHB_ME_SPLIT_STATUS, &reg);
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"RBBM | ME master split timeout | status=%x\n", reg);
+		break;
+	case A4XX_INT_RBBM_PFP_MS_TIMEOUT:
+		kgsl_regread(device, A4XX_RBBM_AHB_PFP_SPLIT_STATUS, &reg);
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"RBBM | PFP master split timeout | status=%x\n", reg);
+		break;
+	case A4XX_INT_RBBM_ETS_MS_TIMEOUT:
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"RBBM: ME master split timeout\n");
+		break;
+	case A4XX_INT_RBBM_ASYNC_OVERFLOW:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: ASYNC overflow\n");
+		break;
+	case A4XX_INT_CP_OPCODE_ERROR:
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"ringbuffer opcode error interrupt\n");
+		break;
+	case A4XX_INT_CP_RESERVED_BIT_ERROR:
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"ringbuffer reserved bit error interrupt\n");
+		break;
+	case A4XX_INT_CP_HW_FAULT:
+	{
+		struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+		kgsl_regread(device, A4XX_CP_HW_FAULT, &reg);
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"CP | Ringbuffer HW fault | status=%x\n", reg);
+		/*
+		 * mask off this interrupt since it can spam, it will be
+		 * turned on again when device resets
+		 */
+		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_0_MASK,
+			gpudev->irq->mask & ~(1 << A4XX_INT_CP_HW_FAULT));
+		break;
+	}
+	case A4XX_INT_CP_REG_PROTECT_FAULT:
+		kgsl_regread(device, A4XX_CP_PROTECT_STATUS, &reg);
+		KGSL_DRV_CRIT(device,
+			"CP | Protected mode error| %s | addr=%x\n",
+			reg & (1 << 24) ? "WRITE" : "READ",
+			(reg & 0xFFFFF) >> 2);
+		return;
+	case A4XX_INT_CP_AHB_ERROR_HALT:
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"ringbuffer AHB error interrupt\n");
+		break;
+	case A4XX_INT_RBBM_ATB_BUS_OVERFLOW:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: ATB bus overflow\n");
+		break;
+	case A4XX_INT_UCHE_OOB_ACCESS:
+		KGSL_DRV_CRIT_RATELIMIT(device, "UCHE: Out of bounds access\n");
+		break;
+	case A4XX_INT_RBBM_DPM_CALC_ERR:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: dpm calc error\n");
+		break;
+	case A4XX_INT_RBBM_DPM_EPOCH_ERR:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: dpm epoch error\n");
+		break;
+	case A4XX_INT_RBBM_DPM_THERMAL_YELLOW_ERR:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: dpm thermal yellow\n");
+		break;
+	case A4XX_INT_RBBM_DPM_THERMAL_RED_ERR:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: dpm thermal red\n");
+		break;
+	default:
+		KGSL_DRV_CRIT_RATELIMIT(device, "Unknown interrupt\n");
+	}
+}
+
+/* Register offset defines for A4XX, in order of enum adreno_regs */
+static unsigned int a4xx_register_offsets[ADRENO_REG_REGISTER_MAX] = {
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_RAM_WADDR, A4XX_CP_ME_RAM_WADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_RAM_DATA, A4XX_CP_ME_RAM_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PFP_UCODE_DATA, A4XX_CP_PFP_UCODE_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PFP_UCODE_ADDR, A4XX_CP_PFP_UCODE_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_WFI_PEND_CTR, A4XX_CP_WFI_PEND_CTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A4XX_CP_RB_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A4XX_CP_RB_RPTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A4XX_CP_RB_WPTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_CNTL, A4XX_CP_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A4XX_CP_ME_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A4XX_CP_RB_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A4XX_CP_IB1_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, A4XX_CP_IB1_BUFSZ),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, A4XX_CP_IB2_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A4XX_CP_IB2_BUFSZ),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_RAM_RADDR, A4XX_CP_ME_RAM_RADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ROQ_ADDR, A4XX_CP_ROQ_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ROQ_DATA, A4XX_CP_ROQ_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_ADDR, A4XX_CP_MERCIU_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_DATA, A4XX_CP_MERCIU_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_DATA2, A4XX_CP_MERCIU_DATA2),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MEQ_ADDR, A4XX_CP_MEQ_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MEQ_DATA, A4XX_CP_MEQ_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_HW_FAULT, A4XX_CP_HW_FAULT),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PROTECT_STATUS, A4XX_CP_PROTECT_STATUS),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_SCRATCH_REG6, A4XX_CP_SCRATCH_REG6),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_SCRATCH_REG7, A4XX_CP_SCRATCH_REG7),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT, A4XX_CP_PREEMPT),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_DEBUG, A4XX_CP_PREEMPT_DEBUG),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_DISABLE,
+						A4XX_CP_PREEMPT_DISABLE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PROTECT_REG_0, A4XX_CP_PROTECT_REG_0),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A4XX_RBBM_STATUS),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_CTL, A4XX_RBBM_PERFCTR_CTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0,
+					A4XX_RBBM_PERFCTR_LOAD_CMD0),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1,
+					A4XX_RBBM_PERFCTR_LOAD_CMD1),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2,
+				A4XX_RBBM_PERFCTR_LOAD_CMD2),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD3,
+				ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_PWR_1_LO,
+					A4XX_RBBM_PERFCTR_PWR_1_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, A4XX_RBBM_INT_0_MASK),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_STATUS, A4XX_RBBM_INT_0_STATUS),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_CLOCK_CTL, A4XX_RBBM_CLOCK_CTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_VPC_DEBUG_RAM_SEL,
+					A4XX_VPC_DEBUG_RAM_SEL),
+	ADRENO_REG_DEFINE(ADRENO_REG_VPC_DEBUG_RAM_READ,
+					A4XX_VPC_DEBUG_RAM_READ),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_CLEAR_CMD,
+				A4XX_RBBM_INT_CLEAR_CMD),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_RBBM_CTL, A4XX_RBBM_RBBM_CTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A4XX_RBBM_SW_RESET_CMD),
+	ADRENO_REG_DEFINE(ADRENO_REG_UCHE_INVALIDATE0, A4XX_UCHE_INVALIDATE0),
+	ADRENO_REG_DEFINE(ADRENO_REG_UCHE_INVALIDATE1, A4XX_UCHE_INVALIDATE1),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO,
+				A4XX_RBBM_PERFCTR_LOAD_VALUE_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI,
+				A4XX_RBBM_PERFCTR_LOAD_VALUE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TRUST_CONTROL,
+				A4XX_RBBM_SECVID_TRUST_CONTROL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO,
+				A4XX_RBBM_ALWAYSON_COUNTER_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_ALWAYSON_COUNTER_HI,
+				A4XX_RBBM_ALWAYSON_COUNTER_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TRUST_CONFIG,
+				A4XX_RBBM_SECVID_TRUST_CONFIG),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TSB_CONTROL,
+				A4XX_RBBM_SECVID_TSB_CONTROL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_BASE,
+				A4XX_RBBM_SECVID_TSB_TRUSTED_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_SIZE,
+				A4XX_RBBM_SECVID_TSB_TRUSTED_SIZE),
+	ADRENO_REG_DEFINE(ADRENO_REG_VBIF_XIN_HALT_CTRL0,
+				A4XX_VBIF_XIN_HALT_CTRL0),
+	ADRENO_REG_DEFINE(ADRENO_REG_VBIF_XIN_HALT_CTRL1,
+				A4XX_VBIF_XIN_HALT_CTRL1),
+	ADRENO_REG_DEFINE(ADRENO_REG_VBIF_VERSION,
+				A4XX_VBIF_VERSION),
+};
+
+static const struct adreno_reg_offsets a4xx_reg_offsets = {
+	.offsets = a4xx_register_offsets,
+	.offset_0 = ADRENO_REG_REGISTER_MAX,
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_cp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_0_LO,
+		A4XX_RBBM_PERFCTR_CP_0_HI, 0, A4XX_CP_PERFCTR_CP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_1_LO,
+		A4XX_RBBM_PERFCTR_CP_1_HI, 1, A4XX_CP_PERFCTR_CP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_2_LO,
+		A4XX_RBBM_PERFCTR_CP_2_HI, 2, A4XX_CP_PERFCTR_CP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_3_LO,
+		A4XX_RBBM_PERFCTR_CP_3_HI, 3, A4XX_CP_PERFCTR_CP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_4_LO,
+		A4XX_RBBM_PERFCTR_CP_4_HI, 4, A4XX_CP_PERFCTR_CP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_5_LO,
+		A4XX_RBBM_PERFCTR_CP_5_HI, 5, A4XX_CP_PERFCTR_CP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_6_LO,
+		A4XX_RBBM_PERFCTR_CP_6_HI, 6, A4XX_CP_PERFCTR_CP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_7_LO,
+		A4XX_RBBM_PERFCTR_CP_7_HI, 7, A4XX_CP_PERFCTR_CP_SEL_7 },
+};
+
+/*
+ * Special list of CP registers for 420 to account for flaws.  This array is
+ * inserted into the tables during perfcounter init
+ */
+static struct adreno_perfcount_register a420_perfcounters_cp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_0_LO,
+		A4XX_RBBM_PERFCTR_CP_0_HI, 0, A4XX_CP_PERFCTR_CP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_1_LO,
+		A4XX_RBBM_PERFCTR_CP_1_HI, 1, A4XX_CP_PERFCTR_CP_SEL_1 },
+	/*
+	 * The selector registers for 3, 5, and 7 are swizzled on the hardware.
+	 * CP_4 and CP_6 are duped to SEL_2 and SEL_3 so we don't enable them
+	 * here
+	 */
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_3_LO,
+		A4XX_RBBM_PERFCTR_CP_3_HI, 3, A4XX_CP_PERFCTR_CP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_5_LO,
+		A4XX_RBBM_PERFCTR_CP_5_HI, 5, A4XX_CP_PERFCTR_CP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_7_LO,
+		A4XX_RBBM_PERFCTR_CP_7_HI, 7, A4XX_CP_PERFCTR_CP_SEL_4 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_rbbm[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RBBM_0_LO,
+		A4XX_RBBM_PERFCTR_RBBM_0_HI, 8, A4XX_RBBM_PERFCTR_RBBM_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RBBM_1_LO,
+		A4XX_RBBM_PERFCTR_RBBM_1_HI, 9, A4XX_RBBM_PERFCTR_RBBM_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RBBM_2_LO,
+		A4XX_RBBM_PERFCTR_RBBM_2_HI, 10, A4XX_RBBM_PERFCTR_RBBM_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RBBM_3_LO,
+		A4XX_RBBM_PERFCTR_RBBM_3_HI, 11, A4XX_RBBM_PERFCTR_RBBM_SEL_3 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_pc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_0_LO,
+		A4XX_RBBM_PERFCTR_PC_0_HI, 12, A4XX_PC_PERFCTR_PC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_1_LO,
+		A4XX_RBBM_PERFCTR_PC_1_HI, 13, A4XX_PC_PERFCTR_PC_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_2_LO,
+		A4XX_RBBM_PERFCTR_PC_2_HI, 14, A4XX_PC_PERFCTR_PC_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_3_LO,
+		A4XX_RBBM_PERFCTR_PC_3_HI, 15, A4XX_PC_PERFCTR_PC_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_4_LO,
+		A4XX_RBBM_PERFCTR_PC_4_HI, 16, A4XX_PC_PERFCTR_PC_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_5_LO,
+		A4XX_RBBM_PERFCTR_PC_5_HI, 17, A4XX_PC_PERFCTR_PC_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_6_LO,
+		A4XX_RBBM_PERFCTR_PC_6_HI, 18, A4XX_PC_PERFCTR_PC_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_7_LO,
+		A4XX_RBBM_PERFCTR_PC_7_HI, 19, A4XX_PC_PERFCTR_PC_SEL_7 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_vfd[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_0_LO,
+		A4XX_RBBM_PERFCTR_VFD_0_HI, 20, A4XX_VFD_PERFCTR_VFD_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_1_LO,
+		A4XX_RBBM_PERFCTR_VFD_1_HI, 21, A4XX_VFD_PERFCTR_VFD_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_2_LO,
+		A4XX_RBBM_PERFCTR_VFD_2_HI, 22, A4XX_VFD_PERFCTR_VFD_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_3_LO,
+		A4XX_RBBM_PERFCTR_VFD_3_HI, 23, A4XX_VFD_PERFCTR_VFD_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_4_LO,
+		A4XX_RBBM_PERFCTR_VFD_4_HI, 24, A4XX_VFD_PERFCTR_VFD_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_5_LO,
+		A4XX_RBBM_PERFCTR_VFD_5_HI, 25, A4XX_VFD_PERFCTR_VFD_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_6_LO,
+		A4XX_RBBM_PERFCTR_VFD_6_HI, 26, A4XX_VFD_PERFCTR_VFD_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_7_LO,
+		A4XX_RBBM_PERFCTR_VFD_7_HI, 27, A4XX_VFD_PERFCTR_VFD_SEL_7 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_hlsq[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_0_LO,
+		A4XX_RBBM_PERFCTR_HLSQ_0_HI, 28, A4XX_HLSQ_PERFCTR_HLSQ_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_1_LO,
+		A4XX_RBBM_PERFCTR_HLSQ_1_HI, 29, A4XX_HLSQ_PERFCTR_HLSQ_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_2_LO,
+		A4XX_RBBM_PERFCTR_HLSQ_2_HI, 30, A4XX_HLSQ_PERFCTR_HLSQ_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_3_LO,
+		A4XX_RBBM_PERFCTR_HLSQ_3_HI, 31, A4XX_HLSQ_PERFCTR_HLSQ_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_4_LO,
+		A4XX_RBBM_PERFCTR_HLSQ_4_HI, 32, A4XX_HLSQ_PERFCTR_HLSQ_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_5_LO,
+		A4XX_RBBM_PERFCTR_HLSQ_5_HI, 33, A4XX_HLSQ_PERFCTR_HLSQ_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_6_LO,
+		A4XX_RBBM_PERFCTR_HLSQ_6_HI, 34, A4XX_HLSQ_PERFCTR_HLSQ_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_7_LO,
+		A4XX_RBBM_PERFCTR_HLSQ_7_HI, 35, A4XX_HLSQ_PERFCTR_HLSQ_SEL_7 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_vpc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VPC_0_LO,
+		A4XX_RBBM_PERFCTR_VPC_0_HI, 36, A4XX_VPC_PERFCTR_VPC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VPC_1_LO,
+		A4XX_RBBM_PERFCTR_VPC_1_HI, 37, A4XX_VPC_PERFCTR_VPC_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VPC_2_LO,
+		A4XX_RBBM_PERFCTR_VPC_2_HI, 38, A4XX_VPC_PERFCTR_VPC_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VPC_3_LO,
+		A4XX_RBBM_PERFCTR_VPC_3_HI, 39, A4XX_VPC_PERFCTR_VPC_SEL_3 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_ccu[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CCU_0_LO,
+		A4XX_RBBM_PERFCTR_CCU_0_HI, 40, A4XX_RB_PERFCTR_CCU_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CCU_1_LO,
+		A4XX_RBBM_PERFCTR_CCU_1_HI, 41, A4XX_RB_PERFCTR_CCU_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CCU_2_LO,
+		A4XX_RBBM_PERFCTR_CCU_2_HI, 42, A4XX_RB_PERFCTR_CCU_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CCU_3_LO,
+		A4XX_RBBM_PERFCTR_CCU_3_HI, 43, A4XX_RB_PERFCTR_CCU_SEL_3 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_tse[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TSE_0_LO,
+		A4XX_RBBM_PERFCTR_TSE_0_HI, 44, A4XX_GRAS_PERFCTR_TSE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TSE_1_LO,
+		A4XX_RBBM_PERFCTR_TSE_1_HI, 45, A4XX_GRAS_PERFCTR_TSE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TSE_2_LO,
+		A4XX_RBBM_PERFCTR_TSE_2_HI, 46, A4XX_GRAS_PERFCTR_TSE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TSE_3_LO,
+		A4XX_RBBM_PERFCTR_TSE_3_HI, 47, A4XX_GRAS_PERFCTR_TSE_SEL_3 },
+};
+
+
+static struct adreno_perfcount_register a4xx_perfcounters_ras[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RAS_0_LO,
+		A4XX_RBBM_PERFCTR_RAS_0_HI, 48, A4XX_GRAS_PERFCTR_RAS_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RAS_1_LO,
+		A4XX_RBBM_PERFCTR_RAS_1_HI, 49, A4XX_GRAS_PERFCTR_RAS_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RAS_2_LO,
+		A4XX_RBBM_PERFCTR_RAS_2_HI, 50, A4XX_GRAS_PERFCTR_RAS_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RAS_3_LO,
+		A4XX_RBBM_PERFCTR_RAS_3_HI, 51, A4XX_GRAS_PERFCTR_RAS_SEL_3 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_uche[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_0_LO,
+		A4XX_RBBM_PERFCTR_UCHE_0_HI, 52, A4XX_UCHE_PERFCTR_UCHE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_1_LO,
+		A4XX_RBBM_PERFCTR_UCHE_1_HI, 53, A4XX_UCHE_PERFCTR_UCHE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_2_LO,
+		A4XX_RBBM_PERFCTR_UCHE_2_HI, 54, A4XX_UCHE_PERFCTR_UCHE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_3_LO,
+		A4XX_RBBM_PERFCTR_UCHE_3_HI, 55, A4XX_UCHE_PERFCTR_UCHE_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_4_LO,
+		A4XX_RBBM_PERFCTR_UCHE_4_HI, 56, A4XX_UCHE_PERFCTR_UCHE_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_5_LO,
+		A4XX_RBBM_PERFCTR_UCHE_5_HI, 57, A4XX_UCHE_PERFCTR_UCHE_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_6_LO,
+		A4XX_RBBM_PERFCTR_UCHE_6_HI, 58, A4XX_UCHE_PERFCTR_UCHE_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_7_LO,
+		A4XX_RBBM_PERFCTR_UCHE_7_HI, 59, A4XX_UCHE_PERFCTR_UCHE_SEL_7 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_tp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_0_LO,
+		A4XX_RBBM_PERFCTR_TP_0_HI, 60, A4XX_TPL1_PERFCTR_TP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_1_LO,
+		A4XX_RBBM_PERFCTR_TP_1_HI, 61, A4XX_TPL1_PERFCTR_TP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_2_LO,
+		A4XX_RBBM_PERFCTR_TP_2_HI, 62, A4XX_TPL1_PERFCTR_TP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_3_LO,
+		A4XX_RBBM_PERFCTR_TP_3_HI, 63, A4XX_TPL1_PERFCTR_TP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_4_LO,
+		A4XX_RBBM_PERFCTR_TP_4_HI, 64, A4XX_TPL1_PERFCTR_TP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_5_LO,
+		A4XX_RBBM_PERFCTR_TP_5_HI, 65, A4XX_TPL1_PERFCTR_TP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_6_LO,
+		A4XX_RBBM_PERFCTR_TP_6_HI, 66, A4XX_TPL1_PERFCTR_TP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_7_LO,
+		A4XX_RBBM_PERFCTR_TP_7_HI, 67, A4XX_TPL1_PERFCTR_TP_SEL_7 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_sp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_0_LO,
+		A4XX_RBBM_PERFCTR_SP_0_HI, 68, A4XX_SP_PERFCTR_SP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_1_LO,
+		A4XX_RBBM_PERFCTR_SP_1_HI, 69, A4XX_SP_PERFCTR_SP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_2_LO,
+		A4XX_RBBM_PERFCTR_SP_2_HI, 70, A4XX_SP_PERFCTR_SP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_3_LO,
+		A4XX_RBBM_PERFCTR_SP_3_HI, 71, A4XX_SP_PERFCTR_SP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_4_LO,
+		A4XX_RBBM_PERFCTR_SP_4_HI, 72, A4XX_SP_PERFCTR_SP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_5_LO,
+		A4XX_RBBM_PERFCTR_SP_5_HI, 73, A4XX_SP_PERFCTR_SP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_6_LO,
+		A4XX_RBBM_PERFCTR_SP_6_HI, 74, A4XX_SP_PERFCTR_SP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_7_LO,
+		A4XX_RBBM_PERFCTR_SP_7_HI, 75, A4XX_SP_PERFCTR_SP_SEL_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_8_LO,
+		A4XX_RBBM_PERFCTR_SP_8_HI, 76, A4XX_SP_PERFCTR_SP_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_9_LO,
+		A4XX_RBBM_PERFCTR_SP_9_HI, 77, A4XX_SP_PERFCTR_SP_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_10_LO,
+		A4XX_RBBM_PERFCTR_SP_10_HI, 78, A4XX_SP_PERFCTR_SP_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_11_LO,
+		A4XX_RBBM_PERFCTR_SP_11_HI, 79, A4XX_SP_PERFCTR_SP_SEL_11 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_rb[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_0_LO,
+		A4XX_RBBM_PERFCTR_RB_0_HI, 80, A4XX_RB_PERFCTR_RB_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_1_LO,
+		A4XX_RBBM_PERFCTR_RB_1_HI, 81, A4XX_RB_PERFCTR_RB_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_2_LO,
+		A4XX_RBBM_PERFCTR_RB_2_HI, 82, A4XX_RB_PERFCTR_RB_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_3_LO,
+		A4XX_RBBM_PERFCTR_RB_3_HI, 83, A4XX_RB_PERFCTR_RB_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_4_LO,
+		A4XX_RBBM_PERFCTR_RB_4_HI, 84, A4XX_RB_PERFCTR_RB_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_5_LO,
+		A4XX_RBBM_PERFCTR_RB_5_HI, 85, A4XX_RB_PERFCTR_RB_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_6_LO,
+		A4XX_RBBM_PERFCTR_RB_6_HI, 86, A4XX_RB_PERFCTR_RB_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_7_LO,
+		A4XX_RBBM_PERFCTR_RB_7_HI, 87, A4XX_RB_PERFCTR_RB_SEL_7 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_vsc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VSC_0_LO,
+		A4XX_RBBM_PERFCTR_VSC_0_HI, 88, A4XX_VSC_PERFCTR_VSC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VSC_1_LO,
+		A4XX_RBBM_PERFCTR_VSC_1_HI, 89, A4XX_VSC_PERFCTR_VSC_SEL_1 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PWR_0_LO,
+		A4XX_RBBM_PERFCTR_PWR_0_HI, -1, 0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PWR_1_LO,
+		A4XX_RBBM_PERFCTR_PWR_1_HI, -1, 0},
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_vbif[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_CNT_LOW0,
+		A4XX_VBIF_PERF_CNT_HIGH0, -1, A4XX_VBIF_PERF_CNT_SEL0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_CNT_LOW1,
+		A4XX_VBIF_PERF_CNT_HIGH1, -1, A4XX_VBIF_PERF_CNT_SEL1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_CNT_LOW2,
+		A4XX_VBIF_PERF_CNT_HIGH2, -1, A4XX_VBIF_PERF_CNT_SEL2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_CNT_LOW3,
+		A4XX_VBIF_PERF_CNT_HIGH3, -1, A4XX_VBIF_PERF_CNT_SEL3 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_vbif_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_PWR_CNT_LOW0,
+		A4XX_VBIF_PERF_PWR_CNT_HIGH0, -1, A4XX_VBIF_PERF_PWR_CNT_EN0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_PWR_CNT_LOW1,
+		A4XX_VBIF_PERF_PWR_CNT_HIGH1, -1, A4XX_VBIF_PERF_PWR_CNT_EN1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_PWR_CNT_LOW2,
+		A4XX_VBIF_PERF_PWR_CNT_HIGH2, -1, A4XX_VBIF_PERF_PWR_CNT_EN2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_PWR_CNT_LOW3,
+		A4XX_VBIF_PERF_PWR_CNT_HIGH3, -1, A4XX_VBIF_PERF_PWR_CNT_EN3 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_alwayson[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_ALWAYSON_COUNTER_LO,
+		A4XX_RBBM_ALWAYSON_COUNTER_HI, -1 },
+};
+
+#define A4XX_PERFCOUNTER_GROUP(offset, name) \
+	ADRENO_PERFCOUNTER_GROUP(a4xx, offset, name)
+
+#define A4XX_PERFCOUNTER_GROUP_FLAGS(offset, name, flags) \
+	ADRENO_PERFCOUNTER_GROUP_FLAGS(a4xx, offset, name, flags)
+
+static struct adreno_perfcount_group a4xx_perfcounter_groups
+				[KGSL_PERFCOUNTER_GROUP_MAX] = {
+	A4XX_PERFCOUNTER_GROUP(CP, cp),
+	A4XX_PERFCOUNTER_GROUP(RBBM, rbbm),
+	A4XX_PERFCOUNTER_GROUP(PC, pc),
+	A4XX_PERFCOUNTER_GROUP(VFD, vfd),
+	A4XX_PERFCOUNTER_GROUP(HLSQ, hlsq),
+	A4XX_PERFCOUNTER_GROUP(VPC, vpc),
+	A4XX_PERFCOUNTER_GROUP(CCU, ccu),
+	A4XX_PERFCOUNTER_GROUP(TSE, tse),
+	A4XX_PERFCOUNTER_GROUP(RAS, ras),
+	A4XX_PERFCOUNTER_GROUP(UCHE, uche),
+	A4XX_PERFCOUNTER_GROUP(TP, tp),
+	A4XX_PERFCOUNTER_GROUP(SP, sp),
+	A4XX_PERFCOUNTER_GROUP(RB, rb),
+	A4XX_PERFCOUNTER_GROUP(VSC, vsc),
+	A4XX_PERFCOUNTER_GROUP_FLAGS(PWR, pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED),
+	A4XX_PERFCOUNTER_GROUP(VBIF, vbif),
+	A4XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, vbif_pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED),
+	A4XX_PERFCOUNTER_GROUP_FLAGS(ALWAYSON, alwayson,
+		ADRENO_PERFCOUNTER_GROUP_FIXED),
+};
+
+static struct adreno_perfcounters a4xx_perfcounters = {
+	a4xx_perfcounter_groups,
+	ARRAY_SIZE(a4xx_perfcounter_groups),
+};
+
+static struct adreno_ft_perf_counters a4xx_ft_perf_counters[] = {
+	{KGSL_PERFCOUNTER_GROUP_SP, A4XX_SP_ALU_ACTIVE_CYCLES},
+	{KGSL_PERFCOUNTER_GROUP_SP, A4XX_SP0_ICL1_MISSES},
+	{KGSL_PERFCOUNTER_GROUP_SP, A4XX_SP_FS_CFLOW_INSTRUCTIONS},
+	{KGSL_PERFCOUNTER_GROUP_TSE, A4XX_TSE_INPUT_PRIM_NUM},
+};
+
+/*
+ * On A420 a number of perfcounters are un-usable. The following defines the
+ * array of countables that do not work and should not be used
+ */
+static const unsigned int a420_pc_invalid_countables[] = {
+	PC_INSTANCES, PC_VERTEX_HITS, PC_GENERATED_FIBERS, PC_GENERATED_WAVES,
+};
+
+static const unsigned int a420_vfd_invalid_countables[] = {
+	VFD_VPC_BYPASS_TRANS, VFD_UPPER_SHADER_FIBERS, VFD_LOWER_SHADER_FIBERS,
+};
+
+static const unsigned int a420_hlsq_invalid_countables[] = {
+	HLSQ_SP_VS_STAGE_CONSTANT, HLSQ_SP_VS_STAGE_INSTRUCTIONS,
+	HLSQ_SP_FS_STAGE_CONSTANT, HLSQ_SP_FS_STAGE_INSTRUCTIONS,
+	HLSQ_FS_STAGE_16_WAVES, HLSQ_FS_STAGE_32_WAVES, HLSQ_FS_STAGE_64_WAVES,
+	HLSQ_VS_STAGE_16_WAVES, HLSQ_VS_STAGE_32_WAVES,
+};
+
+static const unsigned int a420_uche_invalid_countables[] = {
+	UCHE_READ_REQUESTS_MARB, UCHE_READ_REQUESTS_SP,
+	UCHE_WRITE_REQUESTS_MARB, UCHE_WRITE_REQUESTS_SP,
+	UCHE_WRITE_REQUESTS_VPC
+};
+
+static const unsigned int a420_tp_invalid_countables[] = {
+	TP_OUTPUT_TEXELS_POINT, TP_OUTPUT_TEXELS_BILINEAR, TP_OUTPUT_TEXELS_MIP,
+	TP_OUTPUT_TEXELS_ANISO, TP_OUTPUT_TEXELS_OPS16, TP_OUTPUT_TEXELS_OPS32,
+	TP_ZERO_LOD, TP_LATENCY, TP_LATENCY_TRANS,
+};
+
+static const unsigned int a420_sp_invalid_countables[] = {
+	SP_FS_STAGE_BARY_INSTRUCTIONS,
+};
+
+static const unsigned int a420_rb_invalid_countables[] = {
+	RB_VALID_SAMPLES, RB_Z_FAIL, RB_S_FAIL,
+};
+
+static const unsigned int a420_ccu_invalid_countables[] = {
+	CCU_VBIF_STALL, CCU_VBIF_LATENCY_CYCLES, CCU_VBIF_LATENCY_SAMPLES,
+	CCU_Z_READ, CCU_Z_WRITE, CCU_C_READ, CCU_C_WRITE,
+};
+
+static const struct adreno_invalid_countables
+	a420_perfctr_invalid_countables[KGSL_PERFCOUNTER_GROUP_MAX] = {
+	ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_pc, PC),
+	ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_vfd, VFD),
+	ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_hlsq, HLSQ),
+	ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_tp, TP),
+	ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_sp, SP),
+	ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_rb, RB),
+	ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_ccu, CCU),
+	ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_uche, UCHE),
+};
+
+static struct adreno_coresight_register a4xx_coresight_registers[] = {
+	{ A4XX_RBBM_CFG_DEBBUS_CTLT },
+	{ A4XX_RBBM_CFG_DEBBUS_SEL_A },
+	{ A4XX_RBBM_CFG_DEBBUS_SEL_B },
+	{ A4XX_RBBM_CFG_DEBBUS_SEL_C },
+	{ A4XX_RBBM_CFG_DEBBUS_SEL_D },
+	{ A4XX_RBBM_CFG_DEBBUS_OPL },
+	{ A4XX_RBBM_CFG_DEBBUS_OPE },
+	{ A4XX_RBBM_CFG_DEBBUS_IVTL_0 },
+	{ A4XX_RBBM_CFG_DEBBUS_IVTL_1 },
+	{ A4XX_RBBM_CFG_DEBBUS_IVTL_2 },
+	{ A4XX_RBBM_CFG_DEBBUS_IVTL_3 },
+	{ A4XX_RBBM_CFG_DEBBUS_MASKL_0 },
+	{ A4XX_RBBM_CFG_DEBBUS_MASKL_1 },
+	{ A4XX_RBBM_CFG_DEBBUS_MASKL_2 },
+	{ A4XX_RBBM_CFG_DEBBUS_MASKL_3 },
+	{ A4XX_RBBM_CFG_DEBBUS_BYTEL_0 },
+	{ A4XX_RBBM_CFG_DEBBUS_BYTEL_1 },
+	{ A4XX_RBBM_CFG_DEBBUS_IVTE_0 },
+	{ A4XX_RBBM_CFG_DEBBUS_IVTE_1 },
+	{ A4XX_RBBM_CFG_DEBBUS_IVTE_2 },
+	{ A4XX_RBBM_CFG_DEBBUS_IVTE_3 },
+	{ A4XX_RBBM_CFG_DEBBUS_MASKE_0 },
+	{ A4XX_RBBM_CFG_DEBBUS_MASKE_1 },
+	{ A4XX_RBBM_CFG_DEBBUS_MASKE_2 },
+	{ A4XX_RBBM_CFG_DEBBUS_MASKE_3 },
+	{ A4XX_RBBM_CFG_DEBBUS_NIBBLEE },
+	{ A4XX_RBBM_CFG_DEBBUS_PTRC0 },
+	{ A4XX_RBBM_CFG_DEBBUS_PTRC1 },
+	{ A4XX_RBBM_CFG_DEBBUS_CLRC },
+	{ A4XX_RBBM_CFG_DEBBUS_LOADIVT },
+	{ A4XX_RBBM_CFG_DEBBUS_IDX },
+	{ A4XX_RBBM_CFG_DEBBUS_LOADREG },
+	{ A4XX_RBBM_EXT_TRACE_BUS_CTL },
+	{ A4XX_RBBM_CFG_DEBBUS_CTLM },
+};
+
+static void a4xx_perfcounter_init(struct adreno_device *adreno_dev)
+{
+	if (adreno_is_a420(adreno_dev)) {
+		struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+		struct adreno_perfcounters *counters = gpudev->perfcounters;
+
+		/*
+		 * The CP counters on A420 are... special.  Some of the counters
+		 * are swizzled so only a subset of them are usable
+		 */
+
+		if (counters != NULL) {
+			counters->groups[KGSL_PERFCOUNTER_GROUP_CP].regs =
+				a420_perfcounters_cp;
+			counters->groups[KGSL_PERFCOUNTER_GROUP_CP].reg_count =
+				ARRAY_SIZE(a420_perfcounters_cp);
+		}
+
+		/*
+		 * Also on A420 a number of the countables are not functional so
+		 * we maintain a blacklist of countables to protect the user
+		 */
+
+		gpudev->invalid_countables = a420_perfctr_invalid_countables;
+	}
+
+	/*
+	 * Enable the GPU busy count counter. This is a fixed counter on
+	 * A4XX so we don't need to bother checking the return value
+	 */
+
+	adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_PWR, 1,
+		NULL, NULL, PERFCOUNTER_FLAG_KERNEL);
+}
+
+static void a4xx_perfcounter_close(struct adreno_device *adreno_dev)
+{
+	adreno_perfcounter_put(adreno_dev, KGSL_PERFCOUNTER_GROUP_PWR, 1,
+		PERFCOUNTER_FLAG_KERNEL);
+}
+
+static const unsigned int _a4xx_pwron_fixup_fs_instructions[] = {
+	0x00000000, 0x304CC300, 0x00000000, 0x304CC304,
+	0x00000000, 0x304CC308, 0x00000000, 0x304CC30C,
+	0x00000000, 0x304CC310, 0x00000000, 0x304CC314,
+	0x00000000, 0x304CC318, 0x00000000, 0x304CC31C,
+	0x00000000, 0x304CC320, 0x00000000, 0x304CC324,
+	0x00000000, 0x304CC328, 0x00000000, 0x304CC32C,
+	0x00000000, 0x304CC330, 0x00000000, 0x304CC334,
+	0x00000000, 0x304CC338, 0x00000000, 0x304CC33C,
+	0x00000000, 0x00000400, 0x00020000, 0x63808003,
+	0x00060004, 0x63828007, 0x000A0008, 0x6384800B,
+	0x000E000C, 0x6386800F, 0x00120010, 0x63888013,
+	0x00160014, 0x638A8017, 0x001A0018, 0x638C801B,
+	0x001E001C, 0x638E801F, 0x00220020, 0x63908023,
+	0x00260024, 0x63928027, 0x002A0028, 0x6394802B,
+	0x002E002C, 0x6396802F, 0x00320030, 0x63988033,
+	0x00360034, 0x639A8037, 0x003A0038, 0x639C803B,
+	0x003E003C, 0x639E803F, 0x00000000, 0x00000400,
+	0x00000003, 0x80D00003, 0x00000007, 0x80D00007,
+	0x0000000B, 0x80D0000B, 0x0000000F, 0x80D0000F,
+	0x00000013, 0x80D00013, 0x00000017, 0x80D00017,
+	0x0000001B, 0x80D0001B, 0x0000001F, 0x80D0001F,
+	0x00000023, 0x80D00023, 0x00000027, 0x80D00027,
+	0x0000002B, 0x80D0002B, 0x0000002F, 0x80D0002F,
+	0x00000033, 0x80D00033, 0x00000037, 0x80D00037,
+	0x0000003B, 0x80D0003B, 0x0000003F, 0x80D0003F,
+	0x00000000, 0x00000400, 0xFFFFFFFF, 0x304CC300,
+	0xFFFFFFFF, 0x304CC304, 0xFFFFFFFF, 0x304CC308,
+	0xFFFFFFFF, 0x304CC30C, 0xFFFFFFFF, 0x304CC310,
+	0xFFFFFFFF, 0x304CC314, 0xFFFFFFFF, 0x304CC318,
+	0xFFFFFFFF, 0x304CC31C, 0xFFFFFFFF, 0x304CC320,
+	0xFFFFFFFF, 0x304CC324, 0xFFFFFFFF, 0x304CC328,
+	0xFFFFFFFF, 0x304CC32C, 0xFFFFFFFF, 0x304CC330,
+	0xFFFFFFFF, 0x304CC334, 0xFFFFFFFF, 0x304CC338,
+	0xFFFFFFFF, 0x304CC33C, 0x00000000, 0x00000400,
+	0x00020000, 0x63808003, 0x00060004, 0x63828007,
+	0x000A0008, 0x6384800B, 0x000E000C, 0x6386800F,
+	0x00120010, 0x63888013, 0x00160014, 0x638A8017,
+	0x001A0018, 0x638C801B, 0x001E001C, 0x638E801F,
+	0x00220020, 0x63908023, 0x00260024, 0x63928027,
+	0x002A0028, 0x6394802B, 0x002E002C, 0x6396802F,
+	0x00320030, 0x63988033, 0x00360034, 0x639A8037,
+	0x003A0038, 0x639C803B, 0x003E003C, 0x639E803F,
+	0x00000000, 0x00000400, 0x00000003, 0x80D00003,
+	0x00000007, 0x80D00007, 0x0000000B, 0x80D0000B,
+	0x0000000F, 0x80D0000F, 0x00000013, 0x80D00013,
+	0x00000017, 0x80D00017, 0x0000001B, 0x80D0001B,
+	0x0000001F, 0x80D0001F, 0x00000023, 0x80D00023,
+	0x00000027, 0x80D00027, 0x0000002B, 0x80D0002B,
+	0x0000002F, 0x80D0002F, 0x00000033, 0x80D00033,
+	0x00000037, 0x80D00037, 0x0000003B, 0x80D0003B,
+	0x0000003F, 0x80D0003F, 0x00000000, 0x03000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/**
+ * adreno_a4xx_pwron_fixup_init() - Initalize a special command buffer to run a
+ * post-power collapse shader workaround
+ * @adreno_dev: Pointer to a adreno_device struct
+ *
+ * Some targets require a special workaround shader to be executed after
+ * power-collapse.  Construct the IB once at init time and keep it
+ * handy
+ *
+ * Returns: 0 on success or negative on error
+ */
+int adreno_a4xx_pwron_fixup_init(struct adreno_device *adreno_dev)
+{
+	unsigned int *cmds;
+	unsigned int count = ARRAY_SIZE(_a4xx_pwron_fixup_fs_instructions);
+	unsigned int num_units = count >> 5;
+	int ret;
+
+	/* Return if the fixup is already in place */
+	if (test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv))
+			return 0;
+
+	ret = kgsl_allocate_global(&adreno_dev->dev,
+		&adreno_dev->pwron_fixup, PAGE_SIZE,
+		KGSL_MEMFLAGS_GPUREADONLY, 0);
+
+	if (ret)
+		return ret;
+
+	cmds = adreno_dev->pwron_fixup.hostptr;
+
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A4XX_SP_MODE_CONTROL, 1);
+	*cmds++ = 0x00000018;
+	*cmds++ = cp_type0_packet(A4XX_TPL1_TP_MODE_CONTROL, 1);
+	*cmds++ = 0x00000002;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A4xx_HLSQ_CONTROL_0, 5);
+	*cmds++ = 0x800001a0;
+	*cmds++ = 0xfcfc0000;
+	*cmds++ = 0xcff3f3f0;
+	*cmds++ = 0xfcfcfcfc;
+	*cmds++ = 0xccfcfcfc;
+	*cmds++ = cp_type0_packet(A4XX_SP_FS_CTRL_1, 1);
+	*cmds++ = 0x80000000;
+	*cmds++ = cp_type0_packet(A4XX_HLSQ_UPDATE_CONTROL, 1);
+	*cmds++ = 0x00000038;
+	*cmds++ = cp_type0_packet(A4XX_HLSQ_MODE_CONTROL, 1);
+	*cmds++ = 0x00000003;
+	*cmds++ = cp_type0_packet(A4XX_HLSQ_UPDATE_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A4XX_TPL1_TP_TEX_TSIZE_1, 1);
+	*cmds++ = 0x00008000;
+	*cmds++ = cp_type0_packet(A4xx_HLSQ_CONTROL_0, 2);
+	*cmds++ = 0x800001a0;
+	*cmds++ = 0xfcfc0000;
+	*cmds++ = cp_type0_packet(A4XX_HLSQ_CS_CONTROL, 1);
+	*cmds++ = 0x00018030 | (num_units << 24);
+	*cmds++ = cp_type0_packet(A4XX_HLSQ_CL_NDRANGE_0, 7);
+	*cmds++ = 0x000000fd;
+	*cmds++ = 0x00000040;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000001;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000001;
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A4XX_HLSQ_CL_CONTROL_0, 2);
+	*cmds++ = 0x0001201f;
+	*cmds++ = 0x0000f003;
+	*cmds++ = cp_type0_packet(A4XX_HLSQ_CL_KERNEL_CONST, 1);
+	*cmds++ = 0x0001800b;
+	*cmds++ = cp_type0_packet(A4XX_HLSQ_CL_KERNEL_GROUP_X, 3);
+	*cmds++ = 0x00000001;
+	*cmds++ = 0x00000001;
+	*cmds++ = 0x00000001;
+	*cmds++ = cp_type0_packet(A4XX_HLSQ_CL_WG_OFFSET, 1);
+	*cmds++ = 0x00000022;
+	*cmds++ = cp_type0_packet(A4XX_UCHE_INVALIDATE0, 2);
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000012;
+	*cmds++ = cp_type0_packet(A4XX_HLSQ_MODE_CONTROL, 1);
+	*cmds++ = 0x00000003;
+	*cmds++ = cp_type0_packet(A4XX_SP_SP_CTRL, 1);
+	*cmds++ = 0x00920000;
+	*cmds++ = cp_type0_packet(A4XX_SP_INSTR_CACHE_CTRL, 1);
+	*cmds++ = 0x00000260;
+	*cmds++ = cp_type0_packet(A4XX_SP_CS_CTRL_0, 1);
+	*cmds++ = 0x00200400;
+	*cmds++ = cp_type0_packet(A4XX_SP_CS_OBJ_OFFSET, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A4XX_SP_CS_OBJ_START, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A4XX_SP_CS_LENGTH, 1);
+	*cmds++ =  num_units;
+	*cmds++ = cp_type0_packet(A4XX_SP_MODE_CONTROL, 1);
+	*cmds++ = 0x00000018;
+	*cmds++ = cp_type3_packet(CP_LOAD_STATE, 2 + count);
+	*cmds++ = 0x00340000 | (num_units << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+	*cmds++ = 0x00000000;
+
+	memcpy(cmds, _a4xx_pwron_fixup_fs_instructions, count << 2);
+	cmds += count;
+
+	*cmds++ = cp_type3_packet(CP_EXEC_CL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+
+	/*
+	 * Remember the number of dwords in the command buffer for when we
+	 * program the indirect buffer call in the ringbuffer
+	 */
+	adreno_dev->pwron_fixup_dwords =
+		(cmds - (unsigned int *) adreno_dev->pwron_fixup.hostptr);
+
+	/* Mark the flag in ->priv to show that we have the fix */
+	set_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv);
+	return 0;
+}
+
+static int a4xx_hw_init(struct adreno_device *adreno_dev)
+{
+	a4xx_enable_pc(adreno_dev);
+	a4xx_enable_ppd(adreno_dev);
+
+	return 0;
+}
+
+/*
+ * a4xx_rb_init() - Initialize ringbuffer
+ * @adreno_dev: Pointer to adreno device
+ * @rb: Pointer to the ringbuffer of device
+ *
+ * Submit commands for ME initialization, common function shared between
+ * a4xx devices
+ */
+static int a4xx_rb_init(struct adreno_device *adreno_dev,
+			 struct adreno_ringbuffer *rb)
+{
+	unsigned int *cmds;
+	int ret;
+
+	cmds = adreno_ringbuffer_allocspace(rb, 20);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+	if (cmds == NULL)
+		return -ENOSPC;
+
+	*cmds++ = cp_type3_packet(CP_ME_INIT, 17);
+
+	/*
+	 * Ordinal 2 of ME_INIT packet, the bits being set are:
+	 * Ordinal 3, 4, 5-12, 14, 15, 16, 17, 18 are present
+	 * Microcode Default Reset Control = 3
+	 */
+	*cmds++ = 0x000003f7;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000080;
+	*cmds++ = 0x00000100;
+	*cmds++ = 0x00000180;
+	*cmds++ = 0x00006600;
+	*cmds++ = 0x00000150;
+	*cmds++ = 0x0000014e;
+	*cmds++ = 0x00000154;
+	/* MAX Context */
+	*cmds++ = 0x00000001;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+
+	/* Enable protected mode registers for A3XX/A4XX */
+	*cmds++ = 0x20000000;
+
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_PREEMPT_ENABLE, 1);
+	*cmds++ = 1;
+
+	ret = adreno_ringbuffer_submit_spin(rb, NULL, 2000);
+	if (ret) {
+		struct kgsl_device *device = &adreno_dev->dev;
+
+		dev_err(device->dev, "CP initialization failed to idle\n");
+		kgsl_device_snapshot(device, NULL);
+	}
+
+	return ret;
+}
+
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ctrlt, &a4xx_coresight_registers[0]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_sela, &a4xx_coresight_registers[1]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_selb, &a4xx_coresight_registers[2]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_selc, &a4xx_coresight_registers[3]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_seld, &a4xx_coresight_registers[4]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_opl, &a4xx_coresight_registers[5]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ope, &a4xx_coresight_registers[6]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivtl0, &a4xx_coresight_registers[7]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivtl1, &a4xx_coresight_registers[8]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivtl2, &a4xx_coresight_registers[9]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivtl3, &a4xx_coresight_registers[10]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_maskl0, &a4xx_coresight_registers[11]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_maskl1, &a4xx_coresight_registers[12]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_maskl2, &a4xx_coresight_registers[13]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_maskl3, &a4xx_coresight_registers[14]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_bytel0, &a4xx_coresight_registers[15]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_bytel1, &a4xx_coresight_registers[16]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivte0, &a4xx_coresight_registers[17]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivte1, &a4xx_coresight_registers[18]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivte2, &a4xx_coresight_registers[19]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivte3, &a4xx_coresight_registers[20]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_maske0, &a4xx_coresight_registers[21]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_maske1, &a4xx_coresight_registers[22]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_maske2, &a4xx_coresight_registers[23]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_maske3, &a4xx_coresight_registers[24]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_nibblee, &a4xx_coresight_registers[25]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ptrc0, &a4xx_coresight_registers[26]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ptrc1, &a4xx_coresight_registers[27]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_clrc, &a4xx_coresight_registers[28]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_loadivt, &a4xx_coresight_registers[29]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_idx, &a4xx_coresight_registers[30]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_loadreg, &a4xx_coresight_registers[31]);
+static ADRENO_CORESIGHT_ATTR(ext_tracebus_ctl, &a4xx_coresight_registers[32]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ctrlm, &a4xx_coresight_registers[33]);
+
+
+static struct attribute *a4xx_coresight_attrs[] = {
+	&coresight_attr_cfg_debbus_ctrlt.attr.attr,
+	&coresight_attr_cfg_debbus_sela.attr.attr,
+	&coresight_attr_cfg_debbus_selb.attr.attr,
+	&coresight_attr_cfg_debbus_selc.attr.attr,
+	&coresight_attr_cfg_debbus_seld.attr.attr,
+	&coresight_attr_cfg_debbus_opl.attr.attr,
+	&coresight_attr_cfg_debbus_ope.attr.attr,
+	&coresight_attr_cfg_debbus_ivtl0.attr.attr,
+	&coresight_attr_cfg_debbus_ivtl1.attr.attr,
+	&coresight_attr_cfg_debbus_ivtl2.attr.attr,
+	&coresight_attr_cfg_debbus_ivtl3.attr.attr,
+	&coresight_attr_cfg_debbus_maskl0.attr.attr,
+	&coresight_attr_cfg_debbus_maskl1.attr.attr,
+	&coresight_attr_cfg_debbus_maskl2.attr.attr,
+	&coresight_attr_cfg_debbus_maskl3.attr.attr,
+	&coresight_attr_cfg_debbus_bytel0.attr.attr,
+	&coresight_attr_cfg_debbus_bytel1.attr.attr,
+	&coresight_attr_cfg_debbus_ivte0.attr.attr,
+	&coresight_attr_cfg_debbus_ivte1.attr.attr,
+	&coresight_attr_cfg_debbus_ivte2.attr.attr,
+	&coresight_attr_cfg_debbus_ivte3.attr.attr,
+	&coresight_attr_cfg_debbus_maske0.attr.attr,
+	&coresight_attr_cfg_debbus_maske1.attr.attr,
+	&coresight_attr_cfg_debbus_maske2.attr.attr,
+	&coresight_attr_cfg_debbus_maske3.attr.attr,
+	&coresight_attr_cfg_debbus_nibblee.attr.attr,
+	&coresight_attr_cfg_debbus_ptrc0.attr.attr,
+	&coresight_attr_cfg_debbus_ptrc1.attr.attr,
+	&coresight_attr_cfg_debbus_clrc.attr.attr,
+	&coresight_attr_cfg_debbus_loadivt.attr.attr,
+	&coresight_attr_cfg_debbus_idx.attr.attr,
+	&coresight_attr_cfg_debbus_loadreg.attr.attr,
+	&coresight_attr_ext_tracebus_ctl.attr.attr,
+	&coresight_attr_cfg_debbus_ctrlm.attr.attr,
+	NULL,
+};
+
+static const struct attribute_group a4xx_coresight_group = {
+	.attrs = a4xx_coresight_attrs,
+};
+
+static const struct attribute_group *a4xx_coresight_groups[] = {
+	&a4xx_coresight_group,
+	NULL,
+};
+
+static struct adreno_coresight a4xx_coresight = {
+	.registers = a4xx_coresight_registers,
+	.count = ARRAY_SIZE(a4xx_coresight_registers),
+	.groups = a4xx_coresight_groups,
+};
+
+#define A4XX_INT_MASK \
+	((1 << A4XX_INT_RBBM_AHB_ERROR) |		\
+	 (1 << A4XX_INT_RBBM_REG_TIMEOUT) |		\
+	 (1 << A4XX_INT_RBBM_ME_MS_TIMEOUT) |		\
+	 (1 << A4XX_INT_RBBM_PFP_MS_TIMEOUT) |		\
+	 (1 << A4XX_INT_RBBM_ETS_MS_TIMEOUT) |		\
+	 (1 << A4XX_INT_RBBM_ASYNC_OVERFLOW) |		\
+	 (1 << A4XX_INT_CP_SW) |			\
+	 (1 << A4XX_INT_CP_OPCODE_ERROR) |		\
+	 (1 << A4XX_INT_CP_RESERVED_BIT_ERROR) |	\
+	 (1 << A4XX_INT_CP_HW_FAULT) |			\
+	 (1 << A4XX_INT_CP_IB1_INT) |			\
+	 (1 << A4XX_INT_CP_IB2_INT) |			\
+	 (1 << A4XX_INT_CP_RB_INT) |			\
+	 (1 << A4XX_INT_CACHE_FLUSH_TS) |		\
+	 (1 << A4XX_INT_CP_REG_PROTECT_FAULT) |		\
+	 (1 << A4XX_INT_CP_AHB_ERROR_HALT) |		\
+	 (1 << A4XX_INT_RBBM_ATB_BUS_OVERFLOW) |	\
+	 (1 << A4XX_INT_UCHE_OOB_ACCESS) |		\
+	 (1 << A4XX_INT_RBBM_DPM_CALC_ERR) |		\
+	 (1 << A4XX_INT_RBBM_DPM_EPOCH_ERR) |		\
+	 (1 << A4XX_INT_RBBM_DPM_THERMAL_YELLOW_ERR) |\
+	 (1 << A4XX_INT_RBBM_DPM_THERMAL_RED_ERR))
+
+
+static struct adreno_irq_funcs a4xx_irq_funcs[32] = {
+	ADRENO_IRQ_CALLBACK(NULL),                   /* 0 - RBBM_GPU_IDLE */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 1 - RBBM_AHB_ERROR */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */
+	/* 3 - RBBM_ME_MS_TIMEOUT */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback),
+	/* 4 - RBBM_PFP_MS_TIMEOUT */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback),
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 5 - RBBM_ETS_MS_TIMEOUT */
+	/* 6 - RBBM_ATB_ASYNC_OVERFLOW */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback),
+	ADRENO_IRQ_CALLBACK(NULL), /* 7 - RBBM_GPC_ERR */
+	ADRENO_IRQ_CALLBACK(adreno_dispatcher_preempt_callback), /* 8 - CP_SW */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 9 - CP_OPCODE_ERROR */
+	/* 10 - CP_RESERVED_BIT_ERROR */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback),
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 11 - CP_HW_FAULT */
+	ADRENO_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 13 - CP_IB2_INT */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 14 - CP_IB1_INT */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 15 - CP_RB_INT */
+	/* 16 - CP_REG_PROTECT_FAULT */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback),
+	ADRENO_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */
+	/* 21 - CP_AHB_ERROR_FAULT */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback),
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 22 - RBBM_ATB_BUS_OVERFLOW */
+	ADRENO_IRQ_CALLBACK(NULL), /* 23 - Unused */
+	/* 24 - MISC_HANG_DETECT */
+	ADRENO_IRQ_CALLBACK(adreno_hang_int_callback),
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 25 - UCHE_OOB_ACCESS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 26 - Unused */
+	ADRENO_IRQ_CALLBACK(NULL), /* 27 - RBBM_TRACE_MISR */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 28 - RBBM_DPM_CALC_ERR */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 29 - RBBM_DPM_EPOCH_ERR */
+	/* 30 - RBBM_DPM_THERMAL_YELLOW_ERR */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback),
+	/* 31 - RBBM_DPM_THERMAL_RED_ERR */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback),
+};
+
+static struct adreno_irq a4xx_irq = {
+	.funcs = a4xx_irq_funcs,
+	.mask = A4XX_INT_MASK,
+};
+
+static struct adreno_snapshot_data a4xx_snapshot_data = {
+	.sect_sizes = &a4xx_snap_sizes,
+};
+
+/**
+ * a4xx_preempt_trig_state() - Schedule preemption in TRIGGERRED
+ * state
+ * @adreno_dev: Device which is in TRIGGERRED state
+ */
+static void a4xx_preempt_trig_state(
+			struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct kgsl_device *device = &(adreno_dev->dev);
+	unsigned int rbbase, val;
+
+	/*
+	 * Hardware not yet idle means that preemption interrupt
+	 * may still occur, nothing to do here until interrupt signals
+	 * completion of preemption, just return here
+	 */
+	if (!adreno_hw_isidle(adreno_dev))
+		return;
+
+	/*
+	 * We just changed states, reschedule dispatcher to change
+	 * preemption states
+	 */
+	if (ADRENO_DISPATCHER_PREEMPT_TRIGGERED !=
+		atomic_read(&dispatcher->preemption_state)) {
+		adreno_dispatcher_schedule(device);
+		return;
+	}
+
+	/*
+	 * H/W is idle and we did not get a preemption interrupt, may
+	 * be device went idle w/o encountering any preempt token or
+	 * we already preempted w/o interrupt
+	 */
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_BASE, &rbbase);
+	 /* Did preemption occur, if so then change states and return */
+	if (rbbase != adreno_dev->cur_rb->buffer_desc.gpuaddr) {
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT_DEBUG, &val);
+		if (val && rbbase == adreno_dev->next_rb->buffer_desc.gpuaddr) {
+			KGSL_DRV_INFO(device,
+			"Preemption completed without interrupt\n");
+			trace_adreno_hw_preempt_trig_to_comp(adreno_dev->cur_rb,
+					adreno_dev->next_rb);
+			atomic_set(&dispatcher->preemption_state,
+				ADRENO_DISPATCHER_PREEMPT_COMPLETE);
+			adreno_dispatcher_schedule(device);
+			return;
+		}
+		adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+		/* reschedule dispatcher to take care of the fault */
+		adreno_dispatcher_schedule(device);
+		return;
+	}
+	/*
+	 * Check if preempt token was submitted after preemption trigger, if so
+	 * then preemption should have occurred, since device is already idle it
+	 * means something went wrong - trigger FT
+	 */
+	if (dispatcher->preempt_token_submit) {
+		adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+		/* reschedule dispatcher to take care of the fault */
+		adreno_dispatcher_schedule(device);
+		return;
+	}
+	/*
+	 * Preempt token was not submitted after preemption trigger so device
+	 * may have gone idle before preemption could occur, if there are
+	 * commands that got submitted to current RB after triggering preemption
+	 * then submit them as those commands may have a preempt token in them
+	 */
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR,
+			&adreno_dev->cur_rb->rptr);
+	if (adreno_dev->cur_rb->rptr != adreno_dev->cur_rb->wptr) {
+		/*
+		 * Memory barrier before informing the
+		 * hardware of new commands
+		 */
+		mb();
+		kgsl_pwrscale_busy(device);
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR,
+			adreno_dev->cur_rb->wptr);
+		return;
+	}
+
+	/* Submit preempt token to make preemption happen */
+	if (adreno_drawctxt_switch(adreno_dev, adreno_dev->cur_rb, NULL, 0))
+		BUG();
+	if (adreno_ringbuffer_submit_preempt_token(adreno_dev->cur_rb,
+						adreno_dev->next_rb))
+		BUG();
+	dispatcher->preempt_token_submit = 1;
+	adreno_dev->cur_rb->wptr_preempt_end = adreno_dev->cur_rb->wptr;
+	trace_adreno_hw_preempt_token_submit(adreno_dev->cur_rb,
+						adreno_dev->next_rb);
+}
+
+/**
+ * a4xx_preempt_clear_state() - Schedule preemption in
+ * CLEAR state. Preemption can be issued in this state.
+ * @adreno_dev: Device which is in CLEAR state
+ */
+static void a4xx_preempt_clear_state(
+			struct adreno_device *adreno_dev)
+
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct kgsl_device *device = &(adreno_dev->dev);
+	struct adreno_dispatcher_cmdqueue *dispatch_tempq;
+	struct kgsl_cmdbatch *cmdbatch;
+	struct adreno_ringbuffer *highest_busy_rb;
+	int switch_low_to_high;
+	int ret;
+
+	/* Device not awake means there is nothing to do */
+	if (!kgsl_state_is_awake(device))
+		return;
+
+	/* keep updating the current rptr when preemption is clear */
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR,
+			&(adreno_dev->cur_rb->rptr));
+
+	highest_busy_rb = adreno_dispatcher_get_highest_busy_rb(adreno_dev);
+	if (!highest_busy_rb)
+		return;
+
+	switch_low_to_high = adreno_compare_prio_level(
+					highest_busy_rb->id,
+					adreno_dev->cur_rb->id);
+
+	/* already current then return */
+	if (!switch_low_to_high)
+		return;
+
+	if (switch_low_to_high < 0) {
+		/*
+		 * if switching to lower priority make sure that the rptr and
+		 * wptr are equal, when the lower rb is not starved
+		 */
+		if (adreno_dev->cur_rb->rptr != adreno_dev->cur_rb->wptr)
+			return;
+		/*
+		 * switch to default context because when we switch back
+		 * to higher context then its not known which pt will
+		 * be current, so by making it default here the next
+		 * commands submitted will set the right pt
+		 */
+		ret = adreno_drawctxt_switch(adreno_dev,
+				adreno_dev->cur_rb,
+				NULL, 0);
+		/*
+		 * lower priority RB has to wait until space opens up in
+		 * higher RB
+		 */
+		if (ret)
+			return;
+
+		adreno_writereg(adreno_dev,
+			ADRENO_REG_CP_PREEMPT_DISABLE, 1);
+	}
+
+	/*
+	 * setup registers to do the switch to highest priority RB
+	 * which is not empty or may be starving away(poor thing)
+	 */
+	a4xx_preemption_start(adreno_dev, highest_busy_rb);
+
+	/* turn on IOMMU as the preemption may trigger pt switch */
+	kgsl_mmu_enable_clk(&device->mmu);
+
+	atomic_set(&dispatcher->preemption_state,
+			ADRENO_DISPATCHER_PREEMPT_TRIGGERED);
+
+	adreno_dev->next_rb = highest_busy_rb;
+	mod_timer(&dispatcher->preempt_timer, jiffies +
+		msecs_to_jiffies(ADRENO_DISPATCH_PREEMPT_TIMEOUT));
+
+	trace_adreno_hw_preempt_clear_to_trig(adreno_dev->cur_rb,
+						adreno_dev->next_rb);
+	/* issue PREEMPT trigger */
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_PREEMPT, 1);
+	/*
+	 * IOMMU clock can be safely switched off after the timestamp
+	 * of the first command in the new rb
+	 */
+	dispatch_tempq = &adreno_dev->next_rb->dispatch_q;
+	if (dispatch_tempq->head != dispatch_tempq->tail)
+		cmdbatch = dispatch_tempq->cmd_q[dispatch_tempq->head];
+	else
+		cmdbatch = NULL;
+	if (cmdbatch)
+		adreno_ringbuffer_mmu_disable_clk_on_ts(device,
+			adreno_dev->next_rb,
+			cmdbatch->global_ts);
+	else
+		adreno_ringbuffer_mmu_disable_clk_on_ts(device,
+			adreno_dev->next_rb, adreno_dev->next_rb->timestamp);
+	/* submit preempt token packet to ensure preemption */
+	if (switch_low_to_high < 0) {
+		ret = adreno_ringbuffer_submit_preempt_token(
+			adreno_dev->cur_rb, adreno_dev->next_rb);
+		/*
+		 * unexpected since we are submitting this when rptr = wptr,
+		 * this was checked above already
+		 */
+		BUG_ON(ret);
+		dispatcher->preempt_token_submit = 1;
+		adreno_dev->cur_rb->wptr_preempt_end = adreno_dev->cur_rb->wptr;
+	} else {
+		dispatcher->preempt_token_submit = 0;
+		adreno_dispatcher_schedule(device);
+		adreno_dev->cur_rb->wptr_preempt_end = 0xFFFFFFFF;
+	}
+}
+
+/**
+ * a4xx_preempt_complete_state() - Schedule preemption in
+ * COMPLETE state
+ * @adreno_dev: Device which is in COMPLETE state
+ */
+static void a4xx_preempt_complete_state(
+			struct adreno_device *adreno_dev)
+
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct kgsl_device *device = &(adreno_dev->dev);
+	struct adreno_dispatcher_cmdqueue *dispatch_q;
+	unsigned int wptr, rbbase;
+	unsigned int val, val1;
+
+	del_timer_sync(&dispatcher->preempt_timer);
+
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT, &val);
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT_DEBUG, &val1);
+
+	if (val || !val1) {
+		KGSL_DRV_ERR(device,
+		"Invalid state after preemption CP_PREEMPT: %08x, CP_PREEMPT_DEBUG: %08x\n",
+		val, val1);
+		adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+		adreno_dispatcher_schedule(device);
+		return;
+	}
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_BASE, &rbbase);
+	if (rbbase != adreno_dev->next_rb->buffer_desc.gpuaddr) {
+		KGSL_DRV_ERR(device,
+		"RBBASE incorrect after preemption, expected %x got %016llx\b",
+		rbbase,
+		adreno_dev->next_rb->buffer_desc.gpuaddr);
+		adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+		adreno_dispatcher_schedule(device);
+		return;
+	}
+
+	a4xx_preemption_save(adreno_dev, adreno_dev->cur_rb);
+
+	dispatch_q = &(adreno_dev->cur_rb->dispatch_q);
+	/* new RB is the current RB */
+	trace_adreno_hw_preempt_comp_to_clear(adreno_dev->next_rb,
+						adreno_dev->cur_rb);
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->cur_rb->preempted_midway = 0;
+	adreno_dev->cur_rb->wptr_preempt_end = 0xFFFFFFFF;
+	adreno_dev->next_rb = NULL;
+	if (adreno_disp_preempt_fair_sched) {
+		/* starved rb is now scheduled so unhalt dispatcher */
+		if (ADRENO_DISPATCHER_RB_STARVE_TIMER_ELAPSED ==
+			adreno_dev->cur_rb->starve_timer_state)
+			adreno_put_gpu_halt(adreno_dev);
+		adreno_dev->cur_rb->starve_timer_state =
+				ADRENO_DISPATCHER_RB_STARVE_TIMER_SCHEDULED;
+		adreno_dev->cur_rb->sched_timer = jiffies;
+		/*
+		 * If the outgoing RB is has commands then set the
+		 * busy time for it
+		 */
+		if (adreno_dev->prev_rb->rptr != adreno_dev->prev_rb->wptr) {
+			adreno_dev->prev_rb->starve_timer_state =
+				ADRENO_DISPATCHER_RB_STARVE_TIMER_INIT;
+			adreno_dev->prev_rb->sched_timer = jiffies;
+		} else {
+			adreno_dev->prev_rb->starve_timer_state =
+				ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT;
+		}
+	}
+	atomic_set(&dispatcher->preemption_state,
+		ADRENO_DISPATCHER_PREEMPT_CLEAR);
+	if (adreno_compare_prio_level(adreno_dev->prev_rb->id,
+				adreno_dev->cur_rb->id) < 0) {
+		if (adreno_dev->prev_rb->wptr_preempt_end !=
+			adreno_dev->prev_rb->rptr)
+			adreno_dev->prev_rb->preempted_midway = 1;
+	} else if (adreno_dev->prev_rb->wptr_preempt_end !=
+		adreno_dev->prev_rb->rptr) {
+		BUG();
+	}
+	/* submit wptr if required for new rb */
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr);
+	if (adreno_dev->cur_rb->wptr != wptr) {
+		kgsl_pwrscale_busy(device);
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR,
+					adreno_dev->cur_rb->wptr);
+	}
+	/* clear preemption register */
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_PREEMPT_DEBUG, 0);
+	adreno_preempt_process_dispatch_queue(adreno_dev, dispatch_q);
+}
+
+static void a4xx_preemption_schedule(
+				struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct kgsl_device *device = &(adreno_dev->dev);
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	mutex_lock(&device->mutex);
+
+	switch (atomic_read(&dispatcher->preemption_state)) {
+	case ADRENO_DISPATCHER_PREEMPT_CLEAR:
+		a4xx_preempt_clear_state(adreno_dev);
+		break;
+	case ADRENO_DISPATCHER_PREEMPT_TRIGGERED:
+		a4xx_preempt_trig_state(adreno_dev);
+		/*
+		 * if we transitioned to next state then fall-through
+		 * processing to next state
+		 */
+		if (!adreno_preempt_state(adreno_dev,
+			ADRENO_DISPATCHER_PREEMPT_COMPLETE))
+			break;
+	case ADRENO_DISPATCHER_PREEMPT_COMPLETE:
+		a4xx_preempt_complete_state(adreno_dev);
+		break;
+	default:
+		BUG();
+	}
+
+	mutex_unlock(&device->mutex);
+}
+
+struct adreno_gpudev adreno_a4xx_gpudev = {
+	.reg_offsets = &a4xx_reg_offsets,
+	.ft_perf_counters = a4xx_ft_perf_counters,
+	.ft_perf_counters_count = ARRAY_SIZE(a4xx_ft_perf_counters),
+	.perfcounters = &a4xx_perfcounters,
+	.irq = &a4xx_irq,
+	.irq_trace = trace_kgsl_a4xx_irq_status,
+	.snapshot_data = &a4xx_snapshot_data,
+	.num_prio_levels = ADRENO_PRIORITY_MAX_RB_LEVELS,
+	.vbif_xin_halt_ctrl0_mask = A4XX_VBIF_XIN_HALT_CTRL0_MASK,
+
+	.perfcounter_init = a4xx_perfcounter_init,
+	.perfcounter_close = a4xx_perfcounter_close,
+	.rb_init = a4xx_rb_init,
+	.hw_init = a4xx_hw_init,
+	.microcode_read = a3xx_microcode_read,
+	.microcode_load = a3xx_microcode_load,
+	.coresight = &a4xx_coresight,
+	.start = a4xx_start,
+	.snapshot = a4xx_snapshot,
+	.is_sptp_idle = a4xx_is_sptp_idle,
+	.pwrlevel_change_settings = a4xx_pwrlevel_change_settings,
+	.regulator_enable = a4xx_regulator_enable,
+	.regulator_disable = a4xx_regulator_disable,
+	.preemption_pre_ibsubmit = a4xx_preemption_pre_ibsubmit,
+	.preemption_token = a4xx_preemption_token,
+	.preemption_schedule = a4xx_preemption_schedule,
+};
diff --git a/drivers/gpu/msm/adreno_a4xx.h b/drivers/gpu/msm/adreno_a4xx.h
new file mode 100644
index 000000000000..93e54e82a48c
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a4xx.h
@@ -0,0 +1,20 @@
+/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _ADRENO_A4XX_H_
+#define _ADRENO_A4XX_H_
+
+void a4xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+
+#endif
diff --git a/drivers/gpu/msm/adreno_a4xx_snapshot.c b/drivers/gpu/msm/adreno_a4xx_snapshot.c
new file mode 100644
index 000000000000..96080d3b6d9f
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a4xx_snapshot.c
@@ -0,0 +1,605 @@
+/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/io.h>
+#include "kgsl.h"
+#include "adreno.h"
+#include "kgsl_snapshot.h"
+#include "a4xx_reg.h"
+#include "adreno_snapshot.h"
+#include "adreno_a4xx.h"
+
+/*
+ * Set of registers to dump for A4XX on snapshot.
+ * Registers in pairs - first value is the start offset, second
+ * is the stop offset (inclusive)
+ */
+
+static const unsigned int a4xx_registers[] = {
+	/* RBBM */
+	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
+	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
+	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
+	/* CP */
+	0x0200, 0x0226, 0x0228, 0x0233, 0x0240, 0x0258, 0x04C0, 0x04D0,
+	0x04D2, 0x04DD, 0x0500, 0x050B, 0x0578, 0x058F,
+	/* VSC */
+	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
+	/* GRAS */
+	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
+	/* RB */
+	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
+	/* PC */
+	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
+	/* VFD */
+	0x0E40, 0x0E4A,
+	/* VPC */
+	0x0E60, 0x0E61, 0x0E63, 0x0E68,
+	/* UCHE */
+	0x0E80, 0x0E84, 0x0E88, 0x0E95,
+	/* GRAS CTX 0 */
+	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
+	/* PC CTX 0 */
+	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
+	/* VFD CTX 0 */
+	0x2200, 0x2204, 0x2208, 0x22A9,
+	/* GRAS CTX 1 */
+	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
+	/* PC CTX 1 */
+	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
+	/* VFD CTX 1 */
+	0x2600, 0x2604, 0x2608, 0x26A9,
+};
+
+static const unsigned int a4xx_sp_tp_registers[] = {
+	/* SP */
+	0x0EC0, 0x0ECF,
+	/* TPL1 */
+	0x0F00, 0x0F0B,
+	/* SP CTX 0 */
+	0x22C0, 0x22C1, 0x22C4, 0x22E5, 0x22E8, 0x22F8, 0x2300, 0x2306,
+	0x230C, 0x2312, 0x2318, 0x2339, 0x2340, 0x2360,
+	/* TPL1 CTX 0 */
+	0x2380, 0x2382, 0x2384, 0x238F, 0x23A0, 0x23A6,
+	/* SP CTX 1 */+
+	0x26C0, 0x26C1, 0x26C4, 0x26E5, 0x26E8, 0x26F8, 0x2700, 0x2706,
+	0x270C, 0x2712, 0x2718, 0x2739, 0x2740, 0x2760,
+	/* TPL1 CTX 1 */
+	0x2780, 0x2782, 0x2784, 0x278F, 0x27A0, 0x27A6,
+};
+
+static const unsigned int a4xx_ppd_registers[] = {
+	/* V2 Thresholds */
+	0x01B2, 0x01B5,
+	/* Control and Status */
+	0x01B9, 0x01BE,
+};
+
+static const unsigned int a4xx_xpu_registers[] = {
+	/* XPU */
+	0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
+	0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
+	0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
+};
+
+static const unsigned int a4xx_vbif_ver_20000000_registers[] = {
+	/* VBIF version 0x20000000 & IOMMU V1 */
+	0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
+	0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
+	0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
+	0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
+	0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
+	0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
+	0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
+	0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
+	0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
+	0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
+	0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
+	0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
+	0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
+	0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
+	0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
+	0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
+	0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
+	0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
+	0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
+	0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
+	0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
+	0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
+	0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
+	0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
+	0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
+	0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
+	0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
+	0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
+	0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
+	0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
+	0x67D6, 0x67D6, 0x67EE, 0x67EE,
+};
+
+static const unsigned int a4xx_vbif_ver_20020000_registers[] = {
+	0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
+	0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
+	0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
+	0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
+	0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
+	0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
+	0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
+	0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
+	0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x4800, 0x4802,
+	0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
+	0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
+	0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
+	0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
+	0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00, 0x4E80, 0x4E80,
+	0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10, 0x4F18, 0x4F18,
+	0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60, 0x4F80, 0x4F81,
+	0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3, 0x6000, 0x6001,
+	0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B, 0x61FD, 0x61FD,
+	0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0, 0x63C0, 0x63C1,
+	0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6, 0x63EE, 0x63EE,
+	0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416, 0x6418, 0x641B,
+	0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780, 0x67A0, 0x67A0,
+	0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4, 0x67D6, 0x67D6,
+	0x67EE, 0x67EE,
+};
+
+static const unsigned int a4xx_vbif_ver_20050000_registers[] = {
+	/* VBIF version 0x20050000 and 0x20090000 */
+	0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036,
+	0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049,
+	0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D,
+	0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098,
+	0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0,
+	0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108,
+	0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125,
+	0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410,
+};
+
+static const struct adreno_vbif_snapshot_registers
+					a4xx_vbif_snapshot_registers[] = {
+	{ 0x20000000, a4xx_vbif_ver_20000000_registers,
+				ARRAY_SIZE(a4xx_vbif_ver_20000000_registers)/2},
+	{ 0x20020000, a4xx_vbif_ver_20020000_registers,
+				ARRAY_SIZE(a4xx_vbif_ver_20020000_registers)/2},
+	{ 0x20050000, a4xx_vbif_ver_20050000_registers,
+				ARRAY_SIZE(a4xx_vbif_ver_20050000_registers)/2},
+	{ 0x20070000, a4xx_vbif_ver_20020000_registers,
+				ARRAY_SIZE(a4xx_vbif_ver_20020000_registers)/2},
+	{ 0x20090000, a4xx_vbif_ver_20050000_registers,
+				ARRAY_SIZE(a4xx_vbif_ver_20050000_registers)/2},
+};
+
+#define A4XX_NUM_SHADER_BANKS 4
+#define A405_NUM_SHADER_BANKS 1
+/* Shader memory size in words */
+#define A4XX_SHADER_MEMORY_SIZE 0x4000
+
+static const struct adreno_debugbus_block a4xx_debugbus_blocks[] = {
+	{ A4XX_RBBM_DEBBUS_CP_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_RBBM_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_VBIF_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_HLSQ_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_UCHE_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_DPM_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_TESS_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_PC_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_VFD_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_VPC_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_TSE_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_RAS_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_VSC_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_COM_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_DCOM_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_SP_0_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_TPL1_0_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_RB_0_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_MARB_0_ID, 0x100 },
+};
+
+static const struct adreno_debugbus_block a420_debugbus_blocks[] = {
+	{ A4XX_RBBM_DEBBUS_SP_1_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_SP_2_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_SP_3_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_TPL1_1_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_TPL1_2_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_TPL1_3_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_RB_1_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_RB_2_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_RB_3_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_MARB_1_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_MARB_2_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_MARB_3_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_CCU_0_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_CCU_1_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_CCU_2_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_CCU_3_ID, 0x100, },
+};
+
+/**
+ * a4xx_snapshot_shader_memory - Helper function to dump the GPU shader
+ * memory to the snapshot buffer.
+ * @device: GPU device whose shader memory is to be dumped
+ * @buf: Pointer to binary snapshot data blob being made
+ * @remain: Number of remaining bytes in the snapshot blob
+ * @priv: Unused parameter
+ *
+ */
+static size_t a4xx_snapshot_shader_memory(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int i, j;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int shader_read_len = A4XX_SHADER_MEMORY_SIZE;
+	unsigned int shader_banks = A4XX_NUM_SHADER_BANKS;
+
+	if (shader_read_len > (device->shader_mem_len >> 2))
+		shader_read_len = (device->shader_mem_len >> 2);
+
+	if (adreno_is_a405(adreno_dev))
+		shader_banks = A405_NUM_SHADER_BANKS;
+
+	if (remain < DEBUG_SECTION_SZ(shader_read_len *
+				shader_banks)) {
+		SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_SHADER_MEMORY;
+	header->size = shader_read_len * shader_banks;
+
+	/* Map shader memory to kernel, for dumping */
+	if (device->shader_mem_virt == NULL)
+		device->shader_mem_virt = devm_ioremap(device->dev,
+					device->shader_mem_phys,
+					device->shader_mem_len);
+
+	if (device->shader_mem_virt == NULL) {
+		KGSL_DRV_ERR(device,
+		"Unable to map shader memory region\n");
+		return 0;
+	}
+
+	for (j = 0; j < shader_banks; j++) {
+		unsigned int val;
+		/* select the SPTP */
+		kgsl_regread(device, A4XX_HLSQ_SPTP_RDSEL, &val);
+		val &= ~0x3;
+		val |= j;
+		kgsl_regwrite(device, A4XX_HLSQ_SPTP_RDSEL, val);
+		/* Now, dump shader memory to snapshot */
+		for (i = 0; i < shader_read_len; i++)
+			adreno_shadermem_regread(device, i,
+				&data[i + j * shader_read_len]);
+	}
+
+
+	return DEBUG_SECTION_SZ(shader_read_len * shader_banks);
+}
+
+/*
+ * a4xx_rbbm_debug_bus_read() - Read data from trace bus
+ * @device: Device whose data bus is read
+ * @block_id: Trace bus block ID
+ * @index: Index of data to read
+ * @val: Output parameter where data is read
+ */
+static void a4xx_rbbm_debug_bus_read(struct kgsl_device *device,
+	unsigned int block_id, unsigned int index, unsigned int *val)
+{
+	unsigned int reg = 0;
+
+	reg |= (block_id << A4XX_RBBM_CFG_DEBBUS_SEL_PING_BLK_SEL_SHIFT);
+	reg |= (index << A4XX_RBBM_CFG_DEBBUS_SEL_PING_INDEX_SHIFT);
+	kgsl_regwrite(device, A4XX_RBBM_CFG_DEBBUS_SEL_A, reg);
+	kgsl_regwrite(device, A4XX_RBBM_CFG_DEBBUS_SEL_B, reg);
+	kgsl_regwrite(device, A4XX_RBBM_CFG_DEBBUS_SEL_C, reg);
+	kgsl_regwrite(device, A4XX_RBBM_CFG_DEBBUS_SEL_D, reg);
+
+	kgsl_regwrite(device, A4XX_RBBM_CFG_DEBBUS_IDX, 0x3020000);
+	kgsl_regread(device, A4XX_RBBM_CFG_DEBBUS_TRACE_BUF4, val);
+	val++;
+	kgsl_regwrite(device, A4XX_RBBM_CFG_DEBBUS_IDX, 0x1000000);
+	kgsl_regread(device, A4XX_RBBM_CFG_DEBBUS_TRACE_BUF4, val);
+}
+
+/*
+ * a4xx_snapshot_vbif_debugbus() - Dump the VBIF debug data
+ * @device: Device pointer for which the debug data is dumped
+ * @buf: Pointer to the memory where the data is dumped
+ * @remain: Amout of bytes remaining in snapshot
+ * @priv: Pointer to debug bus block
+ *
+ * Returns the number of bytes dumped
+ */
+static size_t a4xx_snapshot_vbif_debugbus(struct kgsl_device *device,
+			u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debugbus *header =
+		(struct kgsl_snapshot_debugbus *)buf;
+	struct adreno_debugbus_block *block = priv;
+	int i, j;
+	/*
+	 * Total number of VBIF data words considering 3 sections:
+	 * 2 arbiter blocks of 16 words
+	 * 5 AXI XIN blocks of 4 dwords each
+	 * 5 core clock side XIN blocks of 5 dwords each
+	 */
+	unsigned int dwords = (16 * A4XX_NUM_AXI_ARB_BLOCKS) +
+			(4 * A4XX_NUM_XIN_BLOCKS) + (5 * A4XX_NUM_XIN_BLOCKS);
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	size_t size;
+	unsigned int reg_clk;
+
+	size = (dwords * sizeof(unsigned int)) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+	header->id = block->block_id;
+	header->count = dwords;
+
+	kgsl_regread(device, A4XX_VBIF_CLKON, &reg_clk);
+	kgsl_regwrite(device, A4XX_VBIF_CLKON, reg_clk |
+			(A4XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK <<
+			A4XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT));
+	kgsl_regwrite(device, A4XX_VBIF_TEST_BUS1_CTRL0, 0);
+	kgsl_regwrite(device, A4XX_VBIF_TEST_BUS_OUT_CTRL,
+			(A4XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK <<
+			A4XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT));
+	for (i = 0; i < A4XX_NUM_AXI_ARB_BLOCKS; i++) {
+		kgsl_regwrite(device, A4XX_VBIF_TEST_BUS2_CTRL0,
+			(1 << (i + 16)));
+		for (j = 0; j < 16; j++) {
+			kgsl_regwrite(device, A4XX_VBIF_TEST_BUS2_CTRL1,
+				((j & A4XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK)
+				<< A4XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT));
+			kgsl_regread(device, A4XX_VBIF_TEST_BUS_OUT,
+					data);
+			data++;
+		}
+	}
+
+	/* XIN blocks AXI side */
+	for (i = 0; i < A4XX_NUM_XIN_BLOCKS; i++) {
+		kgsl_regwrite(device, A4XX_VBIF_TEST_BUS2_CTRL0, 1 << i);
+		for (j = 0; j < 4; j++) {
+			kgsl_regwrite(device, A4XX_VBIF_TEST_BUS2_CTRL1,
+				((j & A4XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK)
+				<< A4XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT));
+			kgsl_regread(device, A4XX_VBIF_TEST_BUS_OUT,
+				data);
+			data++;
+		}
+	}
+
+	/* XIN blocks core clock side */
+	for (i = 0; i < A4XX_NUM_XIN_BLOCKS; i++) {
+		kgsl_regwrite(device, A4XX_VBIF_TEST_BUS1_CTRL0, 1 << i);
+		for (j = 0; j < 5; j++) {
+			kgsl_regwrite(device, A4XX_VBIF_TEST_BUS1_CTRL1,
+				((j & A4XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK)
+				<< A4XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT));
+			kgsl_regread(device, A4XX_VBIF_TEST_BUS_OUT,
+				data);
+			data++;
+		}
+	}
+	/* restore the clock of VBIF */
+	kgsl_regwrite(device, A4XX_VBIF_CLKON, reg_clk);
+	return size;
+}
+
+/*
+ * a4xx_snapshot_debugbus_block() - Capture debug data for a gpu block
+ * @device: Pointer to device
+ * @buf: Memory where data is captured
+ * @remain: Number of bytes left in snapshot
+ * @priv: Pointer to debug bus block
+ *
+ * Returns the number of bytes written
+ */
+static size_t a4xx_snapshot_debugbus_block(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debugbus *header =
+		(struct kgsl_snapshot_debugbus *)buf;
+	struct adreno_debugbus_block *block = priv;
+	int i;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int dwords;
+	size_t size;
+
+	dwords = block->dwords;
+
+	/* For a4xx each debug bus data unit is 2 DWRODS */
+	size = (dwords * sizeof(unsigned int) * 2) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+
+	header->id = block->block_id;
+	header->count = dwords * 2;
+
+	for (i = 0; i < dwords; i++)
+		a4xx_rbbm_debug_bus_read(device, block->block_id, i,
+					&data[i*2]);
+
+	return size;
+}
+
+/*
+ * a4xx_snapshot_debugbus() - Capture debug bus data
+ * @device: The device for which data is captured
+ * @snapshot: Pointer to the snapshot instance
+ */
+static void a4xx_snapshot_debugbus(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int i;
+
+	kgsl_regwrite(device, A4XX_RBBM_CFG_DEBBUS_CTLM,
+		0xf << A4XX_RBBM_CFG_DEBBUS_CTLT_ENABLE_SHIFT);
+
+	for (i = 0; i < ARRAY_SIZE(a4xx_debugbus_blocks); i++) {
+		if (A4XX_RBBM_DEBBUS_VBIF_ID ==
+			a4xx_debugbus_blocks[i].block_id)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+				snapshot, a4xx_snapshot_vbif_debugbus,
+				(void *) &a4xx_debugbus_blocks[i]);
+		else
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+				snapshot, a4xx_snapshot_debugbus_block,
+				(void *) &a4xx_debugbus_blocks[i]);
+	}
+
+	if (!adreno_is_a405(adreno_dev)) {
+		for (i = 0; i < ARRAY_SIZE(a420_debugbus_blocks); i++)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+				snapshot, a4xx_snapshot_debugbus_block,
+				(void *) &a420_debugbus_blocks[i]);
+
+	}
+}
+
+static void a4xx_reset_hlsq(struct kgsl_device *device)
+{
+	unsigned int val, dummy = 0;
+
+	/* reset cp */
+	kgsl_regwrite(device, A4XX_RBBM_BLOCK_SW_RESET_CMD, 1 << 20);
+	kgsl_regread(device, A4XX_RBBM_BLOCK_SW_RESET_CMD, &dummy);
+
+	/* reset hlsq */
+	kgsl_regwrite(device, A4XX_RBBM_BLOCK_SW_RESET_CMD, 1 << 25);
+	kgsl_regread(device, A4XX_RBBM_BLOCK_SW_RESET_CMD, &dummy);
+
+	/* clear reset bits */
+	kgsl_regwrite(device, A4XX_RBBM_BLOCK_SW_RESET_CMD, 0);
+	kgsl_regread(device, A4XX_RBBM_BLOCK_SW_RESET_CMD, &dummy);
+
+
+	/* set HLSQ_TIMEOUT_THRESHOLD.cycle_timeout_limit_sp to 26 */
+	kgsl_regread(device, A4XX_HLSQ_TIMEOUT_THRESHOLD, &val);
+	val &= (0x1F << 24);
+	val |= (26 << 24);
+	kgsl_regwrite(device, A4XX_HLSQ_TIMEOUT_THRESHOLD, val);
+}
+
+/*
+ * a4xx_snapshot() - A4XX GPU snapshot function
+ * @adreno_dev: Device being snapshotted
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * This is where all of the A4XX specific bits and pieces are grabbed
+ * into the snapshot memory
+ */
+void a4xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_snapshot_data *snap_data = gpudev->snapshot_data;
+
+	/* Disable SP clock gating for the debug bus to work */
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP0, 0);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP1, 0);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP2, 0);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP3, 0);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP0, 0);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP1, 0);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP2, 0);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP3, 0);
+
+	/* Disable top level clock gating the debug bus to work */
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL, 0);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2, 0);
+
+	/* Turn on MMU clocks since we read MMU registers */
+	kgsl_mmu_enable_clk(&device->mmu);
+
+	/* Master set of (non debug) registers */
+
+	SNAPSHOT_REGISTERS(device, snapshot, a4xx_registers);
+
+	if (adreno_is_a430(adreno_dev))
+		SNAPSHOT_REGISTERS(device, snapshot, a4xx_sp_tp_registers);
+
+	if (adreno_is_a420(adreno_dev))
+		SNAPSHOT_REGISTERS(device, snapshot, a4xx_xpu_registers);
+
+	if (adreno_is_a430v2(adreno_dev))
+		SNAPSHOT_REGISTERS(device, snapshot, a4xx_ppd_registers);
+
+	adreno_snapshot_vbif_registers(device, snapshot,
+		a4xx_vbif_snapshot_registers,
+		ARRAY_SIZE(a4xx_vbif_snapshot_registers));
+
+	kgsl_mmu_disable_clk(&device->mmu);
+
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A4XX_CP_STATE_DEBUG_INDEX, A4XX_CP_STATE_DEBUG_DATA,
+		0, snap_data->sect_sizes->cp_pfp);
+
+	 /* CP_ME indexed registers */
+	 kgsl_snapshot_indexed_registers(device, snapshot,
+		A4XX_CP_ME_CNTL, A4XX_CP_ME_STATUS, 64, 44);
+
+	/* VPC memory */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_vpc_memory,
+		&snap_data->sect_sizes->vpc_mem);
+
+	/* CP MEQ */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_meq,
+		&snap_data->sect_sizes->cp_meq);
+
+	/* CP PFP and PM4 */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_pfp_ram, NULL);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_pm4_ram, NULL);
+
+	/* CP ROQ */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_roq,
+		&snap_data->sect_sizes->roq);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_merciu,
+		&snap_data->sect_sizes->cp_merciu);
+
+	/* Debug bus */
+	a4xx_snapshot_debugbus(device, snapshot);
+
+	if (!adreno_is_a430(adreno_dev)) {
+		a4xx_reset_hlsq(device);
+		SNAPSHOT_REGISTERS(device, snapshot, a4xx_sp_tp_registers);
+	}
+
+	/* Shader working/shadow memory */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a4xx_snapshot_shader_memory,
+		&snap_data->sect_sizes->shader_mem);
+}
diff --git a/drivers/gpu/msm/adreno_a5xx.c b/drivers/gpu/msm/adreno_a5xx.c
new file mode 100644
index 000000000000..2a461700f46a
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a5xx.c
@@ -0,0 +1,3403 @@
+/* Copyright (c) 2014-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/firmware.h>
+#include <soc/qcom/subsystem_restart.h>
+#include <soc/qcom/scm.h>
+#include <linux/pm_opp.h>
+
+#include "adreno.h"
+#include "a5xx_reg.h"
+#include "adreno_a5xx.h"
+#include "adreno_cp_parser.h"
+#include "adreno_trace.h"
+#include "adreno_pm4types.h"
+#include "adreno_perfcounter.h"
+#include "adreno_ringbuffer.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_log.h"
+#include "kgsl.h"
+
+static int zap_ucode_loaded;
+
+void a5xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+
+static const struct adreno_vbif_data a530_vbif[] = {
+	{A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003},
+	{0, 0},
+};
+
+static const struct adreno_vbif_platform a5xx_vbif_platforms[] = {
+	{ adreno_is_a530, a530_vbif },
+	{ adreno_is_a510, a530_vbif },
+	{ adreno_is_a505, a530_vbif },
+	{ adreno_is_a506, a530_vbif },
+};
+
+#define PREEMPT_RECORD(_field) \
+		offsetof(struct a5xx_cp_preemption_record, _field)
+
+#define PREEMPT_SMMU_RECORD(_field) \
+		offsetof(struct a5xx_cp_smmu_info, _field)
+static void a5xx_gpmu_reset(struct work_struct *work);
+static int _read_fw2_block_header(uint32_t *header, uint32_t id,
+	uint32_t major, uint32_t minor);
+
+/**
+ * Number of times to check if the regulator enabled before
+ * giving up and returning failure.
+ */
+#define PWR_RETRY 100
+
+/**
+ * Number of times to check if the GPMU firmware is initialized before
+ * giving up and returning failure.
+ */
+#define GPMU_FW_INIT_RETRY 100
+
+#define GPMU_HEADER_ID		1
+#define GPMU_FIRMWARE_ID	2
+#define GPMU_SEQUENCE_ID	3
+#define GPMU_INST_RAM_SIZE	0xFFF
+
+#define HEADER_MAJOR	1
+#define HEADER_MINOR	2
+#define HEADER_DATE	3
+#define HEADER_TIME	4
+#define HEADER_SEQUENCE	5
+
+#define MAX_HEADER_SIZE	10
+
+#define LM_SEQUENCE_ID		1
+#define HWCG_SEQUENCE_ID	2
+#define MAX_SEQUENCE_ID		3
+
+/* GPMU communication protocal AGC */
+#define AGC_INIT_BASE			A5XX_GPMU_DATA_RAM_BASE
+#define AGC_RVOUS_MAGIC			(AGC_INIT_BASE + 0)
+#define AGC_KMD_GPMU_ADDR		(AGC_INIT_BASE + 1)
+#define AGC_KMD_GPMU_BYTES		(AGC_INIT_BASE + 2)
+#define AGC_GPMU_KMD_ADDR		(AGC_INIT_BASE + 3)
+#define AGC_GPMU_KMD_BYTES		(AGC_INIT_BASE + 4)
+#define AGC_INIT_MSG_MAGIC		(AGC_INIT_BASE + 5)
+#define AGC_RESERVED			(AGC_INIT_BASE + 6)
+#define AGC_MSG_BASE			(AGC_INIT_BASE + 7)
+
+#define AGC_MSG_STATE			(AGC_MSG_BASE + 0)
+#define AGC_MSG_COMMAND			(AGC_MSG_BASE + 1)
+#define AGC_MSG_RETURN			(AGC_MSG_BASE + 2)
+#define AGC_MSG_PAYLOAD_SIZE		(AGC_MSG_BASE + 3)
+#define AGC_MSG_MAX_RETURN_SIZE		(AGC_MSG_BASE + 4)
+#define AGC_MSG_PAYLOAD			(AGC_MSG_BASE + 5)
+
+#define AGC_INIT_MSG_VALUE	0xBABEFACE
+
+#define AGC_POWER_CONFIG_PRODUCTION_ID	1
+
+#define LM_DEFAULT_LIMIT    6000
+
+#define A530_DEFAULT_LEAKAGE 0x004E001A
+
+#define A530_QFPROM_RAW_PTE_ROW0_MSB 0x134
+#define A530_QFPROM_RAW_PTE_ROW2_MSB 0x144
+#define A530_QFPROM_CORR_PTE_ROW0_LSB 0x4130
+
+static void a530_efuse_leakage(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	unsigned int row0, row2;
+	unsigned int multiplier, gfx_active, leakage_pwr_on, coeff;
+
+	adreno_efuse_read_u32(adreno_dev,
+		A530_QFPROM_RAW_PTE_ROW0_MSB, &row0);
+
+	adreno_efuse_read_u32(adreno_dev,
+		A530_QFPROM_RAW_PTE_ROW2_MSB, &row2);
+
+	multiplier = (row0 >> 1) & 0x3;
+	gfx_active = (row2 >> 2) & 0xFF;
+
+	if (of_property_read_u32(device->pdev->dev.of_node,
+		"qcom,base-leakage-coefficient", &coeff))
+		return;
+
+	leakage_pwr_on = gfx_active * (1 << multiplier);
+
+	adreno_dev->lm_leakage = (leakage_pwr_on << 16) |
+		((leakage_pwr_on * coeff) / 100);
+}
+
+static void a530_efuse_speed_bin(struct adreno_device *adreno_dev)
+{
+	unsigned int val;
+
+	adreno_efuse_read_u32(adreno_dev,
+		A530_QFPROM_CORR_PTE_ROW0_LSB, &val);
+
+	adreno_dev->speed_bin =
+		(val & 0xE0000000) >> 29;
+}
+
+static const struct {
+	int (*check)(struct adreno_device *adreno_dev);
+	void (*func)(struct adreno_device *adreno_dev);
+} a5xx_efuse_funcs[] = {
+	{ adreno_is_a530, a530_efuse_leakage },
+	{ adreno_is_a530v3, a530_efuse_speed_bin },
+};
+
+static void a5xx_check_features(struct adreno_device *adreno_dev)
+{
+	unsigned int i;
+
+	if (adreno_efuse_map(adreno_dev))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(a5xx_efuse_funcs); i++) {
+		if (a5xx_efuse_funcs[i].check(adreno_dev))
+			a5xx_efuse_funcs[i].func(adreno_dev);
+	}
+
+	adreno_efuse_unmap(adreno_dev);
+}
+
+/*
+ * a5xx_preemption_start() - Setup state to start preemption
+ */
+static void a5xx_preemption_start(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb)
+{
+	struct kgsl_device *device = &(adreno_dev->dev);
+	struct kgsl_iommu *iommu = device->mmu.priv;
+	uint64_t ttbr0;
+	uint32_t contextidr;
+	struct kgsl_pagetable *pt;
+	bool switch_default_pt = true;
+
+	kgsl_sharedmem_writel(device, &rb->preemption_desc,
+		PREEMPT_RECORD(wptr), rb->wptr);
+	kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO,
+		lower_32_bits(rb->preemption_desc.gpuaddr));
+	kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI,
+		upper_32_bits(rb->preemption_desc.gpuaddr));
+	kgsl_sharedmem_readq(&rb->pagetable_desc, &ttbr0,
+		offsetof(struct adreno_ringbuffer_pagetable_info, ttbr0));
+	kgsl_sharedmem_readl(&rb->pagetable_desc, &contextidr,
+		offsetof(struct adreno_ringbuffer_pagetable_info, contextidr));
+
+	spin_lock(&kgsl_driver.ptlock);
+	list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) {
+		if (kgsl_mmu_pagetable_get_ttbr0(pt) == ttbr0) {
+			switch_default_pt = false;
+			break;
+		}
+	}
+	spin_unlock(&kgsl_driver.ptlock);
+
+	if (switch_default_pt) {
+		ttbr0 = kgsl_mmu_pagetable_get_ttbr0(
+				device->mmu.defaultpagetable);
+		contextidr = kgsl_mmu_pagetable_get_contextidr(
+				device->mmu.defaultpagetable);
+	}
+
+	kgsl_sharedmem_writeq(device, &iommu->smmu_info,
+		offsetof(struct a5xx_cp_smmu_info, ttbr0), ttbr0);
+	kgsl_sharedmem_writel(device, &iommu->smmu_info,
+		offsetof(struct a5xx_cp_smmu_info, context_idr), contextidr);
+}
+
+/*
+ * a5xx_preemption_save() - Save the state after preemption is done
+ */
+static void a5xx_preemption_save(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb)
+{
+	/* save the rptr from ctxrecord here */
+	kgsl_sharedmem_readl(&rb->preemption_desc, &rb->rptr,
+		PREEMPT_RECORD(rptr));
+}
+
+static int a5xx_preemption_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct kgsl_iommu *iommu = device->mmu.priv;
+	struct adreno_ringbuffer *rb;
+	int ret;
+	unsigned int i;
+	uint64_t addr;
+
+	/* We are dependent on IOMMU to make preemption go on the CP side */
+	if (kgsl_mmu_get_mmutype() != KGSL_MMU_TYPE_IOMMU)
+		return -ENODEV;
+
+	/* Allocate mem for storing preemption counters */
+	ret = kgsl_allocate_global(device, &adreno_dev->preemption_counters,
+		adreno_dev->num_ringbuffers *
+		A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE, 0, 0);
+	if (ret)
+		return ret;
+
+	addr = adreno_dev->preemption_counters.gpuaddr;
+
+	/* Allocate mem for storing preemption switch record */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		ret = kgsl_allocate_global(&adreno_dev->dev,
+			&rb->preemption_desc, A5XX_CP_CTXRECORD_SIZE_IN_BYTES,
+			0, KGSL_MEMDESC_PRIVILEGED);
+		if (ret)
+			return ret;
+
+		/* Initialize the context switch record here */
+		kgsl_sharedmem_writel(rb->device, &rb->preemption_desc,
+			PREEMPT_RECORD(magic), A5XX_CP_CTXRECORD_MAGIC_REF);
+		kgsl_sharedmem_writel(rb->device, &rb->preemption_desc,
+			PREEMPT_RECORD(info), 0);
+		kgsl_sharedmem_writel(rb->device, &rb->preemption_desc,
+			PREEMPT_RECORD(data), 0);
+		kgsl_sharedmem_writel(rb->device, &rb->preemption_desc,
+			PREEMPT_RECORD(cntl), 0x0800000C);
+		kgsl_sharedmem_writel(rb->device, &rb->preemption_desc,
+			PREEMPT_RECORD(rptr), 0);
+		kgsl_sharedmem_writel(rb->device, &rb->preemption_desc,
+			PREEMPT_RECORD(wptr), 0);
+		kgsl_sharedmem_writeq(rb->device, &rb->preemption_desc,
+			PREEMPT_RECORD(rbase),
+			adreno_dev->ringbuffers[i].buffer_desc.gpuaddr);
+		kgsl_sharedmem_writeq(rb->device, &rb->preemption_desc,
+			PREEMPT_RECORD(counter), addr);
+
+		addr += A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE;
+	}
+
+	/* Allocate mem for storing preemption smmu record */
+	return kgsl_allocate_global(device, &iommu->smmu_info, PAGE_SIZE,
+		KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_PRIVILEGED);
+}
+
+/*
+ * a5xx_preemption_token() - Preempt token on a5xx
+ * PM4 commands for preempt token on a5xx. These commands are
+ * submitted to ringbuffer to trigger preemption.
+ */
+static int a5xx_preemption_token(struct adreno_device *adreno_dev,
+			struct adreno_ringbuffer *rb, unsigned int *cmds,
+			uint64_t gpuaddr)
+{
+	unsigned int *cmds_orig = cmds;
+
+	/* Enable yield in RB only */
+	*cmds++ = cp_type7_packet(CP_YIELD_ENABLE, 1);
+	*cmds++ = 1;
+
+	*cmds++ = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4);
+	cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr);
+	*cmds++ = 1;
+	/* generate interrupt on preemption completion */
+	*cmds++ = 1;
+
+	return cmds - cmds_orig;
+
+}
+
+/*
+ * a5xx_preemption_pre_ibsubmit() - Below PM4 commands are
+ * added at the beginning of every cmdbatch submission.
+ */
+static int a5xx_preemption_pre_ibsubmit(
+			struct adreno_device *adreno_dev,
+			struct adreno_ringbuffer *rb, unsigned int *cmds,
+			struct kgsl_context *context, uint64_t cond_addr,
+			struct kgsl_memobj_node *ib)
+{
+	unsigned int *cmds_orig = cmds;
+	uint64_t gpuaddr = rb->preemption_desc.gpuaddr;
+	unsigned int preempt_style = 0;
+
+	if (context)
+		preempt_style = ADRENO_PREEMPT_STYLE(context->flags);
+
+	/*
+	 * CP_PREEMPT_ENABLE_GLOBAL(global preemption) can only be set by KMD
+	 * in ringbuffer.
+	 * 1) set global preemption to 0x0 to disable global preemption.
+	 *    Only RB level preemption is allowed in this mode
+	 * 2) Set global preemption to defer(0x2) for finegrain preemption.
+	 *    when global preemption is set to defer(0x2),
+	 *    CP_PREEMPT_ENABLE_LOCAL(local preemption) determines the
+	 *    preemption point. Local preemption
+	 *    can be enabled by both UMD(within IB) and KMD.
+	 */
+	*cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_GLOBAL, 1);
+	*cmds++ = ((preempt_style == KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN)
+				? 2 : 0);
+
+	/* Turn CP protection OFF */
+	*cmds++ = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+	*cmds++ = 0;
+
+	/*
+	 * CP during context switch will save context switch info to
+	 * a5xx_cp_preemption_record pointed by CONTEXT_SWITCH_SAVE_ADDR
+	 */
+	*cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 1);
+	*cmds++ = lower_32_bits(gpuaddr);
+	*cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI, 1);
+	*cmds++ = upper_32_bits(gpuaddr);
+
+	/* Turn CP protection ON */
+	*cmds++ = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+	*cmds++ = 1;
+
+	/*
+	 * Enable local preemption for finegrain preemption in case of
+	 * a misbehaving IB
+	 */
+	if (preempt_style == KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN) {
+		*cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1);
+		*cmds++ = 1;
+	} else {
+		*cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1);
+		*cmds++ = 0;
+	}
+
+	return cmds - cmds_orig;
+}
+
+/*
+ * a5xx_preemption_post_ibsubmit() - Below PM4 commands are
+ * added after every cmdbatch submission.
+ */
+static int a5xx_preemption_post_ibsubmit(
+			struct adreno_device *adreno_dev,
+			struct adreno_ringbuffer *rb, unsigned int *cmds,
+			struct kgsl_context *context)
+{
+	unsigned int *cmds_orig = cmds;
+	unsigned int ctx_id = context ? context->id : 0;
+
+	/*
+	 * SRM -- set render mode (ex binning, direct render etc)
+	 * SRM is set by UMD usually at start of IB to tell CP the type of
+	 * preemption.
+	 * KMD needs to set SRM to NULL to indicate CP that rendering is
+	 * done by IB.
+	 */
+	*cmds++ = cp_type7_packet(CP_SET_RENDER_MODE, 5);
+	*cmds++ = 0;
+	*cmds++ = 0;
+	*cmds++ = 0;
+	*cmds++ = 0;
+	*cmds++ = 0;
+
+	cmds += a5xx_preemption_token(adreno_dev, rb, cmds,
+				rb->device->memstore.gpuaddr +
+				KGSL_MEMSTORE_OFFSET(ctx_id, preempted));
+
+	return cmds - cmds_orig;
+}
+
+static void a5xx_platform_setup(struct adreno_device *adreno_dev)
+{
+	uint64_t addr;
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (adreno_is_a505_or_a506(adreno_dev)) {
+		gpudev->snapshot_data->sect_sizes->cp_meq = 32;
+		gpudev->snapshot_data->sect_sizes->cp_merciu = 1024;
+		gpudev->snapshot_data->sect_sizes->roq = 256;
+
+		/* A505 & A506 having 3 XIN ports in VBIF */
+		gpudev->vbif_xin_halt_ctrl0_mask =
+				A510_VBIF_XIN_HALT_CTRL0_MASK;
+	} else if (adreno_is_a510(adreno_dev)) {
+		gpudev->snapshot_data->sect_sizes->cp_meq = 32;
+		gpudev->snapshot_data->sect_sizes->cp_merciu = 32;
+		gpudev->snapshot_data->sect_sizes->roq = 256;
+
+		/* A510 has 3 XIN ports in VBIF */
+		gpudev->vbif_xin_halt_ctrl0_mask =
+				A510_VBIF_XIN_HALT_CTRL0_MASK;
+	}
+
+	/* Calculate SP local and private mem addresses */
+	addr = ALIGN(ADRENO_UCHE_GMEM_BASE + adreno_dev->gmem_size, SZ_64K);
+	adreno_dev->sp_local_gpuaddr = addr;
+	adreno_dev->sp_pvt_gpuaddr = addr + SZ_64K;
+
+	/* Setup defaults that might get changed by the fuse bits */
+	adreno_dev->lm_leakage = A530_DEFAULT_LEAKAGE;
+	adreno_dev->speed_bin = 0;
+
+	/* Check efuse bits for various capabilties */
+	a5xx_check_features(adreno_dev);
+}
+
+static void a5xx_init(struct adreno_device *adreno_dev)
+{
+	if (adreno_is_a530(adreno_dev) && !adreno_is_a530v1(adreno_dev))
+		INIT_WORK(&adreno_dev->gpmu_work, a5xx_gpmu_reset);
+
+	a5xx_crashdump_init(adreno_dev);
+}
+
+/**
+ * a5xx_protect_init() - Initializes register protection on a5xx
+ * @device: Pointer to the device structure
+ * Performs register writes to enable protected access to sensitive
+ * registers
+ */
+static void a5xx_protect_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	int index = 0;
+	struct kgsl_protected_registers *iommu_regs;
+
+	/* enable access protection to privileged registers */
+	kgsl_regwrite(device, A5XX_CP_PROTECT_CNTL, 0x00000007);
+
+	/* RBBM registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0x4, 2);
+	adreno_set_protected_registers(adreno_dev, &index, 0x8, 3);
+	adreno_set_protected_registers(adreno_dev, &index, 0x10, 4);
+	adreno_set_protected_registers(adreno_dev, &index, 0x20, 5);
+	adreno_set_protected_registers(adreno_dev, &index, 0x40, 6);
+	adreno_set_protected_registers(adreno_dev, &index, 0x80, 6);
+
+	/* Content protection registers */
+	adreno_set_protected_registers(adreno_dev, &index,
+		   A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 4);
+	adreno_set_protected_registers(adreno_dev, &index,
+		   A5XX_RBBM_SECVID_TRUST_CNTL, 1);
+
+	/* CP registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0x800, 6);
+	adreno_set_protected_registers(adreno_dev, &index, 0x840, 3);
+	adreno_set_protected_registers(adreno_dev, &index, 0x880, 5);
+	adreno_set_protected_registers(adreno_dev, &index, 0x0AA0, 0);
+
+	/* RB registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0xCC0, 0);
+	adreno_set_protected_registers(adreno_dev, &index, 0xCF0, 1);
+
+	/* VPC registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0xE68, 3);
+	adreno_set_protected_registers(adreno_dev, &index, 0xE70, 4);
+
+	/* UCHE registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0xE87, 4);
+
+	/* SMMU registers */
+	iommu_regs = kgsl_mmu_get_prot_regs(&device->mmu);
+	if (iommu_regs)
+		adreno_set_protected_registers(adreno_dev, &index,
+				iommu_regs->base, iommu_regs->range);
+}
+
+/*
+ * a5xx_is_sptp_idle() - A530 SP/TP/RAC should be power collapsed to be
+ * considered idle
+ * @adreno_dev: The adreno_device pointer
+ */
+static bool a5xx_is_sptp_idle(struct adreno_device *adreno_dev)
+{
+	unsigned int reg;
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	/* If feature is not supported or enabled, no worry */
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_SPTP_PC) ||
+		!test_bit(ADRENO_SPTP_PC_CTRL, &adreno_dev->pwrctrl_flag))
+		return true;
+	kgsl_regread(device, A5XX_GPMU_SP_PWR_CLK_STATUS, &reg);
+	if (reg & BIT(20))
+		return false;
+	kgsl_regread(device, A5XX_GPMU_RBCCU_PWR_CLK_STATUS, &reg);
+	return !(reg & BIT(20));
+}
+
+/*
+ * _poll_gdsc_status() - Poll the GDSC status register
+ * @adreno_dev: The adreno device pointer
+ * @status_reg: Offset of the status register
+ * @status_value: The expected bit value
+ *
+ * Poll the status register till the power-on bit is equal to the
+ * expected value or the max retries are exceeded.
+ */
+static int _poll_gdsc_status(struct adreno_device *adreno_dev,
+				unsigned int status_reg,
+				unsigned int status_value)
+{
+	unsigned int reg, retry = PWR_RETRY;
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	/* Bit 20 is the power on bit of SPTP and RAC GDSC status register */
+	do {
+		udelay(1);
+		kgsl_regread(device, status_reg, &reg);
+	} while (((reg & BIT(20)) != (status_value << 20)) && retry--);
+	if ((reg & BIT(20)) != (status_value << 20))
+		return -ETIMEDOUT;
+	return 0;
+}
+
+/*
+ * a5xx_regulator_enable() - Enable any necessary HW regulators
+ * @adreno_dev: The adreno device pointer
+ *
+ * Some HW blocks may need their regulators explicitly enabled
+ * on a restart.  Clocks must be on during this call.
+ */
+static int a5xx_regulator_enable(struct adreno_device *adreno_dev)
+{
+	unsigned int ret;
+	struct kgsl_device *device = &adreno_dev->dev;
+	if (!adreno_is_a530(adreno_dev))
+		return 0;
+
+	/*
+	 * Turn on smaller power domain first to reduce voltage droop.
+	 * Set the default register values; set SW_COLLAPSE to 0.
+	 */
+	kgsl_regwrite(device, A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
+	/* Insert a delay between RAC and SPTP GDSC to reduce voltage droop */
+	udelay(3);
+	ret = _poll_gdsc_status(adreno_dev, A5XX_GPMU_RBCCU_PWR_CLK_STATUS, 1);
+	if (ret) {
+		KGSL_PWR_ERR(device, "RBCCU GDSC enable failed\n");
+		return ret;
+	}
+
+	kgsl_regwrite(device, A5XX_GPMU_SP_POWER_CNTL, 0x778000);
+	ret = _poll_gdsc_status(adreno_dev, A5XX_GPMU_SP_PWR_CLK_STATUS, 1);
+	if (ret) {
+		KGSL_PWR_ERR(device, "SPTP GDSC enable failed\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * a5xx_regulator_disable() - Disable any necessary HW regulators
+ * @adreno_dev: The adreno device pointer
+ *
+ * Some HW blocks may need their regulators explicitly disabled
+ * on a power down to prevent current spikes.  Clocks must be on
+ * during this call.
+ */
+static void a5xx_regulator_disable(struct adreno_device *adreno_dev)
+{
+	unsigned int reg;
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	/* If feature is not supported or not enabled */
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_SPTP_PC) ||
+		!test_bit(ADRENO_SPTP_PC_CTRL, &adreno_dev->pwrctrl_flag)) {
+		/* Set the default register values; set SW_COLLAPSE to 1 */
+		kgsl_regwrite(device, A5XX_GPMU_SP_POWER_CNTL, 0x778001);
+		/*
+		 * Insert a delay between SPTP and RAC GDSC to reduce voltage
+		 * droop.
+		 */
+		udelay(3);
+		if (_poll_gdsc_status(adreno_dev,
+					A5XX_GPMU_SP_PWR_CLK_STATUS, 0))
+			KGSL_PWR_WARN(device, "SPTP GDSC disable failed\n");
+
+		kgsl_regwrite(device, A5XX_GPMU_RBCCU_POWER_CNTL, 0x778001);
+		if (_poll_gdsc_status(adreno_dev,
+					A5XX_GPMU_RBCCU_PWR_CLK_STATUS, 0))
+			KGSL_PWR_WARN(device, "RBCCU GDSC disable failed\n");
+	} else if (test_bit(ADRENO_DEVICE_GPMU_INITIALIZED,
+			&adreno_dev->priv)) {
+		/* GPMU firmware is supposed to turn off SPTP & RAC GDSCs. */
+		kgsl_regread(device, A5XX_GPMU_SP_PWR_CLK_STATUS, &reg);
+		if (reg & BIT(20))
+			KGSL_PWR_WARN(device, "SPTP GDSC is not disabled\n");
+		kgsl_regread(device, A5XX_GPMU_RBCCU_PWR_CLK_STATUS, &reg);
+		if (reg & BIT(20))
+			KGSL_PWR_WARN(device, "RBCCU GDSC is not disabled\n");
+		/*
+		 * GPMU firmware is supposed to set GMEM to non-retention.
+		 * Bit 14 is the memory core force on bit.
+		 */
+		kgsl_regread(device, A5XX_GPMU_RBCCU_CLOCK_CNTL, &reg);
+		if (reg & BIT(14))
+			KGSL_PWR_WARN(device, "GMEM is forced on\n");
+	}
+
+	if (adreno_is_a530(adreno_dev)) {
+		/* Reset VBIF before PC to avoid popping bogus FIFO entries */
+		kgsl_regwrite(device, A5XX_RBBM_BLOCK_SW_RESET_CMD,
+			0x003C0000);
+		kgsl_regwrite(device, A5XX_RBBM_BLOCK_SW_RESET_CMD, 0);
+	}
+}
+
+/*
+ * a5xx_enable_pc() - Enable the GPMU based power collapse of the SPTP and RAC
+ * blocks
+ * @adreno_dev: The adreno device pointer
+ */
+static void a5xx_enable_pc(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_SPTP_PC) ||
+		!test_bit(ADRENO_SPTP_PC_CTRL, &adreno_dev->pwrctrl_flag))
+		return;
+
+	kgsl_regwrite(device, A5XX_GPMU_PWR_COL_INTER_FRAME_CTRL, 0x0000007F);
+	kgsl_regwrite(device, A5XX_GPMU_PWR_COL_BINNING_CTRL, 0);
+	kgsl_regwrite(device, A5XX_GPMU_PWR_COL_INTER_FRAME_HYST, 0x000A0080);
+	kgsl_regwrite(device, A5XX_GPMU_PWR_COL_STAGGER_DELAY, 0x00600040);
+
+	trace_adreno_sp_tp((unsigned long) __builtin_return_address(0));
+};
+
+/*
+ * The maximum payload of a type4 packet is the max size minus one for the
+ * opcode
+ */
+#define TYPE4_MAX_PAYLOAD (PM4_TYPE4_PKT_SIZE_MAX - 1)
+
+static int _gpmu_create_load_cmds(struct adreno_device *adreno_dev,
+	uint32_t *ucode, uint32_t size)
+{
+	uint32_t *start, *cmds;
+	uint32_t offset = 0;
+	uint32_t cmds_size = size;
+
+	/* Add a dword for each PM4 packet */
+	cmds_size += (size / TYPE4_MAX_PAYLOAD) + 1;
+
+	/* Add 4 dwords for the protected mode */
+	cmds_size += 4;
+
+	if (adreno_dev->gpmu_cmds != NULL)
+		return 0;
+
+	adreno_dev->gpmu_cmds = kmalloc(cmds_size << 2, GFP_KERNEL);
+	if (adreno_dev->gpmu_cmds == NULL)
+		return -ENOMEM;
+
+	cmds = adreno_dev->gpmu_cmds;
+	start = cmds;
+
+	/* Turn CP protection OFF */
+	*cmds++ = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+	*cmds++ = 0;
+
+	/*
+	 * Prebuild the cmd stream to send to the GPU to load
+	 * the GPMU firmware
+	 */
+	while (size > 0) {
+		int tmp_size = size;
+
+		if (size >= TYPE4_MAX_PAYLOAD)
+			tmp_size = TYPE4_MAX_PAYLOAD;
+
+		*cmds++ = cp_type4_packet(
+				A5XX_GPMU_INST_RAM_BASE + offset,
+				tmp_size);
+
+		memcpy(cmds, &ucode[offset], tmp_size << 2);
+
+		cmds += tmp_size;
+		offset += tmp_size;
+		size -= tmp_size;
+	}
+
+	/* Turn CP protection ON */
+	*cmds++ = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+	*cmds++ = 1;
+
+	adreno_dev->gpmu_cmds_size = (size_t) (cmds - start);
+
+	return 0;
+}
+
+
+/*
+ * _load_gpmu_firmware() - Load the ucode into the GPMU RAM
+ * @adreno_dev: Pointer to adreno device
+ */
+static int _load_gpmu_firmware(struct adreno_device *adreno_dev)
+{
+	uint32_t *data;
+	const struct firmware *fw = NULL;
+	struct kgsl_device *device = &adreno_dev->dev;
+	const struct adreno_gpu_core *gpucore = adreno_dev->gpucore;
+	uint32_t *cmds, cmd_size;
+	int ret =  -EINVAL;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_GPMU))
+		return 0;
+
+	/* gpmu fw already saved and verified so do nothing new */
+	if (adreno_dev->gpmu_cmds_size != 0)
+		return 0;
+
+	if (gpucore->gpmufw_name == NULL)
+		return 0;
+
+	ret = request_firmware(&fw, gpucore->gpmufw_name, device->dev);
+	if (ret || fw == NULL) {
+		KGSL_CORE_ERR("request_firmware (%s) failed: %d\n",
+				gpucore->gpmufw_name, ret);
+		return ret;
+	}
+
+	data = (uint32_t *)fw->data;
+
+	if (data[0] >= (fw->size / sizeof(uint32_t)) || data[0] < 2)
+		goto err;
+
+	if (data[1] != GPMU_FIRMWARE_ID)
+		goto err;
+	ret = _read_fw2_block_header(&data[2],
+		GPMU_FIRMWARE_ID,
+		adreno_dev->gpucore->gpmu_major,
+		adreno_dev->gpucore->gpmu_minor);
+	if (ret)
+		goto err;
+
+	cmds = data + data[2] + 3;
+	cmd_size = data[0] - data[2] - 2;
+
+	if (cmd_size > GPMU_INST_RAM_SIZE) {
+		KGSL_CORE_ERR(
+			"GPMU firmware block size is larger than RAM size\n");
+		 goto err;
+	}
+
+	/* Everything is cool, so create some commands */
+	ret = _gpmu_create_load_cmds(adreno_dev, cmds, cmd_size);
+err:
+	if (fw)
+		release_firmware(fw);
+
+	return ret;
+}
+
+static int _gpmu_send_init_cmds(struct adreno_device *adreno_dev)
+{
+	struct adreno_ringbuffer *rb = adreno_dev->cur_rb;
+	uint32_t *cmds;
+	uint32_t size = adreno_dev->gpmu_cmds_size;
+
+	if (size == 0 || adreno_dev->gpmu_cmds == NULL)
+		return -EINVAL;
+
+	cmds = adreno_ringbuffer_allocspace(rb, size);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+	if (cmds == NULL)
+		return -ENOSPC;
+
+	/* Copy to the RB the predefined fw sequence cmds */
+	memcpy(cmds, adreno_dev->gpmu_cmds, size << 2);
+	return adreno_ringbuffer_submit_spin(rb, NULL, 2000);
+}
+
+/*
+ * a5xx_gpmu_start() - Initialize and start the GPMU
+ * @adreno_dev: Pointer to adreno device
+ *
+ * Load the GPMU microcode, set up any features such as hardware clock gating
+ * or IFPC, and take the GPMU out of reset.
+ */
+static int a5xx_gpmu_start(struct adreno_device *adreno_dev)
+{
+	int ret;
+	unsigned int reg, retry = GPMU_FW_INIT_RETRY;
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_GPMU))
+		return 0;
+
+	ret = _gpmu_send_init_cmds(adreno_dev);
+	if (ret) {
+		KGSL_CORE_ERR("Failed to program the GPMU: %d\n", ret);
+		return ret;
+	}
+
+	/* GPMU clock gating setup */
+	kgsl_regwrite(device, A5XX_GPMU_WFI_CONFIG, 0x00004014);
+
+	/* Kick off GPMU firmware */
+	kgsl_regwrite(device, A5XX_GPMU_CM3_SYSRESET, 0);
+	/*
+	 * The hardware team's estimation of GPMU firmware initialization
+	 * latency is about 3000 cycles, that's about 5 to 24 usec.
+	 */
+	do {
+		udelay(1);
+		kgsl_regread(device, A5XX_GPMU_GENERAL_0, &reg);
+	} while ((reg != 0xBABEFACE) && retry--);
+	if (reg != 0xBABEFACE) {
+		KGSL_CORE_ERR("GPMU firmware initialization timed out\n");
+		ret = -ETIMEDOUT;
+	} else {
+		set_bit(ADRENO_DEVICE_GPMU_INITIALIZED, &adreno_dev->priv);
+		/*
+		 *  We are in AWARE state and IRQ line from GPU to host is
+		 *  disabled.
+		 *  Read pending GPMU interrupts and clear GPMU_RBBM_INTR_INFO.
+		 */
+		kgsl_regread(device, A5XX_GPMU_RBBM_INTR_INFO, &reg);
+		/*
+		 * Clear RBBM interrupt mask if any of GPMU interrupts
+		 * are pending.
+		 */
+		if (reg)
+			kgsl_regwrite(device,
+				A5XX_RBBM_INT_CLEAR_CMD,
+				1 << A5XX_INT_GPMU_FIRMWARE);
+	}
+	return ret;
+}
+
+struct kgsl_hwcg_reg {
+	unsigned int off;
+	unsigned int val;
+};
+
+static const struct kgsl_hwcg_reg a50x_hwcg_regs[] = {
+	{A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+	{A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+	{A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
+	{A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
+	{A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
+	{A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
+	{A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00555555},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
+	{A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
+	{A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
+};
+
+static const struct kgsl_hwcg_reg a510_hwcg_regs[] = {
+	{A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+	{A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
+	{A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+	{A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
+	{A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
+	{A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
+	{A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
+	{A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
+	{A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
+	{A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
+	{A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00555555},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
+	{A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
+	{A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
+};
+
+static const struct kgsl_hwcg_reg a530_hwcg_regs[] = {
+	{A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+	{A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
+	{A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
+	{A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
+	{A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+	{A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
+	{A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
+	{A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
+	{A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
+	{A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
+	{A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
+	{A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
+	{A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
+	{A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
+	{A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
+	{A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
+	{A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00555555},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
+	{A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
+	{A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
+};
+
+static const struct {
+	int (*devfunc)(struct adreno_device *adreno_dev);
+	const struct kgsl_hwcg_reg *regs;
+	unsigned int count;
+} a5xx_hwcg_registers[] = {
+	{ adreno_is_a530v3, a530_hwcg_regs, ARRAY_SIZE(a530_hwcg_regs) },
+	{ adreno_is_a530v2, a530_hwcg_regs, ARRAY_SIZE(a530_hwcg_regs) },
+	{ adreno_is_a510, a510_hwcg_regs, ARRAY_SIZE(a510_hwcg_regs) },
+	{ adreno_is_a505, a50x_hwcg_regs, ARRAY_SIZE(a50x_hwcg_regs) },
+	{ adreno_is_a506, a50x_hwcg_regs, ARRAY_SIZE(a50x_hwcg_regs) },
+};
+
+static void a5xx_hwcg_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	const struct kgsl_hwcg_reg *regs;
+	int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg_registers); i++) {
+		if (a5xx_hwcg_registers[i].devfunc(adreno_dev))
+			break;
+	}
+
+	if (i == ARRAY_SIZE(a5xx_hwcg_registers))
+		return;
+
+	regs = a5xx_hwcg_registers[i].regs;
+
+	for (j = 0; j < a5xx_hwcg_registers[i].count; j++)
+		kgsl_regwrite(device, regs[j].off, regs[j].val);
+
+	/* enable top level HWCG */
+	kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL, 0xAAA8AA00);
+	kgsl_regwrite(device, A5XX_RBBM_ISDB_CNT, 0x00000182);
+}
+
+static int _read_fw2_block_header(uint32_t *header, uint32_t id,
+				uint32_t major, uint32_t minor)
+{
+	uint32_t header_size;
+	int i = 1;
+
+	if (header == NULL)
+		return -ENOMEM;
+
+	header_size = header[0];
+	/* Headers have limited size and always occur as pairs of words */
+	if (header_size > MAX_HEADER_SIZE || header_size % 2)
+		return -EINVAL;
+	/* Sequences must have an identifying id first thing in their header */
+	if (id == GPMU_SEQUENCE_ID) {
+		if (header[i] != HEADER_SEQUENCE ||
+			(header[i + 1] >= MAX_SEQUENCE_ID))
+			return -EINVAL;
+		i += 2;
+	}
+	for (; i < header_size; i += 2) {
+		switch (header[i]) {
+		/* Major Version */
+		case HEADER_MAJOR:
+			if ((major > header[i + 1]) &&
+				header[i + 1]) {
+				KGSL_CORE_ERR(
+					"GPMU major version mis-match %d, %d\n",
+					major, header[i + 1]);
+				return -EINVAL;
+			}
+			break;
+		case HEADER_MINOR:
+			if (minor > header[i + 1])
+				KGSL_CORE_ERR(
+					"GPMU minor version mis-match %d %d\n",
+					minor, header[i + 1]);
+			break;
+		case HEADER_DATE:
+		case HEADER_TIME:
+			break;
+		default:
+			KGSL_CORE_ERR("GPMU unknown header ID %d\n",
+					header[i]);
+		}
+	}
+	return 0;
+}
+
+/*
+ * Read in the register sequence file and save pointers to the
+ * necessary sequences.
+ *
+ * GPU sequence file format (one dword per field unless noted):
+ * Block 1 length (length dword field not inclusive)
+ * Block 1 type = Sequence = 3
+ * Block Header length (length dword field not inclusive)
+ * BH field ID = Sequence field ID
+ * BH field data = Sequence ID
+ * BH field ID
+ * BH field data
+ * ...
+ * Opcode 0 ID
+ * Opcode 0 data M words
+ * Opcode 1 ID
+ * Opcode 1 data N words
+ * ...
+ * Opcode X ID
+ * Opcode X data O words
+ * Block 2 length...
+ */
+static void _load_regfile(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	const struct firmware *fw;
+	uint32_t block_size = 0, block_total = 0, fw_size;
+	uint32_t *block;
+	int ret = -EINVAL;
+
+	if (!adreno_dev->gpucore->regfw_name)
+		return;
+
+	ret = request_firmware(&fw, adreno_dev->gpucore->regfw_name,
+			device->dev);
+	if (ret) {
+		KGSL_PWR_ERR(device, "request firmware failed %d, %s\n",
+				ret, adreno_dev->gpucore->regfw_name);
+		return;
+	}
+
+	fw_size = fw->size / sizeof(uint32_t);
+	/* Min valid file of size 6, see file description */
+	if (fw_size < 6)
+		goto err;
+	block = (uint32_t *)fw->data;
+	/* All offset numbers calculated from file description */
+	while (block_total < fw_size) {
+		block_size = block[0];
+		if (block_size >= fw_size || block_size < 2)
+			goto err;
+		if (block[1] != GPMU_SEQUENCE_ID)
+			goto err;
+
+		/* For now ignore blocks other than the LM sequence */
+		if (block[4] == LM_SEQUENCE_ID) {
+			ret = _read_fw2_block_header(&block[2],
+				GPMU_SEQUENCE_ID,
+				adreno_dev->gpucore->lm_major,
+				adreno_dev->gpucore->lm_minor);
+			if (ret)
+				goto err;
+
+			adreno_dev->lm_fw = fw;
+			adreno_dev->lm_sequence = block + block[2] + 3;
+			adreno_dev->lm_size = block_size - block[2] - 2;
+		}
+		block_total += (block_size + 1);
+		block += (block_size + 1);
+	}
+	if (adreno_dev->lm_sequence)
+		return;
+
+err:
+	release_firmware(fw);
+	KGSL_PWR_ERR(device,
+		"Register file failed to load sz=%d bsz=%d header=%d\n",
+		fw_size, block_size, ret);
+	return;
+}
+
+static int _execute_reg_sequence(struct adreno_device *adreno_dev,
+			uint32_t *opcode, uint32_t length)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	uint32_t *cur = opcode;
+	uint64_t reg, val;
+
+	/* todo double check the reg writes */
+	while ((cur - opcode) < length) {
+		switch (cur[0]) {
+		/* Write a 32 bit value to a 64 bit reg */
+		case 1:
+			reg = cur[2];
+			reg = (reg << 32) | cur[1];
+			kgsl_regwrite(device, reg, cur[3]);
+			cur += 4;
+			break;
+		/* Write a 64 bit value to a 64 bit reg */
+		case 2:
+			reg = cur[2];
+			reg = (reg << 32) | cur[1];
+			val = cur[4];
+			val = (val << 32) | cur[3];
+			kgsl_regwrite(device, reg, val);
+			cur += 5;
+			break;
+		/* Delay for X usec */
+		case 3:
+			udelay(cur[1]);
+			cur += 2;
+			break;
+		default:
+			return -EINVAL;
+	} }
+	return 0;
+}
+
+static void _write_voltage_table(struct adreno_device *adreno_dev,
+			unsigned int addr, uint32_t *length)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int i;
+	struct dev_pm_opp *opp;
+	int levels = pwr->num_pwrlevels - 1;
+	unsigned int mvolt = 0;
+
+	kgsl_regwrite(device, addr, adreno_dev->gpucore->max_power);
+	kgsl_regwrite(device, addr + 1, levels);
+
+	/* Write voltage in mV and frequency in MHz */
+	for (i = 0; i < levels; i++) {
+		opp = dev_pm_opp_find_freq_exact(&device->pdev->dev,
+				pwr->pwrlevels[i].gpu_freq, true);
+		/* _opp_get returns uV, convert to mV */
+		if (!IS_ERR(opp))
+			mvolt = dev_pm_opp_get_voltage(opp) / 1000;
+		kgsl_regwrite(device, addr + 2 + i * 2, mvolt);
+		kgsl_regwrite(device, addr + 3 + i * 2,
+				pwr->pwrlevels[i].gpu_freq / 1000000);
+	}
+	*length = levels * 2 + 2;
+}
+
+static uint32_t lm_limit(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	if (adreno_dev->lm_limit)
+		return adreno_dev->lm_limit;
+
+	if (of_property_read_u32(device->pdev->dev.of_node, "qcom,lm-limit",
+		&adreno_dev->lm_limit))
+		adreno_dev->lm_limit = LM_DEFAULT_LIMIT;
+
+	return adreno_dev->lm_limit;
+}
+/*
+ * a5xx_lm_init() - Initialize LM/DPM on the GPMU
+ * @adreno_dev: The adreno device pointer
+ */
+static void a5xx_lm_init(struct adreno_device *adreno_dev)
+{
+	uint32_t length;
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM) ||
+		!test_bit(ADRENO_LM_CTRL, &adreno_dev->pwrctrl_flag))
+		return;
+
+	/* If something was wrong with the sequence file, return */
+	if (adreno_dev->lm_sequence == NULL)
+		return;
+
+	/* Write LM registers including DPM ucode, coefficients, and config */
+	if (_execute_reg_sequence(adreno_dev, adreno_dev->lm_sequence,
+				adreno_dev->lm_size)) {
+		/* If the sequence is invalid, it's not getting better */
+		adreno_dev->lm_sequence = NULL;
+		KGSL_PWR_WARN(device,
+				"Invalid LM sequence\n");
+		return;
+	}
+
+	kgsl_regwrite(device, A5XX_GPMU_TEMP_SENSOR_ID,
+			adreno_dev->gpucore->gpmu_tsens);
+	kgsl_regwrite(device, A5XX_GPMU_DELTA_TEMP_THRESHOLD, 0x1);
+	kgsl_regwrite(device, A5XX_GPMU_TEMP_SENSOR_CONFIG, 0x1);
+
+	kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE,
+			(0x80000000 | device->pwrctrl.active_pwrlevel));
+	/* use the leakage to set this value at runtime */
+	kgsl_regwrite(device, A5XX_GPMU_BASE_LEAKAGE,
+		adreno_dev->lm_leakage);
+
+	/* Enable the power threshold and set it to 6000m */
+	kgsl_regwrite(device, A5XX_GPMU_GPMU_PWR_THRESHOLD,
+		0x80000000 | lm_limit(adreno_dev));
+
+	kgsl_regwrite(device, A5XX_GPMU_BEC_ENABLE, 0x10001FFF);
+	kgsl_regwrite(device, A5XX_GDPM_CONFIG1, 0x00201FF1);
+
+	/* Send an initial message to the GPMU with the LM voltage table */
+	kgsl_regwrite(device, AGC_MSG_STATE, 0x1);
+	kgsl_regwrite(device, AGC_MSG_COMMAND, AGC_POWER_CONFIG_PRODUCTION_ID);
+	_write_voltage_table(adreno_dev, AGC_MSG_PAYLOAD, &length);
+	length *= sizeof(uint32_t);
+	kgsl_regwrite(device, AGC_MSG_PAYLOAD_SIZE, length);
+	kgsl_regwrite(device, AGC_INIT_MSG_MAGIC, AGC_INIT_MSG_VALUE);
+}
+
+/*
+ * a5xx_lm_enable() - Enable the LM/DPM feature on the GPMU
+ * @adreno_dev: The adreno device pointer
+ */
+static void a5xx_lm_enable(struct adreno_device *adreno_dev)
+{
+	uint32_t val;
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM) ||
+		!test_bit(ADRENO_LM_CTRL, &adreno_dev->pwrctrl_flag))
+		return;
+
+	/* If no sequence properly initialized, return */
+	if (adreno_dev->lm_sequence == NULL)
+		return;
+
+	kgsl_regwrite(device, A5XX_GDPM_INT_MASK, 0x00000000);
+	kgsl_regwrite(device, A5XX_GDPM_INT_EN, 0x0000000A);
+	kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK, 0x00000001);
+	kgsl_regwrite(device, A5XX_GPMU_TEMP_THRESHOLD_INTR_EN_MASK,
+			0x00050000);
+	kgsl_regwrite(device, A5XX_GPMU_THROTTLE_UNMASK_FORCE_CTRL,
+			0x00030000);
+	if (adreno_is_a530v2(adreno_dev))
+		val = 0x00060011;
+	/* v3 value */
+	else
+		val = 0x00000011;
+	kgsl_regwrite(device, A5XX_GPMU_CLOCK_THROTTLE_CTRL, val);
+}
+
+static int gpmu_set_level(struct kgsl_device *device, unsigned int val)
+{
+	unsigned int reg;
+	int retry = 20;
+
+	kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE, val);
+
+	do {
+		kgsl_regread(device, A5XX_GPMU_GPMU_VOLTAGE, &reg);
+	} while ((reg & 0x80000000) && retry--);
+
+	return (reg & 0x80000000) ? -ETIMEDOUT : 0;
+}
+
+/*
+ * a5xx_pwrlevel_change_settings() - Program the hardware during power level
+ * transitions
+ * @adreno_dev: The adreno device pointer
+ * @prelevel: The previous power level
+ * @postlevel: The new power level
+ * @post: True if called after the clock change has taken effect
+ */
+static void a5xx_pwrlevel_change_settings(struct adreno_device *adreno_dev,
+				unsigned int prelevel, unsigned int postlevel,
+				bool post)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	static int pre;
+	int on = 0;
+
+	/* Only call through if PPD or LM is supported and enabled */
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_PPD) &&
+		test_bit(ADRENO_PPD_CTRL, &adreno_dev->pwrctrl_flag))
+		on = ADRENO_PPD;
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_LM) &&
+		test_bit(ADRENO_LM_CTRL, &adreno_dev->pwrctrl_flag))
+		on = ADRENO_LM;
+
+	if (!on)
+		return;
+
+	/* if this is a real pre, or a post without a previous pre, set pre */
+	if ((post == 0) || (pre == 0 && post == 1))
+		pre = 1;
+	else if (post == 1)
+		pre = 0;
+
+	if (pre) {
+		if (gpmu_set_level(device, (0x80000010 | postlevel)))
+			KGSL_CORE_ERR(
+				"GPMU pre powerlevel did not stabilize\n");
+	}
+
+	if (post) {
+		if (gpmu_set_level(device, (0x80000000 | postlevel)))
+			KGSL_CORE_ERR(
+				"GPMU post powerlevel did not stabilize\n");
+		pre = 0;
+	}
+}
+
+static void a5xx_enable_64bit(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	kgsl_regwrite(device, A5XX_CP_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_VSC_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_RB_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_PC_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_VFD_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_VPC_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_SP_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
+}
+
+/*
+ * a5xx_gpmu_reset() - Re-enable GPMU based power features and restart GPMU
+ * @work: Pointer to the work struct for gpmu reset
+ *
+ * Load the GPMU microcode, set up any features such as hardware clock gating
+ * or IFPC, and take the GPMU out of reset.
+ */
+static void a5xx_gpmu_reset(struct work_struct *work)
+{
+	struct adreno_device *adreno_dev = container_of(work,
+			struct adreno_device, gpmu_work);
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	if (test_bit(ADRENO_DEVICE_GPMU_INITIALIZED, &adreno_dev->priv))
+		return;
+
+	/*
+	 * If GPMU has already experienced a restart or is in the process of it
+	 * after the watchdog timeout, then there is no need to reset GPMU
+	 * again.
+	 */
+	if (device->state != KGSL_STATE_NAP &&
+		device->state != KGSL_STATE_AWARE &&
+		device->state != KGSL_STATE_ACTIVE)
+		return;
+
+	mutex_lock(&device->mutex);
+
+	if (device->state == KGSL_STATE_NAP)
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE);
+
+	if (a5xx_regulator_enable(adreno_dev))
+		goto out;
+
+	/* Soft reset of the GPMU block */
+	kgsl_regwrite(device, A5XX_RBBM_BLOCK_SW_RESET_CMD, BIT(16));
+
+	a5xx_lm_init(adreno_dev);
+
+	a5xx_enable_pc(adreno_dev);
+
+	a5xx_gpmu_start(adreno_dev);
+
+	a5xx_lm_enable(adreno_dev);
+
+out:
+	mutex_unlock(&device->mutex);
+}
+
+/*
+ * a5xx_start() - Device start
+ * @adreno_dev: Pointer to adreno device
+ *
+ * a5xx device start
+ */
+static void a5xx_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct kgsl_iommu *iommu = device->mmu.priv;
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int i;
+	struct adreno_ringbuffer *rb;
+	uint64_t def_ttbr0;
+	uint32_t contextidr;
+
+	adreno_vbif_start(adreno_dev, a5xx_vbif_platforms,
+			ARRAY_SIZE(a5xx_vbif_platforms));
+
+	/* Make all blocks contribute to the GPU BUSY perf counter */
+	kgsl_regwrite(device, A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
+
+	/*
+	 * Enable the RBBM error reporting bits.  This lets us get
+	 * useful information on failure
+	 */
+	kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL0, 0x00000001);
+
+	/*
+	 * Turn on hang detection for a530 v2 and beyond. This spews a
+	 * lot of useful information into the RBBM registers on a hang.
+	 */
+	if (!adreno_is_a530v1(adreno_dev)) {
+		/*
+		 * We have 4 RB units, and only RB0 activity signals are working
+		 * correctly. Mask out RB1-3 activity signals from the HW hang
+		 * detection logic as per recommendation of hardware team.
+		 */
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
+					0xF0000000);
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
+					0xFFFFFFFF);
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
+					0xFFFFFFFF);
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
+					0xFFFFFFFF);
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
+					0xFFFFFFFF);
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
+					0xFFFFFFFF);
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
+					0xFFFFFFFF);
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
+					0xFFFFFFFF);
+
+		set_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv);
+		gpudev->irq->mask |= (1 << A5XX_INT_MISC_HANG_DETECT);
+		/*
+		 * Set hang detection threshold to 1 million cycles
+		 * (0xFFFF*16)
+		 */
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
+					  (1 << 30) | 0xFFFF);
+	}
+
+
+	/* Turn on performance counters */
+	kgsl_regwrite(device, A5XX_RBBM_PERFCTR_CNTL, 0x01);
+
+	/*
+	 * This is to increase performance by restricting VFD's cache access,
+	 * so that LRZ and other data get evicted less.
+	 */
+	kgsl_regwrite(device, A5XX_UCHE_CACHE_WAYS, 0x02);
+
+	/*
+	 * Set UCHE_WRITE_THRU_BASE to the UCHE_TRAP_BASE effectively
+	 * disabling L2 bypass
+	 */
+	kgsl_regwrite(device, A5XX_UCHE_TRAP_BASE_LO, 0xffff0000);
+	kgsl_regwrite(device, A5XX_UCHE_TRAP_BASE_HI, 0x0001ffff);
+	kgsl_regwrite(device, A5XX_UCHE_WRITE_THRU_BASE_LO, 0xffff0000);
+	kgsl_regwrite(device, A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001ffff);
+
+	/* Program the GMEM VA range for the UCHE path */
+	kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MIN_LO,
+				ADRENO_UCHE_GMEM_BASE);
+	kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x0);
+	kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MAX_LO,
+				ADRENO_UCHE_GMEM_BASE +
+				adreno_dev->gmem_size - 1);
+	kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x0);
+
+	/*
+	 * Below CP registers are 0x0 by default, program init
+	 * values based on a5xx flavor.
+	 */
+	if (adreno_is_a505_or_a506(adreno_dev)) {
+		kgsl_regwrite(device, A5XX_CP_MEQ_THRESHOLDS, 0x20);
+		kgsl_regwrite(device, A5XX_CP_MERCIU_SIZE, 0x400);
+		kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
+		kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
+	} else if (adreno_is_a510(adreno_dev)) {
+		kgsl_regwrite(device, A5XX_CP_MEQ_THRESHOLDS, 0x20);
+		kgsl_regwrite(device, A5XX_CP_MERCIU_SIZE, 0x20);
+		kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
+		kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
+	} else {
+		kgsl_regwrite(device, A5XX_CP_MEQ_THRESHOLDS, 0x40);
+		kgsl_regwrite(device, A5XX_CP_MERCIU_SIZE, 0x40);
+		kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
+		kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
+	}
+
+	/*
+	 * vtxFifo and primFifo thresholds default values
+	 * are different.
+	 */
+	if (adreno_is_a505_or_a506(adreno_dev))
+		kgsl_regwrite(device, A5XX_PC_DBG_ECO_CNTL,
+						(0x100 << 11 | 0x100 << 22));
+	else if (adreno_is_a510(adreno_dev))
+		kgsl_regwrite(device, A5XX_PC_DBG_ECO_CNTL,
+						(0x200 << 11 | 0x200 << 22));
+	else
+		kgsl_regwrite(device, A5XX_PC_DBG_ECO_CNTL,
+						(0x400 << 11 | 0x300 << 22));
+
+	/*
+	 * A5x USP LDST non valid pixel wrongly update read combine offset
+	 * In A5xx we added optimization for read combine. There could be cases
+	 * on a530 v1 there is no valid pixel but the active masks is not
+	 * cleared and the offset can be wrongly updated if the invalid address
+	 * can be combined. The wrongly latched value will make the returning
+	 * data got shifted at wrong offset. workaround this issue by disabling
+	 * LD combine, bit[25] of SP_DBG_ECO_CNTL (sp chicken bit[17]) need to
+	 * be set to 1, default is 0(enable)
+	 */
+	if (adreno_is_a530v1(adreno_dev))
+		kgsl_regrmw(device, A5XX_SP_DBG_ECO_CNTL, 0, (1 << 25));
+
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_TWO_PASS_USE_WFI)) {
+		/*
+		 * Set TWOPASSUSEWFI in A5XX_PC_DBG_ECO_CNTL for
+		 * microcodes after v77
+		 */
+		if ((adreno_compare_pfp_version(adreno_dev, 0x5FF077) >= 0))
+			kgsl_regrmw(device, A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
+	}
+
+	/* Set the USE_RETENTION_FLOPS chicken bit */
+	kgsl_regwrite(device, A5XX_CP_CHICKEN_DBG, 0x02000000);
+
+	/* Enable ISDB mode if requested */
+	if (test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv)) {
+		if (!kgsl_active_count_get(device)) {
+			/*
+			* Disable ME/PFP split timeouts when the debugger is
+			* enabled because the CP doesn't know when a shader is
+			* in active debug
+			*/
+			kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL1, 0x06FFFFFF);
+
+			/* Force the SP0/SP1 clocks on to enable ISDB */
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP0, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP1, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP2, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP3, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP0, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP1, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP2, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP3, 0x0);
+
+			/* disable HWCG */
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_ISDB_CNT, 0x0);
+		} else
+			KGSL_CORE_ERR(
+				"Active count failed while turning on ISDB.");
+	} else {
+		/* if not in ISDB mode enable ME/PFP split notification */
+		kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
+		/* enable HWCG */
+		a5xx_hwcg_init(adreno_dev);
+	}
+
+	kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL2, 0x0000003F);
+
+	if (adreno_is_preemption_enabled(adreno_dev)) {
+		struct kgsl_pagetable *pt = device->mmu.defaultpagetable;
+
+		def_ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pt);
+		contextidr = kgsl_mmu_pagetable_get_contextidr(pt);
+
+		/* Initialize the context switch record here */
+		kgsl_sharedmem_writel(device, &iommu->smmu_info,
+				PREEMPT_SMMU_RECORD(magic),
+				A5XX_CP_SMMU_INFO_MAGIC_REF);
+		kgsl_sharedmem_writeq(device, &iommu->smmu_info,
+				PREEMPT_SMMU_RECORD(ttbr0), def_ttbr0);
+		/*
+		 * The CP doesn't actually use the asid field, so
+		 * put a bad value into it until it is removed from
+		 * the preemption record.
+		 */
+		kgsl_sharedmem_writeq(device, &iommu->smmu_info,
+				PREEMPT_SMMU_RECORD(asid),
+				0xdecafbad);
+		kgsl_sharedmem_writeq(device, &iommu->smmu_info,
+				PREEMPT_SMMU_RECORD(context_idr),
+				contextidr);
+		adreno_writereg64(adreno_dev,
+				ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
+				ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+				iommu->smmu_info.gpuaddr);
+
+		FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+			kgsl_sharedmem_writel(rb->device, &rb->preemption_desc,
+				PREEMPT_RECORD(rptr), 0);
+			kgsl_sharedmem_writel(rb->device, &rb->preemption_desc,
+				PREEMPT_RECORD(wptr), 0);
+			kgsl_sharedmem_writeq(rb->device, &rb->pagetable_desc,
+			  offsetof(struct adreno_ringbuffer_pagetable_info,
+			  ttbr0), def_ttbr0);
+		}
+	}
+
+	a5xx_protect_init(adreno_dev);
+}
+
+static int _preemption_init(
+			struct adreno_device *adreno_dev,
+			struct adreno_ringbuffer *rb, unsigned int *cmds,
+			struct kgsl_context *context)
+{
+	unsigned int *cmds_orig = cmds;
+	uint64_t gpuaddr = rb->preemption_desc.gpuaddr;
+	uint64_t gpuaddr_token = rb->device->memstore.gpuaddr +
+				KGSL_MEMSTORE_OFFSET(0, preempted);
+
+	/* Turn CP protection OFF */
+	*cmds++ = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+	*cmds++ = 0;
+	/*
+	 * CP during context switch will save context switch info to
+	 * a5xx_cp_preemption_record pointed by CONTEXT_SWITCH_SAVE_ADDR
+	 */
+	*cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 1);
+	*cmds++ = lower_32_bits(gpuaddr);
+	*cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI, 1);
+	*cmds++ = upper_32_bits(gpuaddr);
+
+	/* Turn CP protection ON */
+	*cmds++ = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+	*cmds++ = 1;
+
+	*cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_GLOBAL, 1);
+	*cmds++ = 0;
+
+	*cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1);
+	*cmds++ = 1;
+
+	/* Enable yield in RB only */
+	*cmds++ = cp_type7_packet(CP_YIELD_ENABLE, 1);
+	*cmds++ = 1;
+
+	*cmds++ = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4);
+	cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr_token);
+	*cmds++ = 1;
+	/* generate interrupt on preemption completion */
+	*cmds++ = 1;
+
+	return cmds - cmds_orig;
+}
+
+/* Print some key registers if a spin-for-idle times out */
+static void spin_idle_debug(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int rptr, wptr;
+	unsigned int status, status3, intstatus;
+	unsigned int hwfault;
+
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &rptr);
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr);
+
+	kgsl_regread(device, A5XX_RBBM_STATUS, &status);
+	kgsl_regread(device, A5XX_RBBM_STATUS3, &status3);
+	kgsl_regread(device, A5XX_RBBM_INT_0_STATUS, &intstatus);
+	kgsl_regread(device, A5XX_CP_HW_FAULT, &hwfault);
+
+	dev_err(device->dev,
+		" rb=%X/%X rbbm_status=%8.8X/%8.8X int_0_status=%8.8X\n",
+		rptr, wptr, status, status3, intstatus);
+	dev_err(device->dev, " hwfault=%8.8X\n", hwfault);
+}
+
+static void a5xx_post_start(struct adreno_device *adreno_dev)
+{
+	unsigned int *cmds, *start;
+	struct adreno_ringbuffer *rb = adreno_dev->cur_rb;
+
+	cmds = adreno_ringbuffer_allocspace(rb, 42);
+	if (IS_ERR_OR_NULL(cmds))
+		return;
+
+	start = cmds;
+
+	/*
+	 * Send a pipeline stat event whenever the GPU gets powered up
+	 * to cause misbehaving perf counters to start ticking
+	 */
+	if (adreno_is_a530(adreno_dev)) {
+		*cmds++ = cp_packet(adreno_dev, CP_EVENT_WRITE, 1);
+		*cmds++ = 0xF;
+	}
+
+	if (adreno_is_preemption_enabled(adreno_dev))
+		cmds += _preemption_init(adreno_dev, rb, cmds, NULL);
+
+	rb->wptr = rb->wptr - (42 - (cmds - start));
+
+	if (cmds == start)
+		return;
+
+	if (adreno_ringbuffer_submit_spin(rb, NULL, 2000)) {
+		struct kgsl_device *device = &adreno_dev->dev;
+
+		KGSL_DRV_ERR(device, "hw initialization failed to idle\n");
+		kgsl_device_snapshot(device, NULL);
+	}
+}
+
+/*
+ * a5xx_hw_init() - Initialize GPU HW using PM4 cmds
+ * @adreno_dev: Pointer to adreno device
+ *
+ * Submit PM4 commands for HW initialization,
+ */
+static int a5xx_hw_init(struct adreno_device *adreno_dev)
+{
+	int ret;
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	/* GPU comes up in secured mode, make it unsecured by default */
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_CONTENT_PROTECTION))
+		kgsl_regwrite(device, A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
+
+	/* Set up LM before initializing the GPMU */
+	a5xx_lm_init(adreno_dev);
+
+	/* Enable SPTP based power collapse before enabling GPMU */
+	a5xx_enable_pc(adreno_dev);
+
+	/* Program the GPMU */
+	ret = a5xx_gpmu_start(adreno_dev);
+	if (ret)
+		return ret;
+
+	/* Enable limits management */
+	a5xx_lm_enable(adreno_dev);
+
+	a5xx_post_start(adreno_dev);
+
+	return 0;
+}
+
+static int a5xx_switch_to_unsecure_mode(struct adreno_device *adreno_dev,
+				struct adreno_ringbuffer *rb)
+{
+	unsigned int *cmds;
+	int ret;
+
+	cmds = adreno_ringbuffer_allocspace(rb, 2);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+	if (cmds == NULL)
+		return -ENOSPC;
+
+	cmds += cp_secure_mode(adreno_dev, cmds, 0);
+
+	ret = adreno_ringbuffer_submit_spin(rb, NULL, 2000);
+	if (ret != 0) {
+		struct kgsl_device *device = &adreno_dev->dev;
+
+		dev_err(device->dev, "Switch to unsecure failed to idle\n");
+		spin_idle_debug(device);
+		kgsl_device_snapshot(device, NULL);
+	}
+
+	return ret;
+}
+
+/*
+ * a5xx_rb_init() - Initialize ringbuffer
+ * @adreno_dev: Pointer to adreno device
+ * @rb: Pointer to the ringbuffer of device
+ *
+ * Submit commands for ME initialization,
+ */
+static int a5xx_rb_init(struct adreno_device *adreno_dev,
+			 struct adreno_ringbuffer *rb)
+{
+	unsigned int *cmds;
+	int ret;
+
+	cmds = adreno_ringbuffer_allocspace(rb, 8);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+	if (cmds == NULL)
+		return -ENOSPC;
+
+	*cmds++ = cp_type7_packet(CP_ME_INIT, 7);
+	/*
+	 *  Mask -- look for all ordinals but drawcall
+	 *  range and reset ucode scratch memory.
+	 */
+	*cmds++ = 0x0000000f;
+	/* Multiple HW ctxs are unreliable on a530v1, use single hw context */
+	if (adreno_is_a530v1(adreno_dev))
+		*cmds++ = 0x00000000;
+	else
+		/* Use both contexts for 3D (bit0) 2D (bit1) */
+		*cmds++ = 0x00000003;
+	/* Enable register protection */
+	*cmds++ = 0x20000000;
+	/* Header dump address */
+	*cmds++ = 0x00000000;
+	/* Header dump enable and dump size */
+	*cmds++ = 0x00000000;
+	/* Below will be ignored by the CP unless bit4 in Mask is set */
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+
+	ret = adreno_ringbuffer_submit_spin(rb, NULL, 2000);
+	if (ret != 0) {
+		struct kgsl_device *device = &adreno_dev->dev;
+
+		dev_err(device->dev, "CP initialization failed to idle\n");
+		spin_idle_debug(device);
+		kgsl_device_snapshot(device, NULL);
+	}
+
+	/* GPU comes up in secured mode, make it unsecured by default */
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_CONTENT_PROTECTION))
+		ret = a5xx_switch_to_unsecure_mode(adreno_dev, rb);
+
+	return ret;
+}
+
+static int _load_firmware(struct adreno_device *adreno_dev, const char *fwfile,
+			  struct kgsl_memdesc *ucode, size_t *ucode_size,
+			  unsigned int *ucode_version)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	const struct firmware *fw = NULL;
+	int ret;
+
+	ret = request_firmware(&fw, fwfile, device->dev);
+
+	if (ret) {
+		KGSL_DRV_ERR(device, "request_firmware(%s) failed: %d\n",
+				fwfile, ret);
+		return ret;
+	}
+
+	ret = kgsl_allocate_global(device, ucode, fw->size - 4,
+				KGSL_MEMFLAGS_GPUREADONLY, 0);
+
+	if (ret)
+		goto done;
+
+	memcpy(ucode->hostptr, &fw->data[4], fw->size - 4);
+	*ucode_size = (fw->size - 4) / sizeof(uint32_t);
+	*ucode_version = *(unsigned int *)&fw->data[4];
+
+done:
+	release_firmware(fw);
+
+	return ret;
+}
+
+/*
+ * a5xx_microcode_read() - Read microcode
+ * @adreno_dev: Pointer to adreno device
+ */
+static int a5xx_microcode_read(struct adreno_device *adreno_dev)
+{
+	int ret;
+
+	ret = _load_firmware(adreno_dev,
+			 adreno_dev->gpucore->pm4fw_name, &adreno_dev->pm4,
+			 &adreno_dev->pm4_fw_size, &adreno_dev->pm4_fw_version);
+	if (ret)
+		return ret;
+
+	ret = _load_firmware(adreno_dev,
+			 adreno_dev->gpucore->pfpfw_name, &adreno_dev->pfp,
+			 &adreno_dev->pfp_fw_size, &adreno_dev->pfp_fw_version);
+	if (ret)
+		return ret;
+
+	ret = _load_gpmu_firmware(adreno_dev);
+	if (ret)
+		return ret;
+
+	_load_regfile(adreno_dev);
+
+	return ret;
+}
+
+/*
+ * a5xx_microcode_load() - Load microcode
+ * @adreno_dev: Pointer to adreno device
+ * @start_type: type of device start cold/warm
+ */
+static int a5xx_microcode_load(struct adreno_device *adreno_dev,
+				unsigned int start_type)
+{
+	void *ptr;
+	struct kgsl_device *device = &adreno_dev->dev;
+	uint64_t gpuaddr;
+
+	gpuaddr = adreno_dev->pm4.gpuaddr;
+	kgsl_regwrite(device, A5XX_CP_PM4_INSTR_BASE_LO,
+				lower_32_bits(gpuaddr));
+	kgsl_regwrite(device, A5XX_CP_PM4_INSTR_BASE_HI,
+				upper_32_bits(gpuaddr));
+
+	gpuaddr = adreno_dev->pfp.gpuaddr;
+	kgsl_regwrite(device, A5XX_CP_PFP_INSTR_BASE_LO,
+				lower_32_bits(gpuaddr));
+	kgsl_regwrite(device, A5XX_CP_PFP_INSTR_BASE_HI,
+				upper_32_bits(gpuaddr));
+
+	/*
+	 * Resume call to write the zap shader base address into the
+	 * appropriate register
+	 */
+	if (zap_ucode_loaded) {
+		int ret;
+		struct scm_desc desc = {0};
+
+		desc.args[0] = 0;
+		desc.args[1] = 13;
+		desc.arginfo = SCM_ARGS(2);
+
+		ret = scm_call2(SCM_SIP_FNID(SCM_SVC_BOOT, 0xA), &desc);
+		if (ret) {
+			pr_err("SCM resume call failed with error %d\n", ret);
+			return ret;
+		}
+
+	}
+
+	/* Load the zap shader firmware through PIL if its available */
+	if (adreno_dev->gpucore->zap_name && !zap_ucode_loaded) {
+		ptr = subsystem_get(adreno_dev->gpucore->zap_name);
+
+		/* Return error if the zap shader cannot be loaded */
+		if (IS_ERR_OR_NULL(ptr))
+			return (ptr == NULL) ? -ENODEV : PTR_ERR(ptr);
+
+		zap_ucode_loaded = 1;
+	}
+
+	return 0;
+}
+
+static struct adreno_perfcount_register a5xx_perfcounters_cp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_0_LO,
+		A5XX_RBBM_PERFCTR_CP_0_HI, 0, A5XX_CP_PERFCTR_CP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_1_LO,
+		A5XX_RBBM_PERFCTR_CP_1_HI, 1, A5XX_CP_PERFCTR_CP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_2_LO,
+		A5XX_RBBM_PERFCTR_CP_2_HI, 2, A5XX_CP_PERFCTR_CP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_3_LO,
+		A5XX_RBBM_PERFCTR_CP_3_HI, 3, A5XX_CP_PERFCTR_CP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_4_LO,
+		A5XX_RBBM_PERFCTR_CP_4_HI, 4, A5XX_CP_PERFCTR_CP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_5_LO,
+		A5XX_RBBM_PERFCTR_CP_5_HI, 5, A5XX_CP_PERFCTR_CP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_6_LO,
+		A5XX_RBBM_PERFCTR_CP_6_HI, 6, A5XX_CP_PERFCTR_CP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_7_LO,
+		A5XX_RBBM_PERFCTR_CP_7_HI, 7, A5XX_CP_PERFCTR_CP_SEL_7 },
+};
+
+/*
+ * Note that PERFCTR_RBBM_0 is missing - it is used to emulate the PWR counters.
+ * See below.
+ */
+static struct adreno_perfcount_register a5xx_perfcounters_rbbm[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_1_LO,
+		A5XX_RBBM_PERFCTR_RBBM_1_HI, 9, A5XX_RBBM_PERFCTR_RBBM_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_2_LO,
+		A5XX_RBBM_PERFCTR_RBBM_2_HI, 10, A5XX_RBBM_PERFCTR_RBBM_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_3_LO,
+		A5XX_RBBM_PERFCTR_RBBM_3_HI, 11, A5XX_RBBM_PERFCTR_RBBM_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_pc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_0_LO,
+		A5XX_RBBM_PERFCTR_PC_0_HI, 12, A5XX_PC_PERFCTR_PC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_1_LO,
+		A5XX_RBBM_PERFCTR_PC_1_HI, 13, A5XX_PC_PERFCTR_PC_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_2_LO,
+		A5XX_RBBM_PERFCTR_PC_2_HI, 14, A5XX_PC_PERFCTR_PC_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_3_LO,
+		A5XX_RBBM_PERFCTR_PC_3_HI, 15, A5XX_PC_PERFCTR_PC_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_4_LO,
+		A5XX_RBBM_PERFCTR_PC_4_HI, 16, A5XX_PC_PERFCTR_PC_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_5_LO,
+		A5XX_RBBM_PERFCTR_PC_5_HI, 17, A5XX_PC_PERFCTR_PC_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_6_LO,
+		A5XX_RBBM_PERFCTR_PC_6_HI, 18, A5XX_PC_PERFCTR_PC_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_7_LO,
+		A5XX_RBBM_PERFCTR_PC_7_HI, 19, A5XX_PC_PERFCTR_PC_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_vfd[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_0_LO,
+		A5XX_RBBM_PERFCTR_VFD_0_HI, 20, A5XX_VFD_PERFCTR_VFD_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_1_LO,
+		A5XX_RBBM_PERFCTR_VFD_1_HI, 21, A5XX_VFD_PERFCTR_VFD_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_2_LO,
+		A5XX_RBBM_PERFCTR_VFD_2_HI, 22, A5XX_VFD_PERFCTR_VFD_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_3_LO,
+		A5XX_RBBM_PERFCTR_VFD_3_HI, 23, A5XX_VFD_PERFCTR_VFD_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_4_LO,
+		A5XX_RBBM_PERFCTR_VFD_4_HI, 24, A5XX_VFD_PERFCTR_VFD_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_5_LO,
+		A5XX_RBBM_PERFCTR_VFD_5_HI, 25, A5XX_VFD_PERFCTR_VFD_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_6_LO,
+		A5XX_RBBM_PERFCTR_VFD_6_HI, 26, A5XX_VFD_PERFCTR_VFD_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_7_LO,
+		A5XX_RBBM_PERFCTR_VFD_7_HI, 27, A5XX_VFD_PERFCTR_VFD_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_hlsq[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_0_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_0_HI, 28, A5XX_HLSQ_PERFCTR_HLSQ_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_1_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_1_HI, 29, A5XX_HLSQ_PERFCTR_HLSQ_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_2_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_2_HI, 30, A5XX_HLSQ_PERFCTR_HLSQ_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_3_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_3_HI, 31, A5XX_HLSQ_PERFCTR_HLSQ_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_4_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_4_HI, 32, A5XX_HLSQ_PERFCTR_HLSQ_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_5_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_5_HI, 33, A5XX_HLSQ_PERFCTR_HLSQ_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_6_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_6_HI, 34, A5XX_HLSQ_PERFCTR_HLSQ_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_7_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_7_HI, 35, A5XX_HLSQ_PERFCTR_HLSQ_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_vpc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_0_LO,
+		A5XX_RBBM_PERFCTR_VPC_0_HI, 36, A5XX_VPC_PERFCTR_VPC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_1_LO,
+		A5XX_RBBM_PERFCTR_VPC_1_HI, 37, A5XX_VPC_PERFCTR_VPC_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_2_LO,
+		A5XX_RBBM_PERFCTR_VPC_2_HI, 38, A5XX_VPC_PERFCTR_VPC_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_3_LO,
+		A5XX_RBBM_PERFCTR_VPC_3_HI, 39, A5XX_VPC_PERFCTR_VPC_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_ccu[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_0_LO,
+		A5XX_RBBM_PERFCTR_CCU_0_HI, 40, A5XX_RB_PERFCTR_CCU_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_1_LO,
+		A5XX_RBBM_PERFCTR_CCU_1_HI, 41, A5XX_RB_PERFCTR_CCU_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_2_LO,
+		A5XX_RBBM_PERFCTR_CCU_2_HI, 42, A5XX_RB_PERFCTR_CCU_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_3_LO,
+		A5XX_RBBM_PERFCTR_CCU_3_HI, 43, A5XX_RB_PERFCTR_CCU_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_tse[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_0_LO,
+		A5XX_RBBM_PERFCTR_TSE_0_HI, 44, A5XX_GRAS_PERFCTR_TSE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_1_LO,
+		A5XX_RBBM_PERFCTR_TSE_1_HI, 45, A5XX_GRAS_PERFCTR_TSE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_2_LO,
+		A5XX_RBBM_PERFCTR_TSE_2_HI, 46, A5XX_GRAS_PERFCTR_TSE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_3_LO,
+		A5XX_RBBM_PERFCTR_TSE_3_HI, 47, A5XX_GRAS_PERFCTR_TSE_SEL_3 },
+};
+
+
+static struct adreno_perfcount_register a5xx_perfcounters_ras[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_0_LO,
+		A5XX_RBBM_PERFCTR_RAS_0_HI, 48, A5XX_GRAS_PERFCTR_RAS_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_1_LO,
+		A5XX_RBBM_PERFCTR_RAS_1_HI, 49, A5XX_GRAS_PERFCTR_RAS_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_2_LO,
+		A5XX_RBBM_PERFCTR_RAS_2_HI, 50, A5XX_GRAS_PERFCTR_RAS_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_3_LO,
+		A5XX_RBBM_PERFCTR_RAS_3_HI, 51, A5XX_GRAS_PERFCTR_RAS_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_uche[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_0_LO,
+		A5XX_RBBM_PERFCTR_UCHE_0_HI, 52, A5XX_UCHE_PERFCTR_UCHE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_1_LO,
+		A5XX_RBBM_PERFCTR_UCHE_1_HI, 53, A5XX_UCHE_PERFCTR_UCHE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_2_LO,
+		A5XX_RBBM_PERFCTR_UCHE_2_HI, 54, A5XX_UCHE_PERFCTR_UCHE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_3_LO,
+		A5XX_RBBM_PERFCTR_UCHE_3_HI, 55, A5XX_UCHE_PERFCTR_UCHE_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_4_LO,
+		A5XX_RBBM_PERFCTR_UCHE_4_HI, 56, A5XX_UCHE_PERFCTR_UCHE_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_5_LO,
+		A5XX_RBBM_PERFCTR_UCHE_5_HI, 57, A5XX_UCHE_PERFCTR_UCHE_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_6_LO,
+		A5XX_RBBM_PERFCTR_UCHE_6_HI, 58, A5XX_UCHE_PERFCTR_UCHE_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_7_LO,
+		A5XX_RBBM_PERFCTR_UCHE_7_HI, 59, A5XX_UCHE_PERFCTR_UCHE_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_tp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_0_LO,
+		A5XX_RBBM_PERFCTR_TP_0_HI, 60, A5XX_TPL1_PERFCTR_TP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_1_LO,
+		A5XX_RBBM_PERFCTR_TP_1_HI, 61, A5XX_TPL1_PERFCTR_TP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_2_LO,
+		A5XX_RBBM_PERFCTR_TP_2_HI, 62, A5XX_TPL1_PERFCTR_TP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_3_LO,
+		A5XX_RBBM_PERFCTR_TP_3_HI, 63, A5XX_TPL1_PERFCTR_TP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_4_LO,
+		A5XX_RBBM_PERFCTR_TP_4_HI, 64, A5XX_TPL1_PERFCTR_TP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_5_LO,
+		A5XX_RBBM_PERFCTR_TP_5_HI, 65, A5XX_TPL1_PERFCTR_TP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_6_LO,
+		A5XX_RBBM_PERFCTR_TP_6_HI, 66, A5XX_TPL1_PERFCTR_TP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_7_LO,
+		A5XX_RBBM_PERFCTR_TP_7_HI, 67, A5XX_TPL1_PERFCTR_TP_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_sp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_0_LO,
+		A5XX_RBBM_PERFCTR_SP_0_HI, 68, A5XX_SP_PERFCTR_SP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_1_LO,
+		A5XX_RBBM_PERFCTR_SP_1_HI, 69, A5XX_SP_PERFCTR_SP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_2_LO,
+		A5XX_RBBM_PERFCTR_SP_2_HI, 70, A5XX_SP_PERFCTR_SP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_3_LO,
+		A5XX_RBBM_PERFCTR_SP_3_HI, 71, A5XX_SP_PERFCTR_SP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_4_LO,
+		A5XX_RBBM_PERFCTR_SP_4_HI, 72, A5XX_SP_PERFCTR_SP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_5_LO,
+		A5XX_RBBM_PERFCTR_SP_5_HI, 73, A5XX_SP_PERFCTR_SP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_6_LO,
+		A5XX_RBBM_PERFCTR_SP_6_HI, 74, A5XX_SP_PERFCTR_SP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_7_LO,
+		A5XX_RBBM_PERFCTR_SP_7_HI, 75, A5XX_SP_PERFCTR_SP_SEL_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_8_LO,
+		A5XX_RBBM_PERFCTR_SP_8_HI, 76, A5XX_SP_PERFCTR_SP_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_9_LO,
+		A5XX_RBBM_PERFCTR_SP_9_HI, 77, A5XX_SP_PERFCTR_SP_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_10_LO,
+		A5XX_RBBM_PERFCTR_SP_10_HI, 78, A5XX_SP_PERFCTR_SP_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_11_LO,
+		A5XX_RBBM_PERFCTR_SP_11_HI, 79, A5XX_SP_PERFCTR_SP_SEL_11 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_rb[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_0_LO,
+		A5XX_RBBM_PERFCTR_RB_0_HI, 80, A5XX_RB_PERFCTR_RB_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_1_LO,
+		A5XX_RBBM_PERFCTR_RB_1_HI, 81, A5XX_RB_PERFCTR_RB_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_2_LO,
+		A5XX_RBBM_PERFCTR_RB_2_HI, 82, A5XX_RB_PERFCTR_RB_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_3_LO,
+		A5XX_RBBM_PERFCTR_RB_3_HI, 83, A5XX_RB_PERFCTR_RB_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_4_LO,
+		A5XX_RBBM_PERFCTR_RB_4_HI, 84, A5XX_RB_PERFCTR_RB_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_5_LO,
+		A5XX_RBBM_PERFCTR_RB_5_HI, 85, A5XX_RB_PERFCTR_RB_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_6_LO,
+		A5XX_RBBM_PERFCTR_RB_6_HI, 86, A5XX_RB_PERFCTR_RB_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_7_LO,
+		A5XX_RBBM_PERFCTR_RB_7_HI, 87, A5XX_RB_PERFCTR_RB_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_vsc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VSC_0_LO,
+		A5XX_RBBM_PERFCTR_VSC_0_HI, 88, A5XX_VSC_PERFCTR_VSC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VSC_1_LO,
+		A5XX_RBBM_PERFCTR_VSC_1_HI, 89, A5XX_VSC_PERFCTR_VSC_SEL_1 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_lrz[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_0_LO,
+		A5XX_RBBM_PERFCTR_LRZ_0_HI, 90, A5XX_GRAS_PERFCTR_LRZ_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_1_LO,
+		A5XX_RBBM_PERFCTR_LRZ_1_HI, 91, A5XX_GRAS_PERFCTR_LRZ_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_2_LO,
+		A5XX_RBBM_PERFCTR_LRZ_2_HI, 92, A5XX_GRAS_PERFCTR_LRZ_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_3_LO,
+		A5XX_RBBM_PERFCTR_LRZ_3_HI, 93, A5XX_GRAS_PERFCTR_LRZ_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_cmp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_0_LO,
+		A5XX_RBBM_PERFCTR_CMP_0_HI, 94, A5XX_RB_PERFCTR_CMP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_1_LO,
+		A5XX_RBBM_PERFCTR_CMP_1_HI, 95, A5XX_RB_PERFCTR_CMP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_2_LO,
+		A5XX_RBBM_PERFCTR_CMP_2_HI, 96, A5XX_RB_PERFCTR_CMP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_3_LO,
+		A5XX_RBBM_PERFCTR_CMP_3_HI, 97, A5XX_RB_PERFCTR_CMP_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_vbif[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW0,
+		A5XX_VBIF_PERF_CNT_HIGH0, -1, A5XX_VBIF_PERF_CNT_SEL0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW1,
+		A5XX_VBIF_PERF_CNT_HIGH1, -1, A5XX_VBIF_PERF_CNT_SEL1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW2,
+		A5XX_VBIF_PERF_CNT_HIGH2, -1, A5XX_VBIF_PERF_CNT_SEL2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW3,
+		A5XX_VBIF_PERF_CNT_HIGH3, -1, A5XX_VBIF_PERF_CNT_SEL3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_vbif_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_PWR_CNT_LOW0,
+		A5XX_VBIF_PERF_PWR_CNT_HIGH0, -1, A5XX_VBIF_PERF_PWR_CNT_EN0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_PWR_CNT_LOW1,
+		A5XX_VBIF_PERF_PWR_CNT_HIGH1, -1, A5XX_VBIF_PERF_PWR_CNT_EN1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_PWR_CNT_LOW2,
+		A5XX_VBIF_PERF_PWR_CNT_HIGH2, -1, A5XX_VBIF_PERF_PWR_CNT_EN2 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_alwayson[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_ALWAYSON_COUNTER_LO,
+		A5XX_RBBM_ALWAYSON_COUNTER_HI, -1 },
+};
+
+/*
+ * 5XX targets don't really have physical PERFCTR_PWR registers - we emulate
+ * them using similar performance counters from the RBBM block. The difference
+ * betweeen using this group and the RBBM group is that the RBBM counters are
+ * reloaded after a power collapse which is not how the PWR counters behaved on
+ * legacy hardware. In order to limit the disruption on the rest of the system
+ * we go out of our way to ensure backwards compatability. Since RBBM counters
+ * are in short supply, we don't emulate PWR:0 which nobody uses - mark it as
+ * broken.
+ */
+static struct adreno_perfcount_register a5xx_perfcounters_pwr[] = {
+	{ KGSL_PERFCOUNTER_BROKEN, 0, 0, 0, 0, -1, 0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_0_LO,
+		A5XX_RBBM_PERFCTR_RBBM_0_HI, -1, 0},
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_sp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_0_LO,
+		A5XX_SP_POWER_COUNTER_0_HI, -1, A5XX_SP_POWERCTR_SP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_1_LO,
+		A5XX_SP_POWER_COUNTER_1_HI, -1, A5XX_SP_POWERCTR_SP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_2_LO,
+		A5XX_SP_POWER_COUNTER_2_HI, -1, A5XX_SP_POWERCTR_SP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_3_LO,
+		A5XX_SP_POWER_COUNTER_3_HI, -1, A5XX_SP_POWERCTR_SP_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_tp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_0_LO,
+		A5XX_TP_POWER_COUNTER_0_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_1_LO,
+		A5XX_TP_POWER_COUNTER_1_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_2_LO,
+		A5XX_TP_POWER_COUNTER_2_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_3_LO,
+		A5XX_TP_POWER_COUNTER_3_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_rb[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_0_LO,
+		A5XX_RB_POWER_COUNTER_0_HI, -1, A5XX_RB_POWERCTR_RB_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_1_LO,
+		A5XX_RB_POWER_COUNTER_1_HI, -1, A5XX_RB_POWERCTR_RB_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_2_LO,
+		A5XX_RB_POWER_COUNTER_2_HI, -1, A5XX_RB_POWERCTR_RB_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_3_LO,
+		A5XX_RB_POWER_COUNTER_3_HI, -1, A5XX_RB_POWERCTR_RB_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_ccu[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CCU_POWER_COUNTER_0_LO,
+		A5XX_CCU_POWER_COUNTER_0_HI, -1, A5XX_RB_POWERCTR_CCU_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CCU_POWER_COUNTER_1_LO,
+		A5XX_CCU_POWER_COUNTER_1_HI, -1, A5XX_RB_POWERCTR_CCU_SEL_1 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_uche[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_0_LO,
+		A5XX_UCHE_POWER_COUNTER_0_HI, -1,
+		A5XX_UCHE_POWERCTR_UCHE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_1_LO,
+		A5XX_UCHE_POWER_COUNTER_1_HI, -1,
+		A5XX_UCHE_POWERCTR_UCHE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_2_LO,
+		A5XX_UCHE_POWER_COUNTER_2_HI, -1,
+		A5XX_UCHE_POWERCTR_UCHE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_3_LO,
+		A5XX_UCHE_POWER_COUNTER_3_HI, -1,
+		A5XX_UCHE_POWERCTR_UCHE_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_cp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_0_LO,
+		A5XX_CP_POWER_COUNTER_0_HI, -1, A5XX_CP_POWERCTR_CP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_1_LO,
+		A5XX_CP_POWER_COUNTER_1_HI, -1, A5XX_CP_POWERCTR_CP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_2_LO,
+		A5XX_CP_POWER_COUNTER_2_HI, -1, A5XX_CP_POWERCTR_CP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_3_LO,
+		A5XX_CP_POWER_COUNTER_3_HI, -1, A5XX_CP_POWERCTR_CP_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_gpmu[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_0_LO,
+		A5XX_GPMU_POWER_COUNTER_0_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_1_LO,
+		A5XX_GPMU_POWER_COUNTER_1_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_2_LO,
+		A5XX_GPMU_POWER_COUNTER_2_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_3_LO,
+		A5XX_GPMU_POWER_COUNTER_3_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_4_LO,
+		A5XX_GPMU_POWER_COUNTER_4_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_5_LO,
+		A5XX_GPMU_POWER_COUNTER_5_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_1 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_alwayson[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_ALWAYS_ON_COUNTER_LO,
+		A5XX_GPMU_ALWAYS_ON_COUNTER_HI, -1 },
+};
+
+#define A5XX_PERFCOUNTER_GROUP(offset, name) \
+	ADRENO_PERFCOUNTER_GROUP(a5xx, offset, name)
+
+#define A5XX_PERFCOUNTER_GROUP_FLAGS(offset, name, flags) \
+	ADRENO_PERFCOUNTER_GROUP_FLAGS(a5xx, offset, name, flags)
+
+#define A5XX_POWER_COUNTER_GROUP(offset, name) \
+	ADRENO_POWER_COUNTER_GROUP(a5xx, offset, name)
+
+static struct adreno_perfcount_group a5xx_perfcounter_groups
+				[KGSL_PERFCOUNTER_GROUP_MAX] = {
+	A5XX_PERFCOUNTER_GROUP(CP, cp),
+	A5XX_PERFCOUNTER_GROUP(RBBM, rbbm),
+	A5XX_PERFCOUNTER_GROUP(PC, pc),
+	A5XX_PERFCOUNTER_GROUP(VFD, vfd),
+	A5XX_PERFCOUNTER_GROUP(HLSQ, hlsq),
+	A5XX_PERFCOUNTER_GROUP(VPC, vpc),
+	A5XX_PERFCOUNTER_GROUP(CCU, ccu),
+	A5XX_PERFCOUNTER_GROUP(CMP, cmp),
+	A5XX_PERFCOUNTER_GROUP(TSE, tse),
+	A5XX_PERFCOUNTER_GROUP(RAS, ras),
+	A5XX_PERFCOUNTER_GROUP(LRZ, lrz),
+	A5XX_PERFCOUNTER_GROUP(UCHE, uche),
+	A5XX_PERFCOUNTER_GROUP(TP, tp),
+	A5XX_PERFCOUNTER_GROUP(SP, sp),
+	A5XX_PERFCOUNTER_GROUP(RB, rb),
+	A5XX_PERFCOUNTER_GROUP(VSC, vsc),
+	A5XX_PERFCOUNTER_GROUP_FLAGS(PWR, pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED),
+	A5XX_PERFCOUNTER_GROUP(VBIF, vbif),
+	A5XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, vbif_pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED),
+	A5XX_PERFCOUNTER_GROUP_FLAGS(ALWAYSON, alwayson,
+		ADRENO_PERFCOUNTER_GROUP_FIXED),
+	A5XX_POWER_COUNTER_GROUP(SP, sp),
+	A5XX_POWER_COUNTER_GROUP(TP, tp),
+	A5XX_POWER_COUNTER_GROUP(RB, rb),
+	A5XX_POWER_COUNTER_GROUP(CCU, ccu),
+	A5XX_POWER_COUNTER_GROUP(UCHE, uche),
+	A5XX_POWER_COUNTER_GROUP(CP, cp),
+	A5XX_POWER_COUNTER_GROUP(GPMU, gpmu),
+	A5XX_POWER_COUNTER_GROUP(ALWAYSON, alwayson),
+};
+
+static struct adreno_perfcounters a5xx_perfcounters = {
+	a5xx_perfcounter_groups,
+	ARRAY_SIZE(a5xx_perfcounter_groups),
+};
+
+static struct adreno_ft_perf_counters a5xx_ft_perf_counters[] = {
+	{KGSL_PERFCOUNTER_GROUP_SP, A5XX_SP_ALU_ACTIVE_CYCLES},
+	{KGSL_PERFCOUNTER_GROUP_SP, A5XX_SP0_ICL1_MISSES},
+	{KGSL_PERFCOUNTER_GROUP_SP, A5XX_SP_FS_CFLOW_INSTRUCTIONS},
+	{KGSL_PERFCOUNTER_GROUP_TSE, A5XX_TSE_INPUT_PRIM_NUM},
+};
+
+/* Register offset defines for A5XX, in order of enum adreno_regs */
+static unsigned int a5xx_register_offsets[ADRENO_REG_REGISTER_MAX] = {
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_WFI_PEND_CTR, A5XX_CP_WFI_PEND_CTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A5XX_CP_RB_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, A5XX_CP_RB_BASE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A5XX_CP_RB_RPTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A5XX_CP_RB_WPTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_CNTL, A5XX_CP_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A5XX_CP_ME_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A5XX_CP_RB_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A5XX_CP_IB1_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, A5XX_CP_IB1_BASE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, A5XX_CP_IB1_BUFSZ),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, A5XX_CP_IB2_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, A5XX_CP_IB2_BASE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A5XX_CP_IB2_BUFSZ),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ROQ_ADDR, A5XX_CP_ROQ_DBG_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ROQ_DATA, A5XX_CP_ROQ_DBG_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_ADDR, A5XX_CP_MERCIU_DBG_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_DATA, A5XX_CP_MERCIU_DBG_DATA_1),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_DATA2,
+				A5XX_CP_MERCIU_DBG_DATA_2),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MEQ_ADDR, A5XX_CP_MEQ_DBG_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MEQ_DATA, A5XX_CP_MEQ_DBG_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PROTECT_REG_0, A5XX_CP_PROTECT_REG_0),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT, A5XX_CP_CONTEXT_SWITCH_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_DEBUG, ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_DISABLE, ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
+				A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+				A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A5XX_RBBM_STATUS),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS3, A5XX_RBBM_STATUS3),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_CTL, A5XX_RBBM_PERFCTR_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0,
+					A5XX_RBBM_PERFCTR_LOAD_CMD0),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1,
+					A5XX_RBBM_PERFCTR_LOAD_CMD1),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2,
+					A5XX_RBBM_PERFCTR_LOAD_CMD2),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD3,
+					A5XX_RBBM_PERFCTR_LOAD_CMD3),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, A5XX_RBBM_INT_0_MASK),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_STATUS, A5XX_RBBM_INT_0_STATUS),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_CLOCK_CTL, A5XX_RBBM_CLOCK_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_CLEAR_CMD,
+				A5XX_RBBM_INT_CLEAR_CMD),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A5XX_RBBM_SW_RESET_CMD),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_BLOCK_SW_RESET_CMD,
+					  A5XX_RBBM_BLOCK_SW_RESET_CMD),
+		ADRENO_REG_DEFINE(ADRENO_REG_RBBM_BLOCK_SW_RESET_CMD2,
+					  A5XX_RBBM_BLOCK_SW_RESET_CMD2),
+	ADRENO_REG_DEFINE(ADRENO_REG_UCHE_INVALIDATE0, A5XX_UCHE_INVALIDATE0),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO,
+				A5XX_RBBM_PERFCTR_LOAD_VALUE_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI,
+				A5XX_RBBM_PERFCTR_LOAD_VALUE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TRUST_CONTROL,
+				A5XX_RBBM_SECVID_TRUST_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TRUST_CONFIG,
+				A5XX_RBBM_SECVID_TRUST_CONFIG),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TSB_CONTROL,
+				A5XX_RBBM_SECVID_TSB_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_BASE,
+				A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_BASE_HI,
+				A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_SIZE,
+				A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO,
+				A5XX_RBBM_ALWAYSON_COUNTER_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_ALWAYSON_COUNTER_HI,
+				A5XX_RBBM_ALWAYSON_COUNTER_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_VBIF_XIN_HALT_CTRL0,
+				A5XX_VBIF_XIN_HALT_CTRL0),
+	ADRENO_REG_DEFINE(ADRENO_REG_VBIF_XIN_HALT_CTRL1,
+				A5XX_VBIF_XIN_HALT_CTRL1),
+	ADRENO_REG_DEFINE(ADRENO_REG_VBIF_VERSION,
+				A5XX_VBIF_VERSION),
+};
+
+static const struct adreno_reg_offsets a5xx_reg_offsets = {
+	.offsets = a5xx_register_offsets,
+	.offset_0 = ADRENO_REG_REGISTER_MAX,
+};
+
+static void a5xx_cp_hw_err_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	unsigned int status1, status2;
+
+	kgsl_regread(device, A5XX_CP_INTERRUPT_STATUS, &status1);
+
+	if (status1 & BIT(A5XX_CP_OPCODE_ERROR)) {
+		unsigned int val;
+
+		kgsl_regwrite(device, A5XX_CP_PFP_STAT_ADDR, 0);
+
+		/*
+		 * A5XX_CP_PFP_STAT_DATA is indexed, so read it twice to get the
+		 * value we want
+		 */
+		kgsl_regread(device, A5XX_CP_PFP_STAT_DATA, &val);
+		kgsl_regread(device, A5XX_CP_PFP_STAT_DATA, &val);
+
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"ringbuffer opcode error | possible opcode=0x%8.8X\n",
+			val);
+	}
+	if (status1 & BIT(A5XX_CP_RESERVED_BIT_ERROR))
+		KGSL_DRV_CRIT_RATELIMIT(device,
+					"ringbuffer reserved bit error interrupt\n");
+	if (status1 & BIT(A5XX_CP_HW_FAULT_ERROR)) {
+		kgsl_regread(device, A5XX_CP_HW_FAULT, &status2);
+		KGSL_DRV_CRIT_RATELIMIT(device,
+					"CP | Ringbuffer HW fault | status=%x\n",
+					status2);
+	}
+	if (status1 & BIT(A5XX_CP_DMA_ERROR))
+		KGSL_DRV_CRIT_RATELIMIT(device, "CP | DMA error\n");
+	if (status1 & BIT(A5XX_CP_REGISTER_PROTECTION_ERROR)) {
+		kgsl_regread(device, A5XX_CP_PROTECT_STATUS, &status2);
+		KGSL_DRV_CRIT_RATELIMIT(device,
+					"CP | Protected mode error| %s | addr=%x | status=%x\n",
+					status2 & (1 << 24) ? "WRITE" : "READ",
+					(status2 & 0xFFFFF) >> 2, status2);
+	}
+	if (status1 & BIT(A5XX_CP_AHB_ERROR)) {
+		kgsl_regread(device, A5XX_CP_AHB_FAULT, &status2);
+		KGSL_DRV_CRIT_RATELIMIT(device,
+					"ringbuffer AHB error interrupt | status=%x\n",
+					status2);
+	}
+}
+
+static void a5xx_err_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	unsigned int reg;
+
+	switch (bit) {
+	case A5XX_INT_RBBM_AHB_ERROR: {
+		kgsl_regread(device, A5XX_RBBM_AHB_ERROR_STATUS, &reg);
+
+		/*
+		 * Return the word address of the erroring register so that it
+		 * matches the register specification
+		 */
+		KGSL_DRV_CRIT(device,
+			"RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
+			reg & (1 << 28) ? "WRITE" : "READ",
+			(reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
+			(reg >> 24) & 0xF);
+
+		/* Clear the error */
+		kgsl_regwrite(device, A5XX_RBBM_AHB_CMD, (1 << 4));
+		return;
+	}
+	case A5XX_INT_RBBM_TRANSFER_TIMEOUT:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: AHB transfer timeout\n");
+		break;
+	case A5XX_INT_RBBM_ME_MS_TIMEOUT:
+		kgsl_regread(device, A5XX_RBBM_AHB_ME_SPLIT_STATUS, &reg);
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"RBBM | ME master split timeout | status=%x\n", reg);
+		break;
+	case A5XX_INT_RBBM_PFP_MS_TIMEOUT:
+		kgsl_regread(device, A5XX_RBBM_AHB_PFP_SPLIT_STATUS, &reg);
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"RBBM | PFP master split timeout | status=%x\n", reg);
+		break;
+	case A5XX_INT_RBBM_ETS_MS_TIMEOUT:
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"RBBM: ME master split timeout\n");
+		break;
+	case A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: ATB ASYNC overflow\n");
+		break;
+	case A5XX_INT_RBBM_ATB_BUS_OVERFLOW:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: ATB bus overflow\n");
+		break;
+	case A5XX_INT_UCHE_OOB_ACCESS:
+		KGSL_DRV_CRIT_RATELIMIT(device, "UCHE: Out of bounds access\n");
+		break;
+	case A5XX_INT_UCHE_TRAP_INTR:
+		KGSL_DRV_CRIT_RATELIMIT(device, "UCHE: Trap interrupt\n");
+		break;
+	case A5XX_INT_GPMU_VOLTAGE_DROOP:
+		KGSL_DRV_CRIT_RATELIMIT(device, "GPMU: Voltage droop\n");
+		break;
+	default:
+		KGSL_DRV_CRIT_RATELIMIT(device, "Unknown interrupt %d\n", bit);
+	}
+}
+
+static void a5xx_gpmu_int_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	unsigned int reg;
+
+	kgsl_regread(device, A5XX_GPMU_RBBM_INTR_INFO, &reg);
+
+	if (reg & BIT(31)) {
+		if (test_and_clear_bit(ADRENO_DEVICE_GPMU_INITIALIZED,
+					&adreno_dev->priv)) {
+			/* Stop GPMU */
+			kgsl_regwrite(device, A5XX_GPMU_CM3_SYSRESET, 1);
+
+			kgsl_schedule_work(&adreno_dev->gpmu_work);
+
+			KGSL_DRV_CRIT_RATELIMIT(device,
+						"GPMU: Watchdog bite\n");
+		}
+	} else if (!(reg & BIT(1)))
+		KGSL_DRV_CRIT_RATELIMIT(device,
+					"GPMU: Unknown interrupt 0x%08X\n",
+					reg);
+}
+
+/*
+* a5x_gpc_err_int_callback() - Isr for GPC error interrupts
+* @adreno_dev: Pointer to device
+* @bit: Interrupt bit
+*/
+void a5x_gpc_err_int_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	/*
+	 * GPC error is typically the result of mistake SW programming.
+	 * Force GPU fault for this interrupt so that we can debug it
+	 * with help of register dump.
+	 */
+
+	KGSL_DRV_CRIT(device, "RBBM: GPC error\n");
+	adreno_irqctrl(adreno_dev, 0);
+
+	/* Trigger a fault in the dispatcher - this will effect a restart */
+	adreno_set_gpu_fault(ADRENO_DEVICE(device), ADRENO_SOFT_FAULT);
+	adreno_dispatcher_schedule(device);
+}
+
+#define A5XX_INT_MASK \
+	((1 << A5XX_INT_RBBM_AHB_ERROR) |		\
+	 (1 << A5XX_INT_RBBM_TRANSFER_TIMEOUT) |		\
+	 (1 << A5XX_INT_RBBM_ME_MS_TIMEOUT) |		\
+	 (1 << A5XX_INT_RBBM_PFP_MS_TIMEOUT) |		\
+	 (1 << A5XX_INT_RBBM_ETS_MS_TIMEOUT) |		\
+	 (1 << A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW) |		\
+	 (1 << A5XX_INT_RBBM_GPC_ERROR) |		\
+	 (1 << A5XX_INT_CP_HW_ERROR) |	\
+	 (1 << A5XX_INT_CP_IB1) |			\
+	 (1 << A5XX_INT_CP_IB2) |			\
+	 (1 << A5XX_INT_CP_RB) |			\
+	 (1 << A5XX_INT_CP_CACHE_FLUSH_TS) |		\
+	 (1 << A5XX_INT_RBBM_ATB_BUS_OVERFLOW) |	\
+	 (1 << A5XX_INT_UCHE_OOB_ACCESS) |		\
+	 (1 << A5XX_INT_UCHE_TRAP_INTR) |		\
+	 (1 << A5XX_INT_CP_SW) |			\
+	 (1 << A5XX_INT_GPMU_FIRMWARE) |                \
+	 (1 << A5XX_INT_GPMU_VOLTAGE_DROOP))
+
+
+static struct adreno_irq_funcs a5xx_irq_funcs[32] = {
+	ADRENO_IRQ_CALLBACK(NULL),              /* 0 - RBBM_GPU_IDLE */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 1 - RBBM_AHB_ERROR */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 2 - RBBM_TRANSFER_TIMEOUT */
+	/* 3 - RBBM_ME_MASTER_SPLIT_TIMEOUT  */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback),
+	/* 4 - RBBM_PFP_MASTER_SPLIT_TIMEOUT */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback),
+	 /* 5 - RBBM_ETS_MASTER_SPLIT_TIMEOUT */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback),
+	/* 6 - RBBM_ATB_ASYNC_OVERFLOW */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback),
+	ADRENO_IRQ_CALLBACK(a5x_gpc_err_int_callback), /* 7 - GPC_ERR */
+	ADRENO_IRQ_CALLBACK(adreno_dispatcher_preempt_callback),/* 8 - CP_SW */
+	ADRENO_IRQ_CALLBACK(a5xx_cp_hw_err_callback), /* 9 - CP_HW_ERROR */
+	/* 10 - CP_CCU_FLUSH_DEPTH_TS */
+	ADRENO_IRQ_CALLBACK(NULL),
+	 /* 11 - CP_CCU_FLUSH_COLOR_TS */
+	ADRENO_IRQ_CALLBACK(NULL),
+	 /* 12 - CP_CCU_RESOLVE_TS */
+	ADRENO_IRQ_CALLBACK(NULL),
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 13 - CP_IB2_INT */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 14 - CP_IB1_INT */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 15 - CP_RB_INT */
+	/* 16 - CCP_UNUSED_1 */
+	ADRENO_IRQ_CALLBACK(NULL),
+	ADRENO_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 18 - CP_WT_DONE_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 19 - UNKNOWN_1 */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */
+	/* 21 - UNUSED_2 */
+	ADRENO_IRQ_CALLBACK(NULL),
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 22 - RBBM_ATB_BUS_OVERFLOW */
+	/* 23 - MISC_HANG_DETECT */
+	ADRENO_IRQ_CALLBACK(adreno_hang_int_callback),
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 24 - UCHE_OOB_ACCESS */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 25 - UCHE_TRAP_INTR */
+	ADRENO_IRQ_CALLBACK(NULL), /* 26 - DEBBUS_INTR_0 */
+	ADRENO_IRQ_CALLBACK(NULL), /* 27 - DEBBUS_INTR_1 */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 28 - GPMU_VOLTAGE_DROOP */
+	ADRENO_IRQ_CALLBACK(a5xx_gpmu_int_callback), /* 29 - GPMU_FIRMWARE */
+	ADRENO_IRQ_CALLBACK(NULL), /* 30 - ISDB_CPU_IRQ */
+	ADRENO_IRQ_CALLBACK(NULL), /* 31 - ISDB_UNDER_DEBUG */
+};
+
+static struct adreno_irq a5xx_irq = {
+	.funcs = a5xx_irq_funcs,
+	.mask = A5XX_INT_MASK,
+};
+
+/*
+ * Default size for CP queues for A5xx targets. You must
+ * overwrite these value in platform_setup function for
+ * A5xx derivatives if size differs.
+ */
+static struct adreno_snapshot_sizes a5xx_snap_sizes = {
+	.cp_pfp = 36,
+	.cp_me = 29,
+	.cp_meq = 64,
+	.cp_merciu = 64,
+	.roq = 512,
+};
+
+static struct adreno_snapshot_data a5xx_snapshot_data = {
+	.sect_sizes = &a5xx_snap_sizes,
+};
+
+static struct adreno_coresight_register a5xx_coresight_registers[] = {
+	{ A5XX_RBBM_CFG_DBGBUS_SEL_A },
+	{ A5XX_RBBM_CFG_DBGBUS_SEL_B },
+	{ A5XX_RBBM_CFG_DBGBUS_SEL_C },
+	{ A5XX_RBBM_CFG_DBGBUS_SEL_D },
+	{ A5XX_RBBM_CFG_DBGBUS_CNTLT },
+	{ A5XX_RBBM_CFG_DBGBUS_CNTLM },
+	{ A5XX_RBBM_CFG_DBGBUS_OPL },
+	{ A5XX_RBBM_CFG_DBGBUS_OPE },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTL_0 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTL_1 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTL_2 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTL_3 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKL_0 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKL_1 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKL_2 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKL_3 },
+	{ A5XX_RBBM_CFG_DBGBUS_BYTEL_0 },
+	{ A5XX_RBBM_CFG_DBGBUS_BYTEL_1 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTE_0 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTE_1 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTE_2 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTE_3 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKE_0 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKE_1 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKE_2 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKE_3 },
+	{ A5XX_RBBM_CFG_DBGBUS_NIBBLEE },
+	{ A5XX_RBBM_CFG_DBGBUS_PTRC0 },
+	{ A5XX_RBBM_CFG_DBGBUS_PTRC1 },
+	{ A5XX_RBBM_CFG_DBGBUS_LOADREG },
+	{ A5XX_RBBM_CFG_DBGBUS_IDX },
+	{ A5XX_RBBM_CFG_DBGBUS_CLRC },
+	{ A5XX_RBBM_CFG_DBGBUS_LOADIVT },
+	{ A5XX_RBBM_CFG_DBGBUS_EVENT_LOGIC },
+	{ A5XX_RBBM_CFG_DBGBUS_OVER },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT0 },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT1 },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT2 },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT3 },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT4 },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT5 },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_ADDR },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_BUF0 },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1 },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2 },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_BUF3 },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_BUF4 },
+	{ A5XX_RBBM_CFG_DBGBUS_MISR0 },
+	{ A5XX_RBBM_CFG_DBGBUS_MISR1 },
+	{ A5XX_RBBM_AHB_DBG_CNTL },
+	{ A5XX_RBBM_READ_AHB_THROUGH_DBG },
+	{ A5XX_RBBM_DBG_LO_HI_GPIO },
+	{ A5XX_RBBM_EXT_TRACE_BUS_CNTL },
+	{ A5XX_RBBM_EXT_VBIF_DBG_CNTL },
+};
+
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_a, &a5xx_coresight_registers[0]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_b, &a5xx_coresight_registers[1]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_c, &a5xx_coresight_registers[2]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_d, &a5xx_coresight_registers[3]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlt, &a5xx_coresight_registers[4]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlm, &a5xx_coresight_registers[5]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_opl, &a5xx_coresight_registers[6]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ope, &a5xx_coresight_registers[7]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_0, &a5xx_coresight_registers[8]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_1, &a5xx_coresight_registers[9]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_2, &a5xx_coresight_registers[10]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_3, &a5xx_coresight_registers[11]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_0, &a5xx_coresight_registers[12]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_1, &a5xx_coresight_registers[13]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_2, &a5xx_coresight_registers[14]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_3, &a5xx_coresight_registers[15]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_0, &a5xx_coresight_registers[16]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_1, &a5xx_coresight_registers[17]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_0, &a5xx_coresight_registers[18]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_1, &a5xx_coresight_registers[19]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_2, &a5xx_coresight_registers[20]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_3, &a5xx_coresight_registers[21]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_0, &a5xx_coresight_registers[22]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_1, &a5xx_coresight_registers[23]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_2, &a5xx_coresight_registers[24]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_3, &a5xx_coresight_registers[25]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_nibblee, &a5xx_coresight_registers[26]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc0, &a5xx_coresight_registers[27]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc1, &a5xx_coresight_registers[28]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadreg, &a5xx_coresight_registers[29]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_idx, &a5xx_coresight_registers[30]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_clrc, &a5xx_coresight_registers[31]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadivt, &a5xx_coresight_registers[32]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_event_logic,
+				&a5xx_coresight_registers[33]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_over, &a5xx_coresight_registers[34]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count0, &a5xx_coresight_registers[35]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count1, &a5xx_coresight_registers[36]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count2, &a5xx_coresight_registers[37]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count3, &a5xx_coresight_registers[38]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count4, &a5xx_coresight_registers[39]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count5, &a5xx_coresight_registers[40]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_addr,
+				&a5xx_coresight_registers[41]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf0,
+				&a5xx_coresight_registers[42]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf1,
+				&a5xx_coresight_registers[43]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf2,
+				&a5xx_coresight_registers[44]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf3,
+				&a5xx_coresight_registers[45]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf4,
+				&a5xx_coresight_registers[46]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_misr0, &a5xx_coresight_registers[47]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_misr1, &a5xx_coresight_registers[48]);
+static ADRENO_CORESIGHT_ATTR(ahb_dbg_cntl, &a5xx_coresight_registers[49]);
+static ADRENO_CORESIGHT_ATTR(read_ahb_through_dbg,
+				&a5xx_coresight_registers[50]);
+static ADRENO_CORESIGHT_ATTR(dbg_lo_hi_gpio, &a5xx_coresight_registers[51]);
+static ADRENO_CORESIGHT_ATTR(ext_trace_bus_cntl, &a5xx_coresight_registers[52]);
+static ADRENO_CORESIGHT_ATTR(ext_vbif_dbg_cntl, &a5xx_coresight_registers[53]);
+
+static struct attribute *a5xx_coresight_attrs[] = {
+	&coresight_attr_cfg_dbgbus_sel_a.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_b.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_c.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_d.attr.attr,
+	&coresight_attr_cfg_dbgbus_cntlt.attr.attr,
+	&coresight_attr_cfg_dbgbus_cntlm.attr.attr,
+	&coresight_attr_cfg_dbgbus_opl.attr.attr,
+	&coresight_attr_cfg_dbgbus_ope.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_bytel_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_bytel_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_nibblee.attr.attr,
+	&coresight_attr_cfg_dbgbus_ptrc0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ptrc1.attr.attr,
+	&coresight_attr_cfg_dbgbus_loadreg.attr.attr,
+	&coresight_attr_cfg_dbgbus_idx.attr.attr,
+	&coresight_attr_cfg_dbgbus_clrc.attr.attr,
+	&coresight_attr_cfg_dbgbus_loadivt.attr.attr,
+	&coresight_attr_cfg_dbgbus_event_logic.attr.attr,
+	&coresight_attr_cfg_dbgbus_over.attr.attr,
+	&coresight_attr_cfg_dbgbus_count0.attr.attr,
+	&coresight_attr_cfg_dbgbus_count1.attr.attr,
+	&coresight_attr_cfg_dbgbus_count2.attr.attr,
+	&coresight_attr_cfg_dbgbus_count3.attr.attr,
+	&coresight_attr_cfg_dbgbus_count4.attr.attr,
+	&coresight_attr_cfg_dbgbus_count5.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_addr.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf0.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf1.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf2.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf3.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf4.attr.attr,
+	&coresight_attr_cfg_dbgbus_misr0.attr.attr,
+	&coresight_attr_cfg_dbgbus_misr1.attr.attr,
+	&coresight_attr_ahb_dbg_cntl.attr.attr,
+	&coresight_attr_read_ahb_through_dbg.attr.attr,
+	&coresight_attr_dbg_lo_hi_gpio.attr.attr,
+	&coresight_attr_ext_trace_bus_cntl.attr.attr,
+	&coresight_attr_ext_vbif_dbg_cntl.attr.attr,
+	NULL,
+};
+
+static const struct attribute_group a5xx_coresight_group = {
+	.attrs = a5xx_coresight_attrs,
+};
+
+static const struct attribute_group *a5xx_coresight_groups[] = {
+	&a5xx_coresight_group,
+	NULL,
+};
+
+static struct adreno_coresight a5xx_coresight = {
+	.registers = a5xx_coresight_registers,
+	.count = ARRAY_SIZE(a5xx_coresight_registers),
+	.groups = a5xx_coresight_groups,
+};
+
+/**
+ * a5xx_preempt_trig_state() - Schedule preemption in TRIGGERRED
+ * state
+ * @adreno_dev: Device which is in TRIGGERRED state
+ */
+static void a5xx_preempt_trig_state(
+			struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct kgsl_device *device = &(adreno_dev->dev);
+	unsigned int preempt_busy;
+	uint64_t rbbase;
+
+	/*
+	 * triggered preemption, check for busy bits, if not set go to complete
+	 * bit 0: When high indicates CP is not done with preemption.
+	 * bit 4: When high indicates that the CP is actively switching between
+	 *        application contexts.
+	 * Check both the bits to make sure CP is done with preemption.
+	 */
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT, &preempt_busy);
+	if (!(preempt_busy & 0x11)) {
+
+		adreno_readreg64(adreno_dev, ADRENO_REG_CP_RB_BASE,
+				 ADRENO_REG_CP_RB_BASE_HI, &rbbase);
+		/* Did preemption occur, if so then change states and return */
+		if (rbbase != adreno_dev->cur_rb->buffer_desc.gpuaddr) {
+			if (rbbase ==
+				adreno_dev->next_rb->buffer_desc.gpuaddr) {
+				KGSL_DRV_INFO(device,
+				"Preemption completed without interrupt\n");
+				trace_adreno_hw_preempt_trig_to_comp(
+					adreno_dev->cur_rb,
+					adreno_dev->next_rb);
+				atomic_set(&dispatcher->preemption_state,
+					ADRENO_DISPATCHER_PREEMPT_COMPLETE);
+			} else {
+				/*
+				 * Something wrong with preemption.
+				 * Set fault and reschedule dispatcher to take
+				 * care of fault.
+				 */
+				adreno_set_gpu_fault(adreno_dev,
+					ADRENO_PREEMPT_FAULT);
+			}
+			adreno_dispatcher_schedule(device);
+			return;
+		}
+	}
+
+	/*
+	 * Preemption is still happening.
+	 * Hardware not yet idle means that preemption interrupt
+	 * may still occur, nothing to do here until interrupt signals
+	 * completion of preemption, just return here
+	 */
+	if (!adreno_hw_isidle(adreno_dev))
+		return;
+
+	/*
+	 * We just changed states, reschedule dispatcher to change
+	 * preemption states
+	 */
+	if (ADRENO_DISPATCHER_PREEMPT_TRIGGERED !=
+		atomic_read(&dispatcher->preemption_state)) {
+		adreno_dispatcher_schedule(device);
+		return;
+	}
+
+
+	adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+
+	/* reschedule dispatcher to take care of the fault */
+	adreno_dispatcher_schedule(device);
+}
+
+/**
+ * a5xx_preempt_clear_state() - Schedule preemption in CLEAR
+ * state. Preemption can be issued in this state.
+ * @adreno_dev: Device which is in CLEAR state
+ */
+static void a5xx_preempt_clear_state(
+			struct adreno_device *adreno_dev)
+
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct kgsl_device *device = &(adreno_dev->dev);
+	struct adreno_ringbuffer *highest_busy_rb;
+	int switch_low_to_high;
+	int ret;
+
+	/* Device not awake means there is nothing to do */
+	if (!kgsl_state_is_awake(device))
+		return;
+
+	/* keep updating the current rptr when preemption is clear */
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR,
+			&(adreno_dev->cur_rb->rptr));
+
+	highest_busy_rb = adreno_dispatcher_get_highest_busy_rb(adreno_dev);
+	if (!highest_busy_rb)
+		return;
+
+	switch_low_to_high = adreno_compare_prio_level(
+		highest_busy_rb->id, adreno_dev->cur_rb->id);
+
+	/* already current then return */
+	if (!switch_low_to_high)
+		return;
+
+	if (switch_low_to_high < 0) {
+
+		if (!adreno_hw_isidle(adreno_dev)) {
+			adreno_dispatcher_schedule(device);
+			return;
+		}
+
+		/*
+		 * if switching to lower priority make sure that the rptr and
+		 * wptr are equal, when the lower rb is not starved
+		 */
+		if (adreno_dev->cur_rb->rptr != adreno_dev->cur_rb->wptr)
+			return;
+		/*
+		 * switch to default context because when we switch back
+		 * to higher context then its not known which pt will
+		 * be current, so by making it default here the next
+		 * commands submitted will set the right pt
+		 */
+		ret = adreno_drawctxt_switch(adreno_dev,
+				adreno_dev->cur_rb,
+				NULL, 0);
+		/*
+		 * lower priority RB has to wait until space opens up in
+		 * higher RB
+		 */
+		if (ret)
+			return;
+	}
+
+	/* rptr could be updated in drawctxt switch above, update it here */
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR,
+			&(adreno_dev->cur_rb->rptr));
+
+	/* turn on IOMMU as the preemption may trigger pt switch */
+	kgsl_mmu_enable_clk(&device->mmu);
+
+	/*
+	 * setup memory to do the switch to highest priority RB
+	 * which is not empty or may be starving away(poor thing)
+	 */
+	a5xx_preemption_start(adreno_dev, highest_busy_rb);
+
+	atomic_set(&dispatcher->preemption_state,
+			ADRENO_DISPATCHER_PREEMPT_TRIGGERED);
+
+	adreno_dev->next_rb = highest_busy_rb;
+	mod_timer(&dispatcher->preempt_timer, jiffies +
+		msecs_to_jiffies(ADRENO_DISPATCH_PREEMPT_TIMEOUT));
+
+	trace_adreno_hw_preempt_clear_to_trig(adreno_dev->cur_rb,
+						adreno_dev->next_rb);
+	/* issue PREEMPT trigger */
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_PREEMPT, 1);
+
+	adreno_dispatcher_schedule(device);
+}
+
+/**
+ * a5xx_preempt_complete_state() - Schedule preemption in
+ * COMPLETE state
+ * @adreno_dev: Device which is in COMPLETE state
+ */
+static void a5xx_preempt_complete_state(
+			struct adreno_device *adreno_dev)
+
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct kgsl_device *device = &(adreno_dev->dev);
+	struct adreno_dispatcher_cmdqueue *dispatch_q;
+	uint64_t rbbase;
+	unsigned int wptr;
+	unsigned int val;
+	static unsigned long wait_for_preemption_complete;
+
+	del_timer_sync(&dispatcher->preempt_timer);
+
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT, &val);
+
+	if (val) {
+		/*
+		 * Wait for 50ms for preemption state to be updated by CP
+		 * before triggering hang
+		 */
+		if (wait_for_preemption_complete == 0)
+			wait_for_preemption_complete = jiffies +
+						msecs_to_jiffies(50);
+		if (time_after(jiffies, wait_for_preemption_complete)) {
+			wait_for_preemption_complete = 0;
+			KGSL_DRV_ERR(device,
+			"Invalid state after preemption CP_PREEMPT:%08x STOP:%1x BUSY:%1x\n",
+					 val, (val & 0x1), (val & 0x10)>>4);
+			adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+		}
+		adreno_dispatcher_schedule(device);
+		return;
+	}
+
+	wait_for_preemption_complete = 0;
+	adreno_readreg64(adreno_dev, ADRENO_REG_CP_RB_BASE,
+				ADRENO_REG_CP_RB_BASE_HI, &rbbase);
+	if (rbbase != adreno_dev->next_rb->buffer_desc.gpuaddr) {
+		KGSL_DRV_ERR(device,
+		"RBBASE incorrect after preemption, expected %016llx got %016llx\b",
+		rbbase,
+		adreno_dev->next_rb->buffer_desc.gpuaddr);
+		adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+		adreno_dispatcher_schedule(device);
+		return;
+	}
+
+	a5xx_preemption_save(adreno_dev, adreno_dev->cur_rb);
+
+	dispatch_q = &(adreno_dev->cur_rb->dispatch_q);
+	/* new RB is the current RB */
+	trace_adreno_hw_preempt_comp_to_clear(adreno_dev->next_rb,
+						adreno_dev->cur_rb);
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->cur_rb->preempted_midway = 0;
+	adreno_dev->cur_rb->wptr_preempt_end = 0xFFFFFFFF;
+	adreno_dev->next_rb = NULL;
+
+	if (adreno_disp_preempt_fair_sched) {
+		/* starved rb is now scheduled so unhalt dispatcher */
+		if (ADRENO_DISPATCHER_RB_STARVE_TIMER_ELAPSED ==
+			adreno_dev->cur_rb->starve_timer_state)
+			adreno_put_gpu_halt(adreno_dev);
+		adreno_dev->cur_rb->starve_timer_state =
+				ADRENO_DISPATCHER_RB_STARVE_TIMER_SCHEDULED;
+		adreno_dev->cur_rb->sched_timer = jiffies;
+		/*
+		 * If the outgoing RB is has commands then set the
+		 * busy time for it
+		 */
+		if (adreno_dev->prev_rb->rptr != adreno_dev->prev_rb->wptr) {
+			adreno_dev->prev_rb->starve_timer_state =
+				ADRENO_DISPATCHER_RB_STARVE_TIMER_INIT;
+			adreno_dev->prev_rb->sched_timer = jiffies;
+		} else {
+			adreno_dev->prev_rb->starve_timer_state =
+				ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT;
+		}
+	}
+	adreno_ringbuffer_mmu_disable_clk_on_ts(device, adreno_dev->cur_rb,
+						adreno_dev->cur_rb->timestamp);
+
+	atomic_set(&dispatcher->preemption_state,
+		ADRENO_DISPATCHER_PREEMPT_CLEAR);
+
+	/* submit wptr if required for new rb */
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr);
+	if (adreno_dev->cur_rb->wptr != wptr) {
+		kgsl_pwrscale_busy(device);
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR,
+					adreno_dev->cur_rb->wptr);
+	}
+
+	adreno_preempt_process_dispatch_queue(adreno_dev, dispatch_q);
+}
+
+static void a5xx_preemption_schedule(
+				struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct kgsl_device *device = &(adreno_dev->dev);
+	struct adreno_ringbuffer *rb;
+	int i = 0;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	mutex_lock(&device->mutex);
+
+	/*
+	 * This barrier is needed for most updated preemption_state
+	 * to be read.
+	 */
+	smp_mb();
+
+	if (KGSL_STATE_ACTIVE == device->state)
+		FOR_EACH_RINGBUFFER(adreno_dev, rb, i)
+			rb->rptr = adreno_get_rptr(rb);
+
+	switch (atomic_read(&dispatcher->preemption_state)) {
+	case ADRENO_DISPATCHER_PREEMPT_CLEAR:
+		a5xx_preempt_clear_state(adreno_dev);
+		break;
+	case ADRENO_DISPATCHER_PREEMPT_TRIGGERED:
+		a5xx_preempt_trig_state(adreno_dev);
+		/*
+		 * if we transitioned to next state then fall-through
+		 * processing to next state
+		 */
+		if (!adreno_preempt_state(adreno_dev,
+			ADRENO_DISPATCHER_PREEMPT_COMPLETE))
+			break;
+	case ADRENO_DISPATCHER_PREEMPT_COMPLETE:
+		a5xx_preempt_complete_state(adreno_dev);
+		break;
+	default:
+		BUG();
+	}
+
+	mutex_unlock(&device->mutex);
+}
+
+struct adreno_gpudev adreno_a5xx_gpudev = {
+	.reg_offsets = &a5xx_reg_offsets,
+	.ft_perf_counters = a5xx_ft_perf_counters,
+	.ft_perf_counters_count = ARRAY_SIZE(a5xx_ft_perf_counters),
+	.coresight = &a5xx_coresight,
+	.start = a5xx_start,
+	.snapshot = a5xx_snapshot,
+	.irq = &a5xx_irq,
+	.snapshot_data = &a5xx_snapshot_data,
+	.irq_trace = trace_kgsl_a5xx_irq_status,
+	.num_prio_levels = ADRENO_PRIORITY_MAX_RB_LEVELS,
+	.platform_setup = a5xx_platform_setup,
+	.init = a5xx_init,
+	.rb_init = a5xx_rb_init,
+	.hw_init = a5xx_hw_init,
+	.microcode_read = a5xx_microcode_read,
+	.microcode_load = a5xx_microcode_load,
+	.perfcounters = &a5xx_perfcounters,
+	.vbif_xin_halt_ctrl0_mask = A5XX_VBIF_XIN_HALT_CTRL0_MASK,
+	.is_sptp_idle = a5xx_is_sptp_idle,
+	.regulator_enable = a5xx_regulator_enable,
+	.regulator_disable = a5xx_regulator_disable,
+	.pwrlevel_change_settings = a5xx_pwrlevel_change_settings,
+	.preemption_pre_ibsubmit = a5xx_preemption_pre_ibsubmit,
+	.preemption_post_ibsubmit =
+				a5xx_preemption_post_ibsubmit,
+	.preemption_token = a5xx_preemption_token,
+	.preemption_init = a5xx_preemption_init,
+	.preemption_schedule = a5xx_preemption_schedule,
+	.enable_64bit = a5xx_enable_64bit,
+};
diff --git a/drivers/gpu/msm/adreno_a5xx.h b/drivers/gpu/msm/adreno_a5xx.h
new file mode 100644
index 000000000000..e10678216b69
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a5xx.h
@@ -0,0 +1,77 @@
+/* Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _ADRENO_A5XX_H_
+#define _ADRENO_A5XX_H_
+
+#define A5XX_CP_CTXRECORD_MAGIC_REF     0x27C4BAFCUL
+/* Size of each CP preemption record */
+#define A5XX_CP_CTXRECORD_SIZE_IN_BYTES     0x100000
+/* Size of the preemption counter block (in bytes) */
+#define A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE   (16 * 4)
+
+/**
+ * struct a5xx_cp_preemption_record - CP context record for
+ * preemption.
+ * @magic: (00) Value at this offset must be equal to
+ * A5XX_CP_CTXRECORD_MAGIC_REF.
+ * @info: (04) Type of record. Written non-zero (usually) by CP.
+ * we must set to zero for all ringbuffers.
+ * @data: (08) DATA field in SET_RENDER_MODE or checkpoint packets.
+ * Written by CP when switching out. Not used on switch-in.
+ * we must initialize to zero.
+ * @cntl: (12) RB_CNTL, saved and restored by CP.
+ * @rptr: (16) RB_RPTR, saved and restored by CP.
+ * @wptr: (20) RB_WPTR, saved and restored by CP.
+ * @rptr_addr: (24) RB_RPTR_ADDR_LO|HI saved and restored.
+ * rbase: (32) RB_BASE_LO|HI saved and restored.
+ * counter: (40) Pointer to preemption counter
+ */
+struct a5xx_cp_preemption_record {
+	uint32_t  magic;
+	uint32_t  info;
+	uint32_t  data;
+	uint32_t  cntl;
+	uint32_t  rptr;
+	uint32_t  wptr;
+	uint64_t  rptr_addr;
+	uint64_t  rbase;
+	uint64_t  counter;
+};
+
+#define A5XX_CP_SMMU_INFO_MAGIC_REF     0x3618CDA3UL
+
+/**
+ * struct a5xx_cp_smmu_info - CP preemption SMMU info.
+ * @magic: (00) The value at this offset must be equal to
+ * A5XX_CP_SMMU_INFO_MAGIC_REF.
+ * @_pad4: (04) Reserved/padding
+ * @ttbr0: (08) Base address of the page table for the
+ * incoming context.
+ * @context_idr: (16) Context Identification Register value.
+ */
+struct a5xx_cp_smmu_info {
+	uint32_t  magic;
+	uint32_t  _pad4;
+	uint64_t  ttbr0;
+	uint32_t  asid;
+	uint32_t  context_idr;
+};
+
+void a5xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+unsigned int a5xx_num_registers(void);
+
+void a5xx_crashdump_init(struct adreno_device *adreno_dev);
+
+#endif
diff --git a/drivers/gpu/msm/adreno_a5xx_snapshot.c b/drivers/gpu/msm/adreno_a5xx_snapshot.c
new file mode 100644
index 000000000000..95f9198a330a
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a5xx_snapshot.c
@@ -0,0 +1,926 @@
+/* Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/io.h>
+#include "kgsl.h"
+#include "adreno.h"
+#include "kgsl_snapshot.h"
+#include "adreno_snapshot.h"
+#include "a5xx_reg.h"
+#include "adreno_a5xx.h"
+
+enum a5xx_rbbm_debbus_id {
+	A5XX_RBBM_DBGBUS_CP          = 0x1,
+	A5XX_RBBM_DBGBUS_RBBM        = 0x2,
+	A5XX_RBBM_DBGBUS_VBIF        = 0x3,
+	A5XX_RBBM_DBGBUS_HLSQ        = 0x4,
+	A5XX_RBBM_DBGBUS_UCHE        = 0x5,
+	A5XX_RBBM_DBGBUS_DPM         = 0x6,
+	A5XX_RBBM_DBGBUS_TESS        = 0x7,
+	A5XX_RBBM_DBGBUS_PC          = 0x8,
+	A5XX_RBBM_DBGBUS_VFDP        = 0x9,
+	A5XX_RBBM_DBGBUS_VPC         = 0xa,
+	A5XX_RBBM_DBGBUS_TSE         = 0xb,
+	A5XX_RBBM_DBGBUS_RAS         = 0xc,
+	A5XX_RBBM_DBGBUS_VSC         = 0xd,
+	A5XX_RBBM_DBGBUS_COM         = 0xe,
+	A5XX_RBBM_DBGBUS_DCOM        = 0xf,
+	A5XX_RBBM_DBGBUS_LRZ         = 0x10,
+	A5XX_RBBM_DBGBUS_A2D_DSP     = 0x11,
+	A5XX_RBBM_DBGBUS_CCUFCHE     = 0x12,
+	A5XX_RBBM_DBGBUS_GPMU        = 0x13,
+	A5XX_RBBM_DBGBUS_RBP         = 0x14,
+	A5XX_RBBM_DBGBUS_HM          = 0x15,
+	A5XX_RBBM_DBGBUS_RBBM_CFG    = 0x16,
+	A5XX_RBBM_DBGBUS_VBIF_CX     = 0x17,
+	A5XX_RBBM_DBGBUS_GPC         = 0x1d,
+	A5XX_RBBM_DBGBUS_LARC        = 0x1e,
+	A5XX_RBBM_DBGBUS_HLSQ_SPTP   = 0x1f,
+	A5XX_RBBM_DBGBUS_RB_0        = 0x20,
+	A5XX_RBBM_DBGBUS_RB_1        = 0x21,
+	A5XX_RBBM_DBGBUS_RB_2        = 0x22,
+	A5XX_RBBM_DBGBUS_RB_3        = 0x23,
+	A5XX_RBBM_DBGBUS_CCU_0       = 0x28,
+	A5XX_RBBM_DBGBUS_CCU_1       = 0x29,
+	A5XX_RBBM_DBGBUS_CCU_2       = 0x2a,
+	A5XX_RBBM_DBGBUS_CCU_3       = 0x2b,
+	A5XX_RBBM_DBGBUS_A2D_RAS_0   = 0x30,
+	A5XX_RBBM_DBGBUS_A2D_RAS_1   = 0x31,
+	A5XX_RBBM_DBGBUS_A2D_RAS_2   = 0x32,
+	A5XX_RBBM_DBGBUS_A2D_RAS_3   = 0x33,
+	A5XX_RBBM_DBGBUS_VFD_0       = 0x38,
+	A5XX_RBBM_DBGBUS_VFD_1       = 0x39,
+	A5XX_RBBM_DBGBUS_VFD_2       = 0x3a,
+	A5XX_RBBM_DBGBUS_VFD_3       = 0x3b,
+	A5XX_RBBM_DBGBUS_SP_0        = 0x40,
+	A5XX_RBBM_DBGBUS_SP_1        = 0x41,
+	A5XX_RBBM_DBGBUS_SP_2        = 0x42,
+	A5XX_RBBM_DBGBUS_SP_3        = 0x43,
+	A5XX_RBBM_DBGBUS_TPL1_0      = 0x48,
+	A5XX_RBBM_DBGBUS_TPL1_1      = 0x49,
+	A5XX_RBBM_DBGBUS_TPL1_2      = 0x4a,
+	A5XX_RBBM_DBGBUS_TPL1_3      = 0x4b
+};
+
+static const struct adreno_debugbus_block a5xx_debugbus_blocks[] = {
+	{  A5XX_RBBM_DBGBUS_CP, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RBBM, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VBIF, 0x100, },
+	{  A5XX_RBBM_DBGBUS_HLSQ, 0x100, },
+	{  A5XX_RBBM_DBGBUS_UCHE, 0x100, },
+	{  A5XX_RBBM_DBGBUS_DPM, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TESS, 0x100, },
+	{  A5XX_RBBM_DBGBUS_PC, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VFDP, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VPC, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TSE, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RAS, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VSC, 0x100, },
+	{  A5XX_RBBM_DBGBUS_COM, 0x100, },
+	{  A5XX_RBBM_DBGBUS_DCOM, 0x100, },
+	{  A5XX_RBBM_DBGBUS_LRZ, 0x100, },
+	{  A5XX_RBBM_DBGBUS_A2D_DSP, 0x100, },
+	{  A5XX_RBBM_DBGBUS_CCUFCHE, 0x100, },
+	{  A5XX_RBBM_DBGBUS_GPMU, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RBP, 0x100, },
+	{  A5XX_RBBM_DBGBUS_HM, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RBBM_CFG, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VBIF_CX, 0x100, },
+	{  A5XX_RBBM_DBGBUS_GPC, 0x100, },
+	{  A5XX_RBBM_DBGBUS_LARC, 0x100, },
+	{  A5XX_RBBM_DBGBUS_HLSQ_SPTP, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RB_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RB_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RB_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RB_3, 0x100, },
+	{  A5XX_RBBM_DBGBUS_CCU_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_CCU_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_CCU_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_CCU_3, 0x100, },
+	{  A5XX_RBBM_DBGBUS_A2D_RAS_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_A2D_RAS_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_A2D_RAS_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_A2D_RAS_3, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VFD_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VFD_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VFD_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VFD_3, 0x100, },
+	{  A5XX_RBBM_DBGBUS_SP_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_SP_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_SP_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_SP_3, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TPL1_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TPL1_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TPL1_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TPL1_3, 0x100, },
+};
+
+#define A5XX_NUM_AXI_ARB_BLOCKS	2
+#define A5XX_NUM_XIN_BLOCKS	5
+
+/* a5xx_snapshot_cp_pm4() - Dump PM4 data in snapshot */
+static size_t a5xx_snapshot_cp_pm4(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	size_t size = adreno_dev->pm4_fw_size;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP PM4 RAM DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_PM4_RAM;
+	header->size = size;
+
+	memcpy(data, adreno_dev->pm4.hostptr, size * sizeof(uint32_t));
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+/* a5xx_snapshot_cp_pfp() - Dump the PFP data on snapshot */
+static size_t a5xx_snapshot_cp_pfp(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int size = adreno_dev->pfp_fw_size;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP PFP RAM DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_PFP_RAM;
+	header->size = size;
+
+	memcpy(data, adreno_dev->pfp.hostptr, size * sizeof(uint32_t));
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+/* a5xx_rbbm_debug_bus_read() - Read data from trace bus */
+static void a5xx_rbbm_debug_bus_read(struct kgsl_device *device,
+	unsigned int block_id, unsigned int index, unsigned int *val)
+{
+	unsigned int reg;
+
+	reg = (block_id << A5XX_RBBM_CFG_DBGBUS_SEL_PING_BLK_SEL_SHIFT) |
+			(index << A5XX_RBBM_CFG_DBGBUS_SEL_PING_INDEX_SHIFT);
+	kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_A, reg);
+	kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_B, reg);
+	kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_C, reg);
+	kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_D, reg);
+
+	kgsl_regread(device, A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2, val);
+	val++;
+	kgsl_regread(device, A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1, val);
+
+}
+
+/* a5xx_snapshot_vbif_debugbus() - Dump the VBIF debug data */
+static size_t a5xx_snapshot_vbif_debugbus(struct kgsl_device *device,
+			u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debugbus *header =
+		(struct kgsl_snapshot_debugbus *)buf;
+	struct adreno_debugbus_block *block = priv;
+	int i, j;
+	/*
+	 * Total number of VBIF data words considering 3 sections:
+	 * 2 arbiter blocks of 16 words
+	 * 5 AXI XIN blocks of 4 dwords each
+	 * 5 core clock side XIN blocks of 5 dwords each
+	 */
+	unsigned int dwords = (16 * A5XX_NUM_AXI_ARB_BLOCKS) +
+			(4 * A5XX_NUM_XIN_BLOCKS) + (5 * A5XX_NUM_XIN_BLOCKS);
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	size_t size;
+	unsigned int reg_clk;
+
+	size = (dwords * sizeof(unsigned int)) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+	header->id = block->block_id;
+	header->count = dwords;
+
+	kgsl_regread(device, A5XX_VBIF_CLKON, &reg_clk);
+	kgsl_regwrite(device, A5XX_VBIF_CLKON, reg_clk |
+			(A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK <<
+			A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT));
+	kgsl_regwrite(device, A5XX_VBIF_TEST_BUS1_CTRL0, 0);
+	kgsl_regwrite(device, A5XX_VBIF_TEST_BUS_OUT_CTRL,
+			(A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK <<
+			A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT));
+	for (i = 0; i < A5XX_NUM_AXI_ARB_BLOCKS; i++) {
+		kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL0,
+			(1 << (i + 16)));
+		for (j = 0; j < 16; j++) {
+			kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL1,
+				((j & A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK)
+				<< A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT));
+			kgsl_regread(device, A5XX_VBIF_TEST_BUS_OUT,
+					data);
+			data++;
+		}
+	}
+
+	/* XIN blocks AXI side */
+	for (i = 0; i < A5XX_NUM_XIN_BLOCKS; i++) {
+		kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL0, 1 << i);
+		for (j = 0; j < 4; j++) {
+			kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL1,
+				((j & A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK)
+				<< A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT));
+			kgsl_regread(device, A5XX_VBIF_TEST_BUS_OUT,
+				data);
+			data++;
+		}
+	}
+
+	/* XIN blocks core clock side */
+	for (i = 0; i < A5XX_NUM_XIN_BLOCKS; i++) {
+		kgsl_regwrite(device, A5XX_VBIF_TEST_BUS1_CTRL0, 1 << i);
+		for (j = 0; j < 5; j++) {
+			kgsl_regwrite(device, A5XX_VBIF_TEST_BUS1_CTRL1,
+				((j & A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK)
+				<< A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT));
+			kgsl_regread(device, A5XX_VBIF_TEST_BUS_OUT,
+				data);
+			data++;
+		}
+	}
+	/* restore the clock of VBIF */
+	kgsl_regwrite(device, A5XX_VBIF_CLKON, reg_clk);
+	return size;
+}
+
+/* a5xx_snapshot_debugbus_block() - Capture debug data for a gpu block */
+static size_t a5xx_snapshot_debugbus_block(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debugbus *header =
+		(struct kgsl_snapshot_debugbus *)buf;
+	struct adreno_debugbus_block *block = priv;
+	int i;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int dwords;
+	size_t size;
+
+	dwords = block->dwords;
+
+	/* For a5xx each debug bus data unit is 2 DWRODS */
+	size = (dwords * sizeof(unsigned int) * 2) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+
+	header->id = block->block_id;
+	header->count = dwords * 2;
+
+	for (i = 0; i < dwords; i++)
+		a5xx_rbbm_debug_bus_read(device, block->block_id, i,
+					&data[i*2]);
+
+	return size;
+}
+
+/* a5xx_snapshot_debugbus() - Capture debug bus data */
+static void a5xx_snapshot_debugbus(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot)
+{
+	int i;
+
+	kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_CNTLM,
+		0xf << A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT);
+
+	for (i = 0; i < ARRAY_SIZE(a5xx_debugbus_blocks); i++) {
+		if (A5XX_RBBM_DBGBUS_VBIF == a5xx_debugbus_blocks[i].block_id)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+				snapshot, a5xx_snapshot_vbif_debugbus,
+				(void *) &a5xx_debugbus_blocks[i]);
+		else
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+				snapshot, a5xx_snapshot_debugbus_block,
+				(void *) &a5xx_debugbus_blocks[i]);
+	}
+}
+
+static const unsigned int a5xx_vbif_ver_20040000_registers[] = {
+	/* VBIF version 0x20040000*/
+	0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x302C, 0x3030, 0x3030,
+	0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
+	0x3042, 0x3042, 0x3049, 0x3049, 0x3058, 0x3058, 0x305A, 0x3061,
+	0x3064, 0x3068, 0x306C, 0x306D, 0x3080, 0x3088, 0x308C, 0x308C,
+	0x3090, 0x3094, 0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0,
+	0x30C8, 0x30C8, 0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0,
+	0x3100, 0x3100, 0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118,
+	0x3120, 0x3120, 0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131,
+	0x340C, 0x340C, 0x3410, 0x3410, 0x3800, 0x3801,
+};
+
+static const struct adreno_vbif_snapshot_registers
+a5xx_vbif_snapshot_registers[] = {
+	{ 0x20040000, a5xx_vbif_ver_20040000_registers,
+				ARRAY_SIZE(a5xx_vbif_ver_20040000_registers)/2},
+	{ 0x20040001, a5xx_vbif_ver_20040000_registers,
+				ARRAY_SIZE(a5xx_vbif_ver_20040000_registers)/2},
+};
+
+/*
+ * Set of registers to dump for A5XX on snapshot.
+ * Registers in pairs - first value is the start offset, second
+ * is the stop offset (inclusive)
+ */
+
+static const unsigned int a5xx_registers[] = {
+	/* RBBM */
+	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
+	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
+	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
+	0x04E0, 0x0533, 0x0540, 0x0555, 0xF400, 0xF400, 0xF800, 0xF807,
+	/* CP */
+	0x0800, 0x081A, 0x081F, 0x0841, 0x0860, 0x0860, 0x0880, 0x08A0,
+	0x0B00, 0x0B12, 0x0B15, 0x0B28, 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD,
+	/* VSC */
+	0x0BC0, 0x0BC6, 0x0BD0, 0x0C53, 0x0C60, 0x0C61,
+	/* GRAS */
+	0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98, 0x0CA0, 0x0CA0,
+	0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
+	/* RB */
+	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
+	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
+	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
+	/* PC */
+	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
+	0x24C0, 0x24C0,
+	/* VFD */
+	0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
+	/* VPC */
+	0x0E60, 0x0E7C,
+	/* UCHE */
+	0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0xEA0, 0xEA8, 0xEB0, 0xEB2,
+
+	/* RB CTX 0 */
+	0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9, 0xE1B0, 0xE1B6,
+	0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201, 0xE210, 0xE21C,
+	0xE240, 0xE268,
+	/* GRAS CTX 0 */
+	0xE000, 0xE006, 0xE010, 0xE09A, 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB,
+	0xE100, 0xE105,
+	/* PC CTX 0 */
+	0xE380, 0xE38F, 0xE3B0, 0xE3B0,
+	/* VFD CTX 0 */
+	0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
+	/* VPC CTX 0 */
+	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2,
+
+	/* RB CTX 1 */
+	0xE940, 0xE947, 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6,
+	0xE9C0, 0xE9C7, 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C,
+	0xEA40, 0xEA68,
+	/* GRAS CTX 1 */
+	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
+	0xE900, 0xE905,
+	/* PC CTX 1 */
+	0xEB80, 0xEB8F, 0xEBB0, 0xEBB0,
+	/* VFD CTX 1 */
+	0xEC00, 0xEC05, 0xEC08, 0xECE9, 0xECF0, 0xECF0,
+	/* VPC CTX 1 */
+	0xEA80, 0xEA80, 0xEA82, 0xEAA3, 0xEAA5, 0xEAC2,
+	/* GPMU */
+	0xA800, 0xA8FF, 0xAC60, 0xAC60,
+	/* DPM */
+	0xB000, 0xB97F, 0xB9A0, 0xB9BF,
+};
+
+struct a5xx_hlsq_sp_tp_regs {
+	unsigned int statetype;
+	unsigned int ahbaddr;
+	unsigned int size;
+};
+
+static const struct a5xx_hlsq_sp_tp_regs a5xx_hlsq_sp_tp_registers[] = {
+	/* HLSQ CTX 0 2D */
+	{ 0x31, 0x2080, 0x1 },
+	/* HLSQ CTX 1 2D */
+	{ 0x33, 0x2480, 0x1 },
+	/* HLSQ CTX 0 3D */
+	{ 0x32, 0xE780, 0x7f },
+	/* HLSQ CTX 1 3D */
+	{ 0x34, 0xEF80, 0x7f },
+
+	/* SP non context */
+	{ 0x3f, 0x0EC0, 0x40 },
+	/* SP CTX 0 2D */
+	{ 0x3d, 0x2040, 0x1 },
+	/* SP CTX 1 2D */
+	{ 0x3b, 0x2440, 0x1 },
+	/* SP CTX 0 3D */
+	{ 0x3e, 0xE580, 0x180 },
+	/* SP CTX 1 3D */
+	{ 0x3c, 0xED80, 0x180 },
+
+	/* TP non context */
+	{ 0x3a, 0x0F00, 0x40 },
+	/* TP CTX 0 2D */
+	{ 0x38, 0x2000, 0x10 },
+	/* TP CTX 1 2D */
+	{ 0x36, 0x2400, 0x10 },
+	/* TP CTX 0 3D */
+	{ 0x39, 0xE700, 0x128 },
+	/* TP CTX 1 3D */
+	{ 0x37, 0xEF00, 0x128 },
+};
+
+/* HLSQ non context registers - can't be read on A530v1 */
+static const struct a5xx_hlsq_sp_tp_regs a5xx_hlsq_non_ctx_registers = {
+	0x35, 0xE00, 0x1C
+};
+
+#define A5XX_NUM_SHADER_BANKS 4
+#define A5XX_SHADER_STATETYPE_SHIFT 8
+
+enum a5xx_shader_obj {
+	A5XX_TP_W_MEMOBJ = 1,
+	A5XX_TP_W_SAMPLER = 2,
+	A5XX_TP_W_MIPMAP_BASE = 3,
+	A5XX_TP_W_MEMOBJ_TAG = 4,
+	A5XX_TP_W_SAMPLER_TAG = 5,
+	A5XX_TP_S_3D_MEMOBJ = 6,
+	A5XX_TP_S_3D_SAMPLER = 0x7,
+	A5XX_TP_S_3D_MEMOBJ_TAG = 0x8,
+	A5XX_TP_S_3D_SAMPLER_TAG = 0x9,
+	A5XX_TP_S_CS_MEMOBJ = 0xA,
+	A5XX_TP_S_CS_SAMPLER = 0xB,
+	A5XX_TP_S_CS_MEMOBJ_TAG = 0xC,
+	A5XX_TP_S_CS_SAMPLER_TAG = 0xD,
+	A5XX_SP_W_INSTR = 0xE,
+	A5XX_SP_W_CONST = 0xF,
+	A5XX_SP_W_UAV_SIZE = 0x10,
+	A5XX_SP_W_CB_SIZE = 0x11,
+	A5XX_SP_W_UAV_BASE = 0x12,
+	A5XX_SP_W_CB_BASE = 0x13,
+	A5XX_SP_W_INST_TAG = 0x14,
+	A5XX_SP_W_STATE = 0x15,
+	A5XX_SP_S_3D_INSTR = 0x16,
+	A5XX_SP_S_3D_CONST = 0x17,
+	A5XX_SP_S_3D_CB_BASE = 0x18,
+	A5XX_SP_S_3D_CB_SIZE = 0x19,
+	A5XX_SP_S_3D_UAV_BASE = 0x1A,
+	A5XX_SP_S_3D_UAV_SIZE = 0x1B,
+	A5XX_SP_S_CS_INSTR = 0x1C,
+	A5XX_SP_S_CS_CONST = 0x1D,
+	A5XX_SP_S_CS_CB_BASE = 0x1E,
+	A5XX_SP_S_CS_CB_SIZE = 0x1F,
+	A5XX_SP_S_CS_UAV_BASE = 0x20,
+	A5XX_SP_S_CS_UAV_SIZE = 0x21,
+	A5XX_SP_S_3D_INSTR_DIRTY = 0x22,
+	A5XX_SP_S_3D_CONST_DIRTY = 0x23,
+	A5XX_SP_S_3D_CB_BASE_DIRTY = 0x24,
+	A5XX_SP_S_3D_CB_SIZE_DIRTY = 0x25,
+	A5XX_SP_S_3D_UAV_BASE_DIRTY = 0x26,
+	A5XX_SP_S_3D_UAV_SIZE_DIRTY = 0x27,
+	A5XX_SP_S_CS_INSTR_DIRTY = 0x28,
+	A5XX_SP_S_CS_CONST_DIRTY = 0x29,
+	A5XX_SP_S_CS_CB_BASE_DIRTY = 0x2A,
+	A5XX_SP_S_CS_CB_SIZE_DIRTY = 0x2B,
+	A5XX_SP_S_CS_UAV_BASE_DIRTY = 0x2C,
+	A5XX_SP_S_CS_UAV_SIZE_DIRTY = 0x2D,
+	A5XX_HLSQ_ICB = 0x2E,
+	A5XX_HLSQ_ICB_DIRTY = 0x2F,
+	A5XX_HLSQ_ICB_CB_BASE_DIRTY = 0x30,
+	A5XX_SP_POWER_RESTORE_RAM = 0x40,
+	A5XX_SP_POWER_RESTORE_RAM_TAG = 0x41,
+	A5XX_TP_POWER_RESTORE_RAM = 0x42,
+	A5XX_TP_POWER_RESTORE_RAM_TAG = 0x43,
+
+};
+
+struct a5xx_shader_block {
+	unsigned int statetype;
+	unsigned int sz;
+};
+
+struct a5xx_shader_block_info {
+	const struct a5xx_shader_block *shader_block;
+	unsigned int shader_num;
+};
+
+static const struct a5xx_shader_block a5xx_shader_blocks[] = {
+	{A5XX_TP_W_MEMOBJ,              0x200},
+	{A5XX_TP_W_MIPMAP_BASE,         0x3C0},
+	{A5XX_TP_W_SAMPLER_TAG,          0x40},
+	{A5XX_TP_S_3D_SAMPLER,           0x80},
+	{A5XX_TP_S_3D_SAMPLER_TAG,       0x20},
+	{A5XX_TP_S_CS_SAMPLER,           0x40},
+	{A5XX_TP_S_CS_SAMPLER_TAG,       0x10},
+	{A5XX_SP_W_CONST,               0x800},
+	{A5XX_SP_W_CB_SIZE,              0x30},
+	{A5XX_SP_W_CB_BASE,              0xF0},
+	{A5XX_SP_W_STATE,                 0x1},
+	{A5XX_SP_S_3D_CONST,            0x800},
+	{A5XX_SP_S_3D_CB_SIZE,           0x28},
+	{A5XX_SP_S_3D_UAV_SIZE,          0x80},
+	{A5XX_SP_S_CS_CONST,            0x400},
+	{A5XX_SP_S_CS_CB_SIZE,            0x8},
+	{A5XX_SP_S_CS_UAV_SIZE,          0x80},
+	{A5XX_SP_S_3D_CONST_DIRTY,       0x12},
+	{A5XX_SP_S_3D_CB_SIZE_DIRTY,      0x1},
+	{A5XX_SP_S_3D_UAV_SIZE_DIRTY,     0x2},
+	{A5XX_SP_S_CS_CONST_DIRTY,        0xA},
+	{A5XX_SP_S_CS_CB_SIZE_DIRTY,      0x1},
+	{A5XX_SP_S_CS_UAV_SIZE_DIRTY,     0x2},
+	{A5XX_HLSQ_ICB_DIRTY,             0xB},
+	{A5XX_SP_POWER_RESTORE_RAM_TAG,   0xA},
+	{A5XX_TP_POWER_RESTORE_RAM_TAG,   0xA},
+	{A5XX_TP_W_SAMPLER,              0x80},
+	{A5XX_TP_W_MEMOBJ_TAG,           0x40},
+	{A5XX_TP_S_3D_MEMOBJ,           0x200},
+	{A5XX_TP_S_3D_MEMOBJ_TAG,        0x20},
+	{A5XX_TP_S_CS_MEMOBJ,           0x100},
+	{A5XX_TP_S_CS_MEMOBJ_TAG,        0x10},
+	{A5XX_SP_W_INSTR,               0x800},
+	{A5XX_SP_W_UAV_SIZE,             0x80},
+	{A5XX_SP_W_UAV_BASE,             0x80},
+	{A5XX_SP_W_INST_TAG,             0x40},
+	{A5XX_SP_S_3D_INSTR,            0x800},
+	{A5XX_SP_S_3D_CB_BASE,           0xC8},
+	{A5XX_SP_S_3D_UAV_BASE,          0x80},
+	{A5XX_SP_S_CS_INSTR,            0x400},
+	{A5XX_SP_S_CS_CB_BASE,           0x28},
+	{A5XX_SP_S_CS_UAV_BASE,          0x80},
+	{A5XX_SP_S_3D_INSTR_DIRTY,        0x1},
+	{A5XX_SP_S_3D_CB_BASE_DIRTY,      0x5},
+	{A5XX_SP_S_3D_UAV_BASE_DIRTY,     0x2},
+	{A5XX_SP_S_CS_INSTR_DIRTY,        0x1},
+	{A5XX_SP_S_CS_CB_BASE_DIRTY,      0x1},
+	{A5XX_SP_S_CS_UAV_BASE_DIRTY,     0x2},
+	{A5XX_HLSQ_ICB,                 0x200},
+	{A5XX_HLSQ_ICB_CB_BASE_DIRTY,     0x4},
+	{A5XX_SP_POWER_RESTORE_RAM,     0x140},
+	{A5XX_TP_POWER_RESTORE_RAM,      0x40},
+};
+
+static size_t a5xx_snapshot_shader_memory(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_shader *header =
+				(struct kgsl_snapshot_shader *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int i;
+	struct a5xx_shader_block_info *shader_block_info =
+				(struct a5xx_shader_block_info *)priv;
+	unsigned int statetype = shader_block_info->shader_block->statetype;
+	unsigned int size = shader_block_info->shader_block->sz;
+	unsigned int shader_num = shader_block_info->shader_num;
+
+
+	if (remain < SHADER_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY");
+		return 0;
+	}
+
+	kgsl_regwrite(device, A5XX_HLSQ_DBG_READ_SEL,
+		  ((statetype << A5XX_SHADER_STATETYPE_SHIFT) | shader_num));
+
+	header->type = statetype;
+	header->index = shader_num;
+	header->size = size;
+
+	for (i = 0; i < size; i++)
+		kgsl_regread(device, A5XX_HLSQ_DBG_AHB_READ_APERTURE + i,
+				data++);
+
+	return SHADER_SECTION_SZ(size);
+}
+
+static void a5xx_snapshot_shader(struct kgsl_device *device,
+			   struct kgsl_snapshot *snapshot)
+{
+	unsigned int i, j;
+	struct a5xx_shader_block_info blk;
+
+	for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) {
+		for (j = 0; j < A5XX_NUM_SHADER_BANKS; j++) {
+			blk.shader_block = &a5xx_shader_blocks[i];
+			blk.shader_num = j;
+			/* Shader working/shadow memory */
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_SHADER,
+				snapshot, a5xx_snapshot_shader_memory, &blk);
+		}
+	}
+}
+
+static int get_hlsq_registers(struct kgsl_device *device,
+		const struct a5xx_hlsq_sp_tp_regs *regs, unsigned int *data)
+{
+	int j;
+	unsigned int val;
+
+	kgsl_regwrite(device, A5XX_HLSQ_DBG_READ_SEL,
+			(regs->statetype << A5XX_SHADER_STATETYPE_SHIFT));
+
+	for (j = 0; j < regs->size; j++) {
+		kgsl_regread(device, A5XX_HLSQ_DBG_AHB_READ_APERTURE + j, &val);
+		*data++ = regs->ahbaddr + j;
+		*data++ = val;
+	}
+
+	return (regs->size * 2);
+}
+
+static size_t a5xx_snapshot_dump_hlsq_sp_tp_regs(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int count = 0, i;
+
+	/* Figure out how many registers we are going to dump */
+	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++)
+			count += a5xx_hlsq_sp_tp_registers[i].size;
+
+	/* the HLSQ non context registers cannot be dumped on A530v1 */
+	if (!adreno_is_a530v1(adreno_dev))
+		count += a5xx_hlsq_non_ctx_registers.size;
+
+	if (remain < (count * 8) + sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+		return 0;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++)
+		data += get_hlsq_registers(device,
+			&a5xx_hlsq_sp_tp_registers[i], data);
+
+	if (!adreno_is_a530v1(adreno_dev))
+		data += get_hlsq_registers(device,
+			&a5xx_hlsq_non_ctx_registers, data);
+
+	header->count = count;
+
+	/* Return the size of the section */
+	return (count * 8) + sizeof(*header);
+}
+
+static size_t a5xx_legacy_snapshot_registers(struct kgsl_device *device,
+		u8 *buf, size_t remain)
+{
+	struct kgsl_snapshot_registers regs = {
+		.regs = a5xx_registers,
+		.count = ARRAY_SIZE(a5xx_registers) / 2,
+	};
+
+	return kgsl_snapshot_dump_registers(device, buf, remain, &regs);
+}
+
+static struct kgsl_memdesc capturescript;
+static struct kgsl_memdesc registers;
+
+#define REG_PAIR_COUNT(_a, _i) \
+	(((_a)[(2 * (_i)) + 1] - (_a)[2 * (_i)]) + 1)
+
+static inline unsigned int count_registers(void)
+{
+	unsigned int i, count = 0;
+
+	for (i = 0; i < ARRAY_SIZE(a5xx_registers) / 2; i++)
+		count += REG_PAIR_COUNT(a5xx_registers, i);
+
+	return count;
+}
+
+static unsigned int copy_registers(unsigned int *dst)
+{
+	unsigned int *src = (unsigned int *) registers.hostptr;
+	unsigned int i, count = 0;
+
+	for (i = 0; i < ARRAY_SIZE(a5xx_registers) / 2; i++) {
+		unsigned int j;
+		unsigned int start = a5xx_registers[2 * i];
+		unsigned int end = a5xx_registers[(2 * i) + 1];
+
+		for (j = start; j <= end; j++, count++) {
+			*dst++ = j;
+			*dst++ = *src++;
+		}
+	}
+
+	return count;
+}
+
+static size_t a5xx_snapshot_registers(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned long wait_time;
+	unsigned int reg = 0;
+	unsigned int val;
+
+	/* Jump to legacy if the crash dump script was not initialized */
+	if (capturescript.gpuaddr == 0 || registers.gpuaddr == 0)
+		return a5xx_legacy_snapshot_registers(device, buf, remain);
+
+	/*
+	 * If we got here because we are stalled on fault the crash dumper has
+	 * won't work
+	 */
+	kgsl_regread(device, A5XX_RBBM_STATUS3, &val);
+	if (val & BIT(24))
+		return a5xx_legacy_snapshot_registers(device, buf, remain);
+
+	if (remain < (count_registers() * 8) + sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+		return 0;
+	}
+
+	kgsl_regwrite(device, A5XX_CP_CRASH_SCRIPT_BASE_LO,
+			lower_32_bits(capturescript.gpuaddr));
+	kgsl_regwrite(device, A5XX_CP_CRASH_SCRIPT_BASE_HI,
+			upper_32_bits(capturescript.gpuaddr));
+	kgsl_regwrite(device, A5XX_CP_CRASH_DUMP_CNTL, 1);
+
+	wait_time = jiffies + msecs_to_jiffies(CP_CRASH_DUMPER_TIMEOUT);
+	while (!time_after(jiffies, wait_time)) {
+		kgsl_regread(device, A5XX_CP_CRASH_DUMP_CNTL, &reg);
+		if (reg & 0x4)
+			break;
+		cpu_relax();
+	}
+
+	if (!(reg & 0x4)) {
+		KGSL_CORE_ERR("Crash dump timed out: 0x%X\n", reg);
+		return a5xx_legacy_snapshot_registers(device, buf, remain);
+	}
+
+	header->count = copy_registers(data);
+
+	/* Return the size of the section */
+	return (header->count * 8) + sizeof(*header);
+}
+
+/*
+ * a5xx_snapshot() - A5XX GPU snapshot function
+ * @adreno_dev: Device being snapshotted
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * This is where all of the A5XX specific bits and pieces are grabbed
+ * into the snapshot memory
+ */
+void a5xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_snapshot_data *snap_data = gpudev->snapshot_data;
+	unsigned int clock_ctl, reg;
+
+	/* Disable Clock gating temporarily for the debug bus to work */
+	kgsl_regread(device, A5XX_RBBM_CLOCK_CNTL, &clock_ctl);
+	kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL, 0);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS,
+		snapshot, a5xx_snapshot_registers, NULL);
+
+	adreno_snapshot_vbif_registers(device, snapshot,
+		a5xx_vbif_snapshot_registers,
+		ARRAY_SIZE(a5xx_vbif_snapshot_registers));
+
+	/* Dump SP TP HLSQ registers */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, snapshot,
+		a5xx_snapshot_dump_hlsq_sp_tp_regs, NULL);
+
+	/* CP_PFP indexed registers */
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A5XX_CP_PFP_STAT_ADDR, A5XX_CP_PFP_STAT_DATA,
+		0, snap_data->sect_sizes->cp_pfp);
+
+	 /* CP_ME indexed registers */
+	 kgsl_snapshot_indexed_registers(device, snapshot,
+		A5XX_CP_ME_STAT_ADDR, A5XX_CP_ME_STAT_DATA,
+		0, snap_data->sect_sizes->cp_me);
+
+	 /* CP_DRAW_STATE */
+	 kgsl_snapshot_indexed_registers(device, snapshot,
+		A5XX_CP_DRAW_STATE_ADDR, A5XX_CP_DRAW_STATE_DATA,
+		0, 128);
+
+	 /*
+	  * CP needs to be halted on a530v1 before reading CP_PFP_UCODE_DBG_DATA
+	  * and CP_PM4_UCODE_DBG_DATA registers
+	  */
+	 if (adreno_is_a530v1(adreno_dev)) {
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_ME_CNTL, &reg);
+		reg |= (1 << 27) | (1 << 28);
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, reg);
+	 }
+
+	 /* ME_UCODE Cache */
+	 kgsl_snapshot_indexed_registers(device, snapshot,
+		A5XX_CP_ME_UCODE_DBG_ADDR, A5XX_CP_ME_UCODE_DBG_DATA,
+		0, 0x53F);
+
+	 /* PFP_UCODE Cache */
+	 kgsl_snapshot_indexed_registers(device, snapshot,
+		A5XX_CP_PFP_UCODE_DBG_ADDR, A5XX_CP_PFP_UCODE_DBG_DATA,
+		0, 0x53F);
+
+	/* CP MEQ */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_meq,
+		&snap_data->sect_sizes->cp_meq);
+
+	/* CP ROQ */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_roq,
+		&snap_data->sect_sizes->roq);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_merciu,
+		&snap_data->sect_sizes->cp_merciu);
+
+	/* CP PFP and PM4 */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a5xx_snapshot_cp_pfp, NULL);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a5xx_snapshot_cp_pm4, NULL);
+
+	/* Shader memory */
+	a5xx_snapshot_shader(device, snapshot);
+
+	/* Debug bus */
+	a5xx_snapshot_debugbus(device, snapshot);
+}
+
+void a5xx_crashdump_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	unsigned int i, count;
+	uint64_t *ptr;
+	uint64_t gpuaddr;
+
+	if (capturescript.gpuaddr != 0 && registers.gpuaddr != 0)
+		return;
+
+	/*
+	 * For the capture script two blocks of memory are needed: A block of
+	 * GPU readonly memory for the special capture script and a destination
+	 * block for the register values. The size of the capture script needs
+	 * is 128 bits (4 dwords) per register pair and 4 dwords at the end.
+	 * The destination block needs to be big enough to hold all the
+	 * registers that we will capture.
+	 */
+
+	if (kgsl_allocate_global(device, &capturescript,
+		((ARRAY_SIZE(a5xx_registers) / 2) * 16) + 16,
+		KGSL_MEMFLAGS_GPUREADONLY, 0))
+		return;
+
+	/* Count the total number of registers to capture */
+	count = count_registers();
+
+	if (kgsl_allocate_global(device, &registers,
+		count * sizeof(unsigned int), 0, 0))
+		return;
+
+	/* Build the crash script */
+
+	ptr = (uint64_t *) capturescript.hostptr;
+	gpuaddr = registers.gpuaddr;
+
+	for (i = 0; i < ARRAY_SIZE(a5xx_registers) / 2; i++) {
+		unsigned int regs = REG_PAIR_COUNT(a5xx_registers, i);
+		*ptr++ = gpuaddr;
+		*ptr++ = (((uint64_t) a5xx_registers[2 * i]) << 44) | regs;
+
+		gpuaddr += regs * sizeof(unsigned int);
+	}
+
+	*ptr++ = 0;
+	*ptr++ = 0;
+}
diff --git a/drivers/gpu/msm/adreno_compat.c b/drivers/gpu/msm/adreno_compat.c
new file mode 100644
index 000000000000..4d78de5b9ec3
--- /dev/null
+++ b/drivers/gpu/msm/adreno_compat.c
@@ -0,0 +1,206 @@
+/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/uaccess.h>
+#include <linux/ioctl.h>
+
+#include "kgsl.h"
+#include "kgsl_compat.h"
+
+#include "adreno.h"
+#include "adreno_compat.h"
+
+int adreno_getproperty_compat(struct kgsl_device *device,
+				unsigned int type,
+				void __user *value,
+				size_t sizebytes)
+{
+	int status = -EINVAL;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	switch (type) {
+	case KGSL_PROP_DEVICE_INFO:
+		{
+			struct kgsl_devinfo_compat devinfo;
+
+			if (sizebytes != sizeof(devinfo)) {
+				status = -EINVAL;
+				break;
+			}
+
+			memset(&devinfo, 0, sizeof(devinfo));
+			devinfo.device_id = device->id + 1;
+			devinfo.chip_id = adreno_dev->chipid;
+			devinfo.mmu_enabled = kgsl_mmu_enabled();
+			devinfo.gmem_gpubaseaddr = adreno_dev->gmem_base;
+			devinfo.gmem_sizebytes = adreno_dev->gmem_size;
+
+			if (copy_to_user(value, &devinfo, sizeof(devinfo)) !=
+					0) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_DEVICE_SHADOW:
+		{
+			struct kgsl_shadowprop_compat shadowprop;
+
+			if (sizebytes != sizeof(shadowprop)) {
+				status = -EINVAL;
+				break;
+			}
+			memset(&shadowprop, 0, sizeof(shadowprop));
+			if (device->memstore.hostptr) {
+				/*
+				 * NOTE: with mmu enabled, gpuaddr doesn't mean
+				 * anything to mmap().
+				 * NOTE: shadowprop.gpuaddr is uint32
+				 * (because legacy) and the memstore gpuaddr is
+				 * 64 bit. Cast the memstore gpuaddr to uint32.
+				 */
+				shadowprop.gpuaddr =
+					(unsigned int) device->memstore.gpuaddr;
+				shadowprop.size =
+					(unsigned int) device->memstore.size;
+				/*
+				 * GSL needs this to be set, even if it
+				 * appears to be meaningless
+				 */
+				shadowprop.flags = KGSL_FLAGS_INITIALIZED |
+					KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS;
+			}
+			if (copy_to_user(value, &shadowprop,
+				sizeof(shadowprop))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	default:
+		/*
+		 * Call the adreno_getproperty to check if the property type
+		 * was KGSL_PROP_MMU_ENABLE or KGSL_PROP_INTERRUPT_WAITS
+		 */
+		status = device->ftbl->getproperty(device, type, value,
+						sizebytes);
+	}
+
+	return status;
+}
+
+int adreno_setproperty_compat(struct kgsl_device_private *dev_priv,
+				unsigned int type,
+				void __user *value,
+				unsigned int sizebytes)
+{
+	int status = -EINVAL;
+	struct kgsl_device *device = dev_priv->device;
+
+	switch (type) {
+	case KGSL_PROP_PWR_CONSTRAINT: {
+			struct kgsl_device_constraint_compat constraint32;
+			struct kgsl_device_constraint constraint;
+			struct kgsl_context *context;
+
+			if (sizebytes != sizeof(constraint32))
+				break;
+
+			if (copy_from_user(&constraint32, value,
+				sizeof(constraint32))) {
+				status = -EFAULT;
+				break;
+			}
+
+			/* Populate the real constraint type from the compat */
+			constraint.type = constraint32.type;
+			constraint.context_id = constraint32.context_id;
+			constraint.data = compat_ptr(constraint32.data);
+			constraint.size = (size_t)constraint32.size;
+
+			context = kgsl_context_get_owner(dev_priv,
+							constraint.context_id);
+			if (context == NULL)
+				break;
+			status = adreno_set_constraint(device, context,
+								&constraint);
+			kgsl_context_put(context);
+		}
+		break;
+	default:
+		/*
+		 * Call adreno_setproperty in case the property type was
+		 * KGSL_PROP_PWRCTRL
+		 */
+		status = device->ftbl->setproperty(dev_priv, type, value,
+						sizebytes);
+	}
+
+	return status;
+}
+
+static long adreno_ioctl_perfcounter_query_compat(
+		struct kgsl_device_private *dev_priv, unsigned int cmd,
+		void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct kgsl_perfcounter_query_compat *query32 = data;
+	struct kgsl_perfcounter_query query;
+	long result;
+
+	query.groupid = query32->groupid;
+	query.countables = to_user_ptr(query32->countables);
+	query.count = query32->count;
+	query.max_counters = query32->max_counters;
+
+	result = adreno_perfcounter_query_group(adreno_dev,
+		query.groupid, query.countables,
+		query.count, &query.max_counters);
+	query32->max_counters = query.max_counters;
+
+	return result;
+}
+
+static long adreno_ioctl_perfcounter_read_compat(
+		struct kgsl_device_private *dev_priv, unsigned int cmd,
+		void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct kgsl_perfcounter_read_compat *read32 = data;
+	struct kgsl_perfcounter_read read;
+
+	read.reads = (struct kgsl_perfcounter_read_group __user *)
+		(uintptr_t)read32->reads;
+	read.count = read32->count;
+
+	return adreno_perfcounter_read_group(adreno_dev, read.reads,
+		read.count);
+}
+
+static struct kgsl_ioctl adreno_compat_ioctl_funcs[] = {
+	{ IOCTL_KGSL_PERFCOUNTER_GET, adreno_ioctl_perfcounter_get },
+	{ IOCTL_KGSL_PERFCOUNTER_PUT, adreno_ioctl_perfcounter_put },
+	{ IOCTL_KGSL_PERFCOUNTER_QUERY_COMPAT,
+		adreno_ioctl_perfcounter_query_compat },
+	{ IOCTL_KGSL_PERFCOUNTER_READ_COMPAT,
+		adreno_ioctl_perfcounter_read_compat },
+};
+
+long adreno_compat_ioctl(struct kgsl_device_private *dev_priv,
+			      unsigned int cmd, unsigned long arg)
+{
+	return adreno_ioctl_helper(dev_priv, cmd, arg,
+		adreno_compat_ioctl_funcs,
+		ARRAY_SIZE(adreno_compat_ioctl_funcs));
+}
diff --git a/drivers/gpu/msm/adreno_compat.h b/drivers/gpu/msm/adreno_compat.h
new file mode 100644
index 000000000000..4fba17bc8b13
--- /dev/null
+++ b/drivers/gpu/msm/adreno_compat.h
@@ -0,0 +1,57 @@
+/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __ADRENO_COMPAT_H
+#define __ADRENO_COMPAT_H
+
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#include "kgsl.h"
+#include "kgsl_device.h"
+
+int adreno_getproperty_compat(struct kgsl_device *device,
+			unsigned int type,
+			void __user *value,
+			size_t sizebytes);
+
+int adreno_setproperty_compat(struct kgsl_device_private *dev_priv,
+				unsigned int type,
+				void __user *value,
+				unsigned int sizebytes);
+
+long adreno_compat_ioctl(struct kgsl_device_private *dev_priv,
+			unsigned int cmd, unsigned long arg);
+
+#else
+
+static inline int adreno_getproperty_compat(struct kgsl_device *device,
+				unsigned int type,
+				void __user *value, size_t sizebytes)
+{
+	BUG();
+}
+
+static inline int adreno_setproperty_compat(struct kgsl_device_private
+				*dev_priv, unsigned int type,
+				void __user *value, unsigned int sizebytes)
+{
+	BUG();
+}
+
+static inline long adreno_compat_ioctl(struct kgsl_device_private *dev_priv,
+				unsigned int cmd, unsigned long arg)
+{
+	BUG();
+}
+
+#endif /* CONFIG_COMPAT */
+#endif /* __ADRENO_COMPAT_H */
diff --git a/drivers/gpu/msm/adreno_coresight.c b/drivers/gpu/msm/adreno_coresight.c
new file mode 100644
index 000000000000..326f3ed0ed4d
--- /dev/null
+++ b/drivers/gpu/msm/adreno_coresight.c
@@ -0,0 +1,331 @@
+/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/of_coresight.h>
+#include <linux/coresight.h>
+
+#include "adreno.h"
+
+#define TO_ADRENO_CORESIGHT_ATTR(_attr) \
+	container_of(_attr, struct adreno_coresight_attr, attr)
+
+ssize_t adreno_coresight_show_register(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	unsigned int val = 0;
+	struct kgsl_device *device = dev_get_drvdata(dev->parent);
+	struct adreno_device *adreno_dev;
+	struct adreno_coresight_attr *cattr = TO_ADRENO_CORESIGHT_ATTR(attr);
+
+	if (device == NULL)
+		return -EINVAL;
+
+	adreno_dev = ADRENO_DEVICE(device);
+
+	if (cattr->reg == NULL)
+		return -EINVAL;
+
+	/*
+	 * Return the current value of the register if coresight is enabled,
+	 * otherwise report 0
+	 */
+
+	mutex_lock(&device->mutex);
+	if (test_bit(ADRENO_DEVICE_CORESIGHT, &adreno_dev->priv)) {
+
+		/*
+		 * If the device isn't power collapsed read the actual value
+		 * from the hardware - otherwise return the cached value
+		 */
+
+		if (device->state == KGSL_STATE_ACTIVE ||
+			device->state == KGSL_STATE_NAP) {
+			if (!kgsl_active_count_get(device)) {
+				kgsl_regread(device, cattr->reg->offset,
+					&cattr->reg->value);
+				kgsl_active_count_put(device);
+			}
+		}
+
+		val = cattr->reg->value;
+	}
+	mutex_unlock(&device->mutex);
+
+	return snprintf(buf, PAGE_SIZE, "0x%X", val);
+}
+
+ssize_t adreno_coresight_store_register(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size)
+{
+	struct kgsl_device *device = dev_get_drvdata(dev->parent);
+	struct adreno_device *adreno_dev;
+	struct adreno_coresight_attr *cattr = TO_ADRENO_CORESIGHT_ATTR(attr);
+	unsigned long val;
+	int ret;
+
+	if (device == NULL)
+		return -EINVAL;
+
+	adreno_dev = ADRENO_DEVICE(device);
+
+	if (cattr->reg == NULL)
+		return -EINVAL;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+
+	/* Ignore writes while coresight is off */
+	if (!test_bit(ADRENO_DEVICE_CORESIGHT, &adreno_dev->priv))
+		goto out;
+
+	cattr->reg->value = val;
+
+	/* Program the hardware if it is not power collapsed */
+	if (device->state == KGSL_STATE_ACTIVE ||
+		device->state == KGSL_STATE_NAP) {
+		if (!kgsl_active_count_get(device)) {
+			kgsl_regwrite(device, cattr->reg->offset,
+					cattr->reg->value);
+			kgsl_active_count_put(device);
+		}
+	}
+
+out:
+	mutex_unlock(&device->mutex);
+	return size;
+}
+
+/**
+ * adreno_coresight_disable() - Generic function to disable coresight debugging
+ * @csdev: Pointer to coresight's device struct
+ *
+ * This is a generic function to disable coresight debug bus on adreno
+ * devices. This should be used in all cases of disabling
+ * coresight debug bus for adreno devices. This function in turn calls
+ * the adreno device specific function through the gpudev hook.
+ * This function is registered as the coresight disable function
+ * with coresight driver. It should only be called through coresight driver
+ * as that would ensure that the necessary setup required to be done on
+ * coresight driver's part is also done.
+ */
+static void adreno_coresight_disable(struct coresight_device *csdev)
+{
+	struct kgsl_device *device = dev_get_drvdata(csdev->dev.parent);
+	struct adreno_device *adreno_dev;
+	struct adreno_gpudev *gpudev;
+	struct adreno_coresight *coresight;
+	int i;
+
+	if (device == NULL)
+		return;
+
+	adreno_dev = ADRENO_DEVICE(device);
+	gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	coresight = gpudev->coresight;
+
+	if (coresight == NULL)
+		return;
+
+	mutex_lock(&device->mutex);
+
+	if (!kgsl_active_count_get(device)) {
+		for (i = 0; i < coresight->count; i++)
+			kgsl_regwrite(device, coresight->registers[i].offset,
+				0);
+
+		kgsl_active_count_put(device);
+	}
+
+	clear_bit(ADRENO_DEVICE_CORESIGHT, &adreno_dev->priv);
+
+	mutex_unlock(&device->mutex);
+}
+
+/**
+ * _adreno_coresight_get_and_clear(): Save the current value of coresight
+ * registers and clear the registers subsequently. Clearing registers
+ * has the effect of disabling coresight.
+ * @adreno_dev: Pointer to adreno device struct
+ */
+static int _adreno_coresight_get_and_clear(struct adreno_device *adreno_dev)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_coresight *coresight = gpudev->coresight;
+	int i;
+
+	if (coresight == NULL)
+		return -ENODEV;
+
+	kgsl_pre_hwaccess(device);
+	/*
+	 * Save the current value of each coresight register
+	 * and then clear each register
+	 */
+	for (i = 0; i < coresight->count; i++) {
+		kgsl_regread(device, coresight->registers[i].offset,
+			&coresight->registers[i].value);
+		kgsl_regwrite(device, coresight->registers[i].offset,
+			0);
+	}
+
+	return 0;
+}
+
+static int _adreno_coresight_set(struct adreno_device *adreno_dev)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_coresight *coresight = gpudev->coresight;
+	int i;
+
+	if (coresight == NULL)
+		return -ENODEV;
+
+	BUG_ON(!kgsl_state_is_awake(device));
+	for (i = 0; i < coresight->count; i++)
+		kgsl_regwrite(device, coresight->registers[i].offset,
+			coresight->registers[i].value);
+
+	return 0;
+}
+/**
+ * adreno_coresight_enable() - Generic function to enable coresight debugging
+ * @csdev: Pointer to coresight's device struct
+ *
+ * This is a generic function to enable coresight debug bus on adreno
+ * devices. This should be used in all cases of enabling
+ * coresight debug bus for adreno devices. This function is registered as the
+ * coresight enable function with coresight driver. It should only be called
+ * through coresight driver as that would ensure that the necessary setup
+ * required to be done on coresight driver's part is also done.
+ */
+static int adreno_coresight_enable(struct coresight_device *csdev)
+{
+	struct kgsl_device *device = dev_get_drvdata(csdev->dev.parent);
+	struct adreno_device *adreno_dev;
+	struct adreno_gpudev *gpudev;
+	struct adreno_coresight *coresight;
+	int ret = 0;
+
+	if (device == NULL)
+		return -ENODEV;
+
+	adreno_dev = ADRENO_DEVICE(device);
+	gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	coresight = gpudev->coresight;
+
+	if (coresight == NULL)
+		return -ENODEV;
+
+	mutex_lock(&device->mutex);
+	if (!test_and_set_bit(ADRENO_DEVICE_CORESIGHT, &adreno_dev->priv)) {
+		int i;
+
+		/* Reset all the debug registers to their default values */
+
+		for (i = 0; i < coresight->count; i++)
+			coresight->registers[i].value =
+				coresight->registers[i].initial;
+
+		ret = kgsl_active_count_get(device);
+		if (!ret) {
+			ret = _adreno_coresight_set(adreno_dev);
+			kgsl_active_count_put(device);
+		}
+	}
+
+	mutex_unlock(&device->mutex);
+
+	return ret;
+}
+
+/**
+ * adreno_coresight_start() - Reprogram coresight registers after power collapse
+ * @adreno_dev: Pointer to the adreno device structure
+ *
+ * Cache the current coresight register values so they can be restored after
+ * power collapse
+ */
+void adreno_coresight_stop(struct adreno_device *adreno_dev)
+{
+	if (test_bit(ADRENO_DEVICE_CORESIGHT, &adreno_dev->priv))
+		_adreno_coresight_get_and_clear(adreno_dev);
+}
+
+/**
+ * adreno_coresight_start() - Reprogram coresight registers after power collapse
+ * @adreno_dev: Pointer to the adreno device structure
+ *
+ * Reprogram the cached values to the coresight registers on power up
+ */
+void adreno_coresight_start(struct adreno_device *adreno_dev)
+{
+	if (test_bit(ADRENO_DEVICE_CORESIGHT, &adreno_dev->priv))
+		_adreno_coresight_set(adreno_dev);
+}
+
+static const struct coresight_ops_source adreno_coresight_source_ops = {
+	.enable = adreno_coresight_enable,
+	.disable = adreno_coresight_disable,
+};
+
+static const struct coresight_ops adreno_coresight_ops = {
+	.source_ops = &adreno_coresight_source_ops,
+};
+
+void adreno_coresight_remove(struct adreno_device *adreno_dev)
+{
+	coresight_unregister(adreno_dev->csdev);
+	adreno_dev->csdev = NULL;
+}
+
+int adreno_coresight_init(struct adreno_device *adreno_dev)
+{
+	int ret = 0;
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct coresight_desc desc;
+
+	if (gpudev->coresight == NULL)
+		return -ENODEV;
+
+	if (adreno_dev->csdev != NULL)
+		return 0;
+
+	memset(&desc, 0, sizeof(desc));
+
+	desc.pdata = of_get_coresight_platform_data(&device->pdev->dev,
+			device->pdev->dev.of_node);
+	if (desc.pdata == NULL)
+		return -ENODEV;
+
+	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
+	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_BUS;
+	desc.ops = &adreno_coresight_ops;
+	desc.dev = &device->pdev->dev;
+	desc.owner = THIS_MODULE;
+	desc.groups = gpudev->coresight->groups;
+
+	adreno_dev->csdev = coresight_register(&desc);
+
+	if (IS_ERR(adreno_dev->csdev))
+		ret = PTR_ERR(adreno_dev->csdev);
+
+	return ret;
+}
diff --git a/drivers/gpu/msm/adreno_cp_parser.c b/drivers/gpu/msm/adreno_cp_parser.c
new file mode 100644
index 000000000000..4faf3a8319cb
--- /dev/null
+++ b/drivers/gpu/msm/adreno_cp_parser.c
@@ -0,0 +1,1048 @@
+/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "kgsl.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_snapshot.h"
+
+#include "adreno.h"
+#include "adreno_pm4types.h"
+#include "a3xx_reg.h"
+#include "adreno_cp_parser.h"
+
+#define MAX_IB_OBJS 1000
+#define NUM_SET_DRAW_GROUPS 32
+
+struct set_draw_state {
+	uint64_t cmd_stream_addr;
+	uint64_t cmd_stream_dwords;
+};
+
+/* List of variables used when parsing an IB */
+struct ib_parser_variables {
+	/* List of registers containing addresses and their sizes */
+	unsigned int cp_addr_regs[ADRENO_CP_ADDR_MAX];
+	/* 32 groups of command streams in set draw state packets */
+	struct set_draw_state set_draw_groups[NUM_SET_DRAW_GROUPS];
+};
+
+/*
+ * Used for locating shader objects. This array holds the unit size of shader
+ * objects based on type and block of shader. The type can be 0 or 1 hence there
+ * are 2 columns and block can be 0-7 hence 7 rows.
+ */
+static int load_state_unit_sizes[7][2] = {
+	{ 2, 4 },
+	{ 0, 1 },
+	{ 2, 4 },
+	{ 0, 1 },
+	{ 8, 2 },
+	{ 8, 2 },
+	{ 8, 2 },
+};
+
+static int adreno_ib_find_objs(struct kgsl_device *device,
+				struct kgsl_process_private *process,
+				uint64_t gpuaddr, uint64_t dwords,
+				int obj_type,
+				struct adreno_ib_object_list *ib_obj_list,
+				int ib_level);
+
+static int ib_parse_set_draw_state(struct kgsl_device *device,
+	unsigned int *ptr,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars);
+
+static int ib_parse_type7_set_draw_state(struct kgsl_device *device,
+	unsigned int *ptr,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list);
+
+/*
+ * adreno_ib_merge_range() - Increases the address range tracked by an ib
+ * object
+ * @ib_obj: The ib object
+ * @gpuaddr: The start address which is to be merged
+ * @size: Size of the merging address
+ */
+static void adreno_ib_merge_range(struct adreno_ib_object *ib_obj,
+		uint64_t gpuaddr, uint64_t size)
+{
+	uint64_t addr_end1 = ib_obj->gpuaddr + ib_obj->size;
+	uint64_t addr_end2 = gpuaddr + size;
+	if (gpuaddr < ib_obj->gpuaddr)
+		ib_obj->gpuaddr = gpuaddr;
+	if (addr_end2 > addr_end1)
+		ib_obj->size = addr_end2 - ib_obj->gpuaddr;
+	else
+		ib_obj->size = addr_end1 - ib_obj->gpuaddr;
+}
+
+/*
+ * adreno_ib_check_overlap() - Checks if an address range overlap
+ * @gpuaddr: The start address range to check for overlap
+ * @size: Size of the address range
+ * @type: The type of address range
+ * @ib_obj_list: The list of address ranges to check for overlap
+ *
+ * Checks if an address range overlaps with a list of address ranges
+ * Returns the entry from list which overlaps else NULL
+ */
+static struct adreno_ib_object *adreno_ib_check_overlap(uint64_t gpuaddr,
+		uint64_t size, int type,
+		struct adreno_ib_object_list *ib_obj_list)
+{
+	struct adreno_ib_object *ib_obj;
+	int i;
+
+	for (i = 0; i < ib_obj_list->num_objs; i++) {
+		ib_obj = &(ib_obj_list->obj_list[i]);
+		if ((type == ib_obj->snapshot_obj_type) &&
+			kgsl_addr_range_overlap(ib_obj->gpuaddr, ib_obj->size,
+			gpuaddr, size))
+			/* regions overlap */
+			return ib_obj;
+	}
+	return NULL;
+}
+
+/*
+ * adreno_ib_add() - Add a gpuaddress range to list
+ * @process: Process in which the gpuaddress is mapped
+ * @type: The type of address range
+ * @ib_obj_list: List of the address ranges in which the given range is to be
+ * added
+ *
+ * Add a gpuaddress range as an ib object to a given list after checking if it
+ * overlaps with another entry on the list. If it conflicts then change the
+ * existing entry to incorporate this range
+ *
+ * Returns 0 on success else error code
+ */
+static int adreno_ib_add(struct kgsl_process_private *process,
+				uint64_t gpuaddr, int type,
+				struct adreno_ib_object_list *ib_obj_list)
+{
+	uint64_t size;
+	struct adreno_ib_object *ib_obj;
+	struct kgsl_mem_entry *entry;
+
+	if (MAX_IB_OBJS <= ib_obj_list->num_objs)
+		return -E2BIG;
+
+	entry = kgsl_sharedmem_find(process, gpuaddr);
+	if (!entry)
+		/*
+		 * Do not fail if gpuaddr not found, we can continue
+		 * to search for other objects even if few objects are
+		 * not found
+		 */
+		return 0;
+
+	size = entry->memdesc.size;
+	gpuaddr = entry->memdesc.gpuaddr;
+
+	ib_obj = adreno_ib_check_overlap(gpuaddr, size, type, ib_obj_list);
+	if (ib_obj) {
+		adreno_ib_merge_range(ib_obj, gpuaddr, size);
+		kgsl_mem_entry_put(entry);
+	} else {
+		adreno_ib_init_ib_obj(gpuaddr, size, type, entry,
+			&(ib_obj_list->obj_list[ib_obj_list->num_objs]));
+		ib_obj_list->num_objs++;
+	}
+	return 0;
+}
+
+/*
+ * ib_save_mip_addresses() - Find mip addresses
+ * @pkt: Pointer to the packet in IB
+ * @process: The process in which IB is mapped
+ * @ib_obj_list: List in which any objects found are added
+ *
+ * Returns 0 on success else error code
+ */
+static int ib_save_mip_addresses(unsigned int *pkt,
+		struct kgsl_process_private *process,
+		struct adreno_ib_object_list *ib_obj_list)
+{
+	int ret = 0;
+	int num_levels = (pkt[1] >> 22) & 0x03FF;
+	int i;
+	unsigned int *hostptr;
+	struct kgsl_mem_entry *ent;
+	unsigned int block, type;
+	int unitsize = 0;
+
+	block = (pkt[1] >> 19) & 0x07;
+	type = pkt[2] & 0x03;
+
+	if (type == 0)
+		unitsize = load_state_unit_sizes[block][0];
+	else
+		unitsize = load_state_unit_sizes[block][1];
+
+	if (3 == block && 1 == type) {
+		uint64_t gpuaddr = pkt[2] & 0xFFFFFFFC;
+		uint64_t size = (num_levels * unitsize) << 2;
+
+		ent = kgsl_sharedmem_find(process, gpuaddr);
+		if (ent == NULL)
+			return 0;
+
+		if (!kgsl_gpuaddr_in_memdesc(&ent->memdesc,
+			gpuaddr, size)) {
+			kgsl_mem_entry_put(ent);
+			return 0;
+		}
+
+		hostptr = kgsl_gpuaddr_to_vaddr(&ent->memdesc, gpuaddr);
+		if (hostptr != NULL) {
+			for (i = 0; i < num_levels; i++) {
+				ret = adreno_ib_add(process, hostptr[i],
+					SNAPSHOT_GPU_OBJECT_GENERIC,
+					ib_obj_list);
+				if (ret)
+					break;
+			}
+		}
+
+		kgsl_memdesc_unmap(&ent->memdesc);
+		kgsl_mem_entry_put(ent);
+	}
+	return ret;
+}
+
+/*
+ * ib_parse_load_state() - Parse load state packet
+ * @pkt: Pointer to the packet in IB
+ * @process: The pagetable in which the IB is mapped
+ * @ib_obj_list: List in which any objects found are added
+ * @ib_parse_vars: VAriable list that store temporary addressses
+ *
+ * Parse load state packet found in an IB and add any memory object found to
+ * a list
+ * Returns 0 on success else error code
+ */
+static int ib_parse_load_state(unsigned int *pkt,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	int ret = 0;
+	int i;
+
+	/*
+	 * The object here is to find indirect shaders i.e - shaders loaded from
+	 * GPU memory instead of directly in the command.  These should be added
+	 * to the list of memory objects to dump. So look at the load state
+	 * if the block is indirect (source = 4). If so then add the memory
+	 * address to the list.  The size of the object differs depending on the
+	 * type per the load_state_unit_sizes array above.
+	 */
+
+	if (type3_pkt_size(pkt[0]) < 2)
+		return 0;
+
+	/*
+	 * Anything from 3rd ordinal onwards of packet can be a memory object,
+	 * no need to be fancy about parsing it, just save it if it looks
+	 * like memory
+	 */
+	for (i = 0; i <= (type3_pkt_size(pkt[0]) - 2); i++) {
+		ret |= adreno_ib_add(process, pkt[2 + i] & 0xFFFFFFFC,
+				SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+		if (ret)
+			break;
+	}
+	/* get the mip addresses */
+	if (!ret)
+		ret = ib_save_mip_addresses(pkt, process, ib_obj_list);
+	return ret;
+}
+
+/*
+ * This opcode sets the base addresses for the visibilty stream buffer and the
+ * visiblity stream size buffer.
+ */
+
+static int ib_parse_set_bin_data(unsigned int *pkt,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	int ret = 0;
+
+	if (type3_pkt_size(pkt[0]) < 2)
+		return 0;
+
+	/* Visiblity stream buffer */
+	ret = adreno_ib_add(process, pkt[1],
+		SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
+	if (ret)
+		return ret;
+
+	/* visiblity stream size buffer (fixed size 8 dwords) */
+	ret = adreno_ib_add(process, pkt[2],
+		SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
+
+	return ret;
+}
+
+/*
+ * This opcode writes to GPU memory - if the buffer is written to, there is a
+ * good chance that it would be valuable to capture in the snapshot, so mark all
+ * buffers that are written to as frozen
+ */
+
+static int ib_parse_mem_write(unsigned int *pkt,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	if (type3_pkt_size(pkt[0]) < 1)
+		return 0;
+
+	/*
+	 * The address is where the data in the rest of this packet is written
+	 * to, but since that might be an offset into the larger buffer we need
+	 * to get the whole thing. Pass a size of 0 tocapture the entire buffer.
+	 */
+
+	return adreno_ib_add(process, pkt[1] & 0xFFFFFFFC,
+		SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
+}
+
+/*
+ * ib_add_type0_entries() - Add memory objects to list
+ * @device: The device on which the IB will execute
+ * @process: The process in which IB is mapped
+ * @ib_obj_list: The list of gpu objects
+ * @ib_parse_vars: addresses ranges found in type0 packets
+ *
+ * Add memory objects to given list that are found in type0 packets
+ * Returns 0 on success else 0
+ */
+static int ib_add_type0_entries(struct kgsl_device *device,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int ret = 0;
+	int i;
+	int vfd_end;
+	unsigned int mask;
+	/* First up the visiblity stream buffer */
+	if (adreno_is_a4xx(adreno_dev))
+		mask = 0xFFFFFFFC;
+	else
+		mask = 0xFFFFFFFF;
+	for (i = ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0;
+		i < ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7; i++) {
+		if (ib_parse_vars->cp_addr_regs[i]) {
+			ret = adreno_ib_add(process,
+				ib_parse_vars->cp_addr_regs[i] & mask,
+				SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+			if (ret)
+				return ret;
+			ib_parse_vars->cp_addr_regs[i] = 0;
+			ib_parse_vars->cp_addr_regs[i + 1] = 0;
+			i++;
+		}
+	}
+
+	vfd_end = adreno_is_a4xx(adreno_dev) ?
+		ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_31 :
+		ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15;
+	for (i = ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0;
+		i <= vfd_end; i++) {
+		if (ib_parse_vars->cp_addr_regs[i]) {
+			ret = adreno_ib_add(process,
+				ib_parse_vars->cp_addr_regs[i],
+				SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+			if (ret)
+				return ret;
+			ib_parse_vars->cp_addr_regs[i] = 0;
+		}
+	}
+
+	if (ib_parse_vars->cp_addr_regs[ADRENO_CP_ADDR_VSC_SIZE_ADDRESS]) {
+		ret = adreno_ib_add(process,
+			ib_parse_vars->cp_addr_regs[
+				ADRENO_CP_ADDR_VSC_SIZE_ADDRESS] & mask,
+			SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
+		if (ret)
+			return ret;
+		ib_parse_vars->cp_addr_regs[
+			ADRENO_CP_ADDR_VSC_SIZE_ADDRESS] = 0;
+	}
+	mask = 0xFFFFFFE0;
+	for (i = ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR;
+		i <= ADRENO_CP_ADDR_SP_FS_OBJ_START_REG; i++) {
+		ret = adreno_ib_add(process,
+			ib_parse_vars->cp_addr_regs[i] & mask,
+			SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
+		if (ret)
+			return ret;
+		ib_parse_vars->cp_addr_regs[i] = 0;
+	}
+	return ret;
+}
+/*
+ * The DRAW_INDX opcode sends a draw initator which starts a draw operation in
+ * the GPU, so this is the point where all the registers and buffers become
+ * "valid".  The DRAW_INDX may also have an index buffer pointer that should be
+ * frozen with the others
+ */
+
+static int ib_parse_draw_indx(struct kgsl_device *device, unsigned int *pkt,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	int ret = 0;
+	int i;
+	int opcode = cp_type3_opcode(pkt[0]);
+
+	switch (opcode) {
+	case CP_DRAW_INDX:
+		if (type3_pkt_size(pkt[0]) > 3) {
+			ret = adreno_ib_add(process,
+				pkt[4], SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+		}
+		break;
+	case CP_DRAW_INDX_OFFSET:
+		if (type3_pkt_size(pkt[0]) == 6) {
+			ret = adreno_ib_add(process,
+				pkt[5], SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+		}
+		break;
+	case CP_DRAW_INDIRECT:
+		if (type3_pkt_size(pkt[0]) == 2) {
+			ret = adreno_ib_add(process,
+				pkt[2], SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+		}
+		break;
+	case CP_DRAW_INDX_INDIRECT:
+		if (type3_pkt_size(pkt[0]) == 4) {
+			ret = adreno_ib_add(process,
+				pkt[2], SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+			if (ret)
+				break;
+			ret = adreno_ib_add(process,
+				pkt[4], SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+		}
+		break;
+	case CP_DRAW_AUTO:
+		if (type3_pkt_size(pkt[0]) == 6) {
+			ret = adreno_ib_add(process,
+				 pkt[3], SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+			if (ret)
+				break;
+			ret = adreno_ib_add(process,
+				pkt[4], SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+		}
+		break;
+	}
+
+	if (ret)
+		return ret;
+	/*
+	 * All of the type0 writes are valid at a draw initiator, so freeze
+	 * the various buffers that we are tracking
+	 */
+	ret = ib_add_type0_entries(device, process, ib_obj_list,
+				ib_parse_vars);
+	if (ret)
+		return ret;
+	/* Process set draw state command streams if any */
+	for (i = 0; i < NUM_SET_DRAW_GROUPS; i++) {
+		if (!ib_parse_vars->set_draw_groups[i].cmd_stream_dwords)
+			continue;
+		ret = adreno_ib_find_objs(device, process,
+			ib_parse_vars->set_draw_groups[i].cmd_stream_addr,
+			ib_parse_vars->set_draw_groups[i].cmd_stream_dwords,
+			SNAPSHOT_GPU_OBJECT_DRAW,
+			ib_obj_list, 2);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
+/*
+ * Parse all the type7 opcode packets that may contain important information,
+ * such as additional GPU buffers to grab or a draw initator
+ */
+
+static int ib_parse_type7(struct kgsl_device *device, unsigned int *ptr,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	int opcode = cp_type7_opcode(*ptr);
+
+	switch (opcode) {
+	case CP_SET_DRAW_STATE:
+		return ib_parse_type7_set_draw_state(device, ptr, process,
+					ib_obj_list);
+	}
+
+	return 0;
+}
+
+/*
+ * Parse all the type3 opcode packets that may contain important information,
+ * such as additional GPU buffers to grab or a draw initator
+ */
+
+static int ib_parse_type3(struct kgsl_device *device, unsigned int *ptr,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	int opcode = cp_type3_opcode(*ptr);
+
+	switch (opcode) {
+	case  CP_LOAD_STATE:
+		return ib_parse_load_state(ptr, process, ib_obj_list,
+					ib_parse_vars);
+	case CP_SET_BIN_DATA:
+		return ib_parse_set_bin_data(ptr, process, ib_obj_list,
+					ib_parse_vars);
+	case CP_MEM_WRITE:
+		return ib_parse_mem_write(ptr, process, ib_obj_list,
+					ib_parse_vars);
+	case CP_DRAW_INDX:
+	case CP_DRAW_INDX_OFFSET:
+	case CP_DRAW_INDIRECT:
+	case CP_DRAW_INDX_INDIRECT:
+		return ib_parse_draw_indx(device, ptr, process, ib_obj_list,
+					ib_parse_vars);
+	case CP_SET_DRAW_STATE:
+		return ib_parse_set_draw_state(device, ptr, process,
+					ib_obj_list, ib_parse_vars);
+	}
+
+	return 0;
+}
+
+/*
+ * Parse type0 packets found in the stream.  Some of the registers that are
+ * written are clues for GPU buffers that we need to freeze.  Register writes
+ * are considred valid when a draw initator is called, so just cache the values
+ * here and freeze them when a CP_DRAW_INDX is seen.  This protects against
+ * needlessly caching buffers that won't be used during a draw call
+ */
+
+static int ib_parse_type0(struct kgsl_device *device, unsigned int *ptr,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int size = type0_pkt_size(*ptr);
+	int offset = type0_pkt_offset(*ptr);
+	int i;
+	int reg_index;
+	int ret = 0;
+
+	for (i = 0; i < size; i++, offset++) {
+		/* Visiblity stream buffer */
+		if (offset >= adreno_cp_parser_getreg(adreno_dev,
+				ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0) &&
+			offset <= adreno_cp_parser_getreg(adreno_dev,
+				ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7)) {
+			reg_index = adreno_cp_parser_regindex(
+					adreno_dev, offset,
+					ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0,
+					ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7);
+			if (reg_index >= 0)
+				ib_parse_vars->cp_addr_regs[reg_index] =
+								ptr[i + 1];
+			continue;
+		} else if ((offset >= adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0)) &&
+			(offset <= adreno_cp_parser_getreg(adreno_dev,
+				ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15))) {
+			reg_index = adreno_cp_parser_regindex(adreno_dev,
+					offset,
+					ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0,
+					ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15);
+			if (reg_index >= 0)
+				ib_parse_vars->cp_addr_regs[reg_index] =
+								ptr[i + 1];
+			continue;
+		} else if ((offset >= adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_16)) &&
+			(offset <= adreno_cp_parser_getreg(adreno_dev,
+				ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_31))) {
+			reg_index = adreno_cp_parser_regindex(adreno_dev,
+					offset,
+					ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_16,
+					ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_31);
+			if (reg_index >= 0)
+				ib_parse_vars->cp_addr_regs[reg_index] =
+								ptr[i + 1];
+			continue;
+		} else {
+			if (offset ==
+				adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_ADDR_VSC_SIZE_ADDRESS))
+				ib_parse_vars->cp_addr_regs[
+					ADRENO_CP_ADDR_VSC_SIZE_ADDRESS] =
+						ptr[i + 1];
+			else if (offset == adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR))
+				ib_parse_vars->cp_addr_regs[
+					ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR] =
+						ptr[i + 1];
+			else if (offset == adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR))
+				ib_parse_vars->cp_addr_regs[
+					ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR] =
+						ptr[i + 1];
+			else if (offset == adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_ADDR_SP_VS_OBJ_START_REG))
+				ib_parse_vars->cp_addr_regs[
+					ADRENO_CP_ADDR_SP_VS_OBJ_START_REG] =
+						ptr[i + 1];
+			else if (offset == adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_ADDR_SP_FS_OBJ_START_REG))
+				ib_parse_vars->cp_addr_regs[
+					ADRENO_CP_ADDR_SP_FS_OBJ_START_REG] =
+						ptr[i + 1];
+			else if ((offset == adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_UCHE_INVALIDATE0)) ||
+				(offset == adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_UCHE_INVALIDATE1))) {
+					ret = adreno_ib_add(process,
+						ptr[i + 1] & 0xFFFFFFC0,
+						SNAPSHOT_GPU_OBJECT_GENERIC,
+						ib_obj_list);
+					if (ret)
+						break;
+			}
+		}
+	}
+	return ret;
+}
+
+static int ib_parse_type7_set_draw_state(struct kgsl_device *device,
+	unsigned int *ptr,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list)
+{
+	int size = type7_pkt_size(*ptr);
+	int i;
+	int grp_id;
+	int ret = 0;
+	int flags;
+	uint64_t cmd_stream_dwords;
+	uint64_t cmd_stream_addr;
+
+	/*
+	 * size is the size of the packet that does not include the DWORD
+	 * for the packet header, we only want to loop here through the
+	 * packet parameters from ptr[1] till ptr[size] where ptr[0] is the
+	 * packet header. In each loop we look at 3 DWORDS hence increment
+	 * loop counter by 3 always
+	 */
+	for (i = 1; i <= size; i += 3) {
+		grp_id = (ptr[i] & 0x1F000000) >> 24;
+		/* take action based on flags */
+		flags = (ptr[i] & 0x000F0000) >> 16;
+
+		/*
+		 * dirty flag or no flags both mean we need to load it for
+		 * next draw. No flags is used when the group is activated
+		 * or initialized for the first time in the IB
+		 */
+		if (flags & 0x1 || !flags) {
+			cmd_stream_dwords = ptr[i] & 0x0000FFFF;
+			cmd_stream_addr = ptr[i + 2];
+			cmd_stream_addr = cmd_stream_addr << 32 | ptr[i + 1];
+			if (cmd_stream_dwords)
+				ret = adreno_ib_find_objs(device, process,
+					cmd_stream_addr, cmd_stream_dwords,
+					SNAPSHOT_GPU_OBJECT_DRAW, ib_obj_list,
+					2);
+			if (ret)
+				break;
+			continue;
+		}
+		/* load immediate */
+		if (flags & 0x8) {
+			uint64_t gpuaddr = ptr[i + 2];
+			gpuaddr = gpuaddr << 32 | ptr[i + 1];
+			ret = adreno_ib_find_objs(device, process,
+				gpuaddr, (ptr[i] & 0x0000FFFF),
+				SNAPSHOT_GPU_OBJECT_IB,
+				ib_obj_list, 2);
+			if (ret)
+				break;
+		}
+	}
+	return ret;
+}
+
+static int ib_parse_set_draw_state(struct kgsl_device *device,
+	unsigned int *ptr,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	int size = type0_pkt_size(*ptr);
+	int i;
+	int grp_id;
+	int ret = 0;
+	int flags;
+
+	/*
+	 * size is the size of the packet that does not include the DWORD
+	 * for the packet header, we only want to loop here through the
+	 * packet parameters from ptr[1] till ptr[size] where ptr[0] is the
+	 * packet header. In each loop we look at 2 DWORDS hence increment
+	 * loop counter by 2 always
+	 */
+	for (i = 1; i <= size; i += 2) {
+		grp_id = (ptr[i] & 0x1F000000) >> 24;
+		/* take action based on flags */
+		flags = (ptr[i] & 0x000F0000) >> 16;
+		/* Disable all groups */
+		if (flags & 0x4) {
+			int j;
+			for (j = 0; j < NUM_SET_DRAW_GROUPS; j++)
+				ib_parse_vars->set_draw_groups[j].
+					cmd_stream_dwords = 0;
+			continue;
+		}
+		/* disable flag */
+		if (flags & 0x2) {
+			ib_parse_vars->set_draw_groups[grp_id].
+						cmd_stream_dwords = 0;
+			continue;
+		}
+		/*
+		 * dirty flag or no flags both mean we need to load it for
+		 * next draw. No flags is used when the group is activated
+		 * or initialized for the first time in the IB
+		 */
+		if (flags & 0x1 || !flags) {
+			ib_parse_vars->set_draw_groups[grp_id].
+				cmd_stream_dwords = ptr[i] & 0x0000FFFF;
+			ib_parse_vars->set_draw_groups[grp_id].
+				cmd_stream_addr = ptr[i + 1];
+			continue;
+		}
+		/* load immediate */
+		if (flags & 0x8) {
+			ret = adreno_ib_find_objs(device, process,
+				ptr[i + 1], (ptr[i] & 0x0000FFFF),
+				SNAPSHOT_GPU_OBJECT_IB,
+				ib_obj_list, 2);
+			if (ret)
+				break;
+		}
+	}
+	return ret;
+}
+
+/*
+ * adreno_cp_parse_ib2() - Wrapper function around IB2 parsing
+ * @device: Device pointer
+ * @process: Process in which the IB is allocated
+ * @gpuaddr: IB2 gpuaddr
+ * @dwords: IB2 size in dwords
+ * @ib_obj_list: List of objects found in IB
+ * @ib_level: The level from which function is called, either from IB1 or IB2
+ *
+ * Function does some checks to ensure that IB2 parsing is called from IB1
+ * and then calls the function to find objects in IB2.
+ */
+static int adreno_cp_parse_ib2(struct kgsl_device *device,
+			struct kgsl_process_private *process,
+			uint64_t gpuaddr, uint64_t dwords,
+			struct adreno_ib_object_list *ib_obj_list,
+			int ib_level)
+{
+	struct adreno_ib_object *ib_obj;
+	int i;
+	/*
+	 * We can only expect an IB2 in IB1, if we are
+	 * already processing an IB2 then return error
+	 */
+	if (2 == ib_level)
+		return -EINVAL;
+	/*
+	 * only try to find sub objects iff this IB has
+	 * not been processed already
+	 */
+	for (i = 0; i < ib_obj_list->num_objs; i++)
+		ib_obj = &(ib_obj_list->obj_list[i]);
+		if ((SNAPSHOT_GPU_OBJECT_IB == ib_obj->snapshot_obj_type) &&
+			(gpuaddr >= ib_obj->gpuaddr) &&
+			(gpuaddr + dwords * sizeof(unsigned int) <=
+			ib_obj->gpuaddr + ib_obj->size))
+			return 0;
+
+	return adreno_ib_find_objs(device, process, gpuaddr, dwords,
+		SNAPSHOT_GPU_OBJECT_IB, ib_obj_list, 2);
+}
+
+/*
+ * adreno_ib_find_objs() - Find all IB objects in a given IB
+ * @device: The device pointer on which the IB executes
+ * @process: The process in which the IB and all contained objects are mapped.
+ * @gpuaddr: The gpu address of the IB
+ * @dwords: Size of ib in dwords
+ * @obj_type: The object type can be either an IB or a draw state sequence
+ * @ib_obj_list: The list in which the IB and the objects in it are added.
+ * @ib_level: Indicates if IB1 or IB2 is being processed
+ *
+ * Finds all IB objects in a given IB and puts then in a list. Can be called
+ * recursively for the IB2's in the IB1's
+ * Returns 0 on success else error code
+ */
+static int adreno_ib_find_objs(struct kgsl_device *device,
+				struct kgsl_process_private *process,
+				uint64_t gpuaddr, uint64_t dwords,
+				int obj_type,
+				struct adreno_ib_object_list *ib_obj_list,
+				int ib_level)
+{
+	int ret = 0;
+	uint64_t rem = dwords;
+	int i;
+	struct ib_parser_variables ib_parse_vars;
+	unsigned int *src;
+	struct adreno_ib_object *ib_obj;
+	struct kgsl_mem_entry *entry;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/* check that this IB is not already on list */
+	for (i = 0; i < ib_obj_list->num_objs; i++) {
+		ib_obj = &(ib_obj_list->obj_list[i]);
+		if ((obj_type == ib_obj->snapshot_obj_type) &&
+			(ib_obj->gpuaddr <= gpuaddr) &&
+			((ib_obj->gpuaddr + ib_obj->size) >=
+			(gpuaddr + (dwords << 2))))
+			return 0;
+	}
+
+	entry = kgsl_sharedmem_find(process, gpuaddr);
+	if (!entry)
+		return -EINVAL;
+
+	if (!kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, (dwords << 2))) {
+		kgsl_mem_entry_put(entry);
+		return -EINVAL;
+	}
+
+	src = kgsl_gpuaddr_to_vaddr(&entry->memdesc, gpuaddr);
+	if (!src) {
+		kgsl_mem_entry_put(entry);
+		return -EINVAL;
+	}
+
+	memset(&ib_parse_vars, 0, sizeof(struct ib_parser_variables));
+
+	ret = adreno_ib_add(process, gpuaddr, obj_type, ib_obj_list);
+	if (ret)
+		goto done;
+
+	for (i = 0; rem > 0; rem--, i++) {
+		int pktsize;
+
+		if (pkt_is_type0(src[i]))
+			pktsize = type0_pkt_size(src[i]);
+
+		else if (pkt_is_type3(src[i]))
+			pktsize = type3_pkt_size(src[i]);
+
+		else if (pkt_is_type4(src[i]))
+			pktsize = type4_pkt_size(src[i]);
+
+		else if (pkt_is_type7(src[i]))
+			pktsize = type7_pkt_size(src[i]);
+
+		/*
+		 * If the packet isn't a type 1, type 3, type 4 or type 7 then
+		 * don't bother parsing it - it is likely corrupted
+		 */
+		else
+			break;
+
+		if (((pkt_is_type0(src[i]) || pkt_is_type3(src[i])) && !pktsize)
+			|| ((pktsize + 1) > rem))
+			break;
+
+		if (pkt_is_type3(src[i])) {
+			if (adreno_cmd_is_ib(adreno_dev, src[i])) {
+				uint64_t gpuaddrib2 = src[i + 1];
+				uint64_t size = src[i + 2];
+
+				ret = adreno_cp_parse_ib2(device, process,
+						gpuaddrib2, size,
+						ib_obj_list, ib_level);
+				if (ret)
+					goto done;
+			} else {
+				ret = ib_parse_type3(device, &src[i], process,
+						ib_obj_list,
+						&ib_parse_vars);
+				/*
+				 * If the parse function failed (probably
+				 * because of a bad decode) then bail out and
+				 * just capture the binary IB data
+				 */
+
+				if (ret)
+					goto done;
+			}
+		}
+
+		else if (pkt_is_type7(src[i])) {
+			if (adreno_cmd_is_ib(adreno_dev, src[i])) {
+				uint64_t size = src[i + 3];
+				uint64_t gpuaddrib2 = src[i + 2];
+				gpuaddrib2 = gpuaddrib2 << 32 | src[i + 1];
+
+				ret = adreno_cp_parse_ib2(device, process,
+						gpuaddrib2, size,
+						ib_obj_list, ib_level);
+				if (ret)
+					goto done;
+			} else {
+				ret = ib_parse_type7(device, &src[i], process,
+						ib_obj_list,
+						&ib_parse_vars);
+				/*
+				 * If the parse function failed (probably
+				 * because of a bad decode) then bail out and
+				 * just capture the binary IB data
+				 */
+
+				if (ret)
+					goto done;
+			}
+		}
+
+		else if (pkt_is_type0(src[i])) {
+			ret = ib_parse_type0(device, &src[i], process,
+					ib_obj_list, &ib_parse_vars);
+			if (ret)
+				goto done;
+		}
+
+		i += pktsize;
+		rem -= pktsize;
+	}
+
+done:
+	/*
+	 * For set draw objects there may not be a draw_indx packet at its end
+	 * to signal that we need to save the found objects in it, so just save
+	 * it here.
+	 */
+	if (!ret && SNAPSHOT_GPU_OBJECT_DRAW == obj_type)
+		ret = ib_add_type0_entries(device, process, ib_obj_list,
+			&ib_parse_vars);
+
+	kgsl_memdesc_unmap(&entry->memdesc);
+	kgsl_mem_entry_put(entry);
+	return ret;
+}
+
+
+/*
+ * adreno_ib_create_object_list() - Find all the memory objects in IB
+ * @device: The device pointer on which the IB executes
+ * @process: The process in which the IB and all contained objects are mapped
+ * @gpuaddr: The gpu address of the IB
+ * @dwords: Size of ib in dwords
+ * @ib_obj_list: The list in which the IB and the objects in it are added.
+ *
+ * Find all the memory objects that an IB needs for execution and place
+ * them in a list including the IB.
+ * Returns the ib object list. On success 0 is returned, on failure error
+ * code is returned along with number of objects that was saved before
+ * error occurred. If no objects found then the list pointer is set to
+ * NULL.
+ */
+int adreno_ib_create_object_list(struct kgsl_device *device,
+		struct kgsl_process_private *process,
+		uint64_t gpuaddr, uint64_t dwords,
+		struct adreno_ib_object_list **out_ib_obj_list)
+{
+	int ret = 0;
+	struct adreno_ib_object_list *ib_obj_list;
+
+	if (!out_ib_obj_list)
+		return -EINVAL;
+
+	*out_ib_obj_list = NULL;
+
+	ib_obj_list = kzalloc(sizeof(*ib_obj_list), GFP_KERNEL);
+	if (!ib_obj_list)
+		return -ENOMEM;
+
+	ib_obj_list->obj_list = vmalloc(MAX_IB_OBJS *
+					sizeof(struct adreno_ib_object));
+
+	if (!ib_obj_list->obj_list) {
+		kfree(ib_obj_list);
+		return -ENOMEM;
+	}
+
+	ret = adreno_ib_find_objs(device, process, gpuaddr, dwords,
+		SNAPSHOT_GPU_OBJECT_IB, ib_obj_list, 1);
+
+	/* Even if there was an error return the remaining objects found */
+	if (ib_obj_list->num_objs)
+		*out_ib_obj_list = ib_obj_list;
+
+	return ret;
+}
+
+/*
+ * adreno_ib_destroy_obj_list() - Destroy an ib object list
+ * @ib_obj_list: List to destroy
+ *
+ * Free up all resources used by an ib_obj_list
+ */
+void adreno_ib_destroy_obj_list(struct adreno_ib_object_list *ib_obj_list)
+{
+	int i;
+
+	if (!ib_obj_list)
+		return;
+
+	for (i = 0; i < ib_obj_list->num_objs; i++) {
+		if (ib_obj_list->obj_list[i].entry)
+			kgsl_mem_entry_put(ib_obj_list->obj_list[i].entry);
+	}
+	vfree(ib_obj_list->obj_list);
+	kfree(ib_obj_list);
+}
diff --git a/drivers/gpu/msm/adreno_cp_parser.h b/drivers/gpu/msm/adreno_cp_parser.h
new file mode 100644
index 000000000000..0248de2d600a
--- /dev/null
+++ b/drivers/gpu/msm/adreno_cp_parser.h
@@ -0,0 +1,186 @@
+/* Copyright (c) 2013-2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __ADRENO_IB_PARSER__
+#define __ADRENO_IB_PARSER__
+
+#include "adreno.h"
+
+extern const unsigned int a3xx_cp_addr_regs[];
+extern const unsigned int a4xx_cp_addr_regs[];
+
+/*
+ * struct adreno_ib_object - Structure containing information about an
+ * address range found in an IB
+ * @gpuaddr: The starting gpuaddress of the range
+ * @size: Size of the range
+ * @snapshot_obj_type - Type of range used in snapshot
+ * @entry: The memory entry in which this range is found
+ */
+struct adreno_ib_object {
+	uint64_t gpuaddr;
+	uint64_t size;
+	int snapshot_obj_type;
+	struct kgsl_mem_entry *entry;
+};
+
+/*
+ * struct adreno_ib_object_list - List of address ranges found in IB
+ * @obj_list: The address range list
+ * @num_objs: Number of objects in list
+ */
+struct adreno_ib_object_list {
+	struct adreno_ib_object *obj_list;
+	int num_objs;
+};
+
+/*
+ * adreno registers used during IB parsing, there contain addresses
+ * and sizes of the addresses that present in an IB
+ */
+enum adreno_cp_addr_regs {
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0 = 0,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_0,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_1,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_1,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_2,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_2,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_3,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_3,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_4,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_4,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_5,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_5,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_6,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_6,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_7,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_1,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_2,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_3,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_4,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_5,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_6,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_7,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_8,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_9,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_10,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_11,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_12,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_13,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_14,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_16,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_17,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_18,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_19,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_20,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_21,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_22,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_23,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_24,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_25,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_26,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_27,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_28,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_29,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_30,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_31,
+	ADRENO_CP_ADDR_VSC_SIZE_ADDRESS,
+	ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR,
+	ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR,
+	ADRENO_CP_ADDR_SP_VS_OBJ_START_REG,
+	ADRENO_CP_ADDR_SP_FS_OBJ_START_REG,
+	ADRENO_CP_UCHE_INVALIDATE0,
+	ADRENO_CP_UCHE_INVALIDATE1,
+	ADRENO_CP_ADDR_MAX,
+};
+
+/*
+ * adreno_ib_init_ib_obj() - Create an ib object structure and initialize it
+ * with gpuaddress and size
+ * @gpuaddr: gpuaddr with which to initialize the object with
+ * @size: Size in bytes with which the object is initialized
+ * @ib_type: The IB type used by snapshot
+ *
+ * Returns the object pointer on success else error code in the pointer
+ */
+static inline void adreno_ib_init_ib_obj(uint64_t gpuaddr,
+			uint64_t size, int obj_type,
+			struct kgsl_mem_entry *entry,
+			struct adreno_ib_object *ib_obj)
+{
+	ib_obj->gpuaddr = gpuaddr;
+	ib_obj->size = size;
+	ib_obj->snapshot_obj_type = obj_type;
+	ib_obj->entry = entry;
+}
+
+/*
+ * adreno_cp_parser_getreg() - Returns the value of register offset
+ * @adreno_dev: The adreno device being operated upon
+ * @reg_enum: Enum index of the register whose offset is returned
+ */
+static inline int adreno_cp_parser_getreg(struct adreno_device *adreno_dev,
+					enum adreno_cp_addr_regs reg_enum)
+{
+	if (reg_enum == ADRENO_CP_ADDR_MAX)
+		return -EEXIST;
+
+	if (adreno_is_a3xx(adreno_dev))
+		return a3xx_cp_addr_regs[reg_enum];
+	else if (adreno_is_a4xx(adreno_dev))
+		return a4xx_cp_addr_regs[reg_enum];
+	else
+		return -EEXIST;
+}
+
+/*
+ * adreno_cp_parser_regindex() - Returns enum index for a given register offset
+ * @adreno_dev: The adreno device being operated upon
+ * @offset: Register offset
+ * @start: The start index to search from
+ * @end: The last index to search
+ *
+ * Checks the list of registers defined for the device and returns the index
+ * whose offset value matches offset parameter.
+ */
+static inline int adreno_cp_parser_regindex(struct adreno_device *adreno_dev,
+				unsigned int offset,
+				enum adreno_cp_addr_regs start,
+				enum adreno_cp_addr_regs end)
+{
+	int i;
+	const unsigned int *regs;
+	if (adreno_is_a4xx(adreno_dev))
+		regs = a4xx_cp_addr_regs;
+	else if (adreno_is_a3xx(adreno_dev))
+		regs = a3xx_cp_addr_regs;
+	else
+		return -EEXIST;
+
+	for (i = start; i <= end && i < ADRENO_CP_ADDR_MAX; i++)
+		if (regs[i] == offset)
+			return i;
+	return -EEXIST;
+}
+
+int adreno_ib_create_object_list(
+		struct kgsl_device *device,
+		struct kgsl_process_private *process,
+		uint64_t gpuaddr, uint64_t dwords,
+		struct adreno_ib_object_list **out_ib_obj_list);
+
+void adreno_ib_destroy_obj_list(struct adreno_ib_object_list *ib_obj_list);
+
+#endif
diff --git a/drivers/gpu/msm/adreno_debugfs.c b/drivers/gpu/msm/adreno_debugfs.c
new file mode 100644
index 000000000000..2290d29fd28c
--- /dev/null
+++ b/drivers/gpu/msm/adreno_debugfs.c
@@ -0,0 +1,380 @@
+/* Copyright (c) 2002,2008-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/delay.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+
+#include "kgsl.h"
+#include "adreno.h"
+#include "kgsl_cffdump.h"
+#include "kgsl_sync.h"
+
+static int _isdb_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/* Once ISDB goes enabled it stays enabled */
+	if (test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv))
+		return 0;
+
+	mutex_lock(&device->mutex);
+
+	/*
+	 * Bring down the GPU so we can bring it back up with the correct power
+	 * and clock settings
+	 */
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND);
+	set_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv);
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER);
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+static int _isdb_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	*val = (u64) test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv);
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(_isdb_fops, _isdb_get, _isdb_set, "%llu\n");
+
+static int _lm_limit_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM))
+		return 0;
+
+	/* assure value is between 3A and 10A */
+	if (val > 10000)
+		val = 10000;
+	else if (val < 3000)
+		val = 3000;
+
+	adreno_dev->lm_limit = val;
+
+	if (test_bit(ADRENO_LM_CTRL, &adreno_dev->pwrctrl_flag)) {
+		mutex_lock(&device->mutex);
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND);
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER);
+		mutex_unlock(&device->mutex);
+	}
+
+	return 0;
+}
+
+static int _lm_limit_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM))
+		*val = 0;
+
+	*val = (u64) adreno_dev->lm_limit;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(_lm_limit_fops, _lm_limit_get, _lm_limit_set, "%llu\n");
+
+static int _lm_threshold_count_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM))
+		*val = 0;
+	else
+		*val = (u64) adreno_dev->lm_threshold_cross;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(_lm_threshold_fops, _lm_threshold_count_get,
+	NULL, "%llu\n");
+
+static int _active_count_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	unsigned int i = atomic_read(&device->active_cnt);
+
+	*val = (u64) i;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(_active_count_fops, _active_count_get, NULL, "%llu\n");
+
+typedef void (*reg_read_init_t)(struct kgsl_device *device);
+typedef void (*reg_read_fill_t)(struct kgsl_device *device, int i,
+	unsigned int *vals, int linec);
+
+
+static void sync_event_print(struct seq_file *s,
+		struct kgsl_cmdbatch_sync_event *sync_event)
+{
+	switch (sync_event->type) {
+	case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP: {
+		seq_printf(s, "sync: ctx: %d ts: %d",
+				sync_event->context->id, sync_event->timestamp);
+		break;
+	}
+	case KGSL_CMD_SYNCPOINT_TYPE_FENCE:
+		seq_printf(s, "sync: [%p] %s", sync_event->handle,
+		(sync_event->handle && sync_event->handle->fence)
+				? sync_event->handle->fence->name : "NULL");
+		break;
+	default:
+		seq_printf(s, "sync: type: %d", sync_event->type);
+		break;
+	}
+}
+
+struct flag_entry {
+	unsigned long mask;
+	const char *str;
+};
+
+static const struct flag_entry cmdbatch_flags[] = {KGSL_CMDBATCH_FLAGS};
+
+static const struct flag_entry cmdbatch_priv[] = {
+	{ CMDBATCH_FLAG_SKIP, "skip"},
+	{ CMDBATCH_FLAG_FORCE_PREAMBLE, "force_preamble"},
+	{ CMDBATCH_FLAG_WFI, "wait_for_idle" },
+};
+
+static const struct flag_entry context_flags[] = {KGSL_CONTEXT_FLAGS};
+
+/*
+ * Note that the ADRENO_CONTEXT_* flags start at
+ * KGSL_CONTEXT_PRIV_DEVICE_SPECIFIC so it is ok to cross the streams here.
+ */
+static const struct flag_entry context_priv[] = {
+	{ KGSL_CONTEXT_PRIV_DETACHED, "detached"},
+	{ KGSL_CONTEXT_PRIV_INVALID, "invalid"},
+	{ KGSL_CONTEXT_PRIV_PAGEFAULT, "pagefault"},
+	{ ADRENO_CONTEXT_FAULT, "fault"},
+	{ ADRENO_CONTEXT_GPU_HANG, "gpu_hang"},
+	{ ADRENO_CONTEXT_GPU_HANG_FT, "gpu_hang_ft"},
+	{ ADRENO_CONTEXT_SKIP_EOF, "skip_end_of_frame" },
+	{ ADRENO_CONTEXT_FORCE_PREAMBLE, "force_preamble"},
+};
+
+static void print_flags(struct seq_file *s, const struct flag_entry *table,
+			size_t table_size, unsigned long flags)
+{
+	int i;
+	int first = 1;
+
+	for (i = 0; i < table_size; i++) {
+		if (flags & table[i].mask) {
+			seq_printf(s, "%c%s", first ? '\0' : '|', table[i].str);
+			flags &= ~(table[i].mask);
+			first = 0;
+		}
+	}
+	if (flags) {
+		seq_printf(s, "%c0x%lx", first ? '\0' : '|', flags);
+		first = 0;
+	}
+	if (first)
+		seq_puts(s, "None");
+}
+
+static void cmdbatch_print(struct seq_file *s, struct kgsl_cmdbatch *cmdbatch)
+{
+	struct kgsl_cmdbatch_sync_event *event;
+	unsigned int i;
+
+	/* print fences first, since they block this cmdbatch */
+
+	for (i = 0; i < cmdbatch->numsyncs; i++) {
+		event = &cmdbatch->synclist[i];
+
+		if (!kgsl_cmdbatch_event_pending(cmdbatch, i))
+			continue;
+
+		/*
+		 * Timestamp is 0 for KGSL_CONTEXT_SYNC, but print it anyways
+		 * so that it is clear if the fence was a separate submit
+		 * or part of an IB submit.
+		 */
+		seq_printf(s, "\t%d ", cmdbatch->timestamp);
+		sync_event_print(s, event);
+		seq_puts(s, "\n");
+	}
+
+	/* if this flag is set, there won't be an IB */
+	if (cmdbatch->flags & KGSL_CONTEXT_SYNC)
+		return;
+
+	seq_printf(s, "\t%d: ib: expires: %lu",
+		cmdbatch->timestamp, cmdbatch->expires);
+
+	seq_puts(s, " flags: ");
+	print_flags(s, cmdbatch_flags, ARRAY_SIZE(cmdbatch_flags),
+		    cmdbatch->flags);
+
+	seq_puts(s, " priv: ");
+	print_flags(s, cmdbatch_priv, ARRAY_SIZE(cmdbatch_priv),
+		    cmdbatch->priv);
+
+	seq_puts(s, "\n");
+}
+
+static const char *ctx_type_str(unsigned int type)
+{
+	int i;
+	struct flag_entry table[] = {KGSL_CONTEXT_TYPES};
+
+	for (i = 0; i < ARRAY_SIZE(table); i++)
+		if (type == table[i].mask)
+			return table[i].str;
+	return "UNKNOWN";
+}
+
+static int ctx_print(struct seq_file *s, void *unused)
+{
+	struct adreno_context *drawctxt = s->private;
+	unsigned int i;
+	struct kgsl_event *event;
+	unsigned int queued = 0, consumed = 0, retired = 0;
+
+	seq_printf(s, "id: %d type: %s priority: %d process: %s (%d) tid: %d\n",
+		   drawctxt->base.id,
+		   ctx_type_str(drawctxt->type),
+		   drawctxt->base.priority,
+		   drawctxt->base.proc_priv->comm,
+		   drawctxt->base.proc_priv->pid,
+		   drawctxt->base.tid);
+
+	seq_puts(s, "flags: ");
+	print_flags(s, context_flags, ARRAY_SIZE(context_flags),
+		    drawctxt->base.flags & ~(KGSL_CONTEXT_PRIORITY_MASK
+						| KGSL_CONTEXT_TYPE_MASK));
+	seq_puts(s, " priv: ");
+	print_flags(s, context_priv, ARRAY_SIZE(context_priv),
+			drawctxt->base.priv);
+	seq_puts(s, "\n");
+
+	seq_puts(s, "timestamps: ");
+	kgsl_readtimestamp(drawctxt->base.device, &drawctxt->base,
+				KGSL_TIMESTAMP_QUEUED, &queued);
+	kgsl_readtimestamp(drawctxt->base.device, &drawctxt->base,
+				KGSL_TIMESTAMP_CONSUMED, &consumed);
+	kgsl_readtimestamp(drawctxt->base.device, &drawctxt->base,
+				KGSL_TIMESTAMP_RETIRED, &retired);
+	seq_printf(s, "queued: %u consumed: %u retired: %u global:%u\n",
+		   queued, consumed, retired,
+		   drawctxt->internal_timestamp);
+
+	seq_puts(s, "cmdqueue:\n");
+
+	spin_lock(&drawctxt->lock);
+	for (i = drawctxt->cmdqueue_head;
+		i != drawctxt->cmdqueue_tail;
+		i = CMDQUEUE_NEXT(i, ADRENO_CONTEXT_CMDQUEUE_SIZE))
+		cmdbatch_print(s, drawctxt->cmdqueue[i]);
+	spin_unlock(&drawctxt->lock);
+
+	seq_puts(s, "events:\n");
+	spin_lock(&drawctxt->base.events.lock);
+	list_for_each_entry(event, &drawctxt->base.events.events, node)
+		seq_printf(s, "\t%d: %pF created: %u\n", event->timestamp,
+				event->func, event->created);
+	spin_unlock(&drawctxt->base.events.lock);
+
+	return 0;
+}
+
+static int ctx_open(struct inode *inode, struct file *file)
+{
+	int ret;
+	unsigned int id = (unsigned int)(unsigned long)inode->i_private;
+	struct kgsl_context *context;
+
+	context = kgsl_context_get(kgsl_get_device(KGSL_DEVICE_3D0), id);
+	if (context == NULL)
+		return -ENODEV;
+
+	ret = single_open(file, ctx_print, context);
+	if (ret)
+		kgsl_context_put(context);
+	return ret;
+}
+
+static int ctx_release(struct inode *inode, struct file *file)
+{
+	struct kgsl_context *context;
+
+	context = ((struct seq_file *)file->private_data)->private;
+
+	kgsl_context_put(context);
+
+	return single_release(inode, file);
+}
+
+static const struct file_operations ctx_fops = {
+	.open = ctx_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = ctx_release,
+};
+
+
+void
+adreno_context_debugfs_init(struct adreno_device *adreno_dev,
+			    struct adreno_context *ctx)
+{
+	unsigned char name[16];
+
+	snprintf(name, sizeof(name), "%d", ctx->base.id);
+
+	ctx->debug_root = debugfs_create_file(name, 0444,
+				adreno_dev->ctx_d_debugfs,
+				(void *)(unsigned long)ctx->base.id, &ctx_fops);
+}
+
+void adreno_debugfs_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	if (!device->d_debugfs || IS_ERR(device->d_debugfs))
+		return;
+
+	kgsl_cffdump_debugfs_create(device);
+
+	debugfs_create_file("active_cnt", 0444, device->d_debugfs, device,
+			    &_active_count_fops);
+	adreno_dev->ctx_d_debugfs = debugfs_create_dir("ctx",
+							device->d_debugfs);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_LM)) {
+		debugfs_create_file("lm_limit", 0644, device->d_debugfs, device,
+			&_lm_limit_fops);
+		debugfs_create_file("lm_threshold_count", 0444,
+			device->d_debugfs, device, &_lm_threshold_fops);
+	}
+
+	if (adreno_is_a5xx(adreno_dev))
+		debugfs_create_file("isdb", 0644, device->d_debugfs,
+			device, &_isdb_fops);
+}
diff --git a/drivers/gpu/msm/adreno_dispatch.c b/drivers/gpu/msm/adreno_dispatch.c
new file mode 100644
index 000000000000..6daf6977829e
--- /dev/null
+++ b/drivers/gpu/msm/adreno_dispatch.c
@@ -0,0 +1,2595 @@
+/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/wait.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/jiffies.h>
+#include <linux/err.h>
+
+#include "kgsl.h"
+#include "kgsl_cffdump.h"
+#include "kgsl_sharedmem.h"
+#include "adreno.h"
+#include "adreno_ringbuffer.h"
+#include "adreno_trace.h"
+#include "kgsl_sharedmem.h"
+
+#define CMDQUEUE_NEXT(_i, _s) (((_i) + 1) % (_s))
+
+/* Time in ms after which the dispatcher tries to schedule an unscheduled RB */
+static unsigned int _dispatch_starvation_time = 2000;
+
+/* Amount of time in ms that a starved RB is permitted to execute for */
+static unsigned int _dispatch_time_slice = 25;
+
+/*
+ * If set then dispatcher tries to schedule lower priority RB's after if they
+ * have commands in their pipe and have been inactive for
+ * _dispatch_starvation_time. Also, once an RB is schduled it will be allowed
+ * to run for _dispatch_time_slice unless it's commands complete before
+ * _dispatch_time_slice
+ */
+unsigned int adreno_disp_preempt_fair_sched;
+
+/* Number of commands that can be queued in a context before it sleeps */
+static unsigned int _context_cmdqueue_size = 50;
+
+/* Number of milliseconds to wait for the context queue to clear */
+static unsigned int _context_queue_wait = 10000;
+
+/* Number of command batches sent at a time from a single context */
+static unsigned int _context_cmdbatch_burst = 5;
+
+/*
+ * GFT throttle parameters. If GFT recovered more than
+ * X times in Y ms invalidate the context and do not attempt recovery.
+ * X -> _fault_throttle_burst
+ * Y -> _fault_throttle_time
+ */
+static unsigned int _fault_throttle_time = 3000;
+static unsigned int _fault_throttle_burst = 3;
+
+/*
+ * Maximum ringbuffer inflight for the single submitting context case - this
+ * should be sufficiently high to keep the GPU loaded
+ */
+static unsigned int _dispatcher_q_inflight_hi = 15;
+
+/*
+ * Minimum inflight for the multiple context case - this should sufficiently low
+ * to allow for lower latency context switching
+ */
+static unsigned int _dispatcher_q_inflight_lo = 4;
+
+/* Command batch timeout (in milliseconds) */
+unsigned int adreno_cmdbatch_timeout = 2000;
+
+/* Interval for reading and comparing fault detection registers */
+static unsigned int _fault_timer_interval = 200;
+
+static int dispatcher_do_fault(struct kgsl_device *device);
+
+/**
+ * _track_context - Add a context ID to the list of recently seen contexts
+ * for the command queue
+ * @cmdqueue: cmdqueue to add the context to
+ * @id: ID of the context to add
+ *
+ * This function is called when a new item is added to a context - this tracks
+ * the number of active contexts seen in the last 100ms for the command queue
+ */
+static void _track_context(struct adreno_dispatcher_cmdqueue *cmdqueue,
+		unsigned int id)
+{
+	struct adreno_context_list *list = cmdqueue->active_contexts;
+	int oldest = -1, empty = -1;
+	unsigned long age = 0;
+	int i, count = 0;
+	bool updated = false;
+
+	for (i = 0; i < ACTIVE_CONTEXT_LIST_MAX; i++) {
+
+		/* If the new ID matches the slot update the expire time */
+		if (list[i].id == id) {
+			list[i].jiffies = jiffies + msecs_to_jiffies(100);
+			updated = true;
+			count++;
+			continue;
+		}
+
+		/* Remember and skip empty slots */
+		if ((list[i].id == 0) ||
+			time_after(jiffies, list[i].jiffies)) {
+			empty = i;
+			continue;
+		}
+
+		count++;
+
+		/* Remember the oldest active entry */
+		if (oldest == -1 || time_before(list[i].jiffies, age)) {
+			age = list[i].jiffies;
+			oldest = i;
+		}
+	}
+
+	if (updated == false) {
+		int pos = (empty != -1) ? empty : oldest;
+
+		list[pos].jiffies = jiffies + msecs_to_jiffies(100);
+		list[pos].id = id;
+		count++;
+	}
+
+	cmdqueue->active_context_count = count;
+}
+
+/*
+ *  If only one context has queued in the last 100 milliseconds increase
+ *  inflight to a high number to load up the GPU. If multiple contexts
+ *  have queued drop the inflight for better context switch latency.
+ *  If no contexts have queued what are you even doing here?
+ */
+
+static inline int
+_cmdqueue_inflight(struct adreno_dispatcher_cmdqueue *cmdqueue)
+{
+	return (cmdqueue->active_context_count > 1)
+		? _dispatcher_q_inflight_lo : _dispatcher_q_inflight_hi;
+}
+
+/**
+ * fault_detect_read() - Read the set of fault detect registers
+ * @device: Pointer to the KGSL device struct
+ *
+ * Read the set of fault detect registers and store them in the local array.
+ * This is for the initial values that are compared later with
+ * fault_detect_read_compare. Also store the initial timestamp of each rb
+ * to compare the timestamps with.
+ */
+static void fault_detect_read(struct kgsl_device *device)
+{
+	int i;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv))
+		return;
+
+	for (i = 0; i < adreno_dev->num_ringbuffers; i++) {
+		struct adreno_ringbuffer *rb = &(adreno_dev->ringbuffers[i]);
+		adreno_rb_readtimestamp(device, rb,
+			KGSL_TIMESTAMP_RETIRED, &(rb->fault_detect_ts));
+	}
+
+	for (i = 0; i < adreno_ft_regs_num; i++) {
+		if (adreno_ft_regs[i] != 0)
+			kgsl_regread(device, adreno_ft_regs[i],
+				&adreno_ft_regs_val[i]);
+	}
+}
+
+/*
+ * Check to see if the device is idle
+ */
+static inline bool _isidle(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpu_core *gpucore = adreno_dev->gpucore;
+	unsigned int reg_rbbm_status;
+
+	if (!kgsl_state_is_awake(device))
+		goto ret;
+
+	/* only check rbbm status to determine if GPU is idle */
+	adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS, &reg_rbbm_status);
+
+	if (reg_rbbm_status & gpucore->busy_mask)
+		return false;
+
+ret:
+	/* Clear the existing register values */
+	memset(adreno_ft_regs_val, 0,
+		adreno_ft_regs_num * sizeof(unsigned int));
+
+	return true;
+}
+
+/**
+ * fault_detect_read_compare() - Read the fault detect registers and compare
+ * them to the current value
+ * @device: Pointer to the KGSL device struct
+ *
+ * Read the set of fault detect registers and compare them to the current set
+ * of registers.  Return 1 if any of the register values changed. Also, compare
+ * if the current RB's timstamp has changed or not.
+ */
+static int fault_detect_read_compare(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev);
+	int i, ret = 0;
+	unsigned int ts;
+
+	/* Check to see if the device is idle - if so report no hang */
+	if (_isidle(device) == true)
+		ret = 1;
+
+	for (i = 0; i < adreno_ft_regs_num; i++) {
+		unsigned int val;
+
+		if (adreno_ft_regs[i] == 0)
+			continue;
+		kgsl_regread(device, adreno_ft_regs[i], &val);
+		if (val != adreno_ft_regs_val[i])
+			ret = 1;
+		adreno_ft_regs_val[i] = val;
+	}
+
+	if (!adreno_rb_readtimestamp(device, adreno_dev->cur_rb,
+				KGSL_TIMESTAMP_RETIRED, &ts)) {
+		if (ts != rb->fault_detect_ts)
+			ret = 1;
+
+		rb->fault_detect_ts = ts;
+	}
+
+	return ret;
+}
+
+static void start_fault_timer(struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	if (adreno_soft_fault_detect(adreno_dev))
+		mod_timer(&dispatcher->fault_timer,
+			jiffies + msecs_to_jiffies(_fault_timer_interval));
+}
+
+/**
+ * _retire_marker() - Retire a marker command batch without sending it to the
+ * hardware
+ * @cmdbatch: Pointer to the cmdbatch to retire
+ *
+ * In some cases marker commands can be retired by the software without going to
+ * the GPU.  In those cases, update the memstore from the CPU, kick off the
+ * event engine to handle expired events and destroy the command batch.
+ */
+static void _retire_marker(struct kgsl_cmdbatch *cmdbatch)
+{
+	struct kgsl_context *context = cmdbatch->context;
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(cmdbatch->context);
+	struct kgsl_device *device = context->device;
+
+	/*
+	 * Write the start and end timestamp to the memstore to keep the
+	 * accounting sane
+	 */
+	kgsl_sharedmem_writel(device, &device->memstore,
+		KGSL_MEMSTORE_OFFSET(context->id, soptimestamp),
+		cmdbatch->timestamp);
+
+	kgsl_sharedmem_writel(device, &device->memstore,
+		KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp),
+		cmdbatch->timestamp);
+
+
+	/* Retire pending GPU events for the object */
+	kgsl_process_event_group(device, &context->events);
+
+	trace_adreno_cmdbatch_retired(cmdbatch, -1, 0, 0, drawctxt->rb);
+	kgsl_cmdbatch_destroy(cmdbatch);
+}
+
+static int _check_context_queue(struct adreno_context *drawctxt)
+{
+	int ret;
+
+	spin_lock(&drawctxt->lock);
+
+	/*
+	 * Wake up if there is room in the context or if the whole thing got
+	 * invalidated while we were asleep
+	 */
+
+	if (kgsl_context_invalid(&drawctxt->base))
+		ret = 1;
+	else
+		ret = drawctxt->queued < _context_cmdqueue_size ? 1 : 0;
+
+	spin_unlock(&drawctxt->lock);
+
+	return ret;
+}
+
+/*
+ * return true if this is a marker command and the dependent timestamp has
+ * retired
+ */
+static bool _marker_expired(struct kgsl_cmdbatch *cmdbatch)
+{
+	return (cmdbatch->flags & KGSL_CMDBATCH_MARKER) &&
+		kgsl_check_timestamp(cmdbatch->device, cmdbatch->context,
+			cmdbatch->marker_timestamp);
+}
+
+static inline void _pop_cmdbatch(struct adreno_context *drawctxt)
+{
+	drawctxt->cmdqueue_head = CMDQUEUE_NEXT(drawctxt->cmdqueue_head,
+		ADRENO_CONTEXT_CMDQUEUE_SIZE);
+	drawctxt->queued--;
+}
+/**
+ * Removes all expired marker and sync cmdbatches from
+ * the context queue when marker command and dependent
+ * timestamp are retired. This function is recursive.
+ * returns cmdbatch if context has command, NULL otherwise.
+ */
+static struct kgsl_cmdbatch *_expire_markers(struct adreno_context *drawctxt)
+{
+	struct kgsl_cmdbatch *cmdbatch;
+
+	if (drawctxt->cmdqueue_head == drawctxt->cmdqueue_tail)
+		return NULL;
+
+	cmdbatch = drawctxt->cmdqueue[drawctxt->cmdqueue_head];
+
+	if (cmdbatch == NULL)
+		return NULL;
+
+	/* Check to see if this is a marker we can skip over */
+	if ((cmdbatch->flags & KGSL_CMDBATCH_MARKER) &&
+			_marker_expired(cmdbatch)) {
+		_pop_cmdbatch(drawctxt);
+		_retire_marker(cmdbatch);
+		return _expire_markers(drawctxt);
+	}
+
+	if (cmdbatch->flags & KGSL_CMDBATCH_SYNC) {
+		if (!kgsl_cmdbatch_events_pending(cmdbatch)) {
+			_pop_cmdbatch(drawctxt);
+			kgsl_cmdbatch_destroy(cmdbatch);
+			return _expire_markers(drawctxt);
+		}
+	}
+
+	return cmdbatch;
+}
+
+static void expire_markers(struct adreno_context *drawctxt)
+{
+	spin_lock(&drawctxt->lock);
+	_expire_markers(drawctxt);
+	spin_unlock(&drawctxt->lock);
+}
+
+static struct kgsl_cmdbatch *_get_cmdbatch(struct adreno_context *drawctxt)
+{
+	struct kgsl_cmdbatch *cmdbatch;
+	bool pending = false;
+
+	cmdbatch = _expire_markers(drawctxt);
+
+	if (cmdbatch == NULL)
+		return NULL;
+
+	/*
+	 * If the marker isn't expired but the SKIP bit is set
+	 * then there are real commands following this one in
+	 * the queue.  This means that we need to dispatch the
+	 * command so that we can keep the timestamp accounting
+	 * correct.  If skip isn't set then we block this queue
+	 * until the dependent timestamp expires
+	 */
+	if ((cmdbatch->flags & KGSL_CMDBATCH_MARKER) &&
+			(!test_bit(CMDBATCH_FLAG_SKIP, &cmdbatch->priv)))
+		pending = true;
+
+	if (kgsl_cmdbatch_events_pending(cmdbatch))
+		pending = true;
+
+	/*
+	 * If changes are pending and the canary timer hasn't been
+	 * started yet, start it
+	 */
+	if (pending) {
+		/*
+		 * If syncpoints are pending start the canary timer if
+		 * it hasn't already been started
+		 */
+		if (!cmdbatch->timeout_jiffies) {
+			cmdbatch->timeout_jiffies = jiffies + 5 * HZ;
+			mod_timer(&cmdbatch->timer, cmdbatch->timeout_jiffies);
+		}
+
+		return ERR_PTR(-EAGAIN);
+	}
+
+	_pop_cmdbatch(drawctxt);
+	return cmdbatch;
+}
+
+/**
+ * adreno_dispatcher_get_cmdbatch() - Get a new command from a context queue
+ * @drawctxt: Pointer to the adreno draw context
+ *
+ * Dequeue a new command batch from the context list
+ */
+static struct kgsl_cmdbatch *adreno_dispatcher_get_cmdbatch(
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_cmdbatch *cmdbatch;
+
+	spin_lock(&drawctxt->lock);
+	cmdbatch = _get_cmdbatch(drawctxt);
+	spin_unlock(&drawctxt->lock);
+
+	/*
+	 * Delete the timer and wait for timer handler to finish executing
+	 * on another core before queueing the buffer. We must do this
+	 * without holding any spin lock that the timer handler might be using
+	 */
+	if (!IS_ERR_OR_NULL(cmdbatch))
+		del_timer_sync(&cmdbatch->timer);
+
+	return cmdbatch;
+}
+
+/**
+ * adreno_dispatcher_requeue_cmdbatch() - Put a command back on the context
+ * queue
+ * @drawctxt: Pointer to the adreno draw context
+ * @cmdbatch: Pointer to the KGSL cmdbatch to requeue
+ *
+ * Failure to submit a command to the ringbuffer isn't the fault of the command
+ * being submitted so if a failure happens, push it back on the head of the the
+ * context queue to be reconsidered again unless the context got detached.
+ */
+static inline int adreno_dispatcher_requeue_cmdbatch(
+		struct adreno_context *drawctxt, struct kgsl_cmdbatch *cmdbatch)
+{
+	unsigned int prev;
+	spin_lock(&drawctxt->lock);
+
+	if (kgsl_context_detached(&drawctxt->base) ||
+		kgsl_context_invalid(&drawctxt->base)) {
+		spin_unlock(&drawctxt->lock);
+		/* get rid of this cmdbatch since the context is bad */
+		kgsl_cmdbatch_destroy(cmdbatch);
+		return -ENOENT;
+	}
+
+	prev = drawctxt->cmdqueue_head == 0 ?
+		(ADRENO_CONTEXT_CMDQUEUE_SIZE - 1) :
+		(drawctxt->cmdqueue_head - 1);
+
+	/*
+	 * The maximum queue size always needs to be one less then the size of
+	 * the ringbuffer queue so there is "room" to put the cmdbatch back in
+	 */
+
+	BUG_ON(prev == drawctxt->cmdqueue_tail);
+
+	drawctxt->cmdqueue[prev] = cmdbatch;
+	drawctxt->queued++;
+
+	/* Reset the command queue head to reflect the newly requeued change */
+	drawctxt->cmdqueue_head = prev;
+	spin_unlock(&drawctxt->lock);
+	return 0;
+}
+
+/**
+ * dispatcher_queue_context() - Queue a context in the dispatcher pending list
+ * @dispatcher: Pointer to the adreno dispatcher struct
+ * @drawctxt: Pointer to the adreno draw context
+ *
+ * Add a context to the dispatcher pending list.
+ */
+static void  dispatcher_queue_context(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	/* Refuse to queue a detached context */
+	if (kgsl_context_detached(&drawctxt->base))
+		return;
+
+	spin_lock(&dispatcher->plist_lock);
+
+	if (plist_node_empty(&drawctxt->pending)) {
+		/* Get a reference to the context while it sits on the list */
+		if (_kgsl_context_get(&drawctxt->base)) {
+			trace_dispatch_queue_context(drawctxt);
+			plist_add(&drawctxt->pending, &dispatcher->pending);
+		}
+	}
+
+	spin_unlock(&dispatcher->plist_lock);
+}
+
+/**
+ * sendcmd() - Send a command batch to the GPU hardware
+ * @dispatcher: Pointer to the adreno dispatcher struct
+ * @cmdbatch: Pointer to the KGSL cmdbatch being sent
+ *
+ * Send a KGSL command batch to the GPU hardware
+ */
+static int sendcmd(struct adreno_device *adreno_dev,
+	struct kgsl_cmdbatch *cmdbatch)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(cmdbatch->context);
+	struct adreno_dispatcher_cmdqueue *dispatch_q =
+				ADRENO_CMDBATCH_DISPATCH_CMDQUEUE(cmdbatch);
+	struct adreno_submit_time time;
+	uint64_t secs = 0;
+	unsigned long nsecs = 0;
+	int ret;
+
+	mutex_lock(&device->mutex);
+	if (adreno_gpu_halt(adreno_dev) != 0) {
+		mutex_unlock(&device->mutex);
+		return -EBUSY;
+	}
+
+	dispatcher->inflight++;
+	dispatch_q->inflight++;
+
+	if (dispatcher->inflight == 1 &&
+			!test_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv)) {
+		/* Time to make the donuts.  Turn on the GPU */
+		ret = kgsl_active_count_get(device);
+		if (ret) {
+			dispatcher->inflight--;
+			dispatch_q->inflight--;
+			mutex_unlock(&device->mutex);
+			return ret;
+		}
+
+		set_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv);
+	}
+
+	if (test_bit(ADRENO_DEVICE_CMDBATCH_PROFILE, &adreno_dev->priv)) {
+		set_bit(CMDBATCH_FLAG_PROFILE, &cmdbatch->priv);
+		cmdbatch->profile_index = adreno_dev->cmdbatch_profile_index;
+		adreno_dev->cmdbatch_profile_index =
+			(adreno_dev->cmdbatch_profile_index + 1) %
+			ADRENO_CMDBATCH_PROFILE_COUNT;
+	}
+
+	ret = adreno_ringbuffer_submitcmd(adreno_dev, cmdbatch, &time);
+
+	/*
+	 * On the first command, if the submission was successful, then read the
+	 * fault registers.  If it failed then turn off the GPU. Sad face.
+	 */
+
+	if (dispatcher->inflight == 1) {
+		if (ret == 0) {
+			fault_detect_read(device);
+
+			if (!test_and_set_bit(ADRENO_DISPATCHER_ACTIVE,
+				&dispatcher->priv))
+				reinit_completion(&dispatcher->idle_gate);
+		} else {
+			kgsl_active_count_put(device);
+			clear_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv);
+		}
+	}
+
+	mutex_unlock(&device->mutex);
+
+	if (ret) {
+		dispatcher->inflight--;
+		dispatch_q->inflight--;
+
+		/*
+		 * -ENOENT means that the context was detached before the
+		 *  command was submitted - don't log a message in that case
+		 */
+
+		if (ret != -ENOENT)
+			KGSL_DRV_ERR(device,
+				"Unable to submit command to the ringbuffer %d\n",
+				ret);
+		return ret;
+	}
+
+	secs = time.ktime;
+	nsecs = do_div(secs, 1000000000);
+
+	trace_adreno_cmdbatch_submitted(cmdbatch, (int) dispatcher->inflight,
+		time.ticks, (unsigned long) secs, nsecs / 1000, drawctxt->rb);
+
+	cmdbatch->submit_ticks = time.ticks;
+
+	dispatch_q->cmd_q[dispatch_q->tail] = cmdbatch;
+	dispatch_q->tail = (dispatch_q->tail + 1) %
+		ADRENO_DISPATCH_CMDQUEUE_SIZE;
+
+	/*
+	 * If this is the first command in the pipe then the GPU will
+	 * immediately start executing it so we can start the expiry timeout on
+	 * the command batch here.  Subsequent command batches will have their
+	 * timer started when the previous command batch is retired.
+	 * Set the timer if the cmdbatch was submitted to current
+	 * active RB else this timer will need to be set when the
+	 * RB becomes active, also if dispatcher is not is CLEAR
+	 * state then the cmdbatch it is currently executing is
+	 * unclear so do not set timer in that case either.
+	 */
+	if (1 == dispatch_q->inflight &&
+		(&(adreno_dev->cur_rb->dispatch_q)) == dispatch_q &&
+		adreno_preempt_state(adreno_dev,
+			ADRENO_DISPATCHER_PREEMPT_CLEAR)) {
+		cmdbatch->expires = jiffies +
+			msecs_to_jiffies(adreno_cmdbatch_timeout);
+		mod_timer(&dispatcher->timer, cmdbatch->expires);
+	}
+
+	/* Start the fault detection timer on the first submission */
+	if (dispatcher->inflight == 1)
+		start_fault_timer(adreno_dev);
+
+	/*
+	 * we just submitted something, readjust ringbuffer
+	 * execution level
+	 */
+	gpudev->preemption_schedule(adreno_dev);
+	return 0;
+}
+
+/**
+ * dispatcher_context_sendcmds() - Send commands from a context to the GPU
+ * @adreno_dev: Pointer to the adreno device struct
+ * @drawctxt: Pointer to the adreno context to dispatch commands from
+ *
+ * Dequeue and send a burst of commands from the specified context to the GPU
+ * Returns postive if the context needs to be put back on the pending queue
+ * 0 if the context is empty or detached and negative on error
+ */
+static int dispatcher_context_sendcmds(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt)
+{
+	struct adreno_dispatcher_cmdqueue *dispatch_q =
+					&(drawctxt->rb->dispatch_q);
+	int count = 0;
+	int ret = 0;
+	int inflight = _cmdqueue_inflight(dispatch_q);
+	unsigned int timestamp;
+
+	if (dispatch_q->inflight >= inflight) {
+		expire_markers(drawctxt);
+		return -EBUSY;
+	}
+
+	/*
+	 * Each context can send a specific number of command batches per cycle
+	 */
+	while ((count < _context_cmdbatch_burst) &&
+		(dispatch_q->inflight < inflight)) {
+		struct kgsl_cmdbatch *cmdbatch;
+
+		if (adreno_gpu_fault(adreno_dev) != 0)
+			break;
+
+		cmdbatch = adreno_dispatcher_get_cmdbatch(drawctxt);
+
+		/*
+		 * adreno_context_get_cmdbatch returns -EAGAIN if the current
+		 * cmdbatch has pending sync points so no more to do here.
+		 * When the sync points are satisfied then the context will get
+		 * reqeueued
+		 */
+
+		if (IS_ERR_OR_NULL(cmdbatch)) {
+			if (IS_ERR(cmdbatch))
+				ret = PTR_ERR(cmdbatch);
+			break;
+		}
+
+		/*
+		 * If this is a synchronization submission then there are no
+		 * commands to submit.  Discard it and get the next item from
+		 * the queue.  Decrement count so this packet doesn't count
+		 * against the burst for the context
+		 */
+
+		if (cmdbatch->flags & KGSL_CMDBATCH_SYNC) {
+			kgsl_cmdbatch_destroy(cmdbatch);
+			continue;
+		}
+
+		timestamp = cmdbatch->timestamp;
+
+		ret = sendcmd(adreno_dev, cmdbatch);
+
+		/*
+		 * On error from sendcmd() try to requeue the command batch
+		 * unless we got back -ENOENT which means that the context has
+		 * been detached and there will be no more deliveries from here
+		 */
+		if (ret != 0) {
+			/* Destroy the cmdbatch on -ENOENT */
+			if (ret == -ENOENT)
+				kgsl_cmdbatch_destroy(cmdbatch);
+			else {
+				/*
+				 * If the requeue returns an error, return that
+				 * instead of whatever sendcmd() sent us
+				 */
+				int r = adreno_dispatcher_requeue_cmdbatch(
+					drawctxt, cmdbatch);
+				if (r)
+					ret = r;
+			}
+
+			break;
+		}
+
+		drawctxt->submitted_timestamp = timestamp;
+
+		count++;
+	}
+
+	/*
+	 * Wake up any snoozing threads if we have consumed any real commands
+	 * or marker commands and we have room in the context queue.
+	 */
+
+	if (_check_context_queue(drawctxt))
+		wake_up_all(&drawctxt->wq);
+
+	if (!ret)
+		ret = count;
+
+	/* Return error or the number of commands queued */
+	return ret;
+}
+
+/**
+ * _adreno_dispatcher_issuecmds() - Issue commmands from pending contexts
+ * @adreno_dev: Pointer to the adreno device struct
+ *
+ * Issue as many commands as possible (up to inflight) from the pending contexts
+ * This function assumes the dispatcher mutex has been locked.
+ */
+static void _adreno_dispatcher_issuecmds(struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct adreno_context *drawctxt, *next;
+	struct plist_head requeue, busy_list;
+	int ret;
+
+	/* Leave early if the dispatcher isn't in a happy state */
+	if (adreno_gpu_fault(adreno_dev) != 0)
+		return;
+
+	plist_head_init(&requeue);
+	plist_head_init(&busy_list);
+
+	/* Try to fill the ringbuffers as much as possible */
+	while (1) {
+
+		/* Stop doing things if the dispatcher is paused or faulted */
+		if (adreno_gpu_fault(adreno_dev) != 0)
+			break;
+
+		if (0 != adreno_gpu_halt(adreno_dev))
+			break;
+
+		spin_lock(&dispatcher->plist_lock);
+
+		if (plist_head_empty(&dispatcher->pending)) {
+			spin_unlock(&dispatcher->plist_lock);
+			break;
+		}
+
+		/* Get the next entry on the list */
+		drawctxt = plist_first_entry(&dispatcher->pending,
+			struct adreno_context, pending);
+
+		plist_del(&drawctxt->pending, &dispatcher->pending);
+
+		spin_unlock(&dispatcher->plist_lock);
+
+		if (kgsl_context_detached(&drawctxt->base) ||
+			kgsl_context_invalid(&drawctxt->base)) {
+			kgsl_context_put(&drawctxt->base);
+			continue;
+		}
+
+		ret = dispatcher_context_sendcmds(adreno_dev, drawctxt);
+
+		/* Don't bother requeuing on -ENOENT - context is detached */
+		if (ret != 0 && ret != -ENOENT) {
+			spin_lock(&dispatcher->plist_lock);
+
+			/*
+			 * Check to seen if the context had been requeued while
+			 * we were processing it (probably by another thread
+			 * pushing commands). If it has then shift it to the
+			 * requeue list if it was not able to submit commands
+			 * due to the dispatch_q being full. Also, do a put to
+			 * make sure the reference counting stays accurate.
+			 * If the node is empty then we will put it on the
+			 * requeue list and not touch the refcount since we
+			 * already hold it from the first time it went on the
+			 * list.
+			 */
+
+			if (!plist_node_empty(&drawctxt->pending)) {
+				plist_del(&drawctxt->pending,
+						&dispatcher->pending);
+				kgsl_context_put(&drawctxt->base);
+			}
+
+			if (ret == -EBUSY)
+				/* Inflight queue is full */
+				plist_add(&drawctxt->pending, &busy_list);
+			else
+				plist_add(&drawctxt->pending, &requeue);
+
+			spin_unlock(&dispatcher->plist_lock);
+		} else {
+			/*
+			 * If the context doesn't need be requeued put back the
+			 * refcount
+			 */
+
+			kgsl_context_put(&drawctxt->base);
+		}
+	}
+
+	spin_lock(&dispatcher->plist_lock);
+
+	/* Put the contexts that couldn't submit back on the pending list */
+	plist_for_each_entry_safe(drawctxt, next, &busy_list, pending) {
+		plist_del(&drawctxt->pending, &busy_list);
+		plist_add(&drawctxt->pending, &dispatcher->pending);
+	}
+
+	/* Now put the contexts that need to be requeued back on the list */
+	plist_for_each_entry_safe(drawctxt, next, &requeue, pending) {
+		plist_del(&drawctxt->pending, &requeue);
+		plist_add(&drawctxt->pending, &dispatcher->pending);
+	}
+
+	spin_unlock(&dispatcher->plist_lock);
+}
+
+/**
+ * adreno_dispatcher_issuecmds() - Issue commmands from pending contexts
+ * @adreno_dev: Pointer to the adreno device struct
+ *
+ * Lock the dispatcher and call _adreno_dispatcher_issueibcmds
+ */
+static void adreno_dispatcher_issuecmds(struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	/* If the dispatcher is busy then schedule the work for later */
+	if (!mutex_trylock(&dispatcher->mutex)) {
+		adreno_dispatcher_schedule(&adreno_dev->dev);
+		return;
+	}
+
+	_adreno_dispatcher_issuecmds(adreno_dev);
+	mutex_unlock(&dispatcher->mutex);
+}
+
+/**
+ * get_timestamp() - Return the next timestamp for the context
+ * @drawctxt - Pointer to an adreno draw context struct
+ * @cmdbatch - Pointer to a command batch
+ * @timestamp - Pointer to a timestamp value possibly passed from the user
+ *
+ * Assign a timestamp based on the settings of the draw context and the command
+ * batch.
+ */
+static int get_timestamp(struct adreno_context *drawctxt,
+		struct kgsl_cmdbatch *cmdbatch, unsigned int *timestamp)
+{
+	/* Synchronization commands don't get a timestamp */
+	if (cmdbatch->flags & KGSL_CMDBATCH_SYNC) {
+		*timestamp = 0;
+		return 0;
+	}
+
+	if (drawctxt->base.flags & KGSL_CONTEXT_USER_GENERATED_TS) {
+		/*
+		 * User specified timestamps need to be greater than the last
+		 * issued timestamp in the context
+		 */
+		if (timestamp_cmp(drawctxt->timestamp, *timestamp) >= 0)
+			return -ERANGE;
+
+		drawctxt->timestamp = *timestamp;
+	} else
+		drawctxt->timestamp++;
+
+	*timestamp = drawctxt->timestamp;
+	return 0;
+}
+
+/**
+ * adreno_dispatcher_preempt_timer() - Timer that triggers when preemption has
+ * not completed
+ * @data: Pointer to adreno device that did not preempt in timely manner
+ */
+static void adreno_dispatcher_preempt_timer(unsigned long data)
+{
+	struct adreno_device *adreno_dev = (struct adreno_device *) data;
+	struct kgsl_device *device = &(adreno_dev->dev);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	KGSL_DRV_ERR(device,
+	"Preemption timed out. cur_rb rptr/wptr %x/%x id %d, next_rb rptr/wptr %x/%x id %d, disp_state: %d\n",
+	adreno_dev->cur_rb->rptr, adreno_dev->cur_rb->wptr,
+	adreno_dev->cur_rb->id, adreno_dev->next_rb->rptr,
+	adreno_dev->next_rb->wptr, adreno_dev->next_rb->id,
+	atomic_read(&dispatcher->preemption_state));
+	adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+	adreno_dispatcher_schedule(device);
+}
+
+/**
+ * adreno_dispatcher_get_highest_busy_rb() - Returns the highest priority RB
+ * which is busy
+ * @adreno_dev: Device whose RB is returned
+ */
+struct adreno_ringbuffer *adreno_dispatcher_get_highest_busy_rb(
+					struct adreno_device *adreno_dev)
+{
+	struct adreno_ringbuffer *rb, *highest_busy_rb = NULL;
+	int i;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		if (rb->rptr != rb->wptr && !highest_busy_rb) {
+			highest_busy_rb = rb;
+			goto done;
+		}
+
+		if (!adreno_disp_preempt_fair_sched)
+			continue;
+
+		switch (rb->starve_timer_state) {
+		case ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT:
+			if (rb->rptr != rb->wptr &&
+				adreno_dev->cur_rb != rb) {
+				rb->starve_timer_state =
+				ADRENO_DISPATCHER_RB_STARVE_TIMER_INIT;
+				rb->sched_timer = jiffies;
+			}
+			break;
+		case ADRENO_DISPATCHER_RB_STARVE_TIMER_INIT:
+			if (time_after(jiffies, rb->sched_timer +
+				msecs_to_jiffies(_dispatch_starvation_time))) {
+				rb->starve_timer_state =
+				ADRENO_DISPATCHER_RB_STARVE_TIMER_ELAPSED;
+				/* halt dispatcher to remove starvation */
+				adreno_get_gpu_halt(adreno_dev);
+			}
+			break;
+		case ADRENO_DISPATCHER_RB_STARVE_TIMER_SCHEDULED:
+			BUG_ON(adreno_dev->cur_rb != rb);
+			/*
+			 * If the RB has not been running for the minimum
+			 * time slice then allow it to run
+			 */
+			if ((rb->rptr != rb->wptr) && time_before(jiffies,
+				adreno_dev->cur_rb->sched_timer +
+				msecs_to_jiffies(_dispatch_time_slice)))
+				highest_busy_rb = rb;
+			else
+				rb->starve_timer_state =
+				ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT;
+			break;
+		case ADRENO_DISPATCHER_RB_STARVE_TIMER_ELAPSED:
+		default:
+			break;
+		}
+	}
+done:
+	return highest_busy_rb;
+}
+
+/**
+ * adreno_dispactcher_queue_cmd() - Queue a new command in the context
+ * @adreno_dev: Pointer to the adreno device struct
+ * @drawctxt: Pointer to the adreno draw context
+ * @cmdbatch: Pointer to the command batch being submitted
+ * @timestamp: Pointer to the requested timestamp
+ *
+ * Queue a command in the context - if there isn't any room in the queue, then
+ * block until there is
+ */
+int adreno_dispatcher_queue_cmd(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt, struct kgsl_cmdbatch *cmdbatch,
+		uint32_t *timestamp)
+{
+	struct adreno_dispatcher_cmdqueue *dispatch_q =
+				ADRENO_CMDBATCH_DISPATCH_CMDQUEUE(cmdbatch);
+	int ret;
+
+	spin_lock(&drawctxt->lock);
+
+	if (kgsl_context_detached(&drawctxt->base)) {
+		spin_unlock(&drawctxt->lock);
+		return -ENOENT;
+	}
+
+	/*
+	 * Force the preamble for this submission only - this is usually
+	 * requested by the dispatcher as part of fault recovery
+	 */
+
+	if (test_and_clear_bit(ADRENO_CONTEXT_FORCE_PREAMBLE,
+				&drawctxt->base.priv))
+		set_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &cmdbatch->priv);
+
+	/*
+	 * Force the premable if set from userspace in the context or cmdbatch
+	 * flags
+	 */
+
+	if ((drawctxt->base.flags & KGSL_CONTEXT_CTX_SWITCH) ||
+		(cmdbatch->flags & KGSL_CMDBATCH_CTX_SWITCH))
+		set_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &cmdbatch->priv);
+
+	/* Skip this cmdbatch commands if IFH_NOP is enabled */
+	if (drawctxt->base.flags & KGSL_CONTEXT_IFH_NOP)
+		set_bit(CMDBATCH_FLAG_SKIP, &cmdbatch->priv);
+
+	/*
+	 * If we are waiting for the end of frame and it hasn't appeared yet,
+	 * then mark the command batch as skipped.  It will still progress
+	 * through the pipeline but it won't actually send any commands
+	 */
+
+	if (test_bit(ADRENO_CONTEXT_SKIP_EOF, &drawctxt->base.priv)) {
+		set_bit(CMDBATCH_FLAG_SKIP, &cmdbatch->priv);
+
+		/*
+		 * If this command batch represents the EOF then clear the way
+		 * for the dispatcher to continue submitting
+		 */
+
+		if (cmdbatch->flags & KGSL_CMDBATCH_END_OF_FRAME) {
+			clear_bit(ADRENO_CONTEXT_SKIP_EOF,
+				  &drawctxt->base.priv);
+
+			/*
+			 * Force the preamble on the next command to ensure that
+			 * the state is correct
+			 */
+			set_bit(ADRENO_CONTEXT_FORCE_PREAMBLE,
+				&drawctxt->base.priv);
+		}
+	}
+
+	/* Wait for room in the context queue */
+
+	while (drawctxt->queued >= _context_cmdqueue_size) {
+		trace_adreno_drawctxt_sleep(drawctxt);
+		spin_unlock(&drawctxt->lock);
+
+		ret = wait_event_interruptible_timeout(drawctxt->wq,
+			_check_context_queue(drawctxt),
+			msecs_to_jiffies(_context_queue_wait));
+
+		spin_lock(&drawctxt->lock);
+		trace_adreno_drawctxt_wake(drawctxt);
+
+		if (ret <= 0) {
+			spin_unlock(&drawctxt->lock);
+			return (ret == 0) ? -ETIMEDOUT : (int) ret;
+		}
+	}
+	/*
+	 * Account for the possiblity that the context got invalidated
+	 * while we were sleeping
+	 */
+
+	if (kgsl_context_invalid(&drawctxt->base)) {
+		spin_unlock(&drawctxt->lock);
+		return -EDEADLK;
+	}
+	if (kgsl_context_detached(&drawctxt->base)) {
+		spin_unlock(&drawctxt->lock);
+		return -ENOENT;
+	}
+
+	ret = get_timestamp(drawctxt, cmdbatch, timestamp);
+	if (ret) {
+		spin_unlock(&drawctxt->lock);
+		return ret;
+	}
+
+	cmdbatch->timestamp = *timestamp;
+
+	if (cmdbatch->flags & KGSL_CMDBATCH_MARKER) {
+
+		/*
+		 * See if we can fastpath this thing - if nothing is queued
+		 * and nothing is inflight retire without bothering the GPU
+		 */
+
+		if (!drawctxt->queued && kgsl_check_timestamp(cmdbatch->device,
+			cmdbatch->context, drawctxt->queued_timestamp)) {
+			trace_adreno_cmdbatch_queued(cmdbatch,
+				drawctxt->queued);
+
+			_retire_marker(cmdbatch);
+			spin_unlock(&drawctxt->lock);
+			return 0;
+		}
+
+		/*
+		 * Remember the last queued timestamp - the marker will block
+		 * until that timestamp is expired (unless another command
+		 * comes along and forces the marker to execute)
+		 */
+
+		cmdbatch->marker_timestamp = drawctxt->queued_timestamp;
+	}
+
+	/* SYNC commands have timestamp 0 and will get optimized out anyway */
+	if (!(cmdbatch->flags & KGSL_CONTEXT_SYNC))
+		drawctxt->queued_timestamp = *timestamp;
+
+	/*
+	 * Set the fault tolerance policy for the command batch - assuming the
+	 * context hasn't disabled FT use the current device policy
+	 */
+
+	if (drawctxt->base.flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE)
+		set_bit(KGSL_FT_DISABLE, &cmdbatch->fault_policy);
+	else
+		cmdbatch->fault_policy = adreno_dev->ft_policy;
+
+	/* Put the command into the queue */
+	drawctxt->cmdqueue[drawctxt->cmdqueue_tail] = cmdbatch;
+	drawctxt->cmdqueue_tail = (drawctxt->cmdqueue_tail + 1) %
+		ADRENO_CONTEXT_CMDQUEUE_SIZE;
+
+	/*
+	 * If this is a real command then we need to force any markers queued
+	 * before it to dispatch to keep time linear - set the skip bit so
+	 * the commands get NOPed.
+	 */
+
+	if (!(cmdbatch->flags & KGSL_CMDBATCH_MARKER)) {
+		unsigned int i = drawctxt->cmdqueue_head;
+
+		while (i != drawctxt->cmdqueue_tail) {
+			if (drawctxt->cmdqueue[i]->flags & KGSL_CMDBATCH_MARKER)
+				set_bit(CMDBATCH_FLAG_SKIP,
+					&drawctxt->cmdqueue[i]->priv);
+
+			i = CMDQUEUE_NEXT(i, ADRENO_CONTEXT_CMDQUEUE_SIZE);
+		}
+	}
+
+	drawctxt->queued++;
+	trace_adreno_cmdbatch_queued(cmdbatch, drawctxt->queued);
+
+	_track_context(dispatch_q, drawctxt->base.id);
+
+	spin_unlock(&drawctxt->lock);
+
+	/* Add the context to the dispatcher pending list */
+	dispatcher_queue_context(adreno_dev, drawctxt);
+
+	/*
+	 * Only issue commands if inflight is less than burst -this prevents us
+	 * from sitting around waiting for the mutex on a busy system - the work
+	 * loop will schedule it for us. Inflight is mutex protected but the
+	 * worse that can happen is that it will go to 0 after we check and if
+	 * it goes to 0 it is because the work loop decremented it and the work
+	 * queue will try to schedule new commands anyway.
+	 */
+
+	if (dispatch_q->inflight < _context_cmdbatch_burst)
+		adreno_dispatcher_issuecmds(adreno_dev);
+
+	return 0;
+}
+
+static int _mark_context(int id, void *ptr, void *data)
+{
+	unsigned int guilty = *((unsigned int *) data);
+	struct kgsl_context *context = ptr;
+
+	/*
+	 * If the context is guilty mark it as such.  Otherwise mark it as
+	 * innocent if it had not already been marked as guilty.  If id is
+	 * passed as 0 then mark EVERYBODY guilty (recovery failed)
+	 */
+
+	if (guilty == 0 || guilty == context->id)
+		context->reset_status =
+			KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT;
+	else if (context->reset_status !=
+		KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT)
+		context->reset_status =
+			KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT;
+
+	return 0;
+}
+
+/**
+ * mark_guilty_context() - Mark the given context as guilty (failed recovery)
+ * @device: Pointer to a KGSL device structure
+ * @id: Context ID of the guilty context (or 0 to mark all as guilty)
+ *
+ * Mark the given (or all) context(s) as guilty (failed recovery)
+ */
+static void mark_guilty_context(struct kgsl_device *device, unsigned int id)
+{
+	/* Mark the status for all the contexts in the device */
+
+	read_lock(&device->context_lock);
+	idr_for_each(&device->context_idr, _mark_context, &id);
+	read_unlock(&device->context_lock);
+}
+
+/*
+ * If an IB inside of the command batch has a gpuaddr that matches the base
+ * passed in then zero the size which effectively skips it when it is submitted
+ * in the ringbuffer.
+ */
+static void cmdbatch_skip_ib(struct kgsl_cmdbatch *cmdbatch, uint64_t base)
+{
+	struct kgsl_memobj_node *ib;
+
+	list_for_each_entry(ib, &cmdbatch->cmdlist, node) {
+		if (ib->gpuaddr == base) {
+			ib->priv |= MEMOBJ_SKIP;
+			if (base)
+				return;
+		}
+	}
+}
+
+static void cmdbatch_skip_cmd(struct kgsl_cmdbatch *cmdbatch,
+	struct kgsl_cmdbatch **replay, int count)
+{
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(cmdbatch->context);
+	int i;
+
+	/*
+	 * SKIPCMD policy: next IB issued for this context is tentative
+	 * if it fails we assume that GFT failed and if it succeeds
+	 * we mark GFT as a success.
+	 *
+	 * Find next commandbatch for the faulting context
+	 * If commandbatch is found
+	 * a) store the current commandbatch fault_policy in context's next
+	 *    commandbatch fault_policy
+	 * b) force preamble for next commandbatch
+	 */
+	for (i = 1; i < count; i++) {
+		if (replay[i]->context->id == cmdbatch->context->id) {
+			replay[i]->fault_policy = replay[0]->fault_policy;
+			set_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &replay[i]->priv);
+			set_bit(KGSL_FT_SKIPCMD, &replay[i]->fault_recovery);
+			break;
+		}
+	}
+
+	/*
+	 * If we did not find the next cmd then
+	 * a) set a flag for next command issued in this context
+	 * b) store the fault_policy, this fault_policy becomes the policy of
+	 *    next command issued in this context
+	 */
+	if ((i == count) && drawctxt) {
+		set_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv);
+		drawctxt->fault_policy = replay[0]->fault_policy;
+	}
+
+	/* set the flags to skip this cmdbatch */
+	set_bit(CMDBATCH_FLAG_SKIP, &cmdbatch->priv);
+	cmdbatch->fault_recovery = 0;
+}
+
+static void cmdbatch_skip_frame(struct kgsl_cmdbatch *cmdbatch,
+	struct kgsl_cmdbatch **replay, int count)
+{
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(cmdbatch->context);
+	int skip = 1;
+	int i;
+
+	for (i = 0; i < count; i++) {
+
+		/*
+		 * Only operate on command batches that belong to the
+		 * faulting context
+		 */
+
+		if (replay[i]->context->id != cmdbatch->context->id)
+			continue;
+
+		/*
+		 * Skip all the command batches in this context until
+		 * the EOF flag is seen.  If the EOF flag is seen then
+		 * force the preamble for the next command.
+		 */
+
+		if (skip) {
+			set_bit(CMDBATCH_FLAG_SKIP, &replay[i]->priv);
+
+			if (replay[i]->flags & KGSL_CMDBATCH_END_OF_FRAME)
+				skip = 0;
+		} else {
+			set_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &replay[i]->priv);
+			return;
+		}
+	}
+
+	/*
+	 * If the EOF flag hasn't been seen yet then set the flag in the
+	 * drawctxt to keep looking for it
+	 */
+
+	if (skip && drawctxt)
+		set_bit(ADRENO_CONTEXT_SKIP_EOF, &drawctxt->base.priv);
+
+	/*
+	 * If we did see the EOF flag then force the preamble on for the
+	 * next command issued on this context
+	 */
+
+	if (!skip && drawctxt)
+		set_bit(ADRENO_CONTEXT_FORCE_PREAMBLE, &drawctxt->base.priv);
+}
+
+static void remove_invalidated_cmdbatches(struct kgsl_device *device,
+		struct kgsl_cmdbatch **replay, int count)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		struct kgsl_cmdbatch *cmd = replay[i];
+		if (cmd == NULL)
+			continue;
+
+		if (kgsl_context_detached(cmd->context) ||
+			kgsl_context_invalid(cmd->context)) {
+			replay[i] = NULL;
+
+			mutex_lock(&device->mutex);
+			kgsl_cancel_events_timestamp(device,
+				&cmd->context->events, cmd->timestamp);
+			mutex_unlock(&device->mutex);
+
+			kgsl_cmdbatch_destroy(cmd);
+		}
+	}
+}
+
+static char _pidname[TASK_COMM_LEN];
+
+static inline const char *_kgsl_context_comm(struct kgsl_context *context)
+{
+	if (context && context->proc_priv)
+		strlcpy(_pidname, context->proc_priv->comm, sizeof(_pidname));
+	else
+		snprintf(_pidname, TASK_COMM_LEN, "unknown");
+
+	return _pidname;
+}
+
+#define pr_fault(_d, _c, fmt, args...) \
+		dev_err((_d)->dev, "%s[%d]: " fmt, \
+		_kgsl_context_comm((_c)->context), \
+		(_c)->context->proc_priv->pid, ##args)
+
+
+static void adreno_fault_header(struct kgsl_device *device,
+		struct adreno_ringbuffer *rb, struct kgsl_cmdbatch *cmdbatch)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int status, rptr, wptr, ib1sz, ib2sz;
+	uint64_t ib1base, ib2base;
+
+	adreno_readreg(adreno_dev , ADRENO_REG_RBBM_STATUS, &status);
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &rptr);
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr);
+	adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE,
+					  ADRENO_REG_CP_IB1_BASE_HI, &ib1base);
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BUFSZ, &ib1sz);
+	adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB2_BASE,
+					   ADRENO_REG_CP_IB2_BASE_HI, &ib2base);
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_IB2_BUFSZ, &ib2sz);
+
+	if (cmdbatch != NULL) {
+		struct adreno_context *drawctxt =
+			ADRENO_CONTEXT(cmdbatch->context);
+
+		trace_adreno_gpu_fault(cmdbatch->context->id,
+			cmdbatch->timestamp,
+			status, rptr, wptr, ib1base, ib1sz,
+			ib2base, ib2sz, drawctxt->rb->id);
+
+		pr_fault(device, cmdbatch,
+			"gpu fault ctx %d ts %d status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
+			cmdbatch->context->id, cmdbatch->timestamp, status,
+			rptr, wptr, ib1base, ib1sz, ib2base, ib2sz);
+
+		if (rb != NULL)
+			pr_fault(device, cmdbatch,
+				"gpu fault rb %d rb sw r/w %4.4x/%4.4x\n",
+				rb->id, rb->rptr, rb->wptr);
+	} else {
+		int id = (rb != NULL) ? rb->id : -1;
+
+		dev_err(device->dev,
+			"RB[%d]: gpu fault status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
+			id, status, rptr, wptr, ib1base, ib1sz, ib2base,
+			ib2sz);
+		if (rb != NULL)
+			dev_err(device->dev,
+				"RB[%d] gpu fault rb sw r/w %4.4x/%4.4x\n",
+				rb->id, rb->rptr, rb->wptr);
+	}
+}
+
+void adreno_fault_skipcmd_detached(struct kgsl_device *device,
+				 struct adreno_context *drawctxt,
+				 struct kgsl_cmdbatch *cmdbatch)
+{
+	if (test_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv) &&
+			kgsl_context_detached(&drawctxt->base)) {
+		pr_context(device, cmdbatch->context, "gpu %s ctx %d\n",
+			 "detached", cmdbatch->context->id);
+		clear_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv);
+	}
+}
+
+/**
+ * process_cmdbatch_fault() - Process a cmdbatch for fault policies
+ * @device: Device on which the cmdbatch caused a fault
+ * @replay: List of cmdbatches that are to be replayed on the device. The
+ * faulting cmdbatch is the first command in the replay list and the remaining
+ * cmdbatches in the list are commands that were submitted to the same queue
+ * as the faulting one.
+ * @count: Number of cmdbatches in replay
+ * @base: The IB1 base at the time of fault
+ * @fault: The fault type
+ */
+static void process_cmdbatch_fault(struct kgsl_device *device,
+		struct kgsl_cmdbatch **replay, int count,
+		unsigned int base,
+		int fault)
+{
+	struct kgsl_cmdbatch *cmdbatch = replay[0];
+	int i;
+	char *state = "failed";
+
+	/*
+	 * If GFT recovered more than X times in Y ms invalidate the context
+	 * and do not attempt recovery.
+	 * Example: X==3 and Y==3000 ms, GPU hung at 500ms, 1700ms, 25000ms and
+	 * 3000ms for the same context, we will not try FT and invalidate the
+	 * context @3000ms because context triggered GFT more than 3 times in
+	 * last 3 seconds. If a context caused recoverable GPU hangs
+	 * where 1st and 4th gpu hang are more than 3 seconds apart we
+	 * won't disable GFT and invalidate the context.
+	 */
+	if (test_bit(KGSL_FT_THROTTLE, &cmdbatch->fault_policy)) {
+		if (time_after(jiffies, (cmdbatch->context->fault_time
+				+ msecs_to_jiffies(_fault_throttle_time)))) {
+			cmdbatch->context->fault_time = jiffies;
+			cmdbatch->context->fault_count = 1;
+		} else {
+			cmdbatch->context->fault_count++;
+			if (cmdbatch->context->fault_count >
+					_fault_throttle_burst) {
+				set_bit(KGSL_FT_DISABLE,
+						&cmdbatch->fault_policy);
+				pr_context(device, cmdbatch->context,
+					 "gpu fault threshold exceeded %d faults in %d msecs\n",
+					 _fault_throttle_burst,
+					 _fault_throttle_time);
+			}
+		}
+	}
+
+	/*
+	 * If FT is disabled for this cmdbatch invalidate immediately
+	 */
+
+	if (test_bit(KGSL_FT_DISABLE, &cmdbatch->fault_policy) ||
+		test_bit(KGSL_FT_TEMP_DISABLE, &cmdbatch->fault_policy)) {
+		state = "skipped";
+		bitmap_zero(&cmdbatch->fault_policy, BITS_PER_LONG);
+	}
+
+	/* If the context is detached do not run FT on context */
+	if (kgsl_context_detached(cmdbatch->context)) {
+		state = "detached";
+		bitmap_zero(&cmdbatch->fault_policy, BITS_PER_LONG);
+	}
+
+	/*
+	 * Set a flag so we don't print another PM dump if the cmdbatch fails
+	 * again on replay
+	 */
+
+	set_bit(KGSL_FT_SKIP_PMDUMP, &cmdbatch->fault_policy);
+
+	/*
+	 * A hardware fault generally means something was deterministically
+	 * wrong with the command batch - no point in trying to replay it
+	 * Clear the replay bit and move on to the next policy level
+	 */
+
+	if (fault & ADRENO_HARD_FAULT)
+		clear_bit(KGSL_FT_REPLAY, &(cmdbatch->fault_policy));
+
+	/*
+	 * A timeout fault means the IB timed out - clear the policy and
+	 * invalidate - this will clear the FT_SKIP_PMDUMP bit but that is okay
+	 * because we won't see this cmdbatch again
+	 */
+
+	if (fault & ADRENO_TIMEOUT_FAULT)
+		bitmap_zero(&cmdbatch->fault_policy, BITS_PER_LONG);
+
+	/*
+	 * If the context had a GPU page fault then it is likely it would fault
+	 * again if replayed
+	 */
+
+	if (test_bit(KGSL_CONTEXT_PRIV_PAGEFAULT,
+		     &cmdbatch->context->priv)) {
+		/* we'll need to resume the mmu later... */
+		clear_bit(KGSL_FT_REPLAY, &cmdbatch->fault_policy);
+		clear_bit(KGSL_CONTEXT_PRIV_PAGEFAULT,
+			  &cmdbatch->context->priv);
+	}
+
+	/*
+	 * Execute the fault tolerance policy. Each command batch stores the
+	 * current fault policy that was set when it was queued.
+	 * As the options are tried in descending priority
+	 * (REPLAY -> SKIPIBS -> SKIPFRAME -> NOTHING) the bits are cleared
+	 * from the cmdbatch policy so the next thing can be tried if the
+	 * change comes around again
+	 */
+
+	/* Replay the hanging command batch again */
+	if (test_and_clear_bit(KGSL_FT_REPLAY, &cmdbatch->fault_policy)) {
+		trace_adreno_cmdbatch_recovery(cmdbatch, BIT(KGSL_FT_REPLAY));
+		set_bit(KGSL_FT_REPLAY, &cmdbatch->fault_recovery);
+		return;
+	}
+
+	/*
+	 * Skip the last IB1 that was played but replay everything else.
+	 * Note that the last IB1 might not be in the "hung" command batch
+	 * because the CP may have caused a page-fault while it was prefetching
+	 * the next IB1/IB2. walk all outstanding commands and zap the
+	 * supposedly bad IB1 where ever it lurks.
+	 */
+
+	if (test_and_clear_bit(KGSL_FT_SKIPIB, &cmdbatch->fault_policy)) {
+		trace_adreno_cmdbatch_recovery(cmdbatch, BIT(KGSL_FT_SKIPIB));
+		set_bit(KGSL_FT_SKIPIB, &cmdbatch->fault_recovery);
+
+		for (i = 0; i < count; i++) {
+			if (replay[i] != NULL &&
+				replay[i]->context->id == cmdbatch->context->id)
+				cmdbatch_skip_ib(replay[i], base);
+		}
+
+		return;
+	}
+
+	/* Skip the faulted command batch submission */
+	if (test_and_clear_bit(KGSL_FT_SKIPCMD, &cmdbatch->fault_policy)) {
+		trace_adreno_cmdbatch_recovery(cmdbatch, BIT(KGSL_FT_SKIPCMD));
+
+		/* Skip faulting command batch */
+		cmdbatch_skip_cmd(cmdbatch, replay, count);
+
+		return;
+	}
+
+	if (test_and_clear_bit(KGSL_FT_SKIPFRAME, &cmdbatch->fault_policy)) {
+		trace_adreno_cmdbatch_recovery(cmdbatch,
+			BIT(KGSL_FT_SKIPFRAME));
+		set_bit(KGSL_FT_SKIPFRAME, &cmdbatch->fault_recovery);
+
+		/*
+		 * Skip all the pending command batches for this context until
+		 * the EOF frame is seen
+		 */
+		cmdbatch_skip_frame(cmdbatch, replay, count);
+		return;
+	}
+
+	/* If we get here then all the policies failed */
+
+	pr_context(device, cmdbatch->context, "gpu %s ctx %d ts %d\n",
+		state, cmdbatch->context->id, cmdbatch->timestamp);
+
+	/* Mark the context as failed */
+	mark_guilty_context(device, cmdbatch->context->id);
+
+	/* Invalidate the context */
+	adreno_drawctxt_invalidate(device, cmdbatch->context);
+}
+
+/**
+ * recover_dispatch_q() - Recover all commands in a dispatch queue by
+ * resubmitting the commands
+ * @device: Device on which recovery is performed
+ * @dispatch_q: The command queue to recover
+ * @fault: Faults caused by the command in the dispatch q
+ * @base: The IB1 base during the fault
+ */
+static void recover_dispatch_q(struct kgsl_device *device,
+		struct adreno_dispatcher_cmdqueue *dispatch_q,
+		int fault,
+		unsigned int base)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_cmdbatch **replay = NULL;
+	unsigned int ptr;
+	int first = 0;
+	int count = 0;
+	int i;
+
+	/* Allocate memory to store the inflight commands */
+	replay = kzalloc(sizeof(*replay) * dispatch_q->inflight, GFP_KERNEL);
+
+	if (replay == NULL) {
+		unsigned int ptr = dispatch_q->head;
+
+		/* Recovery failed - mark everybody on this q guilty */
+		while (ptr != dispatch_q->tail) {
+			struct kgsl_context *context =
+				dispatch_q->cmd_q[ptr]->context;
+
+			mark_guilty_context(device, context->id);
+			adreno_drawctxt_invalidate(device, context);
+			kgsl_cmdbatch_destroy(dispatch_q->cmd_q[ptr]);
+
+			ptr = CMDQUEUE_NEXT(ptr, ADRENO_DISPATCH_CMDQUEUE_SIZE);
+		}
+
+		/*
+		 * Set the replay count to zero - this will ensure that the
+		 * hardware gets reset but nothing else gets played
+		 */
+
+		count = 0;
+		goto replay;
+	}
+
+	/* Copy the inflight command batches into the temporary storage */
+	ptr = dispatch_q->head;
+
+	while (ptr != dispatch_q->tail) {
+		replay[count++] = dispatch_q->cmd_q[ptr];
+		ptr = CMDQUEUE_NEXT(ptr, ADRENO_DISPATCH_CMDQUEUE_SIZE);
+	}
+
+	if (fault && count)
+		process_cmdbatch_fault(device, replay,
+					count, base, fault);
+replay:
+	dispatch_q->inflight = 0;
+	dispatch_q->head = dispatch_q->tail = 0;
+	/* Remove any pending command batches that have been invalidated */
+	remove_invalidated_cmdbatches(device, replay, count);
+
+	/* Replay the pending command buffers */
+	for (i = 0; i < count; i++) {
+
+		int ret;
+
+		if (replay[i] == NULL)
+			continue;
+
+		/*
+		 * Force the preamble on the first command (if applicable) to
+		 * avoid any strange stage issues
+		 */
+
+		if (first == 0) {
+			set_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &replay[i]->priv);
+			first = 1;
+		}
+
+		/*
+		 * Force each command batch to wait for idle - this avoids weird
+		 * CP parse issues
+		 */
+
+		set_bit(CMDBATCH_FLAG_WFI, &replay[i]->priv);
+
+		ret = sendcmd(adreno_dev, replay[i]);
+
+		/*
+		 * If sending the command fails, then try to recover by
+		 * invalidating the context
+		 */
+
+		if (ret) {
+			pr_context(device, replay[i]->context,
+				"gpu reset failed ctx %d ts %d\n",
+				replay[i]->context->id, replay[i]->timestamp);
+
+			/* Mark this context as guilty (failed recovery) */
+			mark_guilty_context(device, replay[i]->context->id);
+
+			adreno_drawctxt_invalidate(device, replay[i]->context);
+			remove_invalidated_cmdbatches(device, &replay[i],
+				count - i);
+		}
+	}
+
+	/* Clear the fault bit */
+	clear_bit(ADRENO_DEVICE_FAULT, &adreno_dev->priv);
+
+	kfree(replay);
+}
+
+static int dispatcher_do_fault(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct adreno_dispatcher_cmdqueue *dispatch_q = NULL, *dispatch_q_temp;
+	struct adreno_ringbuffer *rb;
+	struct adreno_ringbuffer *hung_rb = NULL;
+	unsigned int reg;
+	uint64_t base;
+	struct kgsl_cmdbatch *cmdbatch = NULL;
+	int ret, i;
+	int fault;
+	int halt;
+
+	fault = atomic_xchg(&dispatcher->fault, 0);
+	if (fault == 0)
+		return 0;
+
+	/*
+	 * On A5xx, read RBBM_STATUS3:SMMU_STALLED_ON_FAULT (BIT 24) to
+	 * tell if this function was entered after a pagefault. If so, only
+	 * proceed if the fault handler has already run in the IRQ thread,
+	 * else return early to give the fault handler a chance to run.
+	 */
+	if (!(fault & ADRENO_IOMMU_PAGE_FAULT) && adreno_is_a5xx(adreno_dev)) {
+		unsigned int val;
+		mutex_lock(&device->mutex);
+		adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS3, &val);
+		mutex_unlock(&device->mutex);
+		if (val & BIT(24))
+			return 0;
+	}
+
+	/* Turn off all the timers */
+	del_timer_sync(&dispatcher->timer);
+	del_timer_sync(&dispatcher->fault_timer);
+	del_timer_sync(&dispatcher->preempt_timer);
+
+	mutex_lock(&device->mutex);
+
+	/* hang opcode */
+	kgsl_cffdump_hang(device);
+
+	adreno_readreg64(adreno_dev, ADRENO_REG_CP_RB_BASE,
+		ADRENO_REG_CP_RB_BASE_HI, &base);
+
+	/*
+	 * Force the CP off for anything but a hard fault to make sure it is
+	 * good and stopped
+	 */
+	if (!(fault & ADRENO_HARD_FAULT)) {
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_ME_CNTL, &reg);
+		if (adreno_is_a5xx(adreno_dev))
+			reg |= 1 | (1 << 1);
+		else
+			reg |= (1 << 27) | (1 << 28);
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, reg);
+	}
+	/*
+	 * retire cmdbatches from all the dispatch_q's before starting recovery
+	 */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		adreno_dispatch_process_cmdqueue(adreno_dev,
+			&(rb->dispatch_q), 0);
+		/* Select the active dispatch_q */
+		if (base == rb->buffer_desc.gpuaddr) {
+			dispatch_q = &(rb->dispatch_q);
+			hung_rb = rb;
+			adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR,
+				&hung_rb->rptr);
+			if (adreno_dev->cur_rb != hung_rb) {
+				adreno_dev->prev_rb = adreno_dev->cur_rb;
+				adreno_dev->cur_rb = hung_rb;
+			}
+		}
+		if (ADRENO_DISPATCHER_RB_STARVE_TIMER_ELAPSED ==
+			rb->starve_timer_state) {
+			adreno_put_gpu_halt(adreno_dev);
+			rb->starve_timer_state =
+			ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT;
+		}
+	}
+
+	if (dispatch_q && (dispatch_q->tail != dispatch_q->head)) {
+		cmdbatch = dispatch_q->cmd_q[dispatch_q->head];
+		trace_adreno_cmdbatch_fault(cmdbatch, fault);
+	}
+
+	adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE,
+		ADRENO_REG_CP_IB1_BASE_HI, &base);
+
+	/*
+	 * Dump the snapshot information if this is the first
+	 * detected fault for the oldest active command batch
+	 */
+
+	if (cmdbatch == NULL ||
+		!test_bit(KGSL_FT_SKIP_PMDUMP, &cmdbatch->fault_policy)) {
+		adreno_fault_header(device, hung_rb, cmdbatch);
+		kgsl_device_snapshot(device,
+			cmdbatch ? cmdbatch->context : NULL);
+	}
+
+	/* Terminate the stalled transaction and resume the IOMMU */
+	if (fault & ADRENO_IOMMU_PAGE_FAULT)
+		kgsl_mmu_pagefault_resume(&device->mmu);
+
+	/* Reset the dispatcher queue */
+	dispatcher->inflight = 0;
+	atomic_set(&dispatcher->preemption_state,
+		ADRENO_DISPATCHER_PREEMPT_CLEAR);
+
+	/* Reset the GPU and make sure halt is not set during recovery */
+	halt = adreno_gpu_halt(adreno_dev);
+	adreno_clear_gpu_halt(adreno_dev);
+
+	/*
+	 * If there is a stall in the ringbuffer after all commands have been
+	 * retired then we could hit problems if contexts are waiting for
+	 * internal timestamps that will never retire
+	 */
+
+	if (hung_rb != NULL) {
+		kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_MAX + hung_rb->id,
+				soptimestamp), hung_rb->timestamp);
+
+		kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_MAX + hung_rb->id,
+				eoptimestamp), hung_rb->timestamp);
+
+		/* Schedule any pending events to be run */
+		kgsl_process_event_group(device, &hung_rb->events);
+	}
+
+	ret = adreno_reset(device, fault);
+	mutex_unlock(&device->mutex);
+	/* if any other fault got in until reset then ignore */
+	atomic_set(&dispatcher->fault, 0);
+
+	/* If adreno_reset() fails then what hope do we have for the future? */
+	BUG_ON(ret);
+
+	/* recover all the dispatch_q's starting with the one that hung */
+	if (dispatch_q)
+		recover_dispatch_q(device, dispatch_q, fault, base);
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		dispatch_q_temp = &(rb->dispatch_q);
+		if (dispatch_q_temp != dispatch_q)
+			recover_dispatch_q(device, dispatch_q_temp, 0, base);
+	}
+
+	atomic_add(halt, &adreno_dev->halt);
+
+	return 1;
+}
+
+static inline int cmdbatch_consumed(struct kgsl_cmdbatch *cmdbatch,
+		unsigned int consumed, unsigned int retired)
+{
+	return ((timestamp_cmp(cmdbatch->timestamp, consumed) >= 0) &&
+		(timestamp_cmp(retired, cmdbatch->timestamp) < 0));
+}
+
+static void _print_recovery(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch)
+{
+	static struct {
+		unsigned int mask;
+		const char *str;
+	} flags[] = { ADRENO_FT_TYPES };
+
+	int i, nr = find_first_bit(&cmdbatch->fault_recovery, BITS_PER_LONG);
+	char *result = "unknown";
+
+	for (i = 0; i < ARRAY_SIZE(flags); i++) {
+		if (flags[i].mask == BIT(nr)) {
+			result = (char *) flags[i].str;
+			break;
+		}
+	}
+
+	pr_context(device, cmdbatch->context,
+		"gpu %s ctx %d ts %d policy %lX\n",
+		result, cmdbatch->context->id, cmdbatch->timestamp,
+		cmdbatch->fault_recovery);
+}
+
+static void cmdbatch_profile_ticks(struct adreno_device *adreno_dev,
+	struct kgsl_cmdbatch *cmdbatch, uint64_t *start, uint64_t *retire)
+{
+	void *ptr = adreno_dev->cmdbatch_profile_buffer.hostptr;
+	struct adreno_cmdbatch_profile_entry *entry;
+
+	entry = (struct adreno_cmdbatch_profile_entry *)
+		(ptr + (cmdbatch->profile_index * sizeof(*entry)));
+
+	rmb();
+	*start = entry->started;
+	*retire = entry->retired;
+}
+
+int adreno_dispatch_process_cmdqueue(struct adreno_device *adreno_dev,
+				struct adreno_dispatcher_cmdqueue *dispatch_q,
+				int long_ib_detect)
+{
+	struct kgsl_device *device = &(adreno_dev->dev);
+	struct adreno_dispatcher *dispatcher = &(adreno_dev->dispatcher);
+	uint64_t start_ticks = 0, retire_ticks = 0;
+
+	struct adreno_dispatcher_cmdqueue *active_q =
+			&(adreno_dev->cur_rb->dispatch_q);
+	int count = 0;
+
+	while (dispatch_q->head != dispatch_q->tail) {
+		struct kgsl_cmdbatch *cmdbatch =
+			dispatch_q->cmd_q[dispatch_q->head];
+		struct adreno_context *drawctxt;
+		BUG_ON(cmdbatch == NULL);
+
+		drawctxt = ADRENO_CONTEXT(cmdbatch->context);
+
+		/*
+		 * First try to expire the timestamp. This happens if the
+		 * context is valid and the timestamp expired normally or if the
+		 * context was destroyed before the command batch was finished
+		 * in the GPU.  Either way retire the command batch advance the
+		 * pointers and continue processing the queue
+		 */
+
+		if (kgsl_check_timestamp(device, cmdbatch->context,
+			cmdbatch->timestamp)) {
+
+			/*
+			 * If the cmdbatch in question had faulted announce its
+			 * successful completion to the world
+			 */
+
+			if (cmdbatch->fault_recovery != 0) {
+				/* Mark the context as faulted and recovered */
+				set_bit(ADRENO_CONTEXT_FAULT,
+					&cmdbatch->context->priv);
+
+				_print_recovery(device, cmdbatch);
+			}
+
+			/* Reduce the number of inflight command batches */
+			dispatcher->inflight--;
+			dispatch_q->inflight--;
+
+			/*
+			 * If kernel profiling is enabled get the submit and
+			 * retired ticks from the buffer
+			 */
+
+			if (test_bit(CMDBATCH_FLAG_PROFILE, &cmdbatch->priv))
+				cmdbatch_profile_ticks(adreno_dev, cmdbatch,
+					&start_ticks, &retire_ticks);
+
+			trace_adreno_cmdbatch_retired(cmdbatch,
+				(int) dispatcher->inflight, start_ticks,
+				retire_ticks, ADRENO_CMDBATCH_RB(cmdbatch));
+
+			/* Record the delta between submit and retire ticks */
+			drawctxt->submit_retire_ticks[drawctxt->ticks_index] =
+				retire_ticks - cmdbatch->submit_ticks;
+
+			drawctxt->ticks_index = (drawctxt->ticks_index + 1)
+				% SUBMIT_RETIRE_TICKS_SIZE;
+
+			/* Zero the old entry*/
+			dispatch_q->cmd_q[dispatch_q->head] = NULL;
+
+			/* Advance the buffer head */
+			dispatch_q->head = CMDQUEUE_NEXT(dispatch_q->head,
+				ADRENO_DISPATCH_CMDQUEUE_SIZE);
+
+			/* Destroy the retired command batch */
+			kgsl_cmdbatch_destroy(cmdbatch);
+
+			/* Update the expire time for the next command batch */
+
+			if (dispatch_q->inflight > 0 &&
+				dispatch_q == active_q) {
+				cmdbatch =
+					dispatch_q->cmd_q[dispatch_q->head];
+				cmdbatch->expires = jiffies +
+					msecs_to_jiffies(
+					adreno_cmdbatch_timeout);
+			}
+
+			count++;
+			continue;
+		}
+		/*
+		 * Break here if fault detection is disabled for the context or
+		 * if the long running IB detection is disaled device wide or
+		 * if the dispatch q is not active
+		 * Long running command buffers will be allowed to run to
+		 * completion - but badly behaving command buffers (infinite
+		 * shaders etc) can end up running forever.
+		 */
+
+		if (!long_ib_detect ||
+			drawctxt->base.flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE
+			|| dispatch_q != active_q)
+			break;
+
+		/*
+		 * The last line of defense is to check if the command batch has
+		 * timed out. If we get this far but the timeout hasn't expired
+		 * yet then the GPU is still ticking away
+		 */
+
+		if (time_is_after_jiffies(cmdbatch->expires))
+			break;
+
+		/* Boom goes the dynamite */
+
+		pr_context(device, cmdbatch->context,
+			"gpu timeout ctx %d ts %d\n",
+			cmdbatch->context->id, cmdbatch->timestamp);
+
+		adreno_set_gpu_fault(adreno_dev, ADRENO_TIMEOUT_FAULT);
+		break;
+	}
+	return count;
+}
+
+/**
+ * adreno_dispatcher_work() - Master work handler for the dispatcher
+ * @work: Pointer to the work struct for the current work queue
+ *
+ * Process expired commands and send new ones.
+ */
+static void adreno_dispatcher_work(struct work_struct *work)
+{
+	struct adreno_dispatcher *dispatcher =
+		container_of(work, struct adreno_dispatcher, work);
+	struct adreno_device *adreno_dev =
+		container_of(dispatcher, struct adreno_device, dispatcher);
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int count = 0;
+	int cur_rb_id = adreno_dev->cur_rb->id;
+
+	mutex_lock(&dispatcher->mutex);
+
+	if (ADRENO_DISPATCHER_PREEMPT_CLEAR ==
+		atomic_read(&dispatcher->preemption_state))
+		/* process the active q*/
+		count = adreno_dispatch_process_cmdqueue(adreno_dev,
+			&(adreno_dev->cur_rb->dispatch_q),
+			adreno_long_ib_detect(adreno_dev));
+
+	else if (ADRENO_DISPATCHER_PREEMPT_TRIGGERED ==
+		atomic_read(&dispatcher->preemption_state))
+		count = adreno_dispatch_process_cmdqueue(adreno_dev,
+			&(adreno_dev->cur_rb->dispatch_q), 0);
+
+	/* Check if gpu fault occurred */
+	if (dispatcher_do_fault(device))
+		goto done;
+
+	gpudev->preemption_schedule(adreno_dev);
+
+	if (cur_rb_id != adreno_dev->cur_rb->id) {
+		struct adreno_dispatcher_cmdqueue *dispatch_q =
+			&(adreno_dev->cur_rb->dispatch_q);
+		/* active level switched, clear new level cmdbatches */
+		count = adreno_dispatch_process_cmdqueue(adreno_dev,
+			dispatch_q,
+			adreno_long_ib_detect(adreno_dev));
+		/*
+		 * If GPU has already completed all the commands in new incoming
+		 * RB then we may not get another interrupt due to which
+		 * dispatcher may not run again. Schedule dispatcher here so
+		 * we can come back and process the other RB's if required
+		 */
+		if (dispatch_q->head == dispatch_q->tail)
+			adreno_dispatcher_schedule(device);
+	}
+	/*
+	 * If inflight went to 0, queue back up the event processor to catch
+	 * stragglers
+	 */
+	if (dispatcher->inflight == 0 && count)
+		kgsl_schedule_work(&device->event_work);
+
+	/* Try to dispatch new commands */
+	_adreno_dispatcher_issuecmds(adreno_dev);
+
+done:
+	/* Either update the timer for the next command batch or disable it */
+	if (dispatcher->inflight) {
+		struct kgsl_cmdbatch *cmdbatch =
+			adreno_dev->cur_rb->dispatch_q.cmd_q[
+				adreno_dev->cur_rb->dispatch_q.head];
+		if (cmdbatch && adreno_preempt_state(adreno_dev,
+					ADRENO_DISPATCHER_PREEMPT_CLEAR))
+			/* Update the timeout timer for the next cmdbatch */
+			mod_timer(&dispatcher->timer, cmdbatch->expires);
+
+		/* There are still things in flight - update the idle counts */
+		mutex_lock(&device->mutex);
+		kgsl_pwrscale_update(device);
+		mod_timer(&device->idle_timer, jiffies +
+				device->pwrctrl.interval_timeout);
+		mutex_unlock(&device->mutex);
+	} else {
+		/* There is nothing left in the pipeline.  Shut 'er down boys */
+		mutex_lock(&device->mutex);
+
+		if (test_and_clear_bit(ADRENO_DISPATCHER_ACTIVE,
+			&dispatcher->priv))
+			complete_all(&dispatcher->idle_gate);
+
+		/*
+		 * Stop the fault timer before decrementing the active count to
+		 * avoid reading the hardware registers while we are trying to
+		 * turn clocks off
+		 */
+		del_timer_sync(&dispatcher->fault_timer);
+
+		if (test_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv)) {
+			kgsl_active_count_put(device);
+			clear_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv);
+		}
+
+		mutex_unlock(&device->mutex);
+	}
+
+	mutex_unlock(&dispatcher->mutex);
+}
+
+void adreno_dispatcher_schedule(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	kgsl_schedule_work(&dispatcher->work);
+}
+
+/**
+ * adreno_dispatcher_queue_context() - schedule a drawctxt in the dispatcher
+ * device: pointer to the KGSL device
+ * drawctxt: pointer to the drawctxt to schedule
+ *
+ * Put a draw context on the dispatcher pending queue and schedule the
+ * dispatcher. This is used to reschedule changes that might have been blocked
+ * for sync points or other concerns
+ */
+void adreno_dispatcher_queue_context(struct kgsl_device *device,
+	struct adreno_context *drawctxt)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	dispatcher_queue_context(adreno_dev, drawctxt);
+	adreno_dispatcher_schedule(device);
+}
+
+/*
+ * This is called on a regular basis while command batches are inflight.  Fault
+ * detection registers are read and compared to the existing values - if they
+ * changed then the GPU is still running.  If they are the same between
+ * subsequent calls then the GPU may have faulted
+ */
+
+static void adreno_dispatcher_fault_timer(unsigned long data)
+{
+	struct adreno_device *adreno_dev = (struct adreno_device *) data;
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	/* Leave if the user decided to turn off fast hang detection */
+	if (!adreno_soft_fault_detect(adreno_dev))
+		return;
+
+	if (adreno_gpu_fault(adreno_dev)) {
+		adreno_dispatcher_schedule(device);
+		return;
+	}
+
+	/*
+	 * Read the fault registers - if it returns 0 then they haven't changed
+	 * so mark the dispatcher as faulted and schedule the work loop.
+	 */
+
+	if (!fault_detect_read_compare(device)) {
+		adreno_set_gpu_fault(adreno_dev, ADRENO_SOFT_FAULT);
+		adreno_dispatcher_schedule(device);
+	} else {
+		mod_timer(&dispatcher->fault_timer,
+			jiffies + msecs_to_jiffies(_fault_timer_interval));
+	}
+}
+
+/*
+ * This is called when the timer expires - it either means the GPU is hung or
+ * the IB is taking too long to execute
+ */
+static void adreno_dispatcher_timer(unsigned long data)
+{
+	struct adreno_device *adreno_dev = (struct adreno_device *) data;
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	adreno_dispatcher_schedule(device);
+}
+
+/**
+ * adreno_dispatcher_start() - activate the dispatcher
+ * @adreno_dev: pointer to the adreno device structure
+ *
+ */
+void adreno_dispatcher_start(struct kgsl_device *device)
+{
+	complete_all(&device->cmdbatch_gate);
+
+	/* Schedule the work loop to get things going */
+	adreno_dispatcher_schedule(device);
+}
+
+/**
+ * adreno_dispatcher_stop() - stop the dispatcher
+ * @adreno_dev: pointer to the adreno device structure
+ *
+ * Stop the dispatcher and close all the timers
+ */
+void adreno_dispatcher_stop(struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	del_timer_sync(&dispatcher->timer);
+	del_timer_sync(&dispatcher->fault_timer);
+}
+
+/**
+ * adreno_dispatcher_close() - close the dispatcher
+ * @adreno_dev: pointer to the adreno device structure
+ *
+ * Close the dispatcher and free all the oustanding commands and memory
+ */
+void adreno_dispatcher_close(struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	int i;
+	struct adreno_ringbuffer *rb;
+
+	mutex_lock(&dispatcher->mutex);
+	del_timer_sync(&dispatcher->timer);
+	del_timer_sync(&dispatcher->fault_timer);
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		struct adreno_dispatcher_cmdqueue *dispatch_q =
+			&(rb->dispatch_q);
+		while (dispatch_q->head != dispatch_q->tail) {
+			kgsl_cmdbatch_destroy(
+				dispatch_q->cmd_q[dispatch_q->head]);
+			dispatch_q->head = (dispatch_q->head + 1)
+				% ADRENO_DISPATCH_CMDQUEUE_SIZE;
+		}
+	}
+
+	mutex_unlock(&dispatcher->mutex);
+
+	kobject_put(&dispatcher->kobj);
+}
+
+struct dispatcher_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct adreno_dispatcher *,
+			struct dispatcher_attribute *, char *);
+	ssize_t (*store)(struct adreno_dispatcher *,
+			struct dispatcher_attribute *, const char *buf,
+			size_t count);
+	unsigned int max;
+	unsigned int *value;
+};
+
+#define DISPATCHER_UINT_ATTR(_name, _mode, _max, _value) \
+	struct dispatcher_attribute dispatcher_attr_##_name =  { \
+		.attr = { .name = __stringify(_name), .mode = _mode }, \
+		.show = _show_uint, \
+		.store = _store_uint, \
+		.max = _max, \
+		.value = &(_value), \
+	}
+
+#define to_dispatcher_attr(_a) \
+	container_of((_a), struct dispatcher_attribute, attr)
+#define to_dispatcher(k) container_of(k, struct adreno_dispatcher, kobj)
+
+static ssize_t _store_uint(struct adreno_dispatcher *dispatcher,
+		struct dispatcher_attribute *attr,
+		const char *buf, size_t size)
+{
+	unsigned int val = 0;
+	int ret;
+
+	ret = kgsl_sysfs_store(buf, &val);
+	if (ret)
+		return ret;
+
+	if (!val || (attr->max && (val > attr->max)))
+		return -EINVAL;
+
+	*((unsigned int *) attr->value) = val;
+	return size;
+}
+
+static ssize_t _show_uint(struct adreno_dispatcher *dispatcher,
+		struct dispatcher_attribute *attr,
+		char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%u\n",
+		*((unsigned int *) attr->value));
+}
+
+static DISPATCHER_UINT_ATTR(inflight, 0644, ADRENO_DISPATCH_CMDQUEUE_SIZE,
+	_dispatcher_q_inflight_hi);
+
+static DISPATCHER_UINT_ATTR(inflight_low_latency, 0644,
+	ADRENO_DISPATCH_CMDQUEUE_SIZE, _dispatcher_q_inflight_lo);
+/*
+ * Our code that "puts back" a command from the context is much cleaner
+ * if we are sure that there will always be enough room in the
+ * ringbuffer so restrict the maximum size of the context queue to
+ * ADRENO_CONTEXT_CMDQUEUE_SIZE - 1
+ */
+static DISPATCHER_UINT_ATTR(context_cmdqueue_size, 0644,
+	ADRENO_CONTEXT_CMDQUEUE_SIZE - 1, _context_cmdqueue_size);
+static DISPATCHER_UINT_ATTR(context_burst_count, 0644, 0,
+	_context_cmdbatch_burst);
+static DISPATCHER_UINT_ATTR(cmdbatch_timeout, 0644, 0,
+	adreno_cmdbatch_timeout);
+static DISPATCHER_UINT_ATTR(context_queue_wait, 0644, 0, _context_queue_wait);
+static DISPATCHER_UINT_ATTR(fault_detect_interval, 0644, 0,
+	_fault_timer_interval);
+static DISPATCHER_UINT_ATTR(fault_throttle_time, 0644, 0,
+	_fault_throttle_time);
+static DISPATCHER_UINT_ATTR(fault_throttle_burst, 0644, 0,
+	_fault_throttle_burst);
+static DISPATCHER_UINT_ATTR(disp_preempt_fair_sched, 0644, 0,
+	adreno_disp_preempt_fair_sched);
+static DISPATCHER_UINT_ATTR(dispatch_time_slice, 0644, 0,
+	_dispatch_time_slice);
+static DISPATCHER_UINT_ATTR(dispatch_starvation_time, 0644, 0,
+	_dispatch_starvation_time);
+
+static struct attribute *dispatcher_attrs[] = {
+	&dispatcher_attr_inflight.attr,
+	&dispatcher_attr_inflight_low_latency.attr,
+	&dispatcher_attr_context_cmdqueue_size.attr,
+	&dispatcher_attr_context_burst_count.attr,
+	&dispatcher_attr_cmdbatch_timeout.attr,
+	&dispatcher_attr_context_queue_wait.attr,
+	&dispatcher_attr_fault_detect_interval.attr,
+	&dispatcher_attr_fault_throttle_time.attr,
+	&dispatcher_attr_fault_throttle_burst.attr,
+	&dispatcher_attr_disp_preempt_fair_sched.attr,
+	&dispatcher_attr_dispatch_time_slice.attr,
+	&dispatcher_attr_dispatch_starvation_time.attr,
+	NULL,
+};
+
+static ssize_t dispatcher_sysfs_show(struct kobject *kobj,
+				   struct attribute *attr, char *buf)
+{
+	struct adreno_dispatcher *dispatcher = to_dispatcher(kobj);
+	struct dispatcher_attribute *pattr = to_dispatcher_attr(attr);
+	ssize_t ret = -EIO;
+
+	if (pattr->show)
+		ret = pattr->show(dispatcher, pattr, buf);
+
+	return ret;
+}
+
+static ssize_t dispatcher_sysfs_store(struct kobject *kobj,
+				    struct attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct adreno_dispatcher *dispatcher = to_dispatcher(kobj);
+	struct dispatcher_attribute *pattr = to_dispatcher_attr(attr);
+	ssize_t ret = -EIO;
+
+	if (pattr->store)
+		ret = pattr->store(dispatcher, pattr, buf, count);
+
+	return ret;
+}
+
+static const struct sysfs_ops dispatcher_sysfs_ops = {
+	.show = dispatcher_sysfs_show,
+	.store = dispatcher_sysfs_store
+};
+
+static struct kobj_type ktype_dispatcher = {
+	.sysfs_ops = &dispatcher_sysfs_ops,
+	.default_attrs = dispatcher_attrs,
+};
+
+/**
+ * adreno_dispatcher_init() - Initialize the dispatcher
+ * @adreno_dev: pointer to the adreno device structure
+ *
+ * Initialize the dispatcher
+ */
+int adreno_dispatcher_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	int ret;
+
+	memset(dispatcher, 0, sizeof(*dispatcher));
+
+	mutex_init(&dispatcher->mutex);
+
+	setup_timer(&dispatcher->timer, adreno_dispatcher_timer,
+		(unsigned long) adreno_dev);
+
+	setup_timer(&dispatcher->fault_timer, adreno_dispatcher_fault_timer,
+		(unsigned long) adreno_dev);
+
+	setup_timer(&dispatcher->preempt_timer, adreno_dispatcher_preempt_timer,
+		(unsigned long) adreno_dev);
+
+	INIT_WORK(&dispatcher->work, adreno_dispatcher_work);
+
+	init_completion(&dispatcher->idle_gate);
+	complete_all(&dispatcher->idle_gate);
+
+	plist_head_init(&dispatcher->pending);
+	spin_lock_init(&dispatcher->plist_lock);
+
+	atomic_set(&dispatcher->preemption_state,
+		ADRENO_DISPATCHER_PREEMPT_CLEAR);
+
+	ret = kobject_init_and_add(&dispatcher->kobj, &ktype_dispatcher,
+		&device->dev->kobj, "dispatch");
+
+	return ret;
+}
+
+/*
+ * adreno_dispatcher_idle() - Wait for dispatcher to idle
+ * @adreno_dev: Adreno device whose dispatcher needs to idle
+ *
+ * Signal dispatcher to stop sending more commands and complete
+ * the commands that have already been submitted. This function
+ * should not be called when dispatcher mutex is held.
+ */
+int adreno_dispatcher_idle(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	int ret;
+
+	BUG_ON(!mutex_is_locked(&device->mutex));
+	if (!test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv))
+		return 0;
+
+	/*
+	 * Ensure that this function is not called when dispatcher
+	 * mutex is held and device is started
+	 */
+	if (mutex_is_locked(&dispatcher->mutex) &&
+		dispatcher->mutex.owner == current)
+		BUG_ON(1);
+
+	adreno_get_gpu_halt(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+
+	ret = wait_for_completion_timeout(&dispatcher->idle_gate,
+			msecs_to_jiffies(ADRENO_IDLE_TIMEOUT));
+	if (ret == 0) {
+		ret = -ETIMEDOUT;
+		WARN(1, "Dispatcher halt timeout ");
+	} else if (ret < 0) {
+		KGSL_DRV_ERR(device, "Dispatcher halt failed %d\n", ret);
+	} else {
+		ret = 0;
+	}
+
+	mutex_lock(&device->mutex);
+	adreno_put_gpu_halt(adreno_dev);
+	/*
+	 * requeue dispatcher work to resubmit pending commands
+	 * that may have been blocked due to this idling request
+	 */
+	adreno_dispatcher_schedule(device);
+	return ret;
+}
+
+void adreno_preempt_process_dispatch_queue(struct adreno_device *adreno_dev,
+	struct adreno_dispatcher_cmdqueue *dispatch_q)
+{
+	struct kgsl_device *device = &(adreno_dev->dev);
+	struct kgsl_cmdbatch *cmdbatch;
+
+	if (dispatch_q->head != dispatch_q->tail) {
+		/*
+		 * retire cmdbacthes from previous q, and don't check for
+		 * timeout since the cmdbatch may have been preempted
+		 */
+		adreno_dispatch_process_cmdqueue(adreno_dev,
+							dispatch_q, 0);
+	}
+
+	/* set the timer for the first cmdbatch of active dispatch_q */
+	dispatch_q = &(adreno_dev->cur_rb->dispatch_q);
+	if (dispatch_q->head != dispatch_q->tail) {
+		cmdbatch = dispatch_q->cmd_q[dispatch_q->head];
+		cmdbatch->expires = jiffies +
+			msecs_to_jiffies(adreno_cmdbatch_timeout);
+	}
+	kgsl_schedule_work(&device->event_work);
+}
+
+/**
+ * adreno_dispatcher_preempt_callback() - Callback funcion for CP_SW interrupt
+ * @adreno_dev: The device on which the interrupt occurred
+ * @bit: Interrupt bit in the interrupt status register
+ */
+void adreno_dispatcher_preempt_callback(struct adreno_device *adreno_dev,
+					int bit)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_dispatcher *dispatcher = &(adreno_dev->dispatcher);
+	if (ADRENO_DISPATCHER_PREEMPT_TRIGGERED !=
+			atomic_read(&dispatcher->preemption_state)) {
+		KGSL_DRV_INFO(device,
+			"Preemption interrupt generated w/o trigger!\n");
+		return;
+	}
+	trace_adreno_hw_preempt_trig_to_comp_int(adreno_dev->cur_rb,
+			      adreno_dev->next_rb);
+	atomic_set(&dispatcher->preemption_state,
+			ADRENO_DISPATCHER_PREEMPT_COMPLETE);
+	adreno_dispatcher_schedule(device);
+}
diff --git a/drivers/gpu/msm/adreno_dispatch.h b/drivers/gpu/msm/adreno_dispatch.h
new file mode 100644
index 000000000000..1e89e604e0ae
--- /dev/null
+++ b/drivers/gpu/msm/adreno_dispatch.h
@@ -0,0 +1,160 @@
+/* Copyright (c) 2008-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+
+#ifndef ____ADRENO_DISPATCHER_H
+#define ____ADRENO_DISPATCHER_H
+
+/* Time to allow preemption to complete (in ms) */
+#define ADRENO_DISPATCH_PREEMPT_TIMEOUT 10000
+
+extern unsigned int adreno_disp_preempt_fair_sched;
+extern unsigned int adreno_cmdbatch_timeout;
+
+/**
+ * enum adreno_dispatcher_preempt_states - States of dispatcher for ringbuffer
+ * preemption
+ * @ADRENO_DISPATCHER_PREEMPT_CLEAR: No preemption is underway,
+ * only 1 preemption can be underway at any point
+ * @ADRENO_DISPATCHER_PREEMPT_TRIGGERED: A preemption is underway
+ * @ADRENO_DISPATCHER_PREEMPT_COMPLETE: A preemption has just completed
+ */
+enum adreno_dispatcher_preempt_states {
+	ADRENO_DISPATCHER_PREEMPT_CLEAR = 0,
+	ADRENO_DISPATCHER_PREEMPT_TRIGGERED,
+	ADRENO_DISPATCHER_PREEMPT_COMPLETE,
+};
+
+/**
+ * enum adreno_dispatcher_starve_timer_states - Starvation control states of
+ * a RB
+ * @ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT: Uninitialized, starvation control
+ * is not operating
+ * @ADRENO_DISPATCHER_RB_STARVE_TIMER_INIT: Starvation timer is initialized
+ * and counting
+ * @ADRENO_DISPATCHER_RB_STARVE_TIMER_ELAPSED: The starvation timer has elapsed
+ * this state indicates that the RB is starved
+ * @ADRENO_DISPATCHER_RB_STARVE_TIMER_SCHEDULED: RB is scheduled on the device
+ * and will remain scheduled for a minimum time slice when in this state.
+ */
+enum adreno_dispatcher_starve_timer_states {
+	ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT = 0,
+	ADRENO_DISPATCHER_RB_STARVE_TIMER_INIT = 1,
+	ADRENO_DISPATCHER_RB_STARVE_TIMER_ELAPSED = 2,
+	ADRENO_DISPATCHER_RB_STARVE_TIMER_SCHEDULED = 3,
+};
+
+/*
+ * Maximum size of the dispatcher ringbuffer - the actual inflight size will be
+ * smaller then this but this size will allow for a larger range of inflight
+ * sizes that can be chosen at runtime
+ */
+
+#define ADRENO_DISPATCH_CMDQUEUE_SIZE 128
+
+#define CMDQUEUE_NEXT(_i, _s) (((_i) + 1) % (_s))
+
+#define ACTIVE_CONTEXT_LIST_MAX 2
+
+struct adreno_context_list {
+	unsigned int id;
+	unsigned long jiffies;
+};
+
+/**
+ * struct adreno_dispatcher_cmdqueue - List of commands for a RB level
+ * @cmd_q: List of command batches submitted to dispatcher
+ * @inflight: Number of commands inflight in this q
+ * @head: Head pointer to the q
+ * @tail: Queues tail pointer
+ * @active_contexts: List of most recently seen contexts
+ * @active_context_count: Number of active contexts in the active_contexts list
+ */
+struct adreno_dispatcher_cmdqueue {
+	struct kgsl_cmdbatch *cmd_q[ADRENO_DISPATCH_CMDQUEUE_SIZE];
+	unsigned int inflight;
+	unsigned int head;
+	unsigned int tail;
+	struct adreno_context_list active_contexts[ACTIVE_CONTEXT_LIST_MAX];
+	int active_context_count;
+};
+
+/**
+ * struct adreno_dispatcher - container for the adreno GPU dispatcher
+ * @mutex: Mutex to protect the structure
+ * @state: Current state of the dispatcher (active or paused)
+ * @timer: Timer to monitor the progress of the command batches
+ * @inflight: Number of command batch operations pending in the ringbuffer
+ * @fault: Non-zero if a fault was detected.
+ * @pending: Priority list of contexts waiting to submit command batches
+ * @plist_lock: Spin lock to protect the pending queue
+ * @work: work_struct to put the dispatcher in a work queue
+ * @kobj: kobject for the dispatcher directory in the device sysfs node
+ * @idle_gate: Gate to wait on for dispatcher to idle
+ * @preemption_state: Indicated what state the dispatcher is in, states are
+ * defined by enum adreno_dispatcher_preempt_states
+ * @preempt_token_submit: Indicates if a preempt token has been subnitted in
+ * current ringbuffer.
+ * @preempt_timer: Timer to track if preemption occured within specified time
+ * @disp_preempt_fair_sched: If set then dispatcher will try to be fair to
+ * starving RB's by scheduling them in and enforcing a minimum time slice
+ * for every RB that is scheduled to run on the device
+ */
+struct adreno_dispatcher {
+	struct mutex mutex;
+	unsigned long priv;
+	struct timer_list timer;
+	struct timer_list fault_timer;
+	unsigned int inflight;
+	atomic_t fault;
+	struct plist_head pending;
+	spinlock_t plist_lock;
+	struct work_struct work;
+	struct kobject kobj;
+	struct completion idle_gate;
+	atomic_t preemption_state;
+	int preempt_token_submit;
+	struct timer_list preempt_timer;
+	unsigned int disp_preempt_fair_sched;
+};
+
+enum adreno_dispatcher_flags {
+	ADRENO_DISPATCHER_POWER = 0,
+	ADRENO_DISPATCHER_ACTIVE = 1,
+};
+
+void adreno_dispatcher_start(struct kgsl_device *device);
+int adreno_dispatcher_init(struct adreno_device *adreno_dev);
+void adreno_dispatcher_close(struct adreno_device *adreno_dev);
+int adreno_dispatcher_idle(struct adreno_device *adreno_dev);
+void adreno_dispatcher_stop(struct adreno_device *adreno_dev);
+
+int adreno_dispatcher_queue_cmd(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt, struct kgsl_cmdbatch *cmdbatch,
+		uint32_t *timestamp);
+
+void adreno_dispatcher_schedule(struct kgsl_device *device);
+void adreno_dispatcher_pause(struct adreno_device *adreno_dev);
+void adreno_dispatcher_queue_context(struct kgsl_device *device,
+		struct adreno_context *drawctxt);
+void adreno_dispatcher_preempt_callback(struct adreno_device *adreno_dev,
+					int bit);
+struct adreno_ringbuffer *adreno_dispatcher_get_highest_busy_rb(
+					struct adreno_device *adreno_dev);
+int adreno_dispatch_process_cmdqueue(struct adreno_device *adreno_dev,
+				struct adreno_dispatcher_cmdqueue *dispatch_q,
+				int long_ib_detect);
+void adreno_preempt_process_dispatch_queue(struct adreno_device *adreno_dev,
+	struct adreno_dispatcher_cmdqueue *dispatch_q);
+
+#endif /* __ADRENO_DISPATCHER_H */
diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c
new file mode 100644
index 000000000000..505fd5473a78
--- /dev/null
+++ b/drivers/gpu/msm/adreno_drawctxt.c
@@ -0,0 +1,585 @@
+/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/msm_kgsl.h>
+#include <linux/sched.h>
+#include <linux/debugfs.h>
+
+#include "kgsl.h"
+#include "kgsl_sharedmem.h"
+#include "adreno.h"
+#include "adreno_trace.h"
+
+#define KGSL_INIT_REFTIMESTAMP		0x7FFFFFFF
+
+static void wait_callback(struct kgsl_device *device,
+		struct kgsl_event_group *group, void *priv, int result)
+{
+	struct adreno_context *drawctxt = priv;
+	wake_up_all(&drawctxt->waiting);
+}
+
+static int _check_context_timestamp(struct kgsl_device *device,
+		struct kgsl_context *context, unsigned int timestamp)
+{
+	/* Bail if the drawctxt has been invalidated or destroyed */
+	if (kgsl_context_detached(context) || kgsl_context_invalid(context))
+		return 1;
+
+	return kgsl_check_timestamp(device, context, timestamp);
+}
+
+/**
+ * adreno_drawctxt_dump() - dump information about a draw context
+ * @device: KGSL device that owns the context
+ * @context: KGSL context to dump information about
+ *
+ * Dump specific information about the context to the kernel log.  Used for
+ * fence timeout callbacks
+ */
+void adreno_drawctxt_dump(struct kgsl_device *device,
+		struct kgsl_context *context)
+{
+	unsigned int queue, start, retire;
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	int index, pos;
+	char buf[120];
+
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED, &queue);
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_CONSUMED, &start);
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, &retire);
+
+	/*
+	 * We may have cmdbatch timer running, which also uses same
+	 * lock, take a lock with software interrupt disabled (bh)
+	 * to avoid spin lock recursion.
+	 */
+	spin_lock_bh(&drawctxt->lock);
+	dev_err(device->dev,
+		"  context[%d]: queue=%d, submit=%d, start=%d, retire=%d\n",
+		context->id, queue, drawctxt->submitted_timestamp,
+		start, retire);
+
+	if (drawctxt->cmdqueue_head != drawctxt->cmdqueue_tail) {
+		struct kgsl_cmdbatch *cmdbatch =
+			drawctxt->cmdqueue[drawctxt->cmdqueue_head];
+
+		if (test_bit(CMDBATCH_FLAG_FENCE_LOG, &cmdbatch->priv)) {
+			dev_err(device->dev,
+				"  possible deadlock. Context %d might be blocked for itself\n",
+				context->id);
+			goto stats;
+		}
+
+		if (kgsl_cmdbatch_events_pending(cmdbatch)) {
+			dev_err(device->dev,
+				"  context[%d] (ts=%d) Active sync points:\n",
+				context->id, cmdbatch->timestamp);
+
+			kgsl_dump_syncpoints(device, cmdbatch);
+		}
+	}
+
+stats:
+	memset(buf, 0, sizeof(buf));
+
+	pos = 0;
+
+	for (index = 0; index < SUBMIT_RETIRE_TICKS_SIZE; index++) {
+		uint64_t msecs;
+		unsigned int usecs;
+
+		if (!drawctxt->submit_retire_ticks[index])
+			continue;
+		msecs = drawctxt->submit_retire_ticks[index] * 10;
+		usecs = do_div(msecs, 192);
+		usecs = do_div(msecs, 1000);
+		pos += snprintf(buf + pos, sizeof(buf) - pos, "%d.%0d ",
+			(unsigned int)msecs, usecs);
+	}
+	dev_err(device->dev, "  context[%d]: submit times: %s\n",
+		context->id, buf);
+
+	spin_unlock_bh(&drawctxt->lock);
+}
+
+/**
+ * adreno_drawctxt_wait() - sleep until a timestamp expires
+ * @adreno_dev: pointer to the adreno_device struct
+ * @drawctxt: Pointer to the draw context to sleep for
+ * @timetamp: Timestamp to wait on
+ * @timeout: Number of jiffies to wait (0 for infinite)
+ *
+ * Register an event to wait for a timestamp on a context and sleep until it
+ * has past.  Returns < 0 on error, -ETIMEDOUT if the timeout expires or 0
+ * on success
+ */
+int adreno_drawctxt_wait(struct adreno_device *adreno_dev,
+		struct kgsl_context *context,
+		uint32_t timestamp, unsigned int timeout)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	int ret;
+	long ret_temp;
+
+	if (kgsl_context_detached(context))
+		return -ENOENT;
+
+	if (kgsl_context_invalid(context))
+		return -EDEADLK;
+
+	trace_adreno_drawctxt_wait_start(-1, context->id, timestamp);
+
+	ret = kgsl_add_event(device, &context->events, timestamp,
+		wait_callback, (void *) drawctxt);
+	if (ret)
+		goto done;
+
+	/*
+	 * If timeout is 0, wait forever. msecs_to_jiffies will force
+	 * values larger than INT_MAX to an infinite timeout.
+	 */
+	if (timeout == 0)
+		timeout = UINT_MAX;
+
+	ret_temp = wait_event_interruptible_timeout(drawctxt->waiting,
+			_check_context_timestamp(device, context, timestamp),
+			msecs_to_jiffies(timeout));
+
+	if (ret_temp == 0) {
+		ret = -ETIMEDOUT;
+		goto done;
+	} else if (ret_temp < 0) {
+		ret = (int) ret_temp;
+		goto done;
+	}
+	ret = 0;
+
+	/* -EDEADLK if the context was invalidated while we were waiting */
+	if (kgsl_context_invalid(context))
+		ret = -EDEADLK;
+
+
+	/* Return -EINVAL if the context was detached while we were waiting */
+	if (kgsl_context_detached(context))
+		ret = -ENOENT;
+
+done:
+	trace_adreno_drawctxt_wait_done(-1, context->id, timestamp, ret);
+	return ret;
+}
+
+/**
+ * adreno_drawctxt_wait_rb() - Wait for the last RB timestamp at which this
+ * context submitted a command to the corresponding RB
+ * @adreno_dev: The device on which the timestamp is active
+ * @context: The context which subbmitted command to RB
+ * @timestamp: The RB timestamp of last command submitted to RB by context
+ * @timeout: Timeout value for the wait
+ */
+static int adreno_drawctxt_wait_rb(struct adreno_device *adreno_dev,
+		struct kgsl_context *context,
+		uint32_t timestamp, unsigned int timeout)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	int ret = 0;
+
+	/* Needs to hold the device mutex */
+	BUG_ON(!mutex_is_locked(&device->mutex));
+
+	/*
+	 * If the context is invalid then return immediately - we may end up
+	 * waiting for a timestamp that will never come
+	 */
+	if (kgsl_context_invalid(context))
+		goto done;
+
+	trace_adreno_drawctxt_wait_start(drawctxt->rb->id, context->id,
+					timestamp);
+
+	ret = adreno_ringbuffer_waittimestamp(drawctxt->rb, timestamp, timeout);
+done:
+	trace_adreno_drawctxt_wait_done(drawctxt->rb->id, context->id,
+					timestamp, ret);
+	return ret;
+}
+
+static int drawctxt_detach_cmdbatches(struct adreno_context *drawctxt,
+		struct kgsl_cmdbatch **list)
+{
+	int count = 0;
+
+	while (drawctxt->cmdqueue_head != drawctxt->cmdqueue_tail) {
+		struct kgsl_cmdbatch *cmdbatch =
+			drawctxt->cmdqueue[drawctxt->cmdqueue_head];
+
+		drawctxt->cmdqueue_head = (drawctxt->cmdqueue_head + 1) %
+			ADRENO_CONTEXT_CMDQUEUE_SIZE;
+
+		list[count++] = cmdbatch;
+	}
+
+	return count;
+}
+
+/**
+ * adreno_drawctxt_invalidate() - Invalidate an adreno draw context
+ * @device: Pointer to the KGSL device structure for the GPU
+ * @context: Pointer to the KGSL context structure
+ *
+ * Invalidate the context and remove all queued commands and cancel any pending
+ * waiters
+ */
+void adreno_drawctxt_invalidate(struct kgsl_device *device,
+		struct kgsl_context *context)
+{
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	struct kgsl_cmdbatch *list[ADRENO_CONTEXT_CMDQUEUE_SIZE];
+	int i, count;
+
+	trace_adreno_drawctxt_invalidate(drawctxt);
+
+	spin_lock(&drawctxt->lock);
+	set_bit(KGSL_CONTEXT_PRIV_INVALID, &context->priv);
+
+	/*
+	 * set the timestamp to the last value since the context is invalidated
+	 * and we want the pending events for this context to go away
+	 */
+	kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, soptimestamp),
+			drawctxt->timestamp);
+
+	kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp),
+			drawctxt->timestamp);
+
+	/* Get rid of commands still waiting in the queue */
+	count = drawctxt_detach_cmdbatches(drawctxt, list);
+	spin_unlock(&drawctxt->lock);
+
+	for (i = 0; i < count; i++) {
+		kgsl_cancel_events_timestamp(device, &context->events,
+			list[i]->timestamp);
+		kgsl_cmdbatch_destroy(list[i]);
+	}
+
+	/* Make sure all pending events are processed or cancelled */
+	kgsl_flush_event_group(device, &context->events);
+
+	/* Give the bad news to everybody waiting around */
+	wake_up_all(&drawctxt->waiting);
+	wake_up_all(&drawctxt->wq);
+}
+
+/*
+ * Set the priority of the context based on the flags passed into context
+ * create.  If the priority is not set in the flags, then the kernel can
+ * assign any priority it desires for the context.
+ */
+#define KGSL_CONTEXT_PRIORITY_MED	0x8
+
+static inline void _set_context_priority(struct adreno_context *drawctxt)
+{
+	/* If the priority is not set by user, set it for them */
+	if ((drawctxt->base.flags & KGSL_CONTEXT_PRIORITY_MASK) ==
+			KGSL_CONTEXT_PRIORITY_UNDEF)
+		drawctxt->base.flags |= (KGSL_CONTEXT_PRIORITY_MED <<
+				KGSL_CONTEXT_PRIORITY_SHIFT);
+
+	/* Store the context priority */
+	drawctxt->base.priority =
+		(drawctxt->base.flags & KGSL_CONTEXT_PRIORITY_MASK) >>
+		KGSL_CONTEXT_PRIORITY_SHIFT;
+}
+
+/**
+ * adreno_drawctxt_create - create a new adreno draw context
+ * @dev_priv: the owner of the context
+ * @flags: flags for the context (passed from user space)
+ *
+ * Create and return a new draw context for the 3D core.
+ */
+struct kgsl_context *
+adreno_drawctxt_create(struct kgsl_device_private *dev_priv,
+			uint32_t *flags)
+{
+	struct adreno_context *drawctxt;
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int ret;
+	unsigned long local;
+
+	local = *flags & (KGSL_CONTEXT_PREAMBLE |
+		KGSL_CONTEXT_NO_GMEM_ALLOC |
+		KGSL_CONTEXT_PER_CONTEXT_TS |
+		KGSL_CONTEXT_USER_GENERATED_TS |
+		KGSL_CONTEXT_NO_FAULT_TOLERANCE |
+		KGSL_CONTEXT_CTX_SWITCH |
+		KGSL_CONTEXT_PRIORITY_MASK |
+		KGSL_CONTEXT_TYPE_MASK |
+		KGSL_CONTEXT_PWR_CONSTRAINT |
+		KGSL_CONTEXT_IFH_NOP |
+		KGSL_CONTEXT_SECURE |
+		KGSL_CONTEXT_PREEMPT_STYLE_MASK);
+
+	/* Check for errors before trying to initialize */
+
+	/* If preemption is not supported, ignore preemption request */
+	if (!test_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv))
+		local &= ~KGSL_CONTEXT_PREEMPT_STYLE_MASK;
+
+	/* We no longer support legacy context switching */
+	if ((local & KGSL_CONTEXT_PREAMBLE) == 0 ||
+		(local & KGSL_CONTEXT_NO_GMEM_ALLOC) == 0) {
+		KGSL_DEV_ERR_ONCE(device,
+			"legacy context switch not supported\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* Make sure that our target can support secure contexts if requested */
+	if (!kgsl_mmu_is_secured(&dev_priv->device->mmu) &&
+			(local & KGSL_CONTEXT_SECURE)) {
+		KGSL_DEV_ERR_ONCE(device, "Secure context not supported\n");
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	drawctxt = kzalloc(sizeof(struct adreno_context), GFP_KERNEL);
+
+	if (drawctxt == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	drawctxt->timestamp = 0;
+
+	drawctxt->base.flags = local;
+
+	/* Always enable per-context timestamps */
+	drawctxt->base.flags |= KGSL_CONTEXT_PER_CONTEXT_TS;
+	drawctxt->type = (drawctxt->base.flags & KGSL_CONTEXT_TYPE_MASK)
+		>> KGSL_CONTEXT_TYPE_SHIFT;
+	spin_lock_init(&drawctxt->lock);
+	init_waitqueue_head(&drawctxt->wq);
+	init_waitqueue_head(&drawctxt->waiting);
+
+	/* Set the context priority */
+	_set_context_priority(drawctxt);
+	/* set the context ringbuffer */
+	drawctxt->rb = adreno_ctx_get_rb(adreno_dev, drawctxt);
+
+	/*
+	 * Set up the plist node for the dispatcher.  Insert the node into the
+	 * drawctxt pending list based on priority.
+	 */
+	plist_node_init(&drawctxt->pending, drawctxt->base.priority);
+
+	/*
+	 * Now initialize the common part of the context. This allocates the
+	 * context id, and then possibly another thread could look it up.
+	 * So we want all of our initializtion that doesn't require the context
+	 * id to be done before this call.
+	 */
+	ret = kgsl_context_init(dev_priv, &drawctxt->base);
+	if (ret != 0) {
+		kfree(drawctxt);
+		return ERR_PTR(ret);
+	}
+
+	kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(drawctxt->base.id, soptimestamp),
+			0);
+	kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(drawctxt->base.id, eoptimestamp),
+			0);
+
+	adreno_context_debugfs_init(ADRENO_DEVICE(device), drawctxt);
+
+	/* copy back whatever flags we dediced were valid */
+	*flags = drawctxt->base.flags;
+	return &drawctxt->base;
+}
+
+/**
+ * adreno_drawctxt_sched() - Schedule a previously blocked context
+ * @device: pointer to a KGSL device
+ * @drawctxt: drawctxt to rechedule
+ *
+ * This function is called by the core when it knows that a previously blocked
+ * context has been unblocked.  The default adreno response is to reschedule the
+ * context on the dispatcher
+ */
+void adreno_drawctxt_sched(struct kgsl_device *device,
+		struct kgsl_context *context)
+{
+	adreno_dispatcher_queue_context(device, ADRENO_CONTEXT(context));
+}
+
+/**
+ * adreno_drawctxt_detach(): detach a context from the GPU
+ * @context: Generic KGSL context container for the context
+ *
+ */
+void adreno_drawctxt_detach(struct kgsl_context *context)
+{
+	struct kgsl_device *device;
+	struct adreno_device *adreno_dev;
+	struct adreno_context *drawctxt;
+	struct adreno_ringbuffer *rb;
+	int ret, count, i;
+	struct kgsl_cmdbatch *list[ADRENO_CONTEXT_CMDQUEUE_SIZE];
+
+	if (context == NULL)
+		return;
+
+	device = context->device;
+	adreno_dev = ADRENO_DEVICE(device);
+	drawctxt = ADRENO_CONTEXT(context);
+	rb = drawctxt->rb;
+
+	/* deactivate context */
+	mutex_lock(&device->mutex);
+	if (rb->drawctxt_active == drawctxt) {
+		if (adreno_dev->cur_rb == rb) {
+			if (!kgsl_active_count_get(device)) {
+				adreno_drawctxt_switch(adreno_dev, rb, NULL, 0);
+				kgsl_active_count_put(device);
+			} else
+				BUG();
+		} else
+			adreno_drawctxt_switch(adreno_dev, rb, NULL, 0);
+	}
+	mutex_unlock(&device->mutex);
+
+	spin_lock(&drawctxt->lock);
+	count = drawctxt_detach_cmdbatches(drawctxt, list);
+	spin_unlock(&drawctxt->lock);
+
+	for (i = 0; i < count; i++) {
+		/*
+		 * If the context is deteached while we are waiting for
+		 * the next command in GFT SKIP CMD, print the context
+		 * detached status here.
+		 */
+		adreno_fault_skipcmd_detached(device, drawctxt, list[i]);
+		kgsl_cmdbatch_destroy(list[i]);
+	}
+
+	/*
+	 * internal_timestamp is set in adreno_ringbuffer_addcmds,
+	 * which holds the device mutex.
+	 */
+	mutex_lock(&device->mutex);
+
+	/*
+	 * Wait for the last global timestamp to pass before continuing.
+	 * The maxumum wait time is 30s, some large IB's can take longer
+	 * than 10s and if hang happens then the time for the context's
+	 * commands to retire will be greater than 10s. 30s should be sufficient
+	 * time to wait for the commands even if a hang happens.
+	 */
+	ret = adreno_drawctxt_wait_rb(adreno_dev, context,
+		drawctxt->internal_timestamp, 30 * 1000);
+
+	/*
+	 * If the wait for global fails due to timeout then nothing after this
+	 * point is likely to work very well - BUG_ON() so we can take advantage
+	 * of the debug tools to figure out what the h - e - double hockey
+	 * sticks happened. If EAGAIN error is returned then recovery will kick
+	 * in and there will be no more commands in the RB pipe from this
+	 * context which is waht we are waiting for, so ignore -EAGAIN error
+	 */
+	BUG_ON(ret && ret != -EAGAIN);
+
+	kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, soptimestamp),
+			drawctxt->timestamp);
+
+	kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp),
+			drawctxt->timestamp);
+
+	adreno_profile_process_results(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+
+	/* wake threads waiting to submit commands from this context */
+	wake_up_all(&drawctxt->waiting);
+	wake_up_all(&drawctxt->wq);
+}
+
+void adreno_drawctxt_destroy(struct kgsl_context *context)
+{
+	struct adreno_context *drawctxt;
+	if (context == NULL)
+		return;
+
+	drawctxt = ADRENO_CONTEXT(context);
+	debugfs_remove_recursive(drawctxt->debug_root);
+	kfree(drawctxt);
+}
+
+/**
+ * adreno_drawctxt_switch - switch the current draw context in a given RB
+ * @adreno_dev - The 3D device that owns the context
+ * @rb: The ringubffer pointer on which the current context is being changed
+ * @drawctxt - the 3D context to switch to
+ * @flags - Flags to accompany the switch (from user space)
+ *
+ * Switch the current draw context in given RB
+ */
+
+int adreno_drawctxt_switch(struct adreno_device *adreno_dev,
+				struct adreno_ringbuffer *rb,
+				struct adreno_context *drawctxt,
+				unsigned int flags)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct kgsl_pagetable *new_pt;
+	int ret = 0;
+
+	/* We always expect a valid rb */
+	BUG_ON(!rb);
+
+	/* already current? */
+	if (rb->drawctxt_active == drawctxt)
+		return ret;
+
+	trace_adreno_drawctxt_switch(rb,
+		drawctxt, flags);
+
+	/* Get a refcount to the new instance */
+	if (drawctxt) {
+		if (!_kgsl_context_get(&drawctxt->base))
+			return -ENOENT;
+
+		new_pt = drawctxt->base.proc_priv->pagetable;
+	} else {
+		 /* No context - set the default pagetable and thats it. */
+		new_pt = device->mmu.defaultpagetable;
+	}
+	ret = adreno_iommu_set_pt_ctx(rb, new_pt, drawctxt);
+	if (ret) {
+		KGSL_DRV_ERR(device,
+			"Failed to set pagetable on rb %d\n", rb->id);
+		return ret;
+	}
+
+	/* Put the old instance of the active drawctxt */
+	if (rb->drawctxt_active)
+		kgsl_context_put(&rb->drawctxt_active->base);
+
+	rb->drawctxt_active = drawctxt;
+	return 0;
+}
diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h
new file mode 100644
index 000000000000..d50460a544b1
--- /dev/null
+++ b/drivers/gpu/msm/adreno_drawctxt.h
@@ -0,0 +1,131 @@
+/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __ADRENO_DRAWCTXT_H
+#define __ADRENO_DRAWCTXT_H
+
+struct adreno_context_type {
+	unsigned int type;
+	const char *str;
+};
+
+#define ADRENO_CONTEXT_CMDQUEUE_SIZE 128
+#define SUBMIT_RETIRE_TICKS_SIZE 7
+
+struct kgsl_device;
+struct adreno_device;
+struct kgsl_device_private;
+struct kgsl_context;
+
+/**
+ * struct adreno_context - Adreno GPU draw context
+ * @timestamp: Last issued context-specific timestamp
+ * @internal_timestamp: Global timestamp of the last issued command
+ *			NOTE: guarded by device->mutex, not drawctxt->mutex!
+ * @type: Context type (GL, CL, RS)
+ * @mutex: Mutex to protect the cmdqueue
+ * @cmdqueue: Queue of command batches waiting to be dispatched for this context
+ * @cmdqueue_head: Head of the cmdqueue queue
+ * @cmdqueue_tail: Tail of the cmdqueue queue
+ * @pending: Priority list node for the dispatcher list of pending contexts
+ * @wq: Workqueue structure for contexts to sleep pending room in the queue
+ * @waiting: Workqueue structure for contexts waiting for a timestamp or event
+ * @queued: Number of commands queued in the cmdqueue
+ * @fault_policy: GFT fault policy set in cmdbatch_skip_cmd();
+ * @debug_root: debugfs entry for this context.
+ * @queued_timestamp: The last timestamp that was queued on this context
+ * @rb: The ringbuffer in which this context submits commands.
+ * @submitted_timestamp: The last timestamp that was submitted for this context
+ * @submit_retire_ticks: Array to hold cmdbatch execution times from submit
+ *                       to retire
+ * @ticks_index: The index into submit_retire_ticks[] where the new delta will
+ *		 be written.
+ */
+struct adreno_context {
+	struct kgsl_context base;
+	unsigned int timestamp;
+	unsigned int internal_timestamp;
+	unsigned int type;
+	spinlock_t lock;
+
+	/* Dispatcher */
+	struct kgsl_cmdbatch *cmdqueue[ADRENO_CONTEXT_CMDQUEUE_SIZE];
+	unsigned int cmdqueue_head;
+	unsigned int cmdqueue_tail;
+
+	struct plist_node pending;
+	wait_queue_head_t wq;
+	wait_queue_head_t waiting;
+
+	int queued;
+	unsigned int fault_policy;
+	struct dentry *debug_root;
+	unsigned int queued_timestamp;
+	struct adreno_ringbuffer *rb;
+	unsigned int submitted_timestamp;
+	uint64_t submit_retire_ticks[SUBMIT_RETIRE_TICKS_SIZE];
+	int ticks_index;
+};
+
+/* Flag definitions for flag field in adreno_context */
+
+/* Set when sync timer of cmdbatch belonging to the context times out */
+#define ADRENO_CONTEXT_CMDBATCH_FLAG_FENCE_LOG	BIT(0)
+
+/**
+ * enum adreno_context_priv - Private flags for an adreno draw context
+ * @ADRENO_CONTEXT_FAULT - set if the context has faulted (and recovered)
+ * @ADRENO_CONTEXT_GPU_HANG - Context has caused a GPU hang
+ * @ADRENO_CONTEXT_GPU_HANG_FT - Context has caused a GPU hang
+ *      and fault tolerance was successful
+ * @ADRENO_CONTEXT_SKIP_EOF - Context skip IBs until the next end of frame
+ *      marker.
+ * @ADRENO_CONTEXT_FORCE_PREAMBLE - Force the preamble for the next submission.
+ * @ADRENO_CONTEXT_SKIP_CMD - Context's command batch is skipped during
+	fault tolerance.
+ */
+enum adreno_context_priv {
+	ADRENO_CONTEXT_FAULT = KGSL_CONTEXT_PRIV_DEVICE_SPECIFIC,
+	ADRENO_CONTEXT_GPU_HANG,
+	ADRENO_CONTEXT_GPU_HANG_FT,
+	ADRENO_CONTEXT_SKIP_EOF,
+	ADRENO_CONTEXT_FORCE_PREAMBLE,
+	ADRENO_CONTEXT_SKIP_CMD,
+};
+
+struct kgsl_context *adreno_drawctxt_create(struct kgsl_device_private *,
+			uint32_t *flags);
+
+void adreno_drawctxt_detach(struct kgsl_context *context);
+
+void adreno_drawctxt_destroy(struct kgsl_context *context);
+
+void adreno_drawctxt_sched(struct kgsl_device *device,
+		struct kgsl_context *context);
+
+struct adreno_ringbuffer;
+int adreno_drawctxt_switch(struct adreno_device *adreno_dev,
+				struct adreno_ringbuffer *rb,
+				struct adreno_context *drawctxt,
+				unsigned int flags);
+
+int adreno_drawctxt_wait(struct adreno_device *adreno_dev,
+		struct kgsl_context *context,
+		uint32_t timestamp, unsigned int timeout);
+
+void adreno_drawctxt_invalidate(struct kgsl_device *device,
+		struct kgsl_context *context);
+
+void adreno_drawctxt_dump(struct kgsl_device *device,
+		struct kgsl_context *context);
+
+#endif  /* __ADRENO_DRAWCTXT_H */
diff --git a/drivers/gpu/msm/adreno_ioctl.c b/drivers/gpu/msm/adreno_ioctl.c
new file mode 100644
index 000000000000..13d3353946ca
--- /dev/null
+++ b/drivers/gpu/msm/adreno_ioctl.c
@@ -0,0 +1,168 @@
+/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/ioctl.h>
+#include "kgsl_device.h"
+#include "adreno.h"
+#include "adreno_a5xx.h"
+
+long adreno_ioctl_perfcounter_get(struct kgsl_device_private *dev_priv,
+	unsigned int cmd, void *data)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_perfcounter_get *get = data;
+	int result;
+
+	mutex_lock(&device->mutex);
+
+	/*
+	 * adreno_perfcounter_get() is called by kernel clients
+	 * during start(), so it is not safe to take an
+	 * active count inside that function.
+	 */
+	result = kgsl_active_count_get(device);
+
+	if (result == 0) {
+		result = adreno_perfcounter_get(adreno_dev,
+			get->groupid, get->countable, &get->offset,
+			&get->offset_hi, PERFCOUNTER_FLAG_NONE);
+		kgsl_active_count_put(device);
+	}
+	mutex_unlock(&device->mutex);
+
+	return (long) result;
+}
+
+long adreno_ioctl_perfcounter_put(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_perfcounter_put *put = data;
+	int result;
+
+	mutex_lock(&device->mutex);
+	result = adreno_perfcounter_put(adreno_dev, put->groupid,
+		put->countable, PERFCOUNTER_FLAG_NONE);
+	mutex_unlock(&device->mutex);
+
+	return (long) result;
+}
+
+static long adreno_ioctl_perfcounter_query(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct kgsl_perfcounter_query *query = data;
+
+	return (long) adreno_perfcounter_query_group(adreno_dev, query->groupid,
+			query->countables, query->count, &query->max_counters);
+}
+
+static long adreno_ioctl_perfcounter_read(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct kgsl_perfcounter_read *read = data;
+
+	return (long) adreno_perfcounter_read_group(adreno_dev, read->reads,
+		read->count);
+}
+
+static long adreno_ioctl_preemption_counters_query(
+		struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct kgsl_preemption_counters_query *read = data;
+	int size_level = A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE;
+	int levels_to_copy;
+
+	if (!adreno_is_a5xx(adreno_dev) ||
+		!adreno_is_preemption_enabled(adreno_dev))
+		return -EOPNOTSUPP;
+
+	if (read->size_user < size_level)
+		return -EINVAL;
+
+	/* Calculate number of preemption counter levels to copy to userspace */
+	levels_to_copy = (read->size_user / size_level);
+	if (levels_to_copy > gpudev->num_prio_levels)
+		levels_to_copy = gpudev->num_prio_levels;
+
+	if (copy_to_user((void __user *) (uintptr_t) read->counters,
+			adreno_dev->preemption_counters.hostptr,
+			levels_to_copy * size_level))
+		return -EFAULT;
+
+	read->max_priority_level = levels_to_copy;
+	read->size_priority_level = size_level;
+
+	return 0;
+}
+
+long adreno_ioctl_helper(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, unsigned long arg,
+		const struct kgsl_ioctl *cmds, int len)
+{
+	unsigned char data[128];
+	long ret;
+	int i;
+
+	for (i = 0; i < len; i++) {
+		if (_IOC_NR(cmd) == _IOC_NR(cmds[i].cmd))
+			break;
+	}
+
+	if (i == len) {
+		KGSL_DRV_INFO(dev_priv->device,
+			"invalid ioctl code 0x%08X\n", cmd);
+		return -ENOIOCTLCMD;
+	}
+
+	BUG_ON(_IOC_SIZE(cmds[i].cmd) > sizeof(data));
+
+	if (_IOC_SIZE(cmds[i].cmd)) {
+		ret = kgsl_ioctl_copy_in(cmds[i].cmd, cmd, arg, data);
+
+		if (ret)
+			return ret;
+	} else {
+		memset(data, 0, sizeof(data));
+	}
+
+	ret = cmds[i].func(dev_priv, cmd, data);
+
+	if (ret == 0 && _IOC_SIZE(cmds[i].cmd))
+		ret = kgsl_ioctl_copy_out(cmds[i].cmd, cmd, arg, data);
+
+	return ret;
+}
+
+static struct kgsl_ioctl adreno_ioctl_funcs[] = {
+	{ IOCTL_KGSL_PERFCOUNTER_GET, adreno_ioctl_perfcounter_get },
+	{ IOCTL_KGSL_PERFCOUNTER_PUT, adreno_ioctl_perfcounter_put },
+	{ IOCTL_KGSL_PERFCOUNTER_QUERY, adreno_ioctl_perfcounter_query },
+	{ IOCTL_KGSL_PERFCOUNTER_READ, adreno_ioctl_perfcounter_read },
+	{ IOCTL_KGSL_PREEMPTIONCOUNTER_QUERY,
+		adreno_ioctl_preemption_counters_query },
+};
+
+long adreno_ioctl(struct kgsl_device_private *dev_priv,
+			      unsigned int cmd, unsigned long arg)
+{
+	return adreno_ioctl_helper(dev_priv, cmd, arg,
+		adreno_ioctl_funcs, ARRAY_SIZE(adreno_ioctl_funcs));
+}
diff --git a/drivers/gpu/msm/adreno_iommu.c b/drivers/gpu/msm/adreno_iommu.c
new file mode 100644
index 000000000000..45f5c9be64d9
--- /dev/null
+++ b/drivers/gpu/msm/adreno_iommu.c
@@ -0,0 +1,980 @@
+/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include "adreno.h"
+#include "kgsl_sharedmem.h"
+#include "a3xx_reg.h"
+#include "adreno_pm4types.h"
+#include "kgsl_mmu.h"
+
+#define A5XX_PFP_PER_PROCESS_UCODE_VER 0x5FF064
+#define A5XX_PM4_PER_PROCESS_UCODE_VER 0x5FF052
+
+/*
+ * _wait_reg() - make CP poll on a register
+ * @cmds:	Pointer to memory where commands are to be added
+ * @addr:	Register address to poll for
+ * @val:	Value to poll for
+ * @mask:	The value against which register value is masked
+ * @interval:	wait interval
+ */
+static unsigned int _wait_reg(struct adreno_device *adreno_dev,
+			unsigned int *cmds, unsigned int addr,
+			unsigned int val, unsigned int mask,
+			unsigned int interval)
+{
+	unsigned int *start = cmds;
+
+	if (adreno_is_a3xx(adreno_dev)) {
+		*cmds++ = cp_packet(adreno_dev, CP_WAIT_REG_EQ, 4);
+		*cmds++ = addr;
+		*cmds++ = val;
+		*cmds++ = mask;
+		*cmds++ = interval;
+	} else {
+		*cmds++ = cp_mem_packet(adreno_dev, CP_WAIT_REG_MEM, 5, 1);
+		*cmds++ = 0x3; /* Mem Space = Register,  Function = Equals */
+		cmds += cp_gpuaddr(adreno_dev, cmds, addr); /* Poll address */
+		*cmds++ = val; /* ref val */
+		*cmds++ = mask;
+		*cmds++ = interval;
+
+		/* WAIT_REG_MEM turns back on protected mode - push it off */
+		*cmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1);
+		*cmds++ = 0;
+	}
+
+	return cmds - start;
+}
+
+static unsigned int  _iommu_lock(struct adreno_device *adreno_dev,
+				 unsigned int *cmds)
+{
+	unsigned int *start = cmds;
+	struct kgsl_iommu *iommu = adreno_dev->dev.mmu.priv;
+
+	/*
+	 * If we don't have this register, probe should have forced
+	 * global pagetables and we shouldn't get here.
+	 * BUG() so we don't debug a bad register write.
+	 */
+	BUG_ON(iommu->micro_mmu_ctrl == UINT_MAX);
+
+	/*
+	 * glue commands together until next
+	 * WAIT_FOR_ME
+	 */
+	cmds += _wait_reg(adreno_dev, cmds,
+			adreno_getreg(adreno_dev, ADRENO_REG_CP_WFI_PEND_CTR),
+			1, 0xFFFFFFFF, 0xF);
+
+	/* set the iommu lock bit */
+	*cmds++ = cp_packet(adreno_dev, CP_REG_RMW, 3);
+	*cmds++ = iommu->micro_mmu_ctrl >> 2;
+	/* AND to unmask the lock bit */
+	*cmds++ = ~(KGSL_IOMMU_IMPLDEF_MICRO_MMU_CTRL_HALT);
+	/* OR to set the IOMMU lock bit */
+	*cmds++ = KGSL_IOMMU_IMPLDEF_MICRO_MMU_CTRL_HALT;
+
+	/* wait for smmu to lock */
+	cmds += _wait_reg(adreno_dev, cmds, iommu->micro_mmu_ctrl >> 2,
+			KGSL_IOMMU_IMPLDEF_MICRO_MMU_CTRL_IDLE,
+			KGSL_IOMMU_IMPLDEF_MICRO_MMU_CTRL_IDLE, 0xF);
+
+	return cmds - start;
+}
+
+static unsigned int _iommu_unlock(struct adreno_device *adreno_dev,
+				  unsigned int *cmds)
+{
+	struct kgsl_iommu *iommu = adreno_dev->dev.mmu.priv;
+	unsigned int *start = cmds;
+
+	BUG_ON(iommu->micro_mmu_ctrl == UINT_MAX);
+
+	/* unlock the IOMMU lock */
+	*cmds++ = cp_packet(adreno_dev, CP_REG_RMW, 3);
+	*cmds++ = iommu->micro_mmu_ctrl >> 2;
+	/* AND to unmask the lock bit */
+	*cmds++ = ~(KGSL_IOMMU_IMPLDEF_MICRO_MMU_CTRL_HALT);
+	/* OR with 0 so lock bit is unset */
+	*cmds++ = 0;
+
+	/* release all commands since _iommu_lock() with wait_for_me */
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+
+	return cmds - start;
+}
+
+static unsigned int _vbif_lock(struct adreno_device *adreno_dev,
+			unsigned int *cmds)
+{
+	unsigned int *start = cmds;
+	/*
+	 * glue commands together until next
+	 * WAIT_FOR_ME
+	 */
+	cmds += _wait_reg(adreno_dev, cmds,
+			adreno_getreg(adreno_dev, ADRENO_REG_CP_WFI_PEND_CTR),
+			1, 0xFFFFFFFF, 0xF);
+
+	/* MMU-500 VBIF stall */
+	*cmds++ = cp_packet(adreno_dev, CP_REG_RMW, 3);
+	*cmds++ = A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0;
+	/* AND to unmask the HALT bit */
+	*cmds++ = ~(VBIF_RECOVERABLE_HALT_CTRL);
+	/* OR to set the HALT bit */
+	*cmds++ = 0x1;
+
+	/* Wait for acknowledgement */
+	cmds += _wait_reg(adreno_dev, cmds,
+			A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL1,
+			1, 0xFFFFFFFF, 0xF);
+
+	return cmds - start;
+}
+
+static unsigned int _vbif_unlock(struct adreno_device *adreno_dev,
+				unsigned int *cmds)
+{
+	unsigned int *start = cmds;
+
+	/* MMU-500 VBIF unstall */
+	*cmds++ = cp_packet(adreno_dev, CP_REG_RMW, 3);
+	*cmds++ = A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0;
+	/* AND to unmask the HALT bit */
+	*cmds++ = ~(VBIF_RECOVERABLE_HALT_CTRL);
+	/* OR to reset the HALT bit */
+	*cmds++ = 0;
+
+	/* release all commands since _vbif_lock() with wait_for_me */
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+	return cmds - start;
+}
+
+static unsigned int _cp_smmu_reg(struct adreno_device *adreno_dev,
+				unsigned int *cmds,
+				enum kgsl_iommu_reg_map reg,
+				unsigned int num)
+{
+	unsigned int *start = cmds;
+	unsigned int offset;
+	struct kgsl_iommu *iommu = adreno_dev->dev.mmu.priv;
+
+	offset = kgsl_mmu_get_reg_ahbaddr(&adreno_dev->dev.mmu,
+					  KGSL_IOMMU_CONTEXT_USER, reg) >> 2;
+
+	if (adreno_is_a5xx(adreno_dev) || iommu->version == 1) {
+		*cmds++ = cp_register(adreno_dev, offset, num);
+	} else if (adreno_is_a3xx(adreno_dev)) {
+		*cmds++ = cp_packet(adreno_dev, CP_REG_WR_NO_CTXT, num + 1);
+		*cmds++ = offset;
+	} else if (adreno_is_a4xx(adreno_dev)) {
+		*cmds++ = cp_packet(adreno_dev, CP_WIDE_REG_WRITE, num + 1);
+		*cmds++ = offset;
+	} else  {
+		BUG();
+	}
+	return cmds - start;
+}
+
+static unsigned int _tlbiall(struct adreno_device *adreno_dev,
+				unsigned int *cmds)
+{
+	unsigned int *start = cmds;
+	unsigned int tlbstatus;
+
+	tlbstatus = kgsl_mmu_get_reg_ahbaddr(&adreno_dev->dev.mmu,
+			KGSL_IOMMU_CONTEXT_USER,
+			KGSL_IOMMU_CTX_TLBSTATUS) >> 2;
+
+	cmds += _cp_smmu_reg(adreno_dev, cmds, KGSL_IOMMU_CTX_TLBIALL, 1);
+	*cmds++ = 1;
+
+	cmds += _cp_smmu_reg(adreno_dev, cmds, KGSL_IOMMU_CTX_TLBSYNC, 1);
+	*cmds++ = 0;
+
+	cmds += _wait_reg(adreno_dev, cmds, tlbstatus, 0,
+			KGSL_IOMMU_CTX_TLBSTATUS_SACTIVE, 0xF);
+
+	return cmds - start;
+}
+
+
+/**
+ * _adreno_iommu_add_idle_cmds - Add pm4 packets for GPU idle
+ * @adreno_dev - Pointer to device structure
+ * @cmds - Pointer to memory where idle commands need to be added
+ */
+static inline int _adreno_iommu_add_idle_cmds(struct adreno_device *adreno_dev,
+							unsigned int *cmds)
+{
+	unsigned int *start = cmds;
+
+	cmds += cp_wait_for_idle(adreno_dev, cmds);
+
+	if (adreno_is_a3xx(adreno_dev))
+		cmds += cp_wait_for_me(adreno_dev, cmds);
+
+	return cmds - start;
+}
+
+/*
+ * _invalidate_uche_cpu() - Invalidate UCHE using CPU
+ * @adreno_dev: the device
+ */
+static void _invalidate_uche_cpu(struct adreno_device *adreno_dev)
+{
+	/* Invalidate UCHE using CPU */
+	if (adreno_is_a5xx(adreno_dev))
+		adreno_writereg(adreno_dev,
+			ADRENO_REG_UCHE_INVALIDATE0, 0x12);
+	else if (adreno_is_a4xx(adreno_dev)) {
+		adreno_writereg(adreno_dev,
+			ADRENO_REG_UCHE_INVALIDATE0, 0);
+		adreno_writereg(adreno_dev,
+			ADRENO_REG_UCHE_INVALIDATE1, 0x12);
+	} else if (adreno_is_a3xx(adreno_dev)) {
+		adreno_writereg(adreno_dev,
+			ADRENO_REG_UCHE_INVALIDATE0, 0);
+		adreno_writereg(adreno_dev,
+			ADRENO_REG_UCHE_INVALIDATE1,
+			0x90000000);
+	} else {
+		BUG();
+	}
+}
+
+/*
+ * _ctx_switch_use_cpu_path() - Decide whether to use cpu path
+ * @adreno_dev: the device
+ * @new_pt: pagetable to switch
+ * @rb: ringbuffer for ctx switch
+ *
+ * If we are idle and switching to default pagetable it is
+ * preferable to poke the iommu directly rather than using the
+ * GPU command stream.
+ */
+static bool _ctx_switch_use_cpu_path(
+				struct adreno_device *adreno_dev,
+				struct kgsl_pagetable *new_pt,
+				struct adreno_ringbuffer *rb)
+{
+	/*
+	 * If rb is current, we can use cpu path when GPU is
+	 * idle and we are switching to default pt.
+	 * If rb is not current, we can use cpu path when rb has no
+	 * pending commands (rptr = wptr) and we are switching to default pt.
+	 */
+	if (adreno_dev->cur_rb == rb)
+		return adreno_isidle(&adreno_dev->dev) &&
+			(new_pt == adreno_dev->dev.mmu.defaultpagetable);
+	else if ((rb->wptr == rb->rptr) &&
+			(new_pt == adreno_dev->dev.mmu.defaultpagetable))
+		return true;
+
+	return false;
+}
+
+/**
+ * adreno_iommu_set_apriv() - Generate commands to set/reset the APRIV
+ * @adreno_dev: Device on which the commands will execute
+ * @cmds: The memory pointer where commands are generated
+ * @set: If set then APRIV is set else reset
+ *
+ * Returns the number of commands generated
+ */
+unsigned int adreno_iommu_set_apriv(struct adreno_device *adreno_dev,
+				unsigned int *cmds, int set)
+{
+	unsigned int *cmds_orig = cmds;
+
+	/* adreno 3xx doesn't have the CP_CNTL.APRIV field */
+	if (adreno_is_a3xx(adreno_dev))
+		return 0;
+
+	cmds += cp_wait_for_idle(adreno_dev, cmds);
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+	*cmds++ = cp_register(adreno_dev, adreno_getreg(adreno_dev,
+				ADRENO_REG_CP_CNTL), 1);
+	if (set)
+		*cmds++ = 1;
+	else
+		*cmds++ = 0;
+
+	return cmds - cmds_orig;
+}
+
+static inline int _adreno_iommu_add_idle_indirect_cmds(
+			struct adreno_device *adreno_dev,
+			unsigned int *cmds, uint64_t nop_gpuaddr)
+{
+	unsigned int *start = cmds;
+	/*
+	 * Adding an indirect buffer ensures that the prefetch stalls until
+	 * the commands in indirect buffer have completed. We need to stall
+	 * prefetch with a nop indirect buffer when updating pagetables
+	 * because it provides stabler synchronization */
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+	*cmds++ = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds, nop_gpuaddr);
+	*cmds++ = 2;
+	cmds += cp_wait_for_idle(adreno_dev, cmds);
+	return cmds - start;
+}
+
+/**
+ * _adreno_mmu_set_pt_update_condition() - Generate commands to setup a
+ * flag to indicate whether pt switch is required or not by comparing
+ * current pt id and incoming pt id
+ * @rb: The RB on which the commands will execute
+ * @cmds: The pointer to memory where the commands are placed.
+ * @ptname: Incoming pt id to set to
+ *
+ * Returns number of commands added.
+ */
+static unsigned int _adreno_mmu_set_pt_update_condition(
+			struct adreno_ringbuffer *rb,
+			unsigned int *cmds, unsigned int ptname)
+{
+	struct kgsl_device *device = rb->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int *cmds_orig = cmds;
+	/*
+	 * write 1 to switch pt flag indicating that we need to execute the
+	 * pt switch commands
+	 */
+	*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds, (rb->pagetable_desc.gpuaddr +
+		offsetof(struct adreno_ringbuffer_pagetable_info,
+		switch_pt_enable)));
+	*cmds++ = 1;
+	*cmds++ = cp_packet(adreno_dev, CP_WAIT_MEM_WRITES, 1);
+	*cmds++ = 0;
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+	/*
+	 * The current ptname is
+	 * directly compared to the incoming pt id
+	 */
+	*cmds++ = cp_mem_packet(adreno_dev, CP_COND_WRITE, 6, 2);
+	/* write to mem space, when a mem space is equal to ref val */
+	*cmds++ = (1 << 8) | (1 << 4) | 3;
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+	   (adreno_dev->ringbuffers[0].pagetable_desc.gpuaddr +
+		   offsetof(struct adreno_ringbuffer_pagetable_info,
+		   current_global_ptname)));
+	*cmds++ = ptname;
+	*cmds++ = 0xFFFFFFFF;
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+		   (rb->pagetable_desc.gpuaddr +
+		   offsetof(struct adreno_ringbuffer_pagetable_info,
+		   switch_pt_enable)));
+	*cmds++ = 0;
+	*cmds++ = cp_packet(adreno_dev, CP_WAIT_MEM_WRITES, 1);
+	*cmds++ = 0;
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+
+	return cmds - cmds_orig;
+}
+
+/**
+ * _adreno_iommu_pt_update_pid_to_mem() - Add commands to write to memory the
+ * pagetable id.
+ * @rb: The ringbuffer on which these commands will execute
+ * @cmds: Pointer to memory where the commands are copied
+ * @ptname: The pagetable id
+ */
+static unsigned int _adreno_iommu_pt_update_pid_to_mem(
+				struct adreno_ringbuffer *rb,
+				unsigned int *cmds, int ptname)
+{
+	struct kgsl_device *device = rb->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int *cmds_orig = cmds;
+
+	*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+		  (rb->pagetable_desc.gpuaddr +
+		  offsetof(struct adreno_ringbuffer_pagetable_info,
+		  current_rb_ptname)));
+	*cmds++ = ptname;
+	*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+		  (adreno_dev->ringbuffers[0].pagetable_desc.gpuaddr +
+		  offsetof(struct adreno_ringbuffer_pagetable_info,
+		  current_global_ptname)));
+	*cmds++ = ptname;
+	/* pagetable switch done, Housekeeping: set the switch_pt_enable to 0 */
+	*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			(rb->pagetable_desc.gpuaddr +
+			offsetof(struct adreno_ringbuffer_pagetable_info,
+			switch_pt_enable)));
+	*cmds++ = 0;
+	*cmds++ = cp_packet(adreno_dev, CP_WAIT_MEM_WRITES, 1);
+	*cmds++ = 0;
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+
+	return cmds - cmds_orig;
+}
+
+static unsigned int _adreno_iommu_set_pt_v1(struct adreno_ringbuffer *rb,
+					unsigned int *cmds_orig,
+					u64 ttbr0, u32 contextidr, u32 ptname)
+{
+	struct kgsl_device *device = rb->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int *cmds = cmds_orig;
+	unsigned int *cond_exec_ptr;
+
+	cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
+
+	/* set flag that indicates whether pt switch is required*/
+	cmds += _adreno_mmu_set_pt_update_condition(rb, cmds, ptname);
+	*cmds++ = cp_mem_packet(adreno_dev, CP_COND_EXEC, 4, 2);
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			(rb->pagetable_desc.gpuaddr +
+			offsetof(struct adreno_ringbuffer_pagetable_info,
+			switch_pt_enable)));
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			(rb->pagetable_desc.gpuaddr +
+			offsetof(struct adreno_ringbuffer_pagetable_info,
+			switch_pt_enable)));
+	*cmds++ = 1;
+	/* Exec count to be filled later */
+	cond_exec_ptr = cmds;
+	cmds++;
+
+	cmds += cp_wait_for_idle(adreno_dev, cmds);
+
+	cmds += _iommu_lock(adreno_dev, cmds);
+
+	cmds += _cp_smmu_reg(adreno_dev, cmds, KGSL_IOMMU_CTX_TTBR0, 2);
+	*cmds++ = lower_32_bits(ttbr0);
+	*cmds++ = upper_32_bits(ttbr0);
+	cmds += _cp_smmu_reg(adreno_dev, cmds,
+			KGSL_IOMMU_CTX_CONTEXTIDR, 1);
+	*cmds++ = contextidr;
+
+	/* a3xx doesn't have MEQ space to hold the TLBI commands */
+	if (adreno_is_a3xx(adreno_dev))
+		cmds += _iommu_unlock(adreno_dev, cmds);
+
+	cmds += _tlbiall(adreno_dev, cmds);
+
+	/* unlock or wait for me to finish the TLBI */
+	if (!adreno_is_a3xx(adreno_dev))
+		cmds += _iommu_unlock(adreno_dev, cmds);
+	else
+		cmds += cp_wait_for_me(adreno_dev, cmds);
+
+	/* Exec count ordinal of CP_COND_EXEC packet */
+	*cond_exec_ptr = (cmds - cond_exec_ptr - 1);
+	cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
+	cmds += _adreno_iommu_pt_update_pid_to_mem(rb, cmds, ptname);
+
+	return cmds - cmds_orig;
+}
+
+
+static unsigned int _adreno_iommu_set_pt_v2_a3xx(struct kgsl_device *device,
+					unsigned int *cmds_orig,
+					u64 ttbr0, u32 contextidr)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int *cmds = cmds_orig;
+
+	cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
+
+	cmds += _vbif_lock(adreno_dev, cmds);
+
+	cmds += _cp_smmu_reg(adreno_dev, cmds, KGSL_IOMMU_CTX_TTBR0, 2);
+	*cmds++ = lower_32_bits(ttbr0);
+	*cmds++ = upper_32_bits(ttbr0);
+	cmds += _cp_smmu_reg(adreno_dev, cmds, KGSL_IOMMU_CTX_CONTEXTIDR, 1);
+	*cmds++ = contextidr;
+
+	cmds += _vbif_unlock(adreno_dev, cmds);
+
+	cmds += _tlbiall(adreno_dev, cmds);
+
+	/* wait for me to finish the TLBI */
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+
+	cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
+
+	return cmds - cmds_orig;
+}
+
+static unsigned int _adreno_iommu_set_pt_v2_a4xx(struct kgsl_device *device,
+					unsigned int *cmds_orig,
+					u64 ttbr0, u32 contextidr)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int *cmds = cmds_orig;
+
+	cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
+
+	cmds += _vbif_lock(adreno_dev, cmds);
+
+	cmds += _cp_smmu_reg(adreno_dev, cmds, KGSL_IOMMU_CTX_TTBR0, 2);
+	*cmds++ = lower_32_bits(ttbr0);
+	*cmds++ = upper_32_bits(ttbr0);
+	cmds += _cp_smmu_reg(adreno_dev, cmds, KGSL_IOMMU_CTX_CONTEXTIDR, 1);
+	*cmds++ = contextidr;
+
+	cmds += _vbif_unlock(adreno_dev, cmds);
+
+	cmds += _tlbiall(adreno_dev, cmds);
+
+	/* wait for me to finish the TLBI */
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+
+	cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
+
+	return cmds - cmds_orig;
+}
+
+static unsigned int _adreno_iommu_set_pt_v2_a5xx(struct kgsl_device *device,
+					unsigned int *cmds_orig,
+					u64 ttbr0, u32 contextidr,
+					struct adreno_ringbuffer *rb)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int *cmds = cmds_orig;
+
+	cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+
+	/* CP switches the pagetable and flushes the Caches */
+	*cmds++ = cp_packet(adreno_dev, CP_SMMU_TABLE_UPDATE, 3);
+	*cmds++ = lower_32_bits(ttbr0);
+	*cmds++ = upper_32_bits(ttbr0);
+	*cmds++ = contextidr;
+
+	*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 4, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds, (rb->pagetable_desc.gpuaddr +
+		offsetof(struct adreno_ringbuffer_pagetable_info, ttbr0)));
+	*cmds++ = lower_32_bits(ttbr0);
+	*cmds++ = upper_32_bits(ttbr0);
+	*cmds++ = contextidr;
+
+	/* release all commands with wait_for_me */
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+
+	cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
+
+	return cmds - cmds_orig;
+}
+
+/**
+ * adreno_iommu_set_pt_generate_cmds() - Generate commands to change pagetable
+ * @rb: The RB pointer in which these commaands are to be submitted
+ * @cmds: The pointer where the commands are placed
+ * @pt: The pagetable to switch to
+ */
+unsigned int adreno_iommu_set_pt_generate_cmds(
+					struct adreno_ringbuffer *rb,
+					unsigned int *cmds,
+					struct kgsl_pagetable *pt)
+{
+	struct kgsl_device *device = rb->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	u64 ttbr0;
+	u32 contextidr;
+	unsigned int *cmds_orig = cmds;
+	struct kgsl_iommu *iommu = adreno_dev->dev.mmu.priv;
+
+	ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pt);
+	contextidr = kgsl_mmu_pagetable_get_contextidr(pt);
+
+	cmds += adreno_iommu_set_apriv(adreno_dev, cmds, 1);
+
+	cmds += _adreno_iommu_add_idle_indirect_cmds(adreno_dev, cmds,
+		device->mmu.setstate_memory.gpuaddr +
+		KGSL_IOMMU_SETSTATE_NOP_OFFSET);
+
+	if (iommu->version >= 2) {
+		if (adreno_is_a5xx(adreno_dev))
+			cmds += _adreno_iommu_set_pt_v2_a5xx(device, cmds,
+						ttbr0, contextidr, rb);
+		else if (adreno_is_a4xx(adreno_dev))
+			cmds += _adreno_iommu_set_pt_v2_a4xx(device, cmds,
+						ttbr0, contextidr);
+		else if (adreno_is_a3xx(adreno_dev))
+			cmds += _adreno_iommu_set_pt_v2_a3xx(device, cmds,
+						ttbr0, contextidr);
+		else
+			BUG(); /* new GPU family? */
+	} else {
+		cmds += _adreno_iommu_set_pt_v1(rb, cmds, ttbr0, contextidr,
+						pt->name);
+	}
+
+	/* invalidate all base pointers */
+	cmds += cp_invalidate_state(adreno_dev, cmds);
+
+	cmds += adreno_iommu_set_apriv(adreno_dev, cmds, 0);
+
+	return cmds - cmds_orig;
+}
+
+/**
+ * adreno_iommu_set_pt_ib() - Generate commands to switch pagetable. The
+ * commands generated use an IB
+ * @rb: The RB in which the commands will be executed
+ * @cmds: Memory pointer where commands are generated
+ * @pt: The pagetable to switch to
+ */
+unsigned int adreno_iommu_set_pt_ib(struct adreno_ringbuffer *rb,
+				unsigned int *cmds,
+				struct kgsl_pagetable *pt)
+{
+	struct kgsl_device *device = rb->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int *cmds_orig = cmds;
+	struct kgsl_iommu_pt *iommu_pt = pt->priv;
+
+	/* Write the ttbr0 and contextidr values to pagetable desc memory */
+	*cmds++ =  cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			(rb->pagetable_desc.gpuaddr +
+			offsetof(struct adreno_ringbuffer_pagetable_info,
+			ttbr0)));
+	*cmds++ = lower_32_bits(iommu_pt->ttbr0);
+
+	*cmds++ =  cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			(rb->pagetable_desc.gpuaddr +
+			offsetof(struct adreno_ringbuffer_pagetable_info,
+			contextidr)));
+	*cmds++ = iommu_pt->contextidr;
+
+	*cmds++ = cp_packet(adreno_dev, CP_WAIT_MEM_WRITES, 1);
+	*cmds++ = 0;
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+	*cmds++ = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds, rb->pt_update_desc.gpuaddr);
+	*cmds++ = rb->pt_update_desc.size / sizeof(unsigned int);
+
+	return cmds - cmds_orig;
+}
+
+/**
+ * __add_curr_ctxt_cmds() - Add commands to set a context id in memstore
+ * @rb: The RB in which the commands will be added for execution
+ * @cmds: Pointer to memory where commands are added
+ * @drawctxt: The context whose id is being set in memstore
+ *
+ * Returns the number of dwords
+ */
+static unsigned int __add_curr_ctxt_cmds(struct adreno_ringbuffer *rb,
+			unsigned int *cmds,
+			struct adreno_context *drawctxt)
+{
+	unsigned int *cmds_orig = cmds;
+	struct kgsl_device *device = rb->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/* write the context identifier to memstore memory */
+	*cmds++ = cp_packet(adreno_dev, CP_NOP, 1);
+	*cmds++ = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
+
+	*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds, device->memstore.gpuaddr +
+			   KGSL_MEMSTORE_RB_OFFSET(rb, current_context));
+	*cmds++ = (drawctxt ? drawctxt->base.id : 0);
+
+	*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds, device->memstore.gpuaddr +
+			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+			current_context));
+	*cmds++ = (drawctxt ? drawctxt->base.id : 0);
+
+	/* Invalidate UCHE for new context */
+	if (adreno_is_a5xx(adreno_dev)) {
+		*cmds++ = cp_register(adreno_dev,
+			adreno_getreg(adreno_dev,
+		ADRENO_REG_UCHE_INVALIDATE0), 1);
+		*cmds++ = 0x12;
+	} else if (adreno_is_a4xx(adreno_dev)) {
+		*cmds++ = cp_register(adreno_dev,
+			adreno_getreg(adreno_dev,
+			ADRENO_REG_UCHE_INVALIDATE0), 2);
+		*cmds++ = 0;
+		*cmds++ = 0x12;
+	} else if (adreno_is_a3xx(adreno_dev)) {
+		*cmds++ = cp_register(adreno_dev,
+			adreno_getreg(adreno_dev,
+			ADRENO_REG_UCHE_INVALIDATE0), 2);
+		*cmds++ = 0;
+		*cmds++ = 0x90000000;
+	} else
+		BUG();
+
+	return cmds - cmds_orig;
+}
+
+/*
+ * _set_ctxt_cpu() - Set the current context in memstore
+ * @rb: The ringbuffer memstore to set curr context
+ * @drawctxt: The context whose id is being set in memstore
+ */
+static void _set_ctxt_cpu(struct adreno_ringbuffer *rb,
+			struct adreno_context *drawctxt)
+{
+	struct kgsl_device *device = rb->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (rb == adreno_dev->cur_rb) {
+		_invalidate_uche_cpu(adreno_dev);
+		/* Update global memstore with current context */
+		kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+						current_context),
+			drawctxt ? drawctxt->base.id : 0);
+	}
+	/* Update rb memstore with current context */
+	kgsl_sharedmem_writel(device, &device->memstore,
+		KGSL_MEMSTORE_RB_OFFSET(rb, current_context),
+		drawctxt ? drawctxt->base.id : 0);
+}
+
+/**
+ * _set_ctxt_gpu() - Add commands to set the current context in memstore
+ * @rb: The ringbuffer in which commands to set memstore are added
+ * @drawctxt: The context whose id is being set in memstore
+ */
+static int _set_ctxt_gpu(struct adreno_ringbuffer *rb,
+			struct adreno_context *drawctxt)
+{
+	unsigned int link[15], *cmds;
+	int result;
+
+	cmds = &link[0];
+	cmds += __add_curr_ctxt_cmds(rb, cmds, drawctxt);
+	result = adreno_ringbuffer_issuecmds(rb, 0, link,
+			(unsigned int)(cmds - link));
+	return result;
+}
+
+/**
+ * _set_pagetable_cpu() - Use CPU to switch the pagetable
+ * @rb: The rb for which pagetable needs to be switched
+ * @new_pt: The pagetable to switch to
+ */
+static int _set_pagetable_cpu(struct adreno_ringbuffer *rb,
+			struct kgsl_pagetable *new_pt)
+{
+	struct kgsl_device *device = rb->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int result;
+
+	/* update TTBR0 only if we are updating current RB */
+	if (adreno_dev->cur_rb == rb) {
+		result = kgsl_mmu_set_pt(&device->mmu, new_pt);
+		if (result)
+			return result;
+		/* write the new pt set to memory var */
+		kgsl_sharedmem_writel(device,
+			&adreno_dev->ringbuffers[0].pagetable_desc,
+			offsetof(
+			struct adreno_ringbuffer_pagetable_info,
+			current_global_ptname), new_pt->name);
+	}
+
+	/* Update the RB pagetable info here */
+	kgsl_sharedmem_writel(device, &rb->pagetable_desc,
+		offsetof(
+		struct adreno_ringbuffer_pagetable_info,
+		current_rb_ptname), new_pt->name);
+	kgsl_sharedmem_writeq(device, &rb->pagetable_desc,
+		offsetof(
+		struct adreno_ringbuffer_pagetable_info,
+		ttbr0), kgsl_mmu_pagetable_get_ttbr0(new_pt));
+	kgsl_sharedmem_writel(device, &rb->pagetable_desc,
+		offsetof(
+		struct adreno_ringbuffer_pagetable_info,
+		contextidr), kgsl_mmu_pagetable_get_contextidr(new_pt));
+
+	return 0;
+}
+
+/**
+ * _set_pagetable_gpu() - Use GPU to switch the pagetable
+ * @rb: The rb in which commands to switch pagetable are to be
+ *    submitted
+ * @new_pt: The pagetable to switch to
+ */
+static int _set_pagetable_gpu(struct adreno_ringbuffer *rb,
+			struct kgsl_pagetable *new_pt)
+{
+	unsigned int *link = NULL, *cmds;
+	struct kgsl_device *device = rb->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int result;
+
+	link = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (link == NULL) {
+		result = -ENOMEM;
+		goto done;
+	}
+
+	cmds = link;
+
+	/* If we are in a fault the MMU will be reset soon */
+	if (test_bit(ADRENO_DEVICE_FAULT, &adreno_dev->priv))
+		return 0;
+
+	kgsl_mmu_enable_clk(&device->mmu);
+
+	cmds += adreno_iommu_set_pt_generate_cmds(rb, cmds, new_pt);
+
+	if ((unsigned int) (cmds - link) > (PAGE_SIZE / sizeof(unsigned int))) {
+		KGSL_DRV_ERR(device, "Temp command buffer overflow\n");
+		BUG();
+	}
+	/*
+	 * This returns the per context timestamp but we need to
+	 * use the global timestamp for iommu clock disablement
+	 */
+	result = adreno_ringbuffer_issuecmds(rb,
+			KGSL_CMD_FLAGS_PMODE, link,
+			(unsigned int)(cmds - link));
+
+	/*
+	 * On error disable the IOMMU clock right away otherwise turn it off
+	 * after the command has been retired
+	 */
+	if (result)
+		kgsl_mmu_disable_clk(&device->mmu);
+	else
+		adreno_ringbuffer_mmu_disable_clk_on_ts(device, rb,
+						rb->timestamp);
+
+done:
+	kfree(link);
+	return result;
+}
+
+/**
+ * adreno_iommu_init() - Adreno iommu init
+ * @adreno_dev: Adreno device
+ */
+int adreno_iommu_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_NONE)
+		return 0;
+
+	/*
+	 * A nop is required in an indirect buffer when switching
+	 * pagetables in-stream
+	 */
+	kgsl_sharedmem_writel(device, &device->mmu.setstate_memory,
+				KGSL_IOMMU_SETSTATE_NOP_OFFSET,
+				cp_packet(adreno_dev, CP_NOP, 1));
+
+	/* set iommu features here */
+	if (adreno_is_a420(adreno_dev))
+		device->mmu.features |= KGSL_MMU_FLUSH_TLB_ON_MAP;
+
+	/*
+	 * A5XX: per process PT is supported starting PFP 0x5FF064 me 0x5FF052
+	 * versions
+	 */
+	if (adreno_is_a5xx(adreno_dev) &&
+		!MMU_FEATURE(&device->mmu, KGSL_MMU_GLOBAL_PAGETABLE)) {
+		if ((adreno_compare_pfp_version(adreno_dev,
+				A5XX_PFP_PER_PROCESS_UCODE_VER) < 0) ||
+		    (adreno_compare_pm4_version(adreno_dev,
+				A5XX_PM4_PER_PROCESS_UCODE_VER) < 0)) {
+			KGSL_DRV_ERR(device,
+				"Invalid ucode for per process pagetables\n");
+			return -ENODEV;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * adreno_mmu_set_pt_ctx() - Change the pagetable of the current RB
+ * @device: Pointer to device to which the rb belongs
+ * @rb: The RB pointer on which pagetable is to be changed
+ * @new_pt: The new pt the device will change to
+ * @drawctxt: The context whose pagetable the ringbuffer is switching to,
+ * NULL means KGSL_CONTEXT_GLOBAL
+ *
+ * Returns 0 on success else error code.
+ */
+int adreno_iommu_set_pt_ctx(struct adreno_ringbuffer *rb,
+			struct kgsl_pagetable *new_pt,
+			struct adreno_context *drawctxt)
+{
+	struct kgsl_device *device = rb->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_pagetable *cur_pt = device->mmu.defaultpagetable;
+	int result = 0;
+	int cpu_path = 0;
+
+	if (rb->drawctxt_active)
+		cur_pt = rb->drawctxt_active->base.proc_priv->pagetable;
+
+	cpu_path = _ctx_switch_use_cpu_path(adreno_dev, new_pt, rb);
+
+	/* Pagetable switch */
+	if (new_pt != cur_pt) {
+		if (cpu_path)
+			result = _set_pagetable_cpu(rb, new_pt);
+		else
+			result = _set_pagetable_gpu(rb, new_pt);
+	}
+
+	if (result) {
+		KGSL_DRV_ERR(device, "Error switching pagetable %d\n", result);
+		return result;
+	}
+
+	/* Context switch */
+	if (cpu_path)
+		_set_ctxt_cpu(rb, drawctxt);
+	else
+		result = _set_ctxt_gpu(rb, drawctxt);
+
+	if (result)
+		KGSL_DRV_ERR(device, "Error switching context %d\n", result);
+
+	return result;
+}
+/**
+ * adreno_iommu_set_pt_generate_rb_cmds() - Generate commands to switch pt
+ * in a ringbuffer descriptor
+ * @rb: The RB whose descriptor is used
+ * @pt: The pt to switch to
+ */
+void adreno_iommu_set_pt_generate_rb_cmds(struct adreno_ringbuffer *rb,
+						struct kgsl_pagetable *pt)
+{
+	if (rb->pt_update_desc.hostptr)
+		return;
+
+	rb->pt_update_desc.hostptr = rb->pagetable_desc.hostptr +
+			sizeof(struct adreno_ringbuffer_pagetable_info);
+	rb->pt_update_desc.size =
+		adreno_iommu_set_pt_generate_cmds(rb,
+				rb->pt_update_desc.hostptr, pt) *
+				sizeof(unsigned int);
+	rb->pt_update_desc.gpuaddr = rb->pagetable_desc.gpuaddr +
+			sizeof(struct adreno_ringbuffer_pagetable_info);
+}
diff --git a/drivers/gpu/msm/adreno_perfcounter.c b/drivers/gpu/msm/adreno_perfcounter.c
new file mode 100644
index 000000000000..31cd8c5cd731
--- /dev/null
+++ b/drivers/gpu/msm/adreno_perfcounter.c
@@ -0,0 +1,1011 @@
+/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/module.h>
+#include <linux/uaccess.h>
+
+#include "kgsl.h"
+#include "adreno.h"
+#include "adreno_perfcounter.h"
+#include "adreno_pm4types.h"
+#include "a5xx_reg.h"
+
+/* Bit flag for RBMM_PERFCTR_CTL */
+#define RBBM_PERFCTR_CTL_ENABLE		0x00000001
+
+#define VBIF2_PERF_CNT_SEL_MASK 0x7F
+/* offset of clear register from select register */
+#define VBIF2_PERF_CLR_REG_SEL_OFF 8
+/* offset of enable register from select register */
+#define VBIF2_PERF_EN_REG_SEL_OFF 16
+/* offset of high counter from low counter value */
+#define VBIF2_PERF_HIGH_REG_LOW_OFF 8
+
+/* offset of clear register from the enable register */
+#define VBIF2_PERF_PWR_CLR_REG_EN_OFF 8
+/* offset of high counter from low counter value */
+#define VBIF2_PERF_PWR_HIGH_REG_LOW_OFF 8
+
+#define REG_64BIT_VAL(hi, lo, val) (((((uint64_t) hi) << 32) | lo) + val)
+/*
+ * Return true if the countable is used and not broken
+ */
+static inline int active_countable(unsigned int countable)
+{
+	return ((countable != KGSL_PERFCOUNTER_NOT_USED) &&
+		(countable != KGSL_PERFCOUNTER_BROKEN));
+}
+
+/**
+ * adreno_perfcounter_init: Reserve kernel performance counters
+ * @adreno_dev: Pointer to an adreno_device struct
+ *
+ * The kernel needs/wants a certain group of performance counters for
+ * its own activities.  Reserve these performance counters at init time
+ * to ensure that they are always reserved for the kernel.  The performance
+ * counters used by the kernel can be obtained by the user, but these
+ * performance counters will remain active as long as the device is alive.
+ */
+void adreno_perfcounter_init(struct adreno_device *adreno_dev)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (gpudev->perfcounter_init)
+		gpudev->perfcounter_init(adreno_dev);
+}
+
+/**
+ * adreno_perfcounter_write() - Write the physical performance
+ * counter values.
+ * @adreno_dev -  Adreno device whose registers are to be written to.
+ * @reg - register address of the physical counter to which the value is
+ *		written to.
+ *
+ * This function loads the 64 bit saved value into the particular physical
+ * counter by enabling the corresponding bit in A3XX_RBBM_PERFCTR_LOAD_CMD*
+ * register.
+ */
+static void adreno_perfcounter_write(struct adreno_device *adreno_dev,
+		struct adreno_perfcount_register *reg)
+{
+	unsigned int val, i;
+	int cmd[] = { ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0,
+		ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1,
+		ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2,
+		ADRENO_REG_RBBM_PERFCTR_LOAD_CMD3 };
+
+	/* If not loadable then return quickly */
+	if (reg->load_bit < 0)
+		return;
+
+	/* Get the offset/cmd for loading */
+	i = reg->load_bit / 32;
+
+	/* Get the register bit offset for loading */
+	val = BIT(reg->load_bit & 31);
+
+	/* Write the saved value to PERFCTR_LOAD_VALUE* registers. */
+	adreno_writereg64(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO,
+			  ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI, reg->value);
+
+	/*
+	 * Set the load bit in PERFCTR_LOAD_CMD for the physical counter
+	 * we want to restore. The value in PERFCTR_LOAD_VALUE* is loaded
+	 * into the corresponding physical counter. The value for the select
+	 * register gets cleared once RBBM reads it so no need to clear the
+	 * select register afterwards.
+	 */
+	adreno_writereg(adreno_dev, cmd[i], val);
+}
+
+/**
+ * adreno_perfcounter_close() - Release counters initialized by
+ * adreno_perfcounter_close
+ * @adreno_dev: Pointer to an adreno_device struct
+ */
+void adreno_perfcounter_close(struct adreno_device *adreno_dev)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (gpudev->perfcounter_close)
+		gpudev->perfcounter_close(adreno_dev);
+}
+
+/**
+ * adreno_perfcounter_restore() - Restore performance counters
+ * @adreno_dev: adreno device to configure
+ *
+ * Load the physical performance counters with 64 bit value which are
+ * saved on GPU power collapse.
+ */
+void adreno_perfcounter_restore(struct adreno_device *adreno_dev)
+{
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+	struct adreno_perfcount_group *group;
+	unsigned int counter, groupid;
+
+	if (counters == NULL)
+		return;
+
+	for (groupid = 0; groupid < counters->group_count; groupid++) {
+		group = &(counters->groups[groupid]);
+
+		/* Restore the counters for the group */
+		for (counter = 0; counter < group->reg_count; counter++) {
+			/* If not active or broken, skip this counter */
+			if (!active_countable(group->regs[counter].countable))
+				continue;
+
+			adreno_perfcounter_write(adreno_dev,
+					&group->regs[counter]);
+		}
+	}
+}
+
+/**
+ * adreno_perfcounter_save() - Save performance counters
+ * @adreno_dev: adreno device to configure
+ *
+ * Save the performance counter values before GPU power collapse.
+ * The saved values are restored on restart.
+ * This ensures physical counters are coherent across power-collapse.
+ */
+inline void adreno_perfcounter_save(struct adreno_device *adreno_dev)
+{
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+	struct adreno_perfcount_group *group;
+	unsigned int counter, groupid;
+
+	if (counters == NULL)
+		return;
+
+	for (groupid = 0; groupid < counters->group_count; groupid++) {
+		group = &(counters->groups[groupid]);
+
+		/* Save the counter values for the group */
+		for (counter = 0; counter < group->reg_count; counter++) {
+			/* If not active or broken, skip this counter */
+			if (!active_countable(group->regs[counter].countable))
+				continue;
+
+			/* accumulate values for non-loadable counters */
+			if (group->regs[counter].load_bit >= 0)
+				group->regs[counter].value = 0;
+
+			group->regs[counter].value =
+				group->regs[counter].value +
+				adreno_perfcounter_read(adreno_dev, groupid,
+								counter);
+		}
+	}
+}
+
+static int adreno_perfcounter_enable(struct adreno_device *adreno_dev,
+	unsigned int group, unsigned int counter, unsigned int countable);
+
+/**
+ * adreno_perfcounter_start: Enable performance counters
+ * @adreno_dev: Adreno device to configure
+ *
+ * Ensure all performance counters are enabled that are allocated.  Since
+ * the device was most likely stopped, we can't trust that the counters
+ * are still valid so make it so.
+ */
+
+void adreno_perfcounter_start(struct adreno_device *adreno_dev)
+{
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+	struct adreno_perfcount_group *group;
+	unsigned int i, j;
+
+	if (NULL == counters)
+		return;
+	/* group id iter */
+	for (i = 0; i < counters->group_count; i++) {
+		group = &(counters->groups[i]);
+
+		/* countable iter */
+		for (j = 0; j < group->reg_count; j++) {
+			if (!active_countable(group->regs[j].countable))
+				continue;
+
+			/*
+			 * The GPU has to be idle before calling the perfcounter
+			 * enable function, but since this function is called
+			 * during start we already know the GPU is idle.
+			 * Since the countable/counter pairs have already been
+			 * validated, there is no way for _enable() to fail so
+			 * no need to check the return code.
+			 */
+			adreno_perfcounter_enable(adreno_dev, i, j,
+					  group->regs[j].countable);
+		}
+	}
+}
+
+/**
+ * adreno_perfcounter_read_group() - Determine which countables are in counters
+ * @adreno_dev: Adreno device to configure
+ * @reads: List of kgsl_perfcounter_read_groups
+ * @count: Length of list
+ *
+ * Read the performance counters for the groupid/countable pairs and return
+ * the 64 bit result for each pair
+ */
+
+int adreno_perfcounter_read_group(struct adreno_device *adreno_dev,
+	struct kgsl_perfcounter_read_group __user *reads, unsigned int count)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+	struct adreno_perfcount_group *group;
+	struct kgsl_perfcounter_read_group *list = NULL;
+	unsigned int i, j;
+	int ret = 0;
+
+	if (NULL == counters)
+		return -EINVAL;
+
+	/* sanity check params passed in */
+	if (reads == NULL || count == 0 || count > 100)
+		return -EINVAL;
+
+	list = kmalloc(sizeof(struct kgsl_perfcounter_read_group) * count,
+			GFP_KERNEL);
+	if (!list)
+		return -ENOMEM;
+
+	if (copy_from_user(list, reads,
+			sizeof(struct kgsl_perfcounter_read_group) * count)) {
+		ret = -EFAULT;
+		goto done;
+	}
+
+	mutex_lock(&device->mutex);
+	ret = kgsl_active_count_get(device);
+	if (ret) {
+		mutex_unlock(&device->mutex);
+		goto done;
+	}
+
+	/* list iterator */
+	for (j = 0; j < count; j++) {
+
+		list[j].value = 0;
+
+		/* Verify that the group ID is within range */
+		if (list[j].groupid >= counters->group_count) {
+			ret = -EINVAL;
+			break;
+		}
+
+		group = &(counters->groups[list[j].groupid]);
+
+		/* group/counter iterator */
+		for (i = 0; i < group->reg_count; i++) {
+			if (group->regs[i].countable == list[j].countable) {
+				list[j].value = adreno_perfcounter_read(
+					adreno_dev, list[j].groupid, i);
+				break;
+			}
+		}
+	}
+
+	kgsl_active_count_put(device);
+	mutex_unlock(&device->mutex);
+
+	/* write the data */
+	if (ret == 0)
+		if (copy_to_user(reads, list,
+			sizeof(struct kgsl_perfcounter_read_group) * count))
+			ret = -EFAULT;
+
+done:
+	kfree(list);
+	return ret;
+}
+
+/**
+ * adreno_perfcounter_get_groupid() - Get the performance counter ID
+ * @adreno_dev: Adreno device
+ * @name: Performance counter group name string
+ *
+ * Get the groupid based on the name and return this ID
+ */
+
+int adreno_perfcounter_get_groupid(struct adreno_device *adreno_dev,
+					const char *name)
+{
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+	struct adreno_perfcount_group *group;
+	int i;
+
+	if (name == NULL || counters == NULL)
+		return -EINVAL;
+
+	for (i = 0; i < counters->group_count; ++i) {
+		group = &(counters->groups[i]);
+
+		/* make sure there is a name for this group */
+		if (group->name == NULL)
+			continue;
+
+		/* verify name and length */
+		if (strlen(name) == strlen(group->name) &&
+			strcmp(group->name, name) == 0)
+			return i;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * adreno_perfcounter_get_name() - Get the group name
+ * @adreno_dev: Adreno device
+ * @groupid: Desired performance counter groupid
+ *
+ * Get the name based on the groupid and return it
+ */
+
+const char *adreno_perfcounter_get_name(struct adreno_device *adreno_dev,
+		unsigned int groupid)
+{
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+
+	if (counters != NULL && groupid < counters->group_count)
+		return counters->groups[groupid].name;
+
+	return NULL;
+}
+
+/**
+ * adreno_perfcounter_query_group: Determine which countables are in counters
+ * @adreno_dev: Adreno device to configure
+ * @groupid: Desired performance counter group
+ * @countables: Return list of all countables in the groups counters
+ * @count: Max length of the array
+ * @max_counters: max counters for the groupid
+ *
+ * Query the current state of counters for the group.
+ */
+
+int adreno_perfcounter_query_group(struct adreno_device *adreno_dev,
+	unsigned int groupid, unsigned int __user *countables,
+	unsigned int count, unsigned int *max_counters)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+	struct adreno_perfcount_group *group;
+	unsigned int i, t;
+	int ret = 0;
+	unsigned int *buf;
+
+	*max_counters = 0;
+
+	if (counters == NULL || groupid >= counters->group_count)
+		return -EINVAL;
+
+	mutex_lock(&device->mutex);
+
+	group = &(counters->groups[groupid]);
+	*max_counters = group->reg_count;
+
+	/*
+	 * if NULL countable or *count of zero, return max reg_count in
+	 * *max_counters and return success
+	 */
+	if (countables == NULL || count == 0) {
+		mutex_unlock(&device->mutex);
+		return 0;
+	}
+
+	t = min_t(unsigned int, group->reg_count, count);
+
+	buf = kmalloc(t * sizeof(unsigned int), GFP_KERNEL);
+	if (buf == NULL) {
+		mutex_unlock(&device->mutex);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < t; i++)
+		buf[i] = group->regs[i].countable;
+
+	mutex_unlock(&device->mutex);
+
+	if (copy_to_user(countables, buf, sizeof(unsigned int) * t))
+		ret = -EFAULT;
+
+	kfree(buf);
+
+	return ret;
+}
+
+static inline void refcount_group(struct adreno_perfcount_group *group,
+	unsigned int reg, unsigned int flags,
+	unsigned int *lo, unsigned int *hi)
+{
+	if (flags & PERFCOUNTER_FLAG_KERNEL)
+		group->regs[reg].kernelcount++;
+	else
+		group->regs[reg].usercount++;
+
+	if (lo)
+		*lo = group->regs[reg].offset;
+
+	if (hi)
+		*hi = group->regs[reg].offset_hi;
+}
+
+/**
+ * adreno_perfcounter_get: Try to put a countable in an available counter
+ * @adreno_dev: Adreno device to configure
+ * @groupid: Desired performance counter group
+ * @countable: Countable desired to be in a counter
+ * @offset: Return offset of the LO counter assigned
+ * @offset_hi: Return offset of the HI counter assigned
+ * @flags: Used to setup kernel perf counters
+ *
+ * Try to place a countable in an available counter.  If the countable is
+ * already in a counter, reference count the counter/countable pair resource
+ * and return success
+ */
+
+int adreno_perfcounter_get(struct adreno_device *adreno_dev,
+	unsigned int groupid, unsigned int countable, unsigned int *offset,
+	unsigned int *offset_hi, unsigned int flags)
+{
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+	struct adreno_perfcount_group *group;
+	unsigned int empty = -1;
+	int ret = 0;
+
+	/* always clear return variables */
+	if (offset)
+		*offset = 0;
+	if (offset_hi)
+		*offset_hi = 0;
+
+	if (NULL == counters)
+		return -EINVAL;
+
+	if (groupid >= counters->group_count)
+		return -EINVAL;
+
+	group = &(counters->groups[groupid]);
+
+	if (group->flags & ADRENO_PERFCOUNTER_GROUP_FIXED) {
+		/*
+		 * In fixed groups the countable equals the fixed register the
+		 * user wants. First make sure it is in range
+		 */
+
+		if (countable >= group->reg_count)
+			return -EINVAL;
+
+		/* If it is already reserved, just increase the refcounts */
+		if ((group->regs[countable].kernelcount != 0) ||
+			(group->regs[countable].usercount != 0)) {
+				refcount_group(group, countable, flags,
+					offset, offset_hi);
+				return 0;
+		}
+
+		empty = countable;
+	} else {
+		unsigned int i;
+
+		/*
+		 * Check if the countable is already associated with a counter.
+		 * Refcount and return the offset, otherwise, try and find an
+		 * empty counter and assign the countable to it.
+		 */
+
+		for (i = 0; i < group->reg_count; i++) {
+			if (group->regs[i].countable == countable) {
+				refcount_group(group, i, flags,
+					offset, offset_hi);
+				return 0;
+			} else if (group->regs[i].countable ==
+			KGSL_PERFCOUNTER_NOT_USED) {
+				/* keep track of unused counter */
+				empty = i;
+			}
+		}
+	}
+
+	/* no available counters, so do nothing else */
+	if (empty == -1)
+		return -EBUSY;
+
+	/* enable the new counter */
+	ret = adreno_perfcounter_enable(adreno_dev, groupid, empty, countable);
+	if (ret)
+		return ret;
+	/* initialize the new counter */
+	group->regs[empty].countable = countable;
+
+	/* set initial kernel and user count */
+	if (flags & PERFCOUNTER_FLAG_KERNEL) {
+		group->regs[empty].kernelcount = 1;
+		group->regs[empty].usercount = 0;
+	} else {
+		group->regs[empty].kernelcount = 0;
+		group->regs[empty].usercount = 1;
+	}
+
+	if (offset)
+		*offset = group->regs[empty].offset;
+	if (offset_hi)
+		*offset_hi = group->regs[empty].offset_hi;
+
+	return ret;
+}
+
+
+/**
+ * adreno_perfcounter_put: Release a countable from counter resource
+ * @adreno_dev: Adreno device to configure
+ * @groupid: Desired performance counter group
+ * @countable: Countable desired to be freed from a  counter
+ * @flags: Flag to determine if kernel or user space request
+ *
+ * Put a performance counter/countable pair that was previously received.  If
+ * noone else is using the countable, free up the counter for others.
+ */
+int adreno_perfcounter_put(struct adreno_device *adreno_dev,
+	unsigned int groupid, unsigned int countable, unsigned int flags)
+{
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+	struct adreno_perfcount_group *group;
+	unsigned int i;
+
+	if (counters == NULL || groupid >= counters->group_count)
+		return -EINVAL;
+
+	group = &(counters->groups[groupid]);
+
+	/*
+	 * Find if the counter/countable pair is used currently.
+	 * Start cycling through registers in the bank.
+	 */
+	for (i = 0; i < group->reg_count; i++) {
+		/* check if countable assigned is what we are looking for */
+		if (group->regs[i].countable == countable) {
+			/* found pair, book keep count based on request type */
+			if (flags & PERFCOUNTER_FLAG_KERNEL &&
+					group->regs[i].kernelcount > 0)
+				group->regs[i].kernelcount--;
+			else if (group->regs[i].usercount > 0)
+				group->regs[i].usercount--;
+			else
+				break;
+
+			/* mark available if not used anymore */
+			if (group->regs[i].kernelcount == 0 &&
+					group->regs[i].usercount == 0)
+				group->regs[i].countable =
+					KGSL_PERFCOUNTER_NOT_USED;
+
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static int _perfcounter_enable_pwr(struct adreno_device *adreno_dev,
+	unsigned int counter)
+{
+	/* PWR counters enabled by default on A3XX/A4XX so nothing to do */
+	if (adreno_is_a3xx(adreno_dev) || adreno_is_a4xx(adreno_dev))
+		return 0;
+
+	/*
+	 * On 5XX we have to emulate the PWR counters which are physically
+	 * missing. Program countable 6 on RBBM_PERFCTR_RBBM_0 as a substitute
+	 * for PWR:1. Don't emulate PWR:0 as nobody uses it and we don't want
+	 * to take away too many of the generic RBBM counters.
+	 */
+
+	if (counter == 0)
+		return -EINVAL;
+
+	kgsl_regwrite(&adreno_dev->dev, A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
+
+	return 0;
+}
+
+static void _perfcounter_enable_vbif(struct adreno_device *adreno_dev,
+		struct adreno_perfcounters *counters, unsigned int counter,
+		unsigned int countable)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_perfcount_register *reg;
+
+	reg = &counters->groups[KGSL_PERFCOUNTER_GROUP_VBIF].regs[counter];
+	/* Write 1, followed by 0 to CLR register for clearing the counter */
+	kgsl_regwrite(device, reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 1);
+	kgsl_regwrite(device, reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 0);
+	kgsl_regwrite(device, reg->select, countable & VBIF2_PERF_CNT_SEL_MASK);
+	/* enable reg is 8 DWORDS before select reg */
+	kgsl_regwrite(device, reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 1);
+	reg->value = 0;
+}
+
+static void _perfcounter_enable_vbif_pwr(struct adreno_device *adreno_dev,
+		struct adreno_perfcounters *counters, unsigned int counter)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_perfcount_register *reg;
+
+	reg = &counters->groups[KGSL_PERFCOUNTER_GROUP_VBIF_PWR].regs[counter];
+	/* Write 1, followed by 0 to CLR register for clearing the counter */
+	kgsl_regwrite(device, reg->select + VBIF2_PERF_PWR_CLR_REG_EN_OFF, 1);
+	kgsl_regwrite(device, reg->select + VBIF2_PERF_PWR_CLR_REG_EN_OFF, 0);
+	kgsl_regwrite(device, reg->select, 1);
+	reg->value = 0;
+}
+
+static void _power_counter_enable_alwayson(struct adreno_device *adreno_dev,
+				struct adreno_perfcounters *counters)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	kgsl_regwrite(device, A5XX_GPMU_ALWAYS_ON_COUNTER_RESET, 1);
+	counters->groups[KGSL_PERFCOUNTER_GROUP_ALWAYSON_PWR].regs[0].value = 0;
+}
+
+static void _power_counter_enable_gpmu(struct adreno_device *adreno_dev,
+		struct adreno_perfcounters *counters, unsigned int group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_perfcount_register *reg;
+
+	if (countable > 43)
+		return;
+
+	reg = &counters->groups[group].regs[counter];
+
+	/* Move the countable to the correct byte offset */
+	countable = countable << ((counter % 4) * 8);
+
+	kgsl_regwrite(device, reg->select, countable);
+
+	kgsl_regwrite(device, A5XX_GPMU_POWER_COUNTER_ENABLE, 1);
+	reg->value = 0;
+}
+
+static void _power_counter_enable_default(struct adreno_device *adreno_dev,
+		struct adreno_perfcounters *counters, unsigned int group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_perfcount_register *reg;
+
+	reg = &counters->groups[group].regs[counter];
+	kgsl_regwrite(device, reg->select, countable);
+	kgsl_regwrite(device, A5XX_GPMU_POWER_COUNTER_ENABLE, 1);
+	reg->value = 0;
+}
+
+static int _perfcounter_enable_default(struct adreno_device *adreno_dev,
+		struct adreno_perfcounters *counters, unsigned int group,
+		unsigned int counter, unsigned int countable)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg;
+	int i;
+	int ret = 0;
+
+	/*
+	 * check whether the countable is valid or not by matching it against
+	 * the list on invalid countables
+	 */
+	if (gpudev->invalid_countables) {
+		struct adreno_invalid_countables invalid_countable =
+			gpudev->invalid_countables[group];
+		for (i = 0; i < invalid_countable.num_countables; i++)
+			if (countable == invalid_countable.countables[i])
+				return -EACCES;
+	}
+	reg = &(counters->groups[group].regs[counter]);
+
+	if (test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv)) {
+		struct adreno_ringbuffer *rb = &adreno_dev->ringbuffers[0];
+		unsigned int buf[4];
+		unsigned int *cmds = buf;
+		int ret;
+
+		cmds += cp_wait_for_idle(adreno_dev, cmds);
+		*cmds++ = cp_register(adreno_dev, reg->select, 1);
+		*cmds++ = countable;
+		/* submit to highest priority RB always */
+		ret = adreno_ringbuffer_issuecmds(rb, 0, buf, cmds-buf);
+		if (ret)
+			return ret;
+		/*
+		 * schedule dispatcher to make sure rb[0] is run, because
+		 * if the current RB is not rb[0] and gpu is idle then
+		 * rb[0] will not get scheduled to run
+		 */
+		if (adreno_dev->cur_rb != rb)
+			adreno_dispatcher_schedule(rb->device);
+		/* wait for the above commands submitted to complete */
+		ret = adreno_ringbuffer_waittimestamp(rb, rb->timestamp,
+				ADRENO_IDLE_TIMEOUT);
+		if (ret)
+			KGSL_DRV_ERR(rb->device,
+			"Perfcounter %u/%u/%u start via commands failed %d\n",
+			group, counter, countable, ret);
+	} else {
+		/* Select the desired perfcounter */
+		kgsl_regwrite(&adreno_dev->dev, reg->select, countable);
+	}
+
+	if (!ret)
+		reg->value = 0;
+	return 0;
+}
+
+/**
+ * adreno_perfcounter_enable - Configure a performance counter for a countable
+ * @adreno_dev -  Adreno device to configure
+ * @group - Desired performance counter group
+ * @counter - Desired performance counter in the group
+ * @countable - Desired countable
+ *
+ * Function is used for adreno cores
+ * Physically set up a counter within a group with the desired countable
+ * Return 0 on success else error code
+ */
+static int adreno_perfcounter_enable(struct adreno_device *adreno_dev,
+	unsigned int group, unsigned int counter, unsigned int countable)
+{
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+
+	if (counters == NULL)
+		return -EINVAL;
+
+	if (group >= counters->group_count)
+		return -EINVAL;
+
+	if (counter >= counters->groups[group].reg_count)
+		return -EINVAL;
+
+	switch (group) {
+	case KGSL_PERFCOUNTER_GROUP_ALWAYSON:
+		/* alwayson counter is global, so init value is 0 */
+		break;
+	case KGSL_PERFCOUNTER_GROUP_PWR:
+		return _perfcounter_enable_pwr(adreno_dev, counter);
+	case KGSL_PERFCOUNTER_GROUP_VBIF:
+		if (countable > VBIF2_PERF_CNT_SEL_MASK)
+			return -EINVAL;
+		_perfcounter_enable_vbif(adreno_dev, counters, counter,
+							countable);
+		break;
+	case KGSL_PERFCOUNTER_GROUP_VBIF_PWR:
+		_perfcounter_enable_vbif_pwr(adreno_dev, counters, counter);
+		break;
+	case KGSL_PERFCOUNTER_GROUP_SP_PWR:
+	case KGSL_PERFCOUNTER_GROUP_TP_PWR:
+	case KGSL_PERFCOUNTER_GROUP_RB_PWR:
+	case KGSL_PERFCOUNTER_GROUP_CCU_PWR:
+	case KGSL_PERFCOUNTER_GROUP_UCHE_PWR:
+	case KGSL_PERFCOUNTER_GROUP_CP_PWR:
+		_power_counter_enable_default(adreno_dev, counters, group,
+						counter, countable);
+		break;
+	case KGSL_PERFCOUNTER_GROUP_GPMU_PWR:
+		_power_counter_enable_gpmu(adreno_dev, counters, group, counter,
+				countable);
+		break;
+	case KGSL_PERFCOUNTER_GROUP_ALWAYSON_PWR:
+		_power_counter_enable_alwayson(adreno_dev, counters);
+		break;
+	default:
+		return _perfcounter_enable_default(adreno_dev, counters, group,
+				counter, countable);
+	}
+
+	return 0;
+}
+
+static uint64_t _perfcounter_read_alwayson(struct adreno_device *adreno_dev,
+		struct adreno_perfcount_group *group, unsigned int counter)
+{
+	uint64_t val = 0;
+
+	adreno_readreg64(adreno_dev, ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO,
+				   ADRENO_REG_RBBM_ALWAYSON_COUNTER_HI, &val);
+
+	return val + group->regs[counter].value;
+}
+
+static uint64_t _perfcounter_read_pwr(struct adreno_device *adreno_dev,
+		struct adreno_perfcount_group *group, unsigned int counter)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_perfcount_register *reg;
+	unsigned int in = 0, out, lo = 0, hi = 0;
+	unsigned int enable_bit;
+
+	reg = &group->regs[counter];
+
+	/* Remember, counter 0 is not emulated on 5XX */
+	if (adreno_is_a5xx(adreno_dev) && (counter == 0))
+		return -EINVAL;
+
+	if (adreno_is_a3xx(adreno_dev)) {
+		/* On A3XX we need to freeze the counter so we can read it */
+		if (0 == counter)
+			enable_bit = 0x00010000;
+		else
+			enable_bit = 0x00020000;
+
+		/* freeze counter */
+		adreno_readreg(adreno_dev, ADRENO_REG_RBBM_RBBM_CTL, &in);
+		out = (in & ~enable_bit);
+		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_RBBM_CTL, out);
+	}
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	/* restore the counter control value */
+	if (adreno_is_a3xx(adreno_dev))
+		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_RBBM_CTL, in);
+
+	return REG_64BIT_VAL(hi, lo, reg->value);
+}
+
+static uint64_t _perfcounter_read_vbif(struct adreno_device *adreno_dev,
+		struct adreno_perfcount_group *group, unsigned int counter)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_perfcount_register *reg;
+	unsigned int lo = 0, hi = 0;
+
+	reg = &group->regs[counter];
+
+	/* freeze counter */
+	if (adreno_is_a3xx(adreno_dev))
+		kgsl_regwrite(device, reg->select - VBIF2_PERF_EN_REG_SEL_OFF,
+							0);
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	/* un-freeze counter */
+	if (adreno_is_a3xx(adreno_dev))
+		kgsl_regwrite(device, reg->select - VBIF2_PERF_EN_REG_SEL_OFF,
+							1);
+
+	return REG_64BIT_VAL(hi, lo, reg->value);
+}
+
+static uint64_t _perfcounter_read_vbif_pwr(struct adreno_device *adreno_dev,
+		struct adreno_perfcount_group *group, unsigned int counter)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_perfcount_register *reg;
+	unsigned int lo = 0, hi = 0;
+
+	reg = &group->regs[counter];
+
+	/* freeze counter */
+	if (adreno_is_a3xx(adreno_dev))
+		kgsl_regwrite(device, reg->select, 0);
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	/* un-freeze counter */
+	if (adreno_is_a3xx(adreno_dev))
+		kgsl_regwrite(device, reg->select, 1);
+
+	return REG_64BIT_VAL(hi, lo, reg->value);
+}
+
+static uint64_t _perfcounter_read_pwrcntr(struct adreno_device *adreno_dev,
+	struct adreno_perfcount_group *group, unsigned int counter)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_perfcount_register *reg;
+	unsigned int lo = 0, hi = 0;
+
+	reg = &group->regs[counter];
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	return REG_64BIT_VAL(hi, lo, reg->value);
+}
+
+static uint64_t _perfcounter_read_default(struct adreno_device *adreno_dev,
+		struct adreno_perfcount_group *group, unsigned int counter)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_perfcount_register *reg;
+	unsigned int lo = 0, hi = 0;
+	unsigned int in = 0, out;
+
+	reg = &group->regs[counter];
+
+	/* Freeze the counter */
+	if (adreno_is_a3xx(adreno_dev)) {
+		adreno_readreg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_CTL, &in);
+		out = in & ~RBBM_PERFCTR_CTL_ENABLE;
+		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_CTL, out);
+	}
+
+	/* Read the values */
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	/* Re-Enable the counter */
+	if (adreno_is_a3xx(adreno_dev))
+		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_CTL, in);
+
+	return REG_64BIT_VAL(hi, lo, 0);
+}
+
+/**
+ * adreno_perfcounter_read() - Reads a performance counter
+ * @adreno_dev: The device on which the counter is running
+ * @group: The group of the counter
+ * @counter: The counter within the group
+ *
+ * Function is used to read the counter of adreno devices
+ * Returns the 64 bit counter value on success else 0.
+ */
+uint64_t adreno_perfcounter_read(struct adreno_device *adreno_dev,
+	unsigned int groupid, unsigned int counter)
+{
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+	struct adreno_perfcount_group *group;
+
+	/* Lets hope this doesn't fail. Now subfunctions don't need to check */
+	if (counters == NULL)
+		return 0;
+
+	if (groupid >= counters->group_count)
+		return 0;
+
+	group = &counters->groups[groupid];
+
+	if (counter >= group->reg_count)
+		return 0;
+
+	switch (groupid) {
+	case KGSL_PERFCOUNTER_GROUP_ALWAYSON:
+		return _perfcounter_read_alwayson(adreno_dev, group, counter);
+	case KGSL_PERFCOUNTER_GROUP_VBIF_PWR:
+		return _perfcounter_read_vbif_pwr(adreno_dev, group, counter);
+	case KGSL_PERFCOUNTER_GROUP_VBIF:
+		return _perfcounter_read_vbif(adreno_dev, group, counter);
+	case KGSL_PERFCOUNTER_GROUP_PWR:
+		return _perfcounter_read_pwr(adreno_dev, group, counter);
+	case KGSL_PERFCOUNTER_GROUP_SP_PWR:
+	case KGSL_PERFCOUNTER_GROUP_TP_PWR:
+	case KGSL_PERFCOUNTER_GROUP_RB_PWR:
+	case KGSL_PERFCOUNTER_GROUP_CCU_PWR:
+	case KGSL_PERFCOUNTER_GROUP_UCHE_PWR:
+	case KGSL_PERFCOUNTER_GROUP_CP_PWR:
+	case KGSL_PERFCOUNTER_GROUP_GPMU_PWR:
+	case KGSL_PERFCOUNTER_GROUP_ALWAYSON_PWR:
+		return _perfcounter_read_pwrcntr(adreno_dev, group, counter);
+	default:
+		return _perfcounter_read_default(adreno_dev, group, counter);
+	}
+}
diff --git a/drivers/gpu/msm/adreno_perfcounter.h b/drivers/gpu/msm/adreno_perfcounter.h
new file mode 100644
index 000000000000..8c4db38983b1
--- /dev/null
+++ b/drivers/gpu/msm/adreno_perfcounter.h
@@ -0,0 +1,141 @@
+/* Copyright (c) 2008-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __ADRENO_PERFCOUNTER_H
+#define __ADRENO_PERFCOUNTER_H
+
+#include "adreno.h"
+
+struct adreno_device;
+
+/* ADRENO_PERFCOUNTERS - Given an adreno device, return the perfcounters list */
+#define ADRENO_PERFCOUNTERS(_a) \
+	(ADRENO_GPU_DEVICE(_a) ? ADRENO_GPU_DEVICE(_a)->perfcounters : NULL)
+
+#define PERFCOUNTER_FLAG_NONE 0x0
+#define PERFCOUNTER_FLAG_KERNEL 0x1
+
+/* Structs to maintain the list of active performance counters */
+
+/**
+ * struct adreno_perfcount_register: register state
+ * @countable: countable the register holds
+ * @kernelcount: number of user space users of the register
+ * @usercount: number of kernel users of the register
+ * @offset: register hardware offset
+ * @load_bit: The bit number in LOAD register which corresponds to this counter
+ * @select: The countable register offset
+ * @value: The 64 bit countable register value
+ */
+struct adreno_perfcount_register {
+	unsigned int countable;
+	unsigned int kernelcount;
+	unsigned int usercount;
+	unsigned int offset;
+	unsigned int offset_hi;
+	int load_bit;
+	unsigned int select;
+	uint64_t value;
+};
+
+/**
+ * struct adreno_perfcount_group: registers for a hardware group
+ * @regs: available registers for this group
+ * @reg_count: total registers for this group
+ * @name: group name for this group
+ */
+struct adreno_perfcount_group {
+	struct adreno_perfcount_register *regs;
+	unsigned int reg_count;
+	const char *name;
+	unsigned long flags;
+};
+
+/*
+ * ADRENO_PERFCOUNTER_GROUP_FIXED indicates that a perfcounter group is fixed -
+ * instead of having configurable countables like the other groups, registers in
+ * fixed groups have a hardwired countable.  So when the user requests a
+ * countable in one of these groups, that countable should be used as the
+ * register offset to return
+ */
+
+#define ADRENO_PERFCOUNTER_GROUP_FIXED BIT(0)
+
+/**
+ * adreno_perfcounts: all available perfcounter groups
+ * @groups: available groups for this device
+ * @group_count: total groups for this device
+ */
+struct adreno_perfcounters {
+	struct adreno_perfcount_group *groups;
+	unsigned int group_count;
+};
+
+/**
+ * adreno_invalid_countabless: Invalid countables that do not work properly
+ * @countables: List of unusable countables
+ * @num_countables: Number of unusable countables
+ */
+struct adreno_invalid_countables {
+	const unsigned int *countables;
+	int num_countables;
+};
+
+#define ADRENO_PERFCOUNTER_GROUP_FLAGS(core, offset, name, flags) \
+	[KGSL_PERFCOUNTER_GROUP_##offset] = { core##_perfcounters_##name, \
+	ARRAY_SIZE(core##_perfcounters_##name), __stringify(name), flags }
+
+#define ADRENO_PERFCOUNTER_GROUP(core, offset, name) \
+	ADRENO_PERFCOUNTER_GROUP_FLAGS(core, offset, name, 0)
+
+#define ADRENO_POWER_COUNTER_GROUP(core, offset, name) \
+	[KGSL_PERFCOUNTER_GROUP_##offset##_PWR] = { core##_pwrcounters_##name, \
+	ARRAY_SIZE(core##_pwrcounters_##name), __stringify(name##_pwr), 0}
+
+#define ADRENO_PERFCOUNTER_INVALID_COUNTABLE(name, off) \
+	[KGSL_PERFCOUNTER_GROUP_##off] = { name##_invalid_countables, \
+				ARRAY_SIZE(name##_invalid_countables) }
+
+int adreno_perfcounter_query_group(struct adreno_device *adreno_dev,
+	unsigned int groupid, unsigned int __user *countables,
+	unsigned int count, unsigned int *max_counters);
+
+int adreno_perfcounter_read_group(struct adreno_device *adreno_dev,
+	struct kgsl_perfcounter_read_group __user *reads, unsigned int count);
+
+void adreno_perfcounter_close(struct adreno_device *adreno_dev);
+
+void adreno_perfcounter_restore(struct adreno_device *adreno_dev);
+
+void adreno_perfcounter_save(struct adreno_device *adreno_dev);
+
+void adreno_perfcounter_start(struct adreno_device *adreno_dev);
+
+void adreno_perfcounter_init(struct adreno_device *adreno_dev);
+
+int adreno_perfcounter_get_groupid(struct adreno_device *adreno_dev,
+					const char *name);
+
+uint64_t adreno_perfcounter_read(struct adreno_device *adreno_dev,
+	unsigned int group, unsigned int counter);
+
+const char *adreno_perfcounter_get_name(struct adreno_device
+					*adreno_dev, unsigned int groupid);
+
+int adreno_perfcounter_get(struct adreno_device *adreno_dev,
+	unsigned int groupid, unsigned int countable, unsigned int *offset,
+	unsigned int *offset_hi, unsigned int flags);
+
+int adreno_perfcounter_put(struct adreno_device *adreno_dev,
+	unsigned int groupid, unsigned int countable, unsigned int flags);
+
+#endif /* __ADRENO_PERFCOUNTER_H */
diff --git a/drivers/gpu/msm/adreno_pm4types.h b/drivers/gpu/msm/adreno_pm4types.h
new file mode 100644
index 000000000000..f81c0f20e10b
--- /dev/null
+++ b/drivers/gpu/msm/adreno_pm4types.h
@@ -0,0 +1,468 @@
+/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __ADRENO_PM4TYPES_H
+#define __ADRENO_PM4TYPES_H
+
+#include "adreno.h"
+
+#define CP_PKT_MASK	0xc0000000
+
+#define CP_TYPE0_PKT	((unsigned int)0 << 30)
+#define CP_TYPE3_PKT	((unsigned int)3 << 30)
+#define CP_TYPE4_PKT    ((unsigned int)4 << 28)
+#define CP_TYPE7_PKT    ((unsigned int)7 << 28)
+
+#define PM4_TYPE4_PKT_SIZE_MAX  128
+
+/* type3 packets */
+
+/* Enable preemption flag */
+#define CP_PREEMPT_ENABLE 0x1C
+/* Preemption token command on which preemption occurs */
+#define CP_PREEMPT_TOKEN 0x1E
+/* Bit to set in CP_PREEMPT_TOKEN ordinal for interrupt on preemption */
+#define CP_PREEMPT_ORDINAL_INTERRUPT 24
+/* copy from ME scratch RAM to a register */
+#define CP_SCRATCH_TO_REG      0x4d
+
+/* Copy from REG to ME scratch RAM */
+#define CP_REG_TO_SCRATCH      0x4a
+
+/* Wait for memory writes to complete */
+#define CP_WAIT_MEM_WRITES     0x12
+
+/* Conditional execution based on register comparison */
+#define CP_COND_REG_EXEC       0x47
+
+/* Memory to REG copy */
+#define CP_MEM_TO_REG          0x42
+
+/* initialize CP's micro-engine */
+#define CP_ME_INIT		0x48
+
+/* skip N 32-bit words to get to the next packet */
+#define CP_NOP			0x10
+
+/* indirect buffer dispatch.  same as IB, but init is pipelined */
+#define CP_INDIRECT_BUFFER_PFD	0x37
+
+/* wait for the IDLE state of the engine */
+#define CP_WAIT_FOR_IDLE	0x26
+
+/* wait until a register or memory location is a specific value */
+#define CP_WAIT_REG_MEM	0x3c
+
+/* wait until a register location is equal to a specific value */
+#define CP_WAIT_REG_EQ		0x52
+
+/* switches SMMU pagetable, used on a5xx only */
+#define CP_SMMU_TABLE_UPDATE 0x53
+
+/* wait until a read completes */
+#define CP_WAIT_UNTIL_READ	0x5c
+
+/* wait until all base/size writes from an IB_PFD packet have completed */
+#define CP_WAIT_IB_PFD_COMPLETE 0x5d
+
+/* register read/modify/write */
+#define CP_REG_RMW		0x21
+
+/* Set binning configuration registers */
+#define CP_SET_BIN_DATA             0x2f
+
+/* reads register in chip and writes to memory */
+#define CP_REG_TO_MEM		0x3e
+
+/* write N 32-bit words to memory */
+#define CP_MEM_WRITE		0x3d
+
+/* write CP_PROG_COUNTER value to memory */
+#define CP_MEM_WRITE_CNTR	0x4f
+
+/* conditional execution of a sequence of packets */
+#define CP_COND_EXEC		0x44
+
+/* conditional write to memory or register */
+#define CP_COND_WRITE		0x45
+
+/* generate an event that creates a write to memory when completed */
+#define CP_EVENT_WRITE		0x46
+
+/* generate a VS|PS_done event */
+#define CP_EVENT_WRITE_SHD	0x58
+
+/* generate a cache flush done event */
+#define CP_EVENT_WRITE_CFL	0x59
+
+/* generate a z_pass done event */
+#define CP_EVENT_WRITE_ZPD	0x5b
+
+
+/* initiate fetch of index buffer and draw */
+#define CP_DRAW_INDX		0x22
+
+/* draw using supplied indices in packet */
+#define CP_DRAW_INDX_2		0x36
+
+/* initiate fetch of index buffer and binIDs and draw */
+#define CP_DRAW_INDX_BIN	0x34
+
+/* initiate fetch of bin IDs and draw using supplied indices */
+#define CP_DRAW_INDX_2_BIN	0x35
+
+/* New draw packets defined for A4XX */
+#define CP_DRAW_INDX_OFFSET	0x38
+#define CP_DRAW_INDIRECT	0x28
+#define CP_DRAW_INDX_INDIRECT	0x29
+#define CP_DRAW_AUTO		0x24
+
+/* begin/end initiator for viz query extent processing */
+#define CP_VIZ_QUERY		0x23
+
+/* fetch state sub-blocks and initiate shader code DMAs */
+#define CP_SET_STATE		0x25
+
+/* load constant into chip and to memory */
+#define CP_SET_CONSTANT	0x2d
+
+/* load sequencer instruction memory (pointer-based) */
+#define CP_IM_LOAD		0x27
+
+/* load sequencer instruction memory (code embedded in packet) */
+#define CP_IM_LOAD_IMMEDIATE	0x2b
+
+/* load constants from a location in memory */
+#define CP_LOAD_CONSTANT_CONTEXT 0x2e
+
+/* selective invalidation of state pointers */
+#define CP_INVALIDATE_STATE	0x3b
+
+
+/* dynamically changes shader instruction memory partition */
+#define CP_SET_SHADER_BASES	0x4A
+
+/* sets the 64-bit BIN_MASK register in the PFP */
+#define CP_SET_BIN_MASK	0x50
+
+/* sets the 64-bit BIN_SELECT register in the PFP */
+#define CP_SET_BIN_SELECT	0x51
+
+
+/* updates the current context, if needed */
+#define CP_CONTEXT_UPDATE	0x5e
+
+/* generate interrupt from the command stream */
+#define CP_INTERRUPT		0x40
+
+/* A5XX Enable yield in RB only */
+#define CP_YIELD_ENABLE 0x1C
+
+/* Enable/Disable/Defer A5x global preemption model */
+#define CP_PREEMPT_ENABLE_GLOBAL    0x69
+
+/* Enable/Disable A5x local preemption model */
+#define CP_PREEMPT_ENABLE_LOCAL     0x6A
+
+/* Yeild token on a5xx similar to CP_PREEMPT on a4xx */
+#define CP_CONTEXT_SWITCH_YIELD     0x6B
+
+/* Inform CP about current render mode (needed for a5xx preemption) */
+#define CP_SET_RENDER_MODE          0x6C
+
+/* copy sequencer instruction memory to system memory */
+#define CP_IM_STORE            0x2c
+
+/* test 2 memory locations to dword values specified */
+#define CP_TEST_TWO_MEMS	0x71
+
+/* Write register, ignoring context state for context sensitive registers */
+#define CP_REG_WR_NO_CTXT  0x78
+
+/*
+ * for A4xx
+ * Write to register with address that does not fit into type-0 pkt
+ */
+#define CP_WIDE_REG_WRITE           0x74
+
+
+/* PFP waits until the FIFO between the PFP and the ME is empty */
+#define CP_WAIT_FOR_ME		0x13
+
+/* Record the real-time when this packet is processed by PFP */
+#define CP_RECORD_PFP_TIMESTAMP	0x11
+
+#define CP_SET_PROTECTED_MODE  0x5f /* sets the register protection mode */
+
+/* Used to switch GPU between secure and non-secure modes */
+#define CP_SET_SECURE_MODE 0x66
+
+#define CP_BOOTSTRAP_UCODE  0x6f /* bootstraps microcode */
+
+/*
+ * for a3xx
+ */
+
+#define CP_LOAD_STATE 0x30 /* load high level sequencer command */
+
+/* Conditionally load a IB based on a flag */
+#define CP_COND_INDIRECT_BUFFER_PFE 0x3A /* prefetch enabled */
+#define CP_COND_INDIRECT_BUFFER_PFD 0x32 /* prefetch disabled */
+
+/* Load a buffer with pre-fetch enabled */
+#define CP_INDIRECT_BUFFER_PFE 0x3F
+
+#define CP_EXEC_CL 0x31
+
+/* (A4x) save PM4 stream pointers to execute upon a visible draw */
+#define CP_SET_DRAW_STATE 0x43
+
+#define CP_LOADSTATE_DSTOFFSET_SHIFT 0x00000000
+#define CP_LOADSTATE_STATESRC_SHIFT 0x00000010
+#define CP_LOADSTATE_STATEBLOCKID_SHIFT 0x00000013
+#define CP_LOADSTATE_NUMOFUNITS_SHIFT 0x00000016
+#define CP_LOADSTATE_STATETYPE_SHIFT 0x00000000
+#define CP_LOADSTATE_EXTSRCADDR_SHIFT 0x00000002
+
+static inline uint pm4_calc_odd_parity_bit(uint val)
+{
+	return (0x9669 >> (0xf & ((val) ^
+	((val) >> 4) ^ ((val) >> 8) ^ ((val) >> 12) ^
+	((val) >> 16) ^ ((val) >> 20) ^ ((val) >> 24) ^
+	((val) >> 28)))) & 1;
+}
+
+/*
+ * PM4 packet header functions
+ * For all the packet functions the passed in count should be the size of the
+ * payload excluding the header
+ */
+static inline uint cp_type0_packet(uint regindx, uint cnt)
+{
+	return CP_TYPE0_PKT | ((cnt-1) << 16) | ((regindx) & 0x7FFF);
+}
+
+static inline uint cp_type3_packet(uint opcode, uint cnt)
+{
+	return CP_TYPE3_PKT | ((cnt-1) << 16) | (((opcode) & 0xFF) << 8);
+}
+
+static inline uint cp_type4_packet(uint opcode, uint cnt)
+{
+	return CP_TYPE4_PKT | ((cnt) << 0) |
+	(pm4_calc_odd_parity_bit(cnt) << 7) |
+	(((opcode) & 0x3FFFF) << 8) |
+	((pm4_calc_odd_parity_bit(opcode) << 27));
+}
+
+static inline uint cp_type7_packet(uint opcode, uint cnt)
+{
+	return CP_TYPE7_PKT | ((cnt) << 0) |
+	(pm4_calc_odd_parity_bit(cnt) << 15) |
+	(((opcode) & 0x7F) << 16) |
+	((pm4_calc_odd_parity_bit(opcode) << 23));
+
+}
+
+#define pkt_is_type0(pkt) (((pkt) & 0XC0000000) == CP_TYPE0_PKT)
+
+#define type0_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1)
+#define type0_pkt_offset(pkt) ((pkt) & 0x7FFF)
+
+/*
+ * Check both for the type3 opcode and make sure that the reserved bits [1:7]
+ * and 15 are 0
+ */
+
+#define pkt_is_type3(pkt) \
+	((((pkt) & 0xC0000000) == CP_TYPE3_PKT) && \
+	 (((pkt) & 0x80FE) == 0))
+
+#define cp_type3_opcode(pkt) (((pkt) >> 8) & 0xFF)
+#define type3_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1)
+
+#define pkt_is_type4(pkt) \
+	((((pkt) & 0xF0000000) == CP_TYPE4_PKT) && \
+	 ((((pkt) >> 27) & 0x1) == \
+	 pm4_calc_odd_parity_bit(cp_type4_base_index_one_reg_wr(pkt))) \
+	 && ((((pkt) >> 7) & 0x1) == \
+	 pm4_calc_odd_parity_bit(type4_pkt_size(pkt))))
+
+#define cp_type4_base_index_one_reg_wr(pkt) (((pkt) >> 8) & 0x7FFFF)
+#define type4_pkt_size(pkt) ((pkt) & 0x7F)
+
+#define pkt_is_type7(pkt) \
+	((((pkt) & 0xF0000000) == CP_TYPE7_PKT) && \
+	 (((pkt) & 0x0F000000) == 0) && \
+	 ((((pkt) >> 23) & 0x1) == \
+	 pm4_calc_odd_parity_bit(cp_type7_opcode(pkt))) \
+	 && ((((pkt) >> 15) & 0x1) == \
+	 pm4_calc_odd_parity_bit(type7_pkt_size(pkt))))
+
+#define cp_type7_opcode(pkt) (((pkt) >> 16) & 0x7F)
+#define type7_pkt_size(pkt) ((pkt) & 0x3FFF)
+
+/* dword base address of the GFX decode space */
+#define SUBBLOCK_OFFSET(reg) ((unsigned int)((reg) - (0x2000)))
+
+/* gmem command buffer length */
+#define CP_REG(reg) ((0x4 << 16) | (SUBBLOCK_OFFSET(reg)))
+
+/* Return true if the hardware uses the legacy (A4XX and older) PM4 format */
+#define ADRENO_LEGACY_PM4(_d) (ADRENO_GPUREV(_d) < 500)
+
+/**
+ * cp_packet - Generic CP packet to support different opcodes on
+ * different GPU cores.
+ * @adreno_dev: The adreno device
+ * @opcode: Operation for cp packet
+ * @size: size for cp packet
+ */
+static inline uint cp_packet(struct adreno_device *adreno_dev,
+				int opcode, uint size)
+{
+	if (ADRENO_LEGACY_PM4(adreno_dev))
+		return cp_type3_packet(opcode, size);
+
+	return cp_type7_packet(opcode, size);
+}
+
+/**
+ * cp_mem_packet - Generic CP memory packet to support different
+ * opcodes on different GPU cores.
+ * @adreno_dev: The adreno device
+ * @opcode: mem operation for cp packet
+ * @size: size for cp packet
+ * @num_mem: num of mem access
+ */
+static inline uint cp_mem_packet(struct adreno_device *adreno_dev,
+				int opcode, uint size, uint num_mem)
+{
+	if (ADRENO_LEGACY_PM4(adreno_dev))
+		return cp_type3_packet(opcode, size);
+
+	return cp_type7_packet(opcode, size + num_mem);
+}
+
+/* Return 1 if the command is an indirect buffer of any kind */
+static inline int adreno_cmd_is_ib(struct adreno_device *adreno_dev,
+					unsigned int cmd)
+{
+	return cmd == cp_mem_packet(adreno_dev,
+			CP_INDIRECT_BUFFER_PFE, 2, 1) ||
+		cmd == cp_mem_packet(adreno_dev,
+			CP_INDIRECT_BUFFER_PFD, 2, 1) ||
+		cmd == cp_mem_packet(adreno_dev,
+			CP_COND_INDIRECT_BUFFER_PFE, 2, 1) ||
+		cmd == cp_mem_packet(adreno_dev,
+			CP_COND_INDIRECT_BUFFER_PFD, 2, 1);
+}
+
+/**
+ * cp_gpuaddr - Generic function to add 64bit and 32bit gpuaddr
+ * to pm4 commands
+ * @adreno_dev: The adreno device
+ * @cmds: command pointer to add gpuaddr
+ * @gpuaddr: gpuaddr to add
+ */
+static inline uint cp_gpuaddr(struct adreno_device *adreno_dev,
+		   uint *cmds, uint64_t gpuaddr)
+{
+	uint *start = cmds;
+
+	if (ADRENO_LEGACY_PM4(adreno_dev))
+		*cmds++ = (uint)gpuaddr;
+	else {
+		*cmds++ = lower_32_bits(gpuaddr);
+		*cmds++ = upper_32_bits(gpuaddr);
+	}
+	return cmds - start;
+}
+
+/**
+ * cp_register - Generic function for gpu register operation
+ * @adreno_dev: The adreno device
+ * @reg: GPU register
+ * @size: count for PM4 operation
+ */
+static inline uint cp_register(struct adreno_device *adreno_dev,
+			unsigned int reg, unsigned int size)
+{
+	if (ADRENO_LEGACY_PM4(adreno_dev))
+		return cp_type0_packet(reg, size);
+
+	return cp_type4_packet(reg, size);
+}
+
+/**
+ * cp_wait_for_me - common function for WAIT_FOR_ME
+ * @adreno_dev: The adreno device
+ * @cmds: command pointer to add gpuaddr
+ */
+static inline uint cp_wait_for_me(struct adreno_device *adreno_dev,
+				uint *cmds)
+{
+	uint *start = cmds;
+
+	if (ADRENO_LEGACY_PM4(adreno_dev)) {
+		*cmds++ = cp_type3_packet(CP_WAIT_FOR_ME, 1);
+		*cmds++ = 0;
+	} else
+		*cmds++ = cp_type7_packet(CP_WAIT_FOR_ME, 0);
+
+	return cmds - start;
+}
+
+/**
+ * cp_wait_for_idle - common function for WAIT_FOR_IDLE
+ * @adreno_dev: The adreno device
+ * @cmds: command pointer to add gpuaddr
+ */
+static inline uint cp_wait_for_idle(struct adreno_device *adreno_dev,
+				uint *cmds)
+{
+	uint *start = cmds;
+
+	if (ADRENO_LEGACY_PM4(adreno_dev)) {
+		*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+		*cmds++ = 0;
+	} else
+		*cmds++ = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+
+	return cmds - start;
+}
+
+/**
+ * cp_invalidate_state - common function for invalidating cp
+ * state
+ * @adreno_dev: The adreno device
+ * @cmds: command pointer to add gpuaddr
+ */
+static inline uint cp_invalidate_state(struct adreno_device *adreno_dev,
+				uint *cmds)
+{
+	uint *start = cmds;
+
+	if (ADRENO_GPUREV(adreno_dev) < 500) {
+		*cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1);
+		*cmds++ = 0x7fff;
+	} else {
+		*cmds++ = cp_type7_packet(CP_SET_DRAW_STATE, 3);
+		*cmds++ = 0x40000;
+		*cmds++ = 0;
+		*cmds++ = 0;
+	}
+
+	return cmds - start;
+}
+
+#endif	/* __ADRENO_PM4TYPES_H */
diff --git a/drivers/gpu/msm/adreno_profile.c b/drivers/gpu/msm/adreno_profile.c
new file mode 100644
index 000000000000..5476f9892f89
--- /dev/null
+++ b/drivers/gpu/msm/adreno_profile.c
@@ -0,0 +1,1230 @@
+/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/debugfs.h>
+
+#include "adreno.h"
+#include "adreno_profile.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_cffdump.h"
+#include "adreno_pm4types.h"
+
+#define ASSIGNS_STR_FORMAT "%.8s:%u "
+
+/*
+ * Raw Data for processing later:
+ *        : 3 - timestamp, count, context id
+ * [per counter] - data for each counter
+ *        : 1 - Register offset
+ *        : 2 - Pre IB register hi/lo value
+ *        : 2 - Post IB register hi/lo value
+ * [per counter end]
+ */
+#define SIZE_DATA(cnt) (6 + (cnt) * 5)
+
+/*
+ * Pre-IB command size (in dwords):
+ *        : 2 - NOP start identifier
+ *        : 4 - timestamp
+ *        : 4 - count
+ *        : 4 - context id
+ *        : 4 - pid
+ *        : 4 - tid
+ *        : 4 - type
+ * [loop count start] - for each counter to watch
+ *        : 4 - Register offset
+ *        : 4 - Register read lo
+ *        : 4 - Register read high
+ * [loop end]
+ *        : 2 - NOP end identifier
+ */
+#define SIZE_PREIB(cnt) (28 + (cnt) * 12)
+
+/*
+ * Post-IB command size (in dwords):
+ *        : 2 - NOP start identifier
+ * [loop count start] - for each counter to watch
+ *        : 4 - Register read lo
+ *        : 4 - Register read high
+ * [loop end]
+ *        : 2 - NOP end identifier
+ */
+#define SIZE_POSTIB(cnt) (4 + (cnt) * 8)
+
+/* Counter data + Pre size + post size = total size */
+#define SIZE_SHARED_ENTRY(cnt) (SIZE_DATA(cnt) + SIZE_PREIB(cnt) \
+		+ SIZE_POSTIB(cnt))
+
+/*
+ * Space for following string :"%u %u %u %.5s %u "
+ * [count iterations]: "%.8s:%u %llu %llu%c"
+ */
+#define SIZE_PIPE_ENTRY(cnt) (50 + (cnt) * 62)
+#define SIZE_LOG_ENTRY(cnt) (6 + (cnt) * 5)
+
+static struct adreno_context_type ctxt_type_table[] = {KGSL_CONTEXT_TYPES};
+
+static const char *get_api_type_str(unsigned int type)
+{
+	int i;
+	for (i = 0; i < ARRAY_SIZE(ctxt_type_table) - 1; i++) {
+		if (ctxt_type_table[i].type == type)
+			return ctxt_type_table[i].str;
+	}
+	return "UNKNOWN";
+}
+
+static inline uint _ib_start(struct adreno_device *adreno_dev,
+			 unsigned int *cmds)
+{
+	unsigned int *start = cmds;
+
+	*cmds++ = cp_packet(adreno_dev, CP_NOP, 1);
+	*cmds++ = KGSL_START_OF_PROFILE_IDENTIFIER;
+
+	return cmds - start;
+}
+
+static inline uint _ib_end(struct adreno_device *adreno_dev,
+			  unsigned int *cmds)
+{
+	unsigned int *start = cmds;
+
+	*cmds++ = cp_packet(adreno_dev, CP_NOP, 1);
+	*cmds++ = KGSL_END_OF_PROFILE_IDENTIFIER;
+
+	return cmds - start;
+}
+
+static inline uint _ib_cmd_mem_write(struct adreno_device *adreno_dev,
+			uint *cmds, uint64_t gpuaddr, uint val, uint *off)
+{
+	unsigned int *start = cmds;
+
+	*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr);
+	*cmds++ = val;
+
+	*off += sizeof(unsigned int);
+	return cmds - start;
+}
+
+static inline uint _ib_cmd_reg_to_mem(struct adreno_device *adreno_dev,
+			uint *cmds, uint64_t gpuaddr, uint val, uint *off)
+{
+	unsigned int *start = cmds;
+
+	*cmds++ = cp_mem_packet(adreno_dev, CP_REG_TO_MEM, 2, 1);
+	*cmds++ = val;
+	cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr);
+
+	*off += sizeof(unsigned int);
+	return cmds - start;
+}
+
+static inline int _create_ib_ref(struct adreno_device *adreno_dev,
+		struct kgsl_memdesc *memdesc, unsigned int *cmd,
+		unsigned int cnt, unsigned int off)
+{
+	unsigned int *start = cmd;
+
+	*cmd++ = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1);
+	cmd += cp_gpuaddr(adreno_dev, cmd, (memdesc->gpuaddr + off));
+	*cmd++ = cnt;
+
+	return cmd - start;
+}
+
+static int _build_pre_ib_cmds(struct adreno_device *adreno_dev,
+		struct adreno_profile *profile,
+		unsigned int *rbcmds, unsigned int head,
+		unsigned int timestamp, struct adreno_context *drawctxt)
+{
+	struct adreno_profile_assigns_list *entry;
+	unsigned int *start, *ibcmds;
+	unsigned int count = profile->assignment_count;
+	uint64_t gpuaddr = profile->shared_buffer.gpuaddr;
+	unsigned int ib_offset = head + SIZE_DATA(count);
+	unsigned int data_offset = head * sizeof(unsigned int);
+
+	ibcmds = ib_offset + ((unsigned int *) profile->shared_buffer.hostptr);
+	start = ibcmds;
+
+	/* start of profile identifier */
+	ibcmds += _ib_start(adreno_dev, ibcmds);
+
+	/*
+	 * Write ringbuffer commands to save the following to memory:
+	 * timestamp, count, context_id, pid, tid, context type
+	 */
+	ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset,
+			timestamp, &data_offset);
+	ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset,
+			profile->assignment_count, &data_offset);
+	ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset,
+			drawctxt->base.id, &data_offset);
+	ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset,
+			drawctxt->base.proc_priv->pid, &data_offset);
+	ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset,
+			drawctxt->base.tid, &data_offset);
+	ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset,
+			drawctxt->type, &data_offset);
+
+	/* loop for each countable assigned */
+	list_for_each_entry(entry, &profile->assignments_list, list) {
+		ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds,
+				gpuaddr + data_offset, entry->offset,
+				&data_offset);
+		ibcmds += _ib_cmd_reg_to_mem(adreno_dev, ibcmds,
+				gpuaddr + data_offset, entry->offset,
+				&data_offset);
+		ibcmds += _ib_cmd_reg_to_mem(adreno_dev, ibcmds,
+				gpuaddr + data_offset, entry->offset_hi,
+				&data_offset);
+
+		/* skip over post_ib counter data */
+		data_offset += sizeof(unsigned int) * 2;
+	}
+
+	/* end of profile identifier */
+	ibcmds += _ib_end(adreno_dev, ibcmds);
+
+	return _create_ib_ref(adreno_dev, &profile->shared_buffer, rbcmds,
+			ibcmds - start, ib_offset * sizeof(unsigned int));
+}
+
+static int _build_post_ib_cmds(struct adreno_device *adreno_dev,
+		struct adreno_profile *profile,
+		unsigned int *rbcmds, unsigned int head)
+{
+	struct adreno_profile_assigns_list *entry;
+	unsigned int *start, *ibcmds;
+	unsigned int count = profile->assignment_count;
+	uint64_t gpuaddr =  profile->shared_buffer.gpuaddr;
+	unsigned int ib_offset = head + SIZE_DATA(count) + SIZE_PREIB(count);
+	unsigned int data_offset = head * sizeof(unsigned int);
+
+	ibcmds = ib_offset + ((unsigned int *) profile->shared_buffer.hostptr);
+	start = ibcmds;
+	/* start of profile identifier */
+	ibcmds += _ib_start(adreno_dev, ibcmds);
+
+	/* skip over pre_ib preamble */
+	data_offset += sizeof(unsigned int) * 6;
+
+	/* loop for each countable assigned */
+	list_for_each_entry(entry, &profile->assignments_list, list) {
+		/* skip over pre_ib counter data */
+		data_offset += sizeof(unsigned int) * 3;
+		ibcmds += _ib_cmd_reg_to_mem(adreno_dev, ibcmds,
+				gpuaddr + data_offset, entry->offset,
+				&data_offset);
+		ibcmds += _ib_cmd_reg_to_mem(adreno_dev, ibcmds,
+				gpuaddr + data_offset, entry->offset_hi,
+				&data_offset);
+	}
+
+	/* end of profile identifier */
+	ibcmds += _ib_end(adreno_dev, ibcmds);
+
+	return _create_ib_ref(adreno_dev, &profile->shared_buffer, rbcmds,
+			ibcmds - start, ib_offset * sizeof(unsigned int));
+}
+
+static bool shared_buf_empty(struct adreno_profile *profile)
+{
+	if (profile->shared_buffer.hostptr == NULL ||
+			profile->shared_buffer.size == 0)
+		return true;
+
+	if (profile->shared_head == profile->shared_tail)
+		return true;
+
+	return false;
+}
+
+static inline void shared_buf_inc(unsigned int max_size,
+		unsigned int *offset, size_t inc)
+{
+	*offset = (*offset + inc) % max_size;
+}
+
+static inline void log_buf_wrapcnt(unsigned int cnt, uintptr_t *off)
+{
+	*off = (*off + cnt) % ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS;
+}
+
+static inline void log_buf_wrapinc_len(unsigned int *profile_log_buffer,
+		unsigned int **ptr, unsigned int len)
+{
+	*ptr += len;
+	if (*ptr >= (profile_log_buffer +
+				ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS))
+		*ptr -= ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS;
+}
+
+static inline void log_buf_wrapinc(unsigned int *profile_log_buffer,
+		unsigned int **ptr)
+{
+	log_buf_wrapinc_len(profile_log_buffer, ptr, 1);
+}
+
+static inline unsigned int log_buf_available(struct adreno_profile *profile,
+		unsigned int *head_ptr)
+{
+	uintptr_t tail, head;
+
+	tail = (uintptr_t) profile->log_tail -
+		(uintptr_t) profile->log_buffer;
+	head = (uintptr_t)head_ptr - (uintptr_t) profile->log_buffer;
+	if (tail > head)
+		return (tail - head) / sizeof(uintptr_t);
+	else
+		return ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS - ((head - tail) /
+				sizeof(uintptr_t));
+}
+
+static inline unsigned int shared_buf_available(struct adreno_profile *profile)
+{
+	if (profile->shared_tail > profile->shared_head)
+		return profile->shared_tail - profile->shared_head;
+	else
+		return profile->shared_size -
+			(profile->shared_head - profile->shared_tail);
+}
+
+static struct adreno_profile_assigns_list *_find_assignment_by_offset(
+		struct adreno_profile *profile, unsigned int offset)
+{
+	struct adreno_profile_assigns_list *entry;
+
+	list_for_each_entry(entry, &profile->assignments_list, list) {
+		if (entry->offset == offset)
+			return entry;
+	}
+
+	return NULL;
+}
+
+static bool _in_assignments_list(struct adreno_profile *profile,
+		unsigned int groupid, unsigned int countable)
+{
+	struct adreno_profile_assigns_list *entry;
+
+	list_for_each_entry(entry, &profile->assignments_list, list) {
+		if (entry->groupid == groupid && entry->countable ==
+				countable)
+			return true;
+	}
+
+	return false;
+}
+
+static bool _add_to_assignments_list(struct adreno_profile *profile,
+		const char *str, unsigned int groupid, unsigned int countable,
+		unsigned int offset, unsigned int offset_hi)
+{
+	struct adreno_profile_assigns_list *entry;
+
+	/* first make sure we can alloc memory */
+	entry = kmalloc(sizeof(struct adreno_profile_assigns_list), GFP_KERNEL);
+	if (!entry)
+		return false;
+
+	list_add_tail(&entry->list, &profile->assignments_list);
+
+	entry->countable = countable;
+	entry->groupid = groupid;
+	entry->offset = offset;
+	entry->offset_hi = offset_hi;
+
+	strlcpy(entry->name, str, sizeof(entry->name));
+
+	profile->assignment_count++;
+
+	return true;
+}
+
+static bool results_available(struct adreno_device *adreno_dev,
+		struct adreno_profile *profile, unsigned int *shared_buf_tail)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	unsigned int global_eop;
+	unsigned int off = profile->shared_tail;
+	unsigned int *shared_ptr = (unsigned int *)
+		profile->shared_buffer.hostptr;
+	unsigned int ts, cnt;
+	int ts_cmp;
+
+	/*
+	 * If shared_buffer empty or Memstore EOP timestamp is less than
+	 * outstanding counter buffer timestamps then no results available
+	 */
+	if (shared_buf_empty(profile))
+		return false;
+
+	if (adreno_rb_readtimestamp(device,
+			adreno_dev->cur_rb,
+			KGSL_TIMESTAMP_RETIRED, &global_eop))
+		return false;
+	do {
+		cnt = *(shared_ptr + off + 1);
+		if (cnt == 0)
+			return false;
+
+		ts = *(shared_ptr + off);
+		ts_cmp = timestamp_cmp(ts, global_eop);
+		if (ts_cmp >= 0) {
+			*shared_buf_tail = off;
+			if (off == profile->shared_tail)
+				return false;
+			else
+				return true;
+		}
+		shared_buf_inc(profile->shared_size, &off,
+				SIZE_SHARED_ENTRY(cnt));
+	} while (off != profile->shared_head);
+
+	*shared_buf_tail = profile->shared_head;
+
+	return true;
+}
+
+static void transfer_results(struct adreno_profile *profile,
+		unsigned int shared_buf_tail)
+{
+	unsigned int buf_off;
+	unsigned int ts, cnt, ctxt_id, pid, tid, client_type;
+	unsigned int *ptr = (unsigned int *) profile->shared_buffer.hostptr;
+	unsigned int *log_ptr, *log_base;
+	struct adreno_profile_assigns_list *assigns_list;
+	int i, tmp_tail;
+
+	log_ptr = profile->log_head;
+	log_base = profile->log_buffer;
+	if (log_ptr == NULL)
+		return;
+
+	/*
+	 * go through counter buffers and format for write into log_buffer
+	 * if log buffer doesn't have space just overwrite it circularly
+	 * shared_buf is guaranteed to not wrap within an entry so can use
+	 * ptr increment
+	 */
+	while (profile->shared_tail != shared_buf_tail) {
+		buf_off = profile->shared_tail;
+		/*
+		 * format: timestamp, count, context_id
+		 * count entries: pc_off, pc_start, pc_end
+		 */
+		ts = *(ptr + buf_off++);
+		cnt = *(ptr + buf_off++);
+		ctxt_id = *(ptr + buf_off++);
+		pid = *(ptr + buf_off++);
+		tid = *(ptr + buf_off++);
+		client_type = *(ptr + buf_off++);
+
+		/*
+		 * if entry overwrites the tail of log_buffer then adjust tail
+		 * ptr to make room for the new entry, discarding old entry
+		 */
+		while (log_buf_available(profile, log_ptr) <=
+				SIZE_LOG_ENTRY(cnt)) {
+			unsigned int size_tail;
+			uintptr_t boff;
+			size_tail = SIZE_LOG_ENTRY(0xffff &
+					*(profile->log_tail));
+			boff = ((uintptr_t) profile->log_tail -
+				(uintptr_t) log_base) / sizeof(uintptr_t);
+			log_buf_wrapcnt(size_tail, &boff);
+			profile->log_tail = log_base + boff;
+		}
+
+		*log_ptr = cnt;
+		log_buf_wrapinc(log_base, &log_ptr);
+		*log_ptr = client_type;
+		log_buf_wrapinc(log_base, &log_ptr);
+		*log_ptr = pid;
+		log_buf_wrapinc(log_base, &log_ptr);
+		*log_ptr = tid;
+		log_buf_wrapinc(log_base, &log_ptr);
+		*log_ptr = ctxt_id;
+		log_buf_wrapinc(log_base, &log_ptr);
+		*log_ptr = ts;
+		log_buf_wrapinc(log_base, &log_ptr);
+
+		for (i = 0; i < cnt; i++) {
+			assigns_list = _find_assignment_by_offset(
+					profile, *(ptr + buf_off++));
+			if (assigns_list == NULL) {
+				*log_ptr = (unsigned int) -1;
+
+				shared_buf_inc(profile->shared_size,
+					&profile->shared_tail,
+					SIZE_SHARED_ENTRY(cnt));
+				goto err;
+			} else {
+				*log_ptr = assigns_list->groupid << 16 |
+					(assigns_list->countable & 0xffff);
+			}
+			log_buf_wrapinc(log_base, &log_ptr);
+			*log_ptr  = *(ptr + buf_off++); /* perf cntr start hi */
+			log_buf_wrapinc(log_base, &log_ptr);
+			*log_ptr = *(ptr + buf_off++);  /* perf cntr start lo */
+			log_buf_wrapinc(log_base, &log_ptr);
+			*log_ptr = *(ptr + buf_off++);  /* perf cntr end hi */
+			log_buf_wrapinc(log_base, &log_ptr);
+			*log_ptr = *(ptr + buf_off++);  /* perf cntr end lo */
+			log_buf_wrapinc(log_base, &log_ptr);
+
+		}
+
+		tmp_tail = profile->shared_tail;
+		shared_buf_inc(profile->shared_size,
+				&profile->shared_tail,
+				SIZE_SHARED_ENTRY(cnt));
+		/*
+		 * Possibly lost some room as we cycled around, so it's safe to
+		 * reset the max size
+		 */
+		if (profile->shared_tail < tmp_tail)
+			profile->shared_size =
+				ADRENO_PROFILE_SHARED_BUF_SIZE_DWORDS;
+
+	}
+	profile->log_head = log_ptr;
+	return;
+err:
+	/* reset head/tail to same on error in hopes we work correctly later */
+	profile->log_head = profile->log_tail;
+}
+
+static int profile_enable_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	mutex_lock(&device->mutex);
+	*val = adreno_profile_enabled(&adreno_dev->profile);
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+static int profile_enable_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_profile *profile = &adreno_dev->profile;
+
+	mutex_lock(&device->mutex);
+
+	if (val && profile->log_buffer == NULL) {
+		/* allocate profile_log_buffer the first time enabled */
+		profile->log_buffer = vmalloc(ADRENO_PROFILE_LOG_BUF_SIZE);
+		if (profile->log_buffer == NULL) {
+			mutex_unlock(&device->mutex);
+			return -ENOMEM;
+		}
+		profile->log_tail = profile->log_buffer;
+		profile->log_head = profile->log_buffer;
+	}
+
+	profile->enabled = val;
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+static ssize_t profile_assignments_read(struct file *filep,
+		char __user *ubuf, size_t max, loff_t *ppos)
+{
+	struct kgsl_device *device = (struct kgsl_device *) filep->private_data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_profile *profile = &adreno_dev->profile;
+	struct adreno_profile_assigns_list *entry;
+	int len = 0, max_size = PAGE_SIZE;
+	char *buf, *pos;
+	ssize_t size = 0;
+
+	mutex_lock(&device->mutex);
+
+	if (profile->assignment_count == 0) {
+		mutex_unlock(&device->mutex);
+		return 0;
+	}
+
+	buf = kmalloc(max_size, GFP_KERNEL);
+	if (!buf) {
+		mutex_unlock(&device->mutex);
+		return -ENOMEM;
+	}
+
+	pos = buf;
+
+	/* copy all assingments from list to str */
+	list_for_each_entry(entry, &profile->assignments_list, list) {
+		len = snprintf(pos, max_size, ASSIGNS_STR_FORMAT,
+				entry->name, entry->countable);
+
+		max_size -= len;
+		pos += len;
+	}
+
+	size = simple_read_from_buffer(ubuf, max, ppos, buf,
+			strlen(buf));
+
+	kfree(buf);
+
+	mutex_unlock(&device->mutex);
+	return size;
+}
+
+static void _remove_assignment(struct adreno_device *adreno_dev,
+		unsigned int groupid, unsigned int countable)
+{
+	struct adreno_profile *profile = &adreno_dev->profile;
+	struct adreno_profile_assigns_list *entry, *tmp;
+
+	list_for_each_entry_safe(entry, tmp, &profile->assignments_list, list) {
+		if (entry->groupid == groupid &&
+				entry->countable == countable) {
+			list_del(&entry->list);
+
+			profile->assignment_count--;
+
+			kfree(entry);
+
+			/* remove from perf counter allocation */
+			adreno_perfcounter_put(adreno_dev, groupid, countable,
+					PERFCOUNTER_FLAG_KERNEL);
+		}
+	}
+}
+
+static void _add_assignment(struct adreno_device *adreno_dev,
+		unsigned int groupid, unsigned int countable)
+{
+	struct adreno_profile *profile = &adreno_dev->profile;
+	unsigned int offset, offset_hi;
+	const char *name = NULL;
+
+	name = adreno_perfcounter_get_name(adreno_dev, groupid);
+	if (!name)
+		return;
+
+	/* if already in assigned list skip it */
+	if (_in_assignments_list(profile, groupid, countable))
+		return;
+
+	/* add to perf counter allocation, if fail skip it */
+	if (adreno_perfcounter_get(adreno_dev, groupid, countable,
+				&offset, &offset_hi, PERFCOUNTER_FLAG_NONE))
+		return;
+
+	/* add to assignments list, put counter back if error */
+	if (!_add_to_assignments_list(profile, name, groupid,
+				countable, offset, offset_hi))
+		adreno_perfcounter_put(adreno_dev, groupid,
+				countable, PERFCOUNTER_FLAG_KERNEL);
+}
+
+static char *_parse_next_assignment(struct adreno_device *adreno_dev,
+		char *str, int *groupid, int *countable, bool *remove)
+{
+	char *groupid_str, *countable_str, *next_str = NULL;
+	int ret;
+
+	*groupid = -EINVAL;
+	*countable = -EINVAL;
+	*remove = false;
+
+	/* remove spaces */
+	while (*str == ' ')
+		str++;
+
+	/* check if it's a remove assignment */
+	if (*str == '-') {
+		*remove = true;
+		str++;
+	}
+
+	/* get the groupid string */
+	groupid_str = str;
+	while (*str != ':') {
+		if (*str == '\0')
+			return NULL;
+		*str = tolower(*str);
+		str++;
+	}
+	if (groupid_str == str)
+		return NULL;
+
+	*str = '\0';
+	str++;
+
+	/* get the countable string */
+	countable_str = str;
+	while (*str != ' ' && *str != '\0')
+		str++;
+	if (countable_str == str)
+		return NULL;
+
+	/*
+	 * If we have reached the end of the original string then make sure we
+	 * return NULL from this function or we could accidently overrun
+	 */
+
+	if (*str != '\0') {
+		*str = '\0';
+		next_str = str + 1;
+	}
+
+	/* set results */
+	*groupid = adreno_perfcounter_get_groupid(adreno_dev,
+			groupid_str);
+	if (*groupid < 0)
+		return NULL;
+	ret = kstrtou32(countable_str, 10, countable);
+	if (ret)
+		return NULL;
+
+	return next_str;
+}
+
+static ssize_t profile_assignments_write(struct file *filep,
+		const char __user *user_buf, size_t len, loff_t *off)
+{
+	struct kgsl_device *device = (struct kgsl_device *) filep->private_data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_profile *profile = &adreno_dev->profile;
+	size_t size = 0;
+	char *buf, *pbuf;
+	bool remove_assignment = false;
+	int groupid, countable, ret;
+
+	if (len >= PAGE_SIZE || len == 0)
+		return -EINVAL;
+
+	buf = kmalloc(len + 1, GFP_KERNEL);
+	if (buf == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(buf, user_buf, len)) {
+		size = -EFAULT;
+		goto error_free;
+	}
+
+	mutex_lock(&device->mutex);
+
+	if (adreno_profile_enabled(profile)) {
+		size = -EINVAL;
+		goto error_unlock;
+	}
+
+	ret = kgsl_active_count_get(device);
+	if (ret) {
+		size = ret;
+		goto error_unlock;
+	}
+
+	/*
+	 * When adding/removing assignments, ensure that the GPU is done with
+	 * all it's work.  This helps to syncronize the work flow to the
+	 * GPU and avoid racey conditions.
+	 */
+	if (adreno_idle(device)) {
+		size = -ETIMEDOUT;
+		goto error_put;
+	}
+
+	/* clear all shared buffer results */
+	adreno_profile_process_results(adreno_dev);
+
+	pbuf = buf;
+
+	/* clear the log buffer */
+	if (profile->log_buffer != NULL) {
+		profile->log_head = profile->log_buffer;
+		profile->log_tail = profile->log_buffer;
+	}
+
+
+	/* for sanity and parsing, ensure it is null terminated */
+	buf[len] = '\0';
+
+	/* parse file buf and add(remove) to(from) appropriate lists */
+	while (pbuf) {
+		pbuf = _parse_next_assignment(adreno_dev, pbuf, &groupid,
+				&countable, &remove_assignment);
+		if (groupid < 0 || countable < 0)
+			break;
+
+		if (remove_assignment)
+			_remove_assignment(adreno_dev, groupid, countable);
+		else
+			_add_assignment(adreno_dev, groupid, countable);
+	}
+
+	size = len;
+
+error_put:
+	kgsl_active_count_put(device);
+error_unlock:
+	mutex_unlock(&device->mutex);
+error_free:
+	kfree(buf);
+	return size;
+}
+
+static int _pipe_print_pending(char __user *ubuf, size_t max)
+{
+	loff_t unused = 0;
+	char str[] = "Operation Would Block!";
+
+	return simple_read_from_buffer(ubuf, max,
+			&unused, str, strlen(str));
+}
+
+static int _pipe_print_results(struct adreno_device *adreno_dev,
+		char __user *ubuf, size_t max)
+{
+	struct adreno_profile *profile = &adreno_dev->profile;
+	const char *grp_name;
+	char __user *usr_buf = ubuf;
+	unsigned int *log_ptr = NULL, *tmp_log_ptr = NULL;
+	int len, i;
+	int status = 0;
+	ssize_t size, total_size = 0;
+	unsigned int cnt, api_type, ctxt_id, pid, tid, ts, cnt_reg;
+	unsigned long long pc_start, pc_end;
+	const char *api_str;
+	char format_space;
+	loff_t unused = 0;
+	char pipe_hdr_buf[51];   /* 4 uint32 + 5 space + 5 API type + '\0' */
+	char pipe_cntr_buf[63];  /* 2 uint64 + 1 uint32 + 4 spaces + 8 group */
+
+	/* convert unread entries to ASCII, copy to user-space */
+	log_ptr = profile->log_tail;
+
+	do {
+		/* store the tmp var for error cases so we can skip */
+		tmp_log_ptr = log_ptr;
+
+		/* Too many to output to pipe, so skip this data */
+		cnt = *log_ptr;
+		log_buf_wrapinc(profile->log_buffer, &log_ptr);
+
+		if (SIZE_PIPE_ENTRY(cnt) > max) {
+			log_buf_wrapinc_len(profile->log_buffer,
+				&tmp_log_ptr, SIZE_PIPE_ENTRY(cnt));
+			log_ptr = tmp_log_ptr;
+			goto done;
+		}
+
+		/*
+		 * Not enough space left in pipe, return without doing
+		 * anything
+		 */
+		if ((max - (usr_buf - ubuf)) < SIZE_PIPE_ENTRY(cnt)) {
+			log_ptr = tmp_log_ptr;
+			goto done;
+		}
+
+		api_type = *log_ptr;
+		api_str = get_api_type_str(api_type);
+		log_buf_wrapinc(profile->log_buffer, &log_ptr);
+		pid = *log_ptr;
+		log_buf_wrapinc(profile->log_buffer, &log_ptr);
+		tid = *log_ptr;
+		log_buf_wrapinc(profile->log_buffer, &log_ptr);
+		ctxt_id =  *log_ptr;
+		log_buf_wrapinc(profile->log_buffer, &log_ptr);
+		ts = *log_ptr;
+		log_buf_wrapinc(profile->log_buffer, &log_ptr);
+		len = snprintf(pipe_hdr_buf, sizeof(pipe_hdr_buf) - 1,
+				"%u %u %u %.5s %u ",
+				pid, tid, ctxt_id, api_str, ts);
+		size = simple_read_from_buffer(usr_buf,
+				max - (usr_buf - ubuf),
+				&unused, pipe_hdr_buf, len);
+
+		/* non-fatal error, so skip rest of entry and return */
+		if (size < 0) {
+			log_buf_wrapinc_len(profile->log_buffer,
+				&tmp_log_ptr, SIZE_PIPE_ENTRY(cnt));
+			log_ptr = tmp_log_ptr;
+			goto done;
+		}
+
+		unused = 0;
+		usr_buf += size;
+		total_size += size;
+
+		for (i = 0; i < cnt; i++) {
+			unsigned int start_lo, start_hi;
+			unsigned int end_lo, end_hi;
+
+			grp_name = adreno_perfcounter_get_name(
+					adreno_dev, (*log_ptr >> 16) & 0xffff);
+
+			/* non-fatal error, so skip rest of entry and return */
+			if (grp_name == NULL) {
+				log_buf_wrapinc_len(profile->log_buffer,
+					&tmp_log_ptr, SIZE_PIPE_ENTRY(cnt));
+				log_ptr = tmp_log_ptr;
+				goto done;
+			}
+
+			if (i == cnt - 1)
+				format_space = '\n';
+			else
+				format_space = ' ';
+
+			cnt_reg = *log_ptr & 0xffff;
+			log_buf_wrapinc(profile->log_buffer, &log_ptr);
+			start_lo = *log_ptr;
+			log_buf_wrapinc(profile->log_buffer, &log_ptr);
+			start_hi = *log_ptr;
+			log_buf_wrapinc(profile->log_buffer, &log_ptr);
+			end_lo = *log_ptr;
+			log_buf_wrapinc(profile->log_buffer, &log_ptr);
+			end_hi = *log_ptr;
+			log_buf_wrapinc(profile->log_buffer, &log_ptr);
+
+			pc_start = (((uint64_t) start_hi) << 32) | start_lo;
+			pc_end = (((uint64_t) end_hi) << 32) | end_lo;
+
+			len = snprintf(pipe_cntr_buf,
+					sizeof(pipe_cntr_buf) - 1,
+					"%.8s:%u %llu %llu%c",
+					grp_name, cnt_reg, pc_start,
+					pc_end, format_space);
+
+			size = simple_read_from_buffer(usr_buf,
+					max - (usr_buf - ubuf),
+					&unused, pipe_cntr_buf, len);
+
+			/* non-fatal error, so skip rest of entry and return */
+			if (size < 0) {
+				log_buf_wrapinc_len(profile->log_buffer,
+					&tmp_log_ptr, SIZE_PIPE_ENTRY(cnt));
+				log_ptr = tmp_log_ptr;
+				goto done;
+			}
+			unused = 0;
+			usr_buf += size;
+			total_size += size;
+		}
+	} while (log_ptr != profile->log_head);
+
+done:
+	status = total_size;
+	profile->log_tail = log_ptr;
+
+	return status;
+}
+
+static ssize_t profile_pipe_print(struct file *filep, char __user *ubuf,
+		size_t max, loff_t *ppos)
+{
+	struct kgsl_device *device = (struct kgsl_device *) filep->private_data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_profile *profile = &adreno_dev->profile;
+	char __user *usr_buf = ubuf;
+	int status = 0;
+
+	/*
+	 * this file not seekable since it only supports streaming, ignore
+	 * ppos <> 0
+	 */
+	/*
+	 * format <pid>  <tid> <context id> <cnt<<16 | client type> <timestamp>
+	 * for each perf counter <cntr_reg_off> <start hi & lo> <end hi & low>
+	 */
+
+	mutex_lock(&device->mutex);
+
+	while (1) {
+		/* process any results that are available into the log_buffer */
+		status = adreno_profile_process_results(adreno_dev);
+		if (status > 0) {
+			/* if we have results, print them and exit */
+			status = _pipe_print_results(adreno_dev, usr_buf, max);
+			break;
+		}
+
+		/* there are no unread results, act accordingly */
+		if (filep->f_flags & O_NONBLOCK) {
+			if (profile->shared_tail != profile->shared_head) {
+				status = _pipe_print_pending(usr_buf, max);
+				break;
+			} else {
+				status = 0;
+				break;
+			}
+		}
+
+		mutex_unlock(&device->mutex);
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(HZ / 10);
+		mutex_lock(&device->mutex);
+
+		if (signal_pending(current)) {
+			status = 0;
+			break;
+		}
+	}
+
+	mutex_unlock(&device->mutex);
+
+	return status;
+}
+
+static int profile_groups_print(struct seq_file *s, void *unused)
+{
+	struct kgsl_device *device = (struct kgsl_device *) s->private;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_perfcounters *counters = gpudev->perfcounters;
+	struct adreno_perfcount_group *group;
+	int i, j, used;
+
+	mutex_lock(&device->mutex);
+
+	for (i = 0; i < counters->group_count; ++i) {
+		group = &(counters->groups[i]);
+		/* get number of counters used for this group */
+		used = 0;
+		for (j = 0; j < group->reg_count; j++) {
+			if (group->regs[j].countable !=
+					KGSL_PERFCOUNTER_NOT_USED)
+				used++;
+		}
+
+		seq_printf(s, "%s %d %d\n", group->name,
+			group->reg_count, used);
+	}
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+static int profile_groups_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, profile_groups_print, inode->i_private);
+}
+
+static const struct file_operations profile_groups_fops = {
+	.owner = THIS_MODULE,
+	.open = profile_groups_open,
+	.read = seq_read,
+	.llseek = noop_llseek,
+	.release = single_release,
+};
+
+static const struct file_operations profile_pipe_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = profile_pipe_print,
+	.llseek = noop_llseek,
+};
+
+static const struct file_operations profile_assignments_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = profile_assignments_read,
+	.write = profile_assignments_write,
+	.llseek = noop_llseek,
+};
+
+DEFINE_SIMPLE_ATTRIBUTE(profile_enable_fops,
+			profile_enable_get,
+			profile_enable_set, "%llu\n");
+
+void adreno_profile_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_profile *profile = &adreno_dev->profile;
+	struct dentry *profile_dir;
+	int ret;
+
+	profile->enabled = false;
+
+	/* allocate shared_buffer, which includes pre_ib and post_ib */
+	profile->shared_size = ADRENO_PROFILE_SHARED_BUF_SIZE_DWORDS;
+	ret = kgsl_allocate_global(device, &profile->shared_buffer,
+			profile->shared_size * sizeof(unsigned int), 0, 0);
+
+	if (ret) {
+		profile->shared_size = 0;
+		return;
+	}
+
+	INIT_LIST_HEAD(&profile->assignments_list);
+
+	/* Create perf counter debugfs */
+	profile_dir = debugfs_create_dir("profiling", device->d_debugfs);
+	if (IS_ERR(profile_dir))
+		return;
+
+	debugfs_create_file("enable",  0644, profile_dir, device,
+			&profile_enable_fops);
+	debugfs_create_file("blocks", 0444, profile_dir, device,
+			&profile_groups_fops);
+	debugfs_create_file("pipe", 0444, profile_dir, device,
+			&profile_pipe_fops);
+	debugfs_create_file("assignments", 0644, profile_dir, device,
+			&profile_assignments_fops);
+}
+
+void adreno_profile_close(struct adreno_device *adreno_dev)
+{
+	struct adreno_profile *profile = &adreno_dev->profile;
+	struct adreno_profile_assigns_list *entry, *tmp;
+
+	profile->enabled = false;
+	vfree(profile->log_buffer);
+	profile->log_buffer = NULL;
+	profile->log_head = NULL;
+	profile->log_tail = NULL;
+	profile->shared_head = 0;
+	profile->shared_tail = 0;
+	kgsl_free_global(&profile->shared_buffer);
+	profile->shared_size = 0;
+
+	profile->assignment_count = 0;
+
+	list_for_each_entry_safe(entry, tmp, &profile->assignments_list, list) {
+		list_del(&entry->list);
+		kfree(entry);
+	}
+}
+
+int adreno_profile_process_results(struct adreno_device *adreno_dev)
+{
+	struct adreno_profile *profile = &adreno_dev->profile;
+	unsigned int shared_buf_tail = profile->shared_tail;
+
+	if (!results_available(adreno_dev, profile, &shared_buf_tail))
+		return 0;
+
+	/*
+	 * transfer retired results to log_buffer
+	 * update shared_buffer tail ptr
+	 */
+	transfer_results(profile, shared_buf_tail);
+
+	return 1;
+}
+
+void adreno_profile_preib_processing(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt, unsigned int *cmd_flags,
+		unsigned int **rbptr)
+{
+	struct adreno_profile *profile = &adreno_dev->profile;
+	int count = profile->assignment_count;
+	unsigned int entry_head = profile->shared_head;
+	unsigned int *shared_ptr;
+	struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev);
+	unsigned int rbcmds[4];
+	unsigned int *ptr = *rbptr;
+	unsigned int i, ret = 0;
+
+	*cmd_flags &= ~KGSL_CMD_FLAGS_PROFILE;
+
+	if (!adreno_profile_assignments_ready(profile))
+		goto done;
+
+	/*
+	 * check if space available, include the post_ib in space available
+	 * check so don't have to handle trying to undo the pre_ib insertion in
+	 * ringbuffer in the case where only the post_ib fails enough space
+	 */
+	if (SIZE_SHARED_ENTRY(count) >= shared_buf_available(profile))
+		goto done;
+
+	if (entry_head + SIZE_SHARED_ENTRY(count) >= profile->shared_size) {
+		/* entry_head would wrap, start entry_head at 0 in buffer */
+		entry_head = 0;
+		profile->shared_size = profile->shared_head;
+		profile->shared_head = 0;
+
+		/* recheck space available */
+		if (SIZE_SHARED_ENTRY(count) >= shared_buf_available(profile))
+			goto done;
+	}
+
+	/* zero out the counter area of shared_buffer entry_head */
+	shared_ptr = entry_head + ((unsigned int *)
+			profile->shared_buffer.hostptr);
+	memset(shared_ptr, 0, SIZE_SHARED_ENTRY(count) * sizeof(unsigned int));
+
+	/* reserve space for the pre ib shared buffer */
+	shared_buf_inc(profile->shared_size, &profile->shared_head,
+			SIZE_SHARED_ENTRY(count));
+
+	/* create the shared ibdesc */
+	ret = _build_pre_ib_cmds(adreno_dev, profile, rbcmds, entry_head,
+			rb->timestamp + 1, drawctxt);
+
+	/* set flag to sync with post ib commands */
+	*cmd_flags |= KGSL_CMD_FLAGS_PROFILE;
+
+done:
+	/* write the ibdesc to the ringbuffer */
+	for (i = 0; i < ret; i++)
+		*ptr++ = rbcmds[i];
+
+	*rbptr = ptr;
+}
+
+void adreno_profile_postib_processing(struct adreno_device *adreno_dev,
+		unsigned int *cmd_flags, unsigned int **rbptr)
+{
+	struct adreno_profile *profile = &adreno_dev->profile;
+	int count = profile->assignment_count;
+	unsigned int entry_head = profile->shared_head -
+		SIZE_SHARED_ENTRY(count);
+	unsigned int *ptr = *rbptr;
+	unsigned int rbcmds[4];
+	int ret = 0, i;
+
+	if (!adreno_profile_assignments_ready(profile))
+		goto done;
+
+	if (!(*cmd_flags & KGSL_CMD_FLAGS_PROFILE))
+		goto done;
+
+	/* create the shared ibdesc */
+	ret = _build_post_ib_cmds(adreno_dev, profile, rbcmds, entry_head);
+
+done:
+	/* write the ibdesc to the ringbuffer */
+	for (i = 0; i < ret; i++)
+		*ptr++ = rbcmds[i];
+
+	*rbptr = ptr;
+
+	/* reset the sync flag */
+	*cmd_flags &= ~KGSL_CMD_FLAGS_PROFILE;
+}
+
diff --git a/drivers/gpu/msm/adreno_profile.h b/drivers/gpu/msm/adreno_profile.h
new file mode 100644
index 000000000000..4d81abd14837
--- /dev/null
+++ b/drivers/gpu/msm/adreno_profile.h
@@ -0,0 +1,111 @@
+/* Copyright (c) 2013-2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __ADRENO_PROFILE_H
+#define __ADRENO_PROFILE_H
+#include <linux/seq_file.h>
+
+/**
+ * struct adreno_profile_assigns_list: linked list for assigned perf counters
+ * @list: linkage  for nodes in list
+ * @name: group name  or GPU name name
+ * @groupid: group id
+ * @countable: countable assigned to perfcounter
+ * @offset: perfcounter register address offset
+ */
+struct adreno_profile_assigns_list {
+	struct list_head list;
+	char name[25];
+	unsigned int groupid;
+	unsigned int countable;
+	unsigned int offset;    /* LO offset */
+	unsigned int offset_hi; /* HI offset */
+};
+
+struct adreno_profile {
+	struct list_head assignments_list; /* list of all assignments */
+	unsigned int assignment_count;  /* Number of assigned counters */
+	unsigned int *log_buffer;
+	unsigned int *log_head;
+	unsigned int *log_tail;
+	bool enabled;
+	/* counter, pre_ib, and post_ib held in one large circular buffer
+	 * shared between kgsl and GPU
+	 * counter entry 0
+	 * pre_ib entry 0
+	 * post_ib entry 0
+	 * ...
+	 * counter entry N
+	 * pre_ib entry N
+	 * post_ib entry N
+	 */
+	struct kgsl_memdesc shared_buffer;
+	unsigned int shared_head;
+	unsigned int shared_tail;
+	unsigned int shared_size;
+};
+
+#define ADRENO_PROFILE_SHARED_BUF_SIZE_DWORDS (48 * 4096 / sizeof(uint))
+/* sized @ 48 pages should allow for over 50 outstanding IBs minimum, 1755 max*/
+
+#define ADRENO_PROFILE_LOG_BUF_SIZE  (1024 * 920)
+/* sized for 1024 entries of fully assigned 45 cnters in log buffer, 230 pages*/
+#define ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS  (ADRENO_PROFILE_LOG_BUF_SIZE / \
+						sizeof(unsigned int))
+
+#ifdef CONFIG_DEBUG_FS
+void adreno_profile_init(struct adreno_device *adreno_dev);
+void adreno_profile_close(struct adreno_device *adreno_dev);
+int adreno_profile_process_results(struct  adreno_device *adreno_dev);
+void adreno_profile_preib_processing(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt, unsigned int *cmd_flags,
+		unsigned int **rbptr);
+void adreno_profile_postib_processing(struct  adreno_device *adreno_dev,
+		unsigned int *cmd_flags, unsigned int **rbptr);
+#else
+static inline void adreno_profile_init(struct adreno_device *adreno_dev) { }
+static inline void adreno_profile_close(struct adreno_device *adreno_dev) { }
+static inline int adreno_profile_process_results(
+		struct adreno_device *adreno_dev)
+{
+	return 0;
+}
+
+static inline void adreno_profile_preib_processing(
+		struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt, unsigned int *cmd_flags,
+		unsigned int **rbptr) { }
+
+static inline void adreno_profile_postib_processing(
+		struct adreno_device *adreno_dev,
+		unsigned int *cmd_flags, unsigned int **rbptr) { }
+#endif
+
+static inline bool adreno_profile_enabled(struct adreno_profile *profile)
+{
+	return profile->enabled;
+}
+
+static inline bool adreno_profile_has_assignments(
+	struct adreno_profile *profile)
+{
+	return list_empty(&profile->assignments_list) ? false : true;
+}
+
+static inline bool adreno_profile_assignments_ready(
+	struct adreno_profile *profile)
+{
+	return adreno_profile_enabled(profile) &&
+		adreno_profile_has_assignments(profile);
+}
+
+#endif
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
new file mode 100644
index 000000000000..a80707385e3b
--- /dev/null
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -0,0 +1,1357 @@
+/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/log2.h>
+#include <linux/time.h>
+#include <linux/delay.h>
+
+#include "kgsl.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_cffdump.h"
+#include "kgsl_trace.h"
+#include "kgsl_pwrctrl.h"
+
+#include "adreno.h"
+#include "adreno_pm4types.h"
+#include "adreno_ringbuffer.h"
+
+#include "a3xx_reg.h"
+#include "adreno_a5xx.h"
+
+#define GSL_RB_NOP_SIZEDWORDS				2
+
+#define ADRENO_RB_PREEMPT_TOKEN_IB_DWORDS	50
+#define ADRENO_RB_PREEMPT_TOKEN_DWORDS		125
+
+#define RB_HOSTPTR(_rb, _pos) \
+	((unsigned int *) ((_rb)->buffer_desc.hostptr + \
+		((_pos) * sizeof(unsigned int))))
+
+#define RB_GPUADDR(_rb, _pos) \
+	((_rb)->buffer_desc.gpuaddr + ((_pos) * sizeof(unsigned int)))
+
+static void _cff_write_ringbuffer(struct adreno_ringbuffer *rb)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
+	struct kgsl_device *device = &adreno_dev->dev;
+	uint64_t gpuaddr;
+	unsigned int *hostptr;
+	size_t size;
+
+	if (device->cff_dump_enable == 0)
+		return;
+
+	/*
+	 * This code is predicated on the fact that we write a full block of
+	 * stuff without wrapping
+	 */
+	BUG_ON(rb->wptr < rb->last_wptr);
+
+	size = (rb->wptr - rb->last_wptr) * sizeof(unsigned int);
+
+	hostptr = RB_HOSTPTR(rb, rb->last_wptr);
+	gpuaddr = RB_GPUADDR(rb, rb->last_wptr);
+
+	kgsl_cffdump_memcpy(device, gpuaddr, hostptr, size);
+}
+
+void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
+	BUG_ON(rb->wptr == 0);
+
+	/* Write the changes to CFF if so enabled */
+	_cff_write_ringbuffer(rb);
+
+	/*
+	 * Read the current GPU ticks and wallclock for most accurate
+	 * profiling
+	 */
+
+	if (time != NULL) {
+		/*
+		 * Here we are attempting to create a mapping between the
+		 * GPU time domain (alwayson counter) and the CPU time domain
+		 * (local_clock) by sampling both values as close together as
+		 * possible. This is useful for many types of debugging and
+		 * profiling. In order to make this mapping as accurate as
+		 * possible, we must turn off interrupts to avoid running
+		 * interrupt handlers between the two samples.
+		 */
+		unsigned long flags;
+		local_irq_save(flags);
+
+		/* Read always on registers */
+		if (!adreno_is_a3xx(adreno_dev)) {
+			adreno_readreg64(adreno_dev,
+				ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO,
+				ADRENO_REG_RBBM_ALWAYSON_COUNTER_HI,
+				&time->ticks);
+
+			/*
+			 * Mask hi bits as they may be incorrect on
+			 * a4x and some a5x
+			 */
+			if (ADRENO_GPUREV(adreno_dev) >= 400 &&
+				ADRENO_GPUREV(adreno_dev) <= ADRENO_REV_A530)
+				time->ticks &= 0xFFFFFFFF;
+		}
+		else
+			time->ticks = 0;
+
+		/* Get the kernel clock for time since boot */
+		time->ktime = local_clock();
+
+		/* Get the timeofday for the wall time (for the user) */
+		getnstimeofday(&time->utime);
+
+		local_irq_restore(flags);
+	}
+
+	/* Memory barrier before informing the hardware of new commands */
+	mb();
+
+	if (adreno_preempt_state(adreno_dev, ADRENO_DISPATCHER_PREEMPT_CLEAR) &&
+		(adreno_dev->cur_rb == rb)) {
+		/*
+		 * Let the pwrscale policy know that new commands have
+		 * been submitted.
+		 */
+		kgsl_pwrscale_busy(rb->device);
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR, rb->wptr);
+	}
+}
+
+int adreno_ringbuffer_submit_spin(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time, unsigned int timeout)
+{
+	adreno_ringbuffer_submit(rb, NULL);
+	return adreno_spin_idle(rb->device, timeout);
+}
+
+static int
+adreno_ringbuffer_waitspace(struct adreno_ringbuffer *rb,
+				unsigned int numcmds, int wptr_ahead)
+{
+	int nopcount = 0;
+	unsigned int freecmds;
+	unsigned int wptr = rb->wptr;
+	unsigned int *cmds = NULL;
+	uint64_t gpuaddr;
+	unsigned long wait_time;
+	unsigned long wait_timeout = msecs_to_jiffies(ADRENO_IDLE_TIMEOUT);
+	unsigned int rptr;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
+
+	/* if wptr ahead, fill the remaining with NOPs */
+	if (wptr_ahead) {
+		/* -1 for header */
+		nopcount = KGSL_RB_DWORDS - rb->wptr - 1;
+
+		cmds = RB_HOSTPTR(rb, rb->wptr);
+		gpuaddr = RB_GPUADDR(rb, rb->wptr);
+
+		rptr = adreno_get_rptr(rb);
+		/* For non current rb we don't expect the rptr to move */
+		if ((adreno_dev->cur_rb != rb ||
+				!adreno_preempt_state(adreno_dev,
+				ADRENO_DISPATCHER_PREEMPT_CLEAR)) &&
+			!rptr)
+			return -ENOSPC;
+
+		/* Make sure that rptr is not 0 before submitting
+		 * commands at the end of ringbuffer. We do not
+		 * want the rptr and wptr to become equal when
+		 * the ringbuffer is not empty */
+		wait_time = jiffies + wait_timeout;
+		while (!rptr) {
+			rptr = adreno_get_rptr(rb);
+			if (time_after(jiffies, wait_time))
+				return -ETIMEDOUT;
+		}
+
+		rb->wptr = 0;
+	}
+
+	rptr = adreno_get_rptr(rb);
+	freecmds = rptr - rb->wptr;
+	if (freecmds == 0 || freecmds > numcmds)
+		goto done;
+
+	/* non current rptr will not advance anyway or if preemption underway */
+	if (adreno_dev->cur_rb != rb ||
+		!adreno_preempt_state(adreno_dev,
+			ADRENO_DISPATCHER_PREEMPT_CLEAR)) {
+		rb->wptr = wptr;
+		return -ENOSPC;
+	}
+
+	wait_time = jiffies + wait_timeout;
+	/* wait for space in ringbuffer */
+	while (1) {
+		rptr = adreno_get_rptr(rb);
+
+		freecmds = rptr - rb->wptr;
+
+		if (freecmds == 0 || freecmds > numcmds)
+			break;
+
+		if (time_after(jiffies, wait_time)) {
+			KGSL_DRV_ERR(rb->device,
+			"Timed out waiting for freespace in RB rptr: 0x%x, wptr: 0x%x, rb id %d\n",
+			rptr, wptr, rb->id);
+			return -ETIMEDOUT;
+		}
+	}
+done:
+	if (wptr_ahead) {
+		*cmds = cp_packet(adreno_dev, CP_NOP, nopcount);
+		kgsl_cffdump_write(rb->device, gpuaddr, *cmds);
+
+	}
+	return 0;
+}
+
+unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb,
+					unsigned int numcmds)
+{
+	unsigned int *ptr = NULL;
+	int ret = 0;
+	unsigned int rptr;
+	BUG_ON(numcmds >= KGSL_RB_DWORDS);
+
+	rptr = adreno_get_rptr(rb);
+	/* check for available space */
+	if (rb->wptr >= rptr) {
+		/* wptr ahead or equal to rptr */
+		/* reserve dwords for nop packet */
+		if ((rb->wptr + numcmds) > (KGSL_RB_DWORDS -
+				GSL_RB_NOP_SIZEDWORDS))
+			ret = adreno_ringbuffer_waitspace(rb, numcmds, 1);
+	} else {
+		/* wptr behind rptr */
+		if ((rb->wptr + numcmds) >= rptr)
+			ret = adreno_ringbuffer_waitspace(rb, numcmds, 0);
+		/* check for remaining space */
+		/* reserve dwords for nop packet */
+		if (!ret && (rb->wptr + numcmds) > (KGSL_RB_DWORDS -
+				GSL_RB_NOP_SIZEDWORDS))
+			ret = adreno_ringbuffer_waitspace(rb, numcmds, 1);
+	}
+
+	if (!ret) {
+		rb->last_wptr = rb->wptr;
+
+		ptr = (unsigned int *)rb->buffer_desc.hostptr + rb->wptr;
+		rb->wptr += numcmds;
+	} else
+		ptr = ERR_PTR(ret);
+
+	return ptr;
+}
+
+/**
+ * _ringbuffer_setup_common() - Ringbuffer start
+ * @rb: Pointer to adreno ringbuffer
+ *
+ * Setup ringbuffer for GPU.
+ */
+static void _ringbuffer_setup_common(struct adreno_ringbuffer *rb)
+{
+	struct kgsl_device *device = rb->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_ringbuffer *rb_temp;
+	int i;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb_temp, i) {
+		kgsl_sharedmem_set(rb_temp->device,
+			&(rb_temp->buffer_desc), 0,
+			0xAA, KGSL_RB_SIZE);
+		rb_temp->wptr = 0;
+		rb_temp->rptr = 0;
+		rb_temp->wptr_preempt_end = 0xFFFFFFFF;
+		rb_temp->starve_timer_state =
+		ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT;
+		adreno_iommu_set_pt_generate_rb_cmds(rb_temp,
+					device->mmu.defaultpagetable);
+	}
+
+	/*
+	 * The size of the ringbuffer in the hardware is the log2
+	 * representation of the size in quadwords (sizedwords / 2).
+	 * Also disable the host RPTR shadow register as it might be unreliable
+	 * in certain circumstances.
+	 */
+
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_CNTL,
+		(ilog2(KGSL_RB_DWORDS >> 1) & 0x3F) |
+		(1 << 27));
+
+	adreno_writereg64(adreno_dev, ADRENO_REG_CP_RB_BASE,
+			  ADRENO_REG_CP_RB_BASE_HI, rb->buffer_desc.gpuaddr);
+
+	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
+	if (adreno_is_a3xx(adreno_dev)) {
+		unsigned int val = 0x000E0602;
+
+		if (adreno_is_a305b(adreno_dev) ||
+				adreno_is_a310(adreno_dev) ||
+				adreno_is_a330(adreno_dev))
+			val = 0x003E2008;
+		kgsl_regwrite(device, A3XX_CP_QUEUE_THRESHOLDS, val);
+	}
+}
+
+/**
+ * _ringbuffer_start_common() - Ringbuffer start
+ * @rb: Pointer to adreno ringbuffer
+ *
+ * Start ringbuffer for GPU.
+ */
+static int _ringbuffer_start_common(struct adreno_ringbuffer *rb)
+{
+	int status;
+	struct kgsl_device *device = rb->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	/* clear ME_HALT to start micro engine */
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0);
+
+	/* ME init is GPU specific, so jump into the sub-function */
+	status = gpudev->rb_init(adreno_dev, rb);
+	if (status)
+		return status;
+
+	return status;
+}
+
+/**
+ * adreno_ringbuffer_start() - Ringbuffer start
+ * @adreno_dev: Pointer to adreno device
+ * @start_type: Warm or cold start
+ */
+int adreno_ringbuffer_start(struct adreno_device *adreno_dev,
+	unsigned int start_type)
+{
+	int status;
+	struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	_ringbuffer_setup_common(rb);
+
+	status = gpudev->microcode_load(adreno_dev, start_type);
+	if (status)
+		return status;
+
+	return _ringbuffer_start_common(rb);
+}
+
+void adreno_ringbuffer_stop(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_ringbuffer *rb;
+	int i;
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i)
+		kgsl_cancel_events(device, &(rb->events));
+}
+
+static int _adreno_ringbuffer_init(struct adreno_device *adreno_dev,
+				struct adreno_ringbuffer *rb, int id)
+{
+	int ret;
+	char name[64];
+
+	rb->device = &adreno_dev->dev;
+	rb->id = id;
+
+	snprintf(name, sizeof(name), "rb_events-%d", id);
+	kgsl_add_event_group(&rb->events, NULL, name,
+		adreno_rb_readtimestamp, rb);
+	rb->timestamp = 0;
+	init_waitqueue_head(&rb->ts_expire_waitq);
+
+	/*
+	 * Allocate mem for storing RB pagetables and commands to
+	 * switch pagetable
+	 */
+	ret = kgsl_allocate_global(&adreno_dev->dev, &rb->pagetable_desc,
+		PAGE_SIZE, 0, KGSL_MEMDESC_PRIVILEGED);
+	if (ret)
+		return ret;
+
+	ret = kgsl_allocate_global(&adreno_dev->dev, &rb->buffer_desc,
+			KGSL_RB_SIZE, KGSL_MEMFLAGS_GPUREADONLY, 0);
+	return ret;
+}
+
+int adreno_ringbuffer_init(struct adreno_device *adreno_dev, bool nopreempt)
+{
+	int status = 0;
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_ringbuffer *rb;
+	int i;
+
+	if (nopreempt == false && ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION))
+		adreno_dev->num_ringbuffers = gpudev->num_prio_levels;
+	else
+		adreno_dev->num_ringbuffers = 1;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		status = _adreno_ringbuffer_init(adreno_dev, rb, i);
+		if (status)
+			break;
+	}
+	if (status)
+		adreno_ringbuffer_close(adreno_dev);
+	else
+		adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]);
+
+	return status;
+}
+
+static void _adreno_ringbuffer_close(struct adreno_ringbuffer *rb)
+{
+	kgsl_free_global(&rb->pagetable_desc);
+	kgsl_free_global(&rb->preemption_desc);
+
+	memset(&rb->pt_update_desc, 0, sizeof(struct kgsl_memdesc));
+
+	kgsl_free_global(&rb->buffer_desc);
+	kgsl_del_event_group(&rb->events);
+	memset(rb, 0, sizeof(struct adreno_ringbuffer));
+}
+
+void adreno_ringbuffer_close(struct adreno_device *adreno_dev)
+{
+	struct adreno_ringbuffer *rb;
+	int i;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i)
+		_adreno_ringbuffer_close(rb);
+}
+
+/*
+ * cp_secure_mode() - Put GPU in trusted mode
+ * @adreno_dev: Pointer to adreno device
+ * @cmds: Pointer to cmds to be put in the ringbuffer
+ * @set: 1 - secure mode, 0 - unsecure mode
+ *
+ * Add commands to the ringbuffer to put the GPU in secure mode
+ * or unsecure mode based on the variable set.
+ */
+int cp_secure_mode(struct adreno_device *adreno_dev, uint *cmds,
+				int set)
+{
+	uint *start = cmds;
+
+	if (adreno_is_a4xx(adreno_dev)) {
+		cmds += cp_wait_for_idle(adreno_dev, cmds);
+		/*
+		 * The two commands will stall the PFP until the PFP-ME-AHB
+		 * is drained and the GPU is idle. As soon as this happens,
+		 * the PFP will start moving again.
+		 */
+		cmds += cp_wait_for_me(adreno_dev, cmds);
+
+		/*
+		 * Below commands are processed by ME. GPU will be
+		 * idle when they are processed. But the PFP will continue
+		 * to fetch instructions at the same time.
+		 */
+		*cmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1);
+		*cmds++ = 0;
+		*cmds++ = cp_packet(adreno_dev, CP_WIDE_REG_WRITE, 2);
+		*cmds++ = adreno_getreg(adreno_dev,
+				ADRENO_REG_RBBM_SECVID_TRUST_CONTROL);
+		*cmds++ = set;
+		*cmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1);
+		*cmds++ = 1;
+
+		/* Stall PFP until all above commands are complete */
+		cmds += cp_wait_for_me(adreno_dev, cmds);
+	} else {
+		/*
+		 * A5xx has a separate opcode specifically to put the GPU
+		 * in and out of secure mode.
+		 */
+		*cmds++ = cp_packet(adreno_dev, CP_SET_SECURE_MODE, 1);
+		*cmds++ = set;
+	}
+
+	return cmds - start;
+}
+
+static int
+adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
+				unsigned int flags, unsigned int *cmds,
+				unsigned int sizedwords, uint32_t timestamp,
+				struct adreno_submit_time *time)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct kgsl_device *device = rb->device;
+	unsigned int *ringcmds, *start;
+	unsigned int total_sizedwords = sizedwords;
+	unsigned int i;
+	unsigned int context_id = 0;
+	uint64_t gpuaddr = rb->device->memstore.gpuaddr;
+	bool profile_ready;
+	struct adreno_context *drawctxt = rb->drawctxt_active;
+	struct kgsl_context *context = NULL;
+	bool secured_ctxt = false;
+	uint64_t cond_addr;
+
+	if (drawctxt != NULL && kgsl_context_detached(&drawctxt->base) &&
+		!(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE))
+		return -ENOENT;
+
+	rb->timestamp++;
+
+	/* If this is a internal IB, use the global timestamp for it */
+	if (!drawctxt || (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE))
+		timestamp = rb->timestamp;
+	else {
+		context_id = drawctxt->base.id;
+		context = &drawctxt->base;
+	}
+
+	/*
+	 * Note that we cannot safely take drawctxt->mutex here without
+	 * potential mutex inversion with device->mutex which is held
+	 * here. As a result, any other code that accesses this variable
+	 * must also use device->mutex.
+	 */
+	if (drawctxt) {
+		drawctxt->internal_timestamp = rb->timestamp;
+		if (drawctxt->base.flags & KGSL_CONTEXT_SECURE)
+			secured_ctxt = true;
+	}
+
+	/*
+	 * If in stream ib profiling is enabled and there are counters
+	 * assigned, then space needs to be reserved for profiling.  This
+	 * space in the ringbuffer is always consumed (might be filled with
+	 * NOPs in error case.  profile_ready needs to be consistent through
+	 * the _addcmds call since it is allocating additional ringbuffer
+	 * command space.
+	 */
+	profile_ready = drawctxt &&
+		adreno_profile_assignments_ready(&adreno_dev->profile) &&
+		!(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE);
+
+	/* reserve space to temporarily turn off protected mode
+	*  error checking if needed
+	*/
+	total_sizedwords += flags & KGSL_CMD_FLAGS_PMODE ? 4 : 0;
+	/* 2 dwords to store the start of command sequence */
+	total_sizedwords += 2;
+	/* internal ib command identifier for the ringbuffer */
+	total_sizedwords += (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE) ? 2 : 0;
+
+	total_sizedwords += (secured_ctxt) ? 26 : 0;
+
+	/* context rollover */
+	if (adreno_is_a3xx(adreno_dev))
+		total_sizedwords += 3;
+
+	/* For HLSQ updates below */
+	if (adreno_is_a4xx(adreno_dev) || adreno_is_a3xx(adreno_dev))
+		total_sizedwords += 4;
+
+	if (gpudev->preemption_pre_ibsubmit &&
+				adreno_is_preemption_enabled(adreno_dev))
+		total_sizedwords += 20;
+
+	if (gpudev->preemption_post_ibsubmit &&
+				adreno_is_preemption_enabled(adreno_dev))
+		total_sizedwords += 13;
+
+	/*
+	 * a5xx uses 64 bit memory address. pm4 commands that involve read/write
+	 * from memory take 4 bytes more than a4xx because of 64 bit addressing.
+	 * This function is shared between gpucores, so reserve the max size
+	 * required in ringbuffer and adjust the write pointer depending on
+	 * gpucore at the end of this function.
+	 */
+	total_sizedwords += 4; /* sop timestamp */
+	total_sizedwords += 5; /* eop timestamp */
+
+	if (drawctxt && !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
+		total_sizedwords += 4; /* global timestamp without cache
+					* flush for non-zero context */
+	}
+
+	if (flags & KGSL_CMD_FLAGS_WFI)
+		total_sizedwords += 2; /* WFI */
+
+	if (profile_ready)
+		total_sizedwords += 8;   /* space for pre_ib and post_ib */
+
+	/* Add space for the power on shader fixup if we need it */
+	if (flags & KGSL_CMD_FLAGS_PWRON_FIXUP)
+		total_sizedwords += 9;
+
+	/* WAIT_MEM_WRITES - needed in the stall on fault case
+	 * to prevent out of order CP operations that can result
+	 * in a CACHE_FLUSH_TS interrupt storm */
+	if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE,
+				&adreno_dev->ft_pf_policy))
+		total_sizedwords += 1;
+
+	ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords);
+	if (IS_ERR(ringcmds))
+		return PTR_ERR(ringcmds);
+
+	start = ringcmds;
+
+	*ringcmds++ = cp_packet(adreno_dev, CP_NOP, 1);
+	*ringcmds++ = KGSL_CMD_IDENTIFIER;
+
+	if (adreno_is_preemption_enabled(adreno_dev) &&
+				gpudev->preemption_pre_ibsubmit) {
+		cond_addr = device->memstore.gpuaddr +
+					KGSL_MEMSTORE_OFFSET(context_id,
+					 preempted);
+		ringcmds += gpudev->preemption_pre_ibsubmit(
+					adreno_dev, rb, ringcmds, context,
+					cond_addr, NULL);
+	}
+
+	if (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE) {
+		*ringcmds++ = cp_packet(adreno_dev, CP_NOP, 1);
+		*ringcmds++ = KGSL_CMD_INTERNAL_IDENTIFIER;
+	}
+
+	if (flags & KGSL_CMD_FLAGS_PWRON_FIXUP) {
+		/* Disable protected mode for the fixup */
+		*ringcmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1);
+		*ringcmds++ = 0;
+
+		*ringcmds++ = cp_packet(adreno_dev, CP_NOP, 1);
+		*ringcmds++ = KGSL_PWRON_FIXUP_IDENTIFIER;
+		*ringcmds++ = cp_mem_packet(adreno_dev,
+				CP_INDIRECT_BUFFER_PFE, 2, 1);
+		ringcmds += cp_gpuaddr(adreno_dev, ringcmds,
+				adreno_dev->pwron_fixup.gpuaddr);
+		*ringcmds++ = adreno_dev->pwron_fixup_dwords;
+
+		/* Re-enable protected mode */
+		*ringcmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1);
+		*ringcmds++ = 1;
+	}
+
+	/* Add any IB required for profiling if it is enabled */
+	if (profile_ready)
+		adreno_profile_preib_processing(adreno_dev, drawctxt,
+				&flags, &ringcmds);
+
+	/* start-of-pipeline timestamp */
+	*ringcmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
+	if (drawctxt && !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE))
+		ringcmds += cp_gpuaddr(adreno_dev, ringcmds,
+			gpuaddr + KGSL_MEMSTORE_OFFSET(context_id,
+			soptimestamp));
+	else
+		ringcmds += cp_gpuaddr(adreno_dev, ringcmds,
+			gpuaddr + KGSL_MEMSTORE_RB_OFFSET(rb, soptimestamp));
+	*ringcmds++ = timestamp;
+
+	if (secured_ctxt)
+		ringcmds += cp_secure_mode(adreno_dev, ringcmds, 1);
+
+	if (flags & KGSL_CMD_FLAGS_PMODE) {
+		/* disable protected mode error checking */
+		*ringcmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1);
+		*ringcmds++ = 0;
+	}
+
+	for (i = 0; i < sizedwords; i++)
+		*ringcmds++ = cmds[i];
+
+	if (flags & KGSL_CMD_FLAGS_PMODE) {
+		/* re-enable protected mode error checking */
+		*ringcmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1);
+		*ringcmds++ = 1;
+	}
+
+	/*
+	 * Flush HLSQ lazy updates to make sure there are no
+	 * resources pending for indirect loads after the timestamp
+	 */
+	if (adreno_is_a4xx(adreno_dev) || adreno_is_a3xx(adreno_dev)) {
+		*ringcmds++ = cp_packet(adreno_dev, CP_EVENT_WRITE, 1);
+		*ringcmds++ = 0x07; /* HLSQ_FLUSH */
+		ringcmds += cp_wait_for_idle(adreno_dev, ringcmds);
+	}
+
+	/* Add any postIB required for profiling if it is enabled and has
+	   assigned counters */
+	if (profile_ready)
+		adreno_profile_postib_processing(adreno_dev, &flags, &ringcmds);
+
+	/*
+	 * WAIT_MEM_WRITES - needed in the stall on fault case to prevent
+	 * out of order CP operations that can result in a CACHE_FLUSH_TS
+	 * interrupt storm
+	 */
+	if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE,
+				&adreno_dev->ft_pf_policy))
+		*ringcmds++ = cp_packet(adreno_dev, CP_WAIT_MEM_WRITES, 0);
+
+	/*
+	 * end-of-pipeline timestamp.  If per context timestamps is not
+	 * enabled, then drawctxt will be NULL or internal command flag will be
+	 * set and hence the rb timestamp will be used in else statement below.
+	 */
+	*ringcmds++ = cp_mem_packet(adreno_dev, CP_EVENT_WRITE, 3, 1);
+	if (drawctxt || (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE))
+		*ringcmds++ = CACHE_FLUSH_TS | (1 << 31);
+	else
+		*ringcmds++ = CACHE_FLUSH_TS;
+
+	if (drawctxt && !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
+		ringcmds += cp_gpuaddr(adreno_dev, ringcmds, gpuaddr +
+				KGSL_MEMSTORE_OFFSET(context_id, eoptimestamp));
+		*ringcmds++ = timestamp;
+		*ringcmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
+		ringcmds += cp_gpuaddr(adreno_dev, ringcmds, gpuaddr +
+				KGSL_MEMSTORE_RB_OFFSET(rb, eoptimestamp));
+		*ringcmds++ = rb->timestamp;
+	} else {
+		ringcmds += cp_gpuaddr(adreno_dev, ringcmds, gpuaddr +
+				KGSL_MEMSTORE_RB_OFFSET(rb, eoptimestamp));
+		*ringcmds++ = timestamp;
+	}
+
+	if (adreno_is_a3xx(adreno_dev)) {
+		/* Dummy set-constant to trigger context rollover */
+		*ringcmds++ = cp_packet(adreno_dev, CP_SET_CONSTANT, 2);
+		*ringcmds++ =
+			(0x4<<16) | (A3XX_HLSQ_CL_KERNEL_GROUP_X_REG - 0x2000);
+		*ringcmds++ = 0;
+	}
+
+	if (flags & KGSL_CMD_FLAGS_WFI) {
+		ringcmds += cp_wait_for_idle(adreno_dev, ringcmds);
+	}
+
+	if (secured_ctxt)
+		ringcmds += cp_secure_mode(adreno_dev, ringcmds, 0);
+
+	if (gpudev->preemption_post_ibsubmit &&
+				adreno_is_preemption_enabled(adreno_dev))
+		ringcmds += gpudev->preemption_post_ibsubmit(adreno_dev,
+					rb, ringcmds, &drawctxt->base);
+
+	/*
+	 * If we have more ringbuffer commands than space reserved
+	 * in ringbuffer BUG() to fix this because it will lead to
+	 * weird errors.
+	 */
+	if ((ringcmds - start) > total_sizedwords)
+		BUG();
+	/*
+	 *  Allocate total_sizedwords space in RB, this is the max space
+	 *  required. If we have commands less than the space reserved in RB
+	 *  adjust the wptr accordingly.
+	 */
+	rb->wptr = rb->wptr - (total_sizedwords - (ringcmds - start));
+
+	adreno_ringbuffer_submit(rb, time);
+
+	return 0;
+}
+
+int
+adreno_ringbuffer_issuecmds(struct adreno_ringbuffer *rb,
+				unsigned int flags,
+				unsigned int *cmds,
+				int sizedwords)
+{
+	flags |= KGSL_CMD_FLAGS_INTERNAL_ISSUE;
+
+	return adreno_ringbuffer_addcmds(rb, flags, cmds,
+		sizedwords, 0, NULL);
+}
+
+/**
+ * _ringbuffer_verify_ib() - Check if an IB's size is within a permitted limit
+ * @device: The kgsl device pointer
+ * @ibdesc: Pointer to the IB descriptor
+ */
+static inline bool _ringbuffer_verify_ib(struct kgsl_device_private *dev_priv,
+		struct kgsl_context *context, struct kgsl_memobj_node *ib)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+
+	/* The maximum allowable size for an IB in the CP is 0xFFFFF dwords */
+	if (ib->size == 0 || ((ib->size >> 2) > 0xFFFFF)) {
+		pr_context(device, context, "ctxt %d invalid ib size %lld\n",
+			context->id, ib->size);
+		return false;
+	}
+
+	/* Make sure that the address is mapped */
+	if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, ib->gpuaddr)) {
+		pr_context(device, context, "ctxt %d invalid ib gpuaddr %llX\n",
+			context->id, ib->gpuaddr);
+		return false;
+	}
+
+	return true;
+}
+
+int
+adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv,
+				struct kgsl_context *context,
+				struct kgsl_cmdbatch *cmdbatch,
+				uint32_t *timestamp)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	struct kgsl_memobj_node *ib;
+	int ret;
+
+	if (kgsl_context_invalid(context))
+		return -EDEADLK;
+
+	/* Verify the IBs before they get queued */
+	list_for_each_entry(ib, &cmdbatch->cmdlist, node)
+		if (_ringbuffer_verify_ib(dev_priv, context, ib) == false)
+			return -EINVAL;
+
+	/* wait for the suspend gate */
+	wait_for_completion(&device->cmdbatch_gate);
+
+	/*
+	 * Clear the wake on touch bit to indicate an IB has been
+	 * submitted since the last time we set it. But only clear
+	 * it when we have rendering commands.
+	 */
+	if (!(cmdbatch->flags & KGSL_CMDBATCH_MARKER)
+		&& !(cmdbatch->flags & KGSL_CMDBATCH_SYNC))
+		device->flags &= ~KGSL_FLAG_WAKE_ON_TOUCH;
+
+	/* Queue the command in the ringbuffer */
+	ret = adreno_dispatcher_queue_cmd(adreno_dev, drawctxt, cmdbatch,
+		timestamp);
+
+	/*
+	 * Return -EPROTO if the device has faulted since the last time we
+	 * checked - userspace uses this to perform post-fault activities
+	 */
+	if (!ret && test_and_clear_bit(ADRENO_CONTEXT_FAULT, &context->priv))
+		ret = -EPROTO;
+
+	return ret;
+}
+
+static void adreno_ringbuffer_set_constraint(struct kgsl_device *device,
+			struct kgsl_cmdbatch *cmdbatch)
+{
+	struct kgsl_context *context = cmdbatch->context;
+	/*
+	 * Check if the context has a constraint and constraint flags are
+	 * set.
+	 */
+	if (context->pwr_constraint.type &&
+		((context->flags & KGSL_CONTEXT_PWR_CONSTRAINT) ||
+			(cmdbatch->flags & KGSL_CONTEXT_PWR_CONSTRAINT)))
+		kgsl_pwrctrl_set_constraint(device, &context->pwr_constraint,
+						context->id);
+}
+
+static inline int _get_alwayson_counter(struct adreno_device *adreno_dev,
+		unsigned int *cmds, uint64_t gpuaddr)
+{
+	unsigned int *p = cmds;
+
+	*p++ = cp_mem_packet(adreno_dev, CP_REG_TO_MEM, 2, 1);
+
+	/*
+	 * For a4x and some a5x the alwayson_hi read through CPU
+	 * will be masked. Only do 32 bit CP reads for keeping the
+	 * numbers consistent
+	 */
+	if (ADRENO_GPUREV(adreno_dev) >= 400 &&
+		ADRENO_GPUREV(adreno_dev) <= ADRENO_REV_A530)
+		*p++ = adreno_getreg(adreno_dev,
+			ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO);
+	else
+		*p++ = adreno_getreg(adreno_dev,
+			ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO) |
+			(1 << 30) | (2 << 18);
+	p += cp_gpuaddr(adreno_dev, p, gpuaddr);
+
+	return (unsigned int)(p - cmds);
+}
+
+/* adreno_rindbuffer_submitcmd - submit userspace IBs to the GPU */
+int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_cmdbatch *cmdbatch, struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct kgsl_memobj_node *ib;
+	unsigned int numibs = 0;
+	unsigned int *link;
+	unsigned int *cmds;
+	struct kgsl_context *context;
+	struct adreno_context *drawctxt;
+	bool use_preamble = true;
+	bool cmdbatch_user_profiling = false;
+	bool cmdbatch_kernel_profiling = false;
+	int flags = KGSL_CMD_FLAGS_NONE;
+	int ret;
+	struct adreno_ringbuffer *rb;
+	struct kgsl_cmdbatch_profiling_buffer *profile_buffer = NULL;
+	unsigned int dwords = 0;
+	struct adreno_submit_time local;
+
+	struct kgsl_mem_entry *entry = cmdbatch->profiling_buf_entry;
+	if (entry)
+		profile_buffer = kgsl_gpuaddr_to_vaddr(&entry->memdesc,
+					cmdbatch->profiling_buffer_gpuaddr);
+
+	context = cmdbatch->context;
+	drawctxt = ADRENO_CONTEXT(context);
+
+	/* Get the total IBs in the list */
+	list_for_each_entry(ib, &cmdbatch->cmdlist, node)
+		numibs++;
+
+	rb = drawctxt->rb;
+
+	/* process any profiling results that are available into the log_buf */
+	adreno_profile_process_results(adreno_dev);
+
+	/*
+	 * If SKIP CMD flag is set for current context
+	 * a) set SKIPCMD as fault_recovery for current commandbatch
+	 * b) store context's commandbatch fault_policy in current
+	 *    commandbatch fault_policy and clear context's commandbatch
+	 *    fault_policy
+	 * c) force preamble for commandbatch
+	 */
+	if (test_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv) &&
+		(!test_bit(CMDBATCH_FLAG_SKIP, &cmdbatch->priv))) {
+
+		set_bit(KGSL_FT_SKIPCMD, &cmdbatch->fault_recovery);
+		cmdbatch->fault_policy = drawctxt->fault_policy;
+		set_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &cmdbatch->priv);
+
+		/* if context is detached print fault recovery */
+		adreno_fault_skipcmd_detached(device, drawctxt, cmdbatch);
+
+		/* clear the drawctxt flags */
+		clear_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv);
+		drawctxt->fault_policy = 0;
+	}
+
+	/*When preamble is enabled, the preamble buffer with state restoration
+	commands are stored in the first node of the IB chain. We can skip that
+	if a context switch hasn't occured */
+
+	if ((drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE) &&
+		!test_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &cmdbatch->priv) &&
+		(rb->drawctxt_active == drawctxt))
+		use_preamble = false;
+
+	/*
+	 * In skip mode don't issue the draw IBs but keep all the other
+	 * accoutrements of a submision (including the interrupt) to keep
+	 * the accounting sane. Set start_index and numibs to 0 to just
+	 * generate the start and end markers and skip everything else
+	 */
+	if (test_bit(CMDBATCH_FLAG_SKIP, &cmdbatch->priv)) {
+		use_preamble = false;
+		numibs = 0;
+	}
+
+	/*
+	 * a5xx uses 64 bit memory address. pm4 commands that involve read/write
+	 * from memory take 4 bytes more than a4xx because of 64 bit addressing.
+	 * This function is shared between gpucores, so reserve the max size
+	 * required and adjust the number of commands before calling addcmds.
+	 * Each submission needs 7 dwords max for wrappers and other red tape.
+	 */
+	dwords = 7;
+
+	/* Each IB takes up 30 dwords in worst case */
+	dwords += (numibs * 30);
+
+	if (cmdbatch->flags & KGSL_CMDBATCH_PROFILING &&
+		!adreno_is_a3xx(adreno_dev) && profile_buffer) {
+		cmdbatch_user_profiling = true;
+		dwords += 6;
+
+		/*
+		 * REG_TO_MEM packet on A5xx needs another ordinal.
+		 * Add 2 more dwords since we do profiling before and after.
+		 */
+		if (adreno_is_a5xx(adreno_dev))
+			dwords += 2;
+
+		/*
+		 * we want to use an adreno_submit_time struct to get the
+		 * precise moment when the command is submitted to the
+		 * ringbuffer.  If an upstream caller already passed down a
+		 * pointer piggyback on that otherwise use a local struct
+		 */
+
+		if (time == NULL)
+			time = &local;
+	}
+
+	if (test_bit(CMDBATCH_FLAG_PROFILE, &cmdbatch->priv)) {
+		cmdbatch_kernel_profiling = true;
+		dwords += 6;
+		if (adreno_is_a5xx(adreno_dev))
+			dwords += 2;
+	}
+
+	link = kzalloc(sizeof(unsigned int) *  dwords, GFP_KERNEL);
+	if (!link) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	cmds = link;
+
+	*cmds++ = cp_packet(adreno_dev, CP_NOP, 1);
+	*cmds++ = KGSL_START_OF_IB_IDENTIFIER;
+
+	if (cmdbatch_kernel_profiling) {
+		cmds += _get_alwayson_counter(adreno_dev, cmds,
+			adreno_dev->cmdbatch_profile_buffer.gpuaddr +
+			ADRENO_CMDBATCH_PROFILE_OFFSET(cmdbatch->profile_index,
+				started));
+	}
+
+	/*
+	 * Add cmds to read the GPU ticks at the start of the cmdbatch and
+	 * write it into the appropriate cmdbatch profiling buffer offset
+	 */
+	if (cmdbatch_user_profiling) {
+		cmds += _get_alwayson_counter(adreno_dev, cmds,
+			cmdbatch->profiling_buffer_gpuaddr +
+			offsetof(struct kgsl_cmdbatch_profiling_buffer,
+			gpu_ticks_submitted));
+	}
+
+	if (numibs) {
+		list_for_each_entry(ib, &cmdbatch->cmdlist, node) {
+			/*
+			 * Skip 0 sized IBs - these are presumed to have been
+			 * removed from consideration by the FT policy
+			 */
+			if (ib->priv & MEMOBJ_SKIP ||
+				(ib->priv & MEMOBJ_PREAMBLE &&
+				use_preamble == false))
+				*cmds++ = cp_mem_packet(adreno_dev, CP_NOP,
+						3, 1);
+
+			*cmds++ = cp_mem_packet(adreno_dev,
+					CP_INDIRECT_BUFFER_PFE, 2, 1);
+			cmds += cp_gpuaddr(adreno_dev, cmds, ib->gpuaddr);
+			*cmds++ = (unsigned int) ib->size >> 2;
+			/* preamble is required on only for first command */
+			use_preamble = false;
+		}
+	}
+
+	if (cmdbatch_kernel_profiling) {
+		cmds += _get_alwayson_counter(adreno_dev, cmds,
+			adreno_dev->cmdbatch_profile_buffer.gpuaddr +
+			ADRENO_CMDBATCH_PROFILE_OFFSET(cmdbatch->profile_index,
+				retired));
+	}
+
+	/*
+	 * Add cmds to read the GPU ticks at the end of the cmdbatch and
+	 * write it into the appropriate cmdbatch profiling buffer offset
+	 */
+	if (cmdbatch_user_profiling) {
+		cmds += _get_alwayson_counter(adreno_dev, cmds,
+			cmdbatch->profiling_buffer_gpuaddr +
+			offsetof(struct kgsl_cmdbatch_profiling_buffer,
+			gpu_ticks_retired));
+	}
+
+	*cmds++ = cp_packet(adreno_dev, CP_NOP, 1);
+	*cmds++ = KGSL_END_OF_IB_IDENTIFIER;
+
+	ret = adreno_drawctxt_switch(adreno_dev, rb, drawctxt, cmdbatch->flags);
+
+	/*
+	 * In the unlikely event of an error in the drawctxt switch,
+	 * treat it like a hang
+	 */
+	if (ret)
+		goto done;
+
+	if (test_bit(CMDBATCH_FLAG_WFI, &cmdbatch->priv))
+		flags = KGSL_CMD_FLAGS_WFI;
+
+	/*
+	 * For some targets, we need to execute a dummy shader operation after a
+	 * power collapse
+	 */
+
+	if (test_and_clear_bit(ADRENO_DEVICE_PWRON, &adreno_dev->priv) &&
+		test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv))
+		flags |= KGSL_CMD_FLAGS_PWRON_FIXUP;
+
+	/* Set the constraints before adding to ringbuffer */
+	adreno_ringbuffer_set_constraint(device, cmdbatch);
+
+	/* CFF stuff executed only if CFF is enabled */
+	kgsl_cffdump_capture_ib_desc(device, context, cmdbatch);
+
+
+	ret = adreno_ringbuffer_addcmds(rb, flags,
+					&link[0], (cmds - link),
+					cmdbatch->timestamp, time);
+
+	if (!ret) {
+		cmdbatch->global_ts = drawctxt->internal_timestamp;
+
+		/* Put the timevalues in the profiling buffer */
+		if (cmdbatch_user_profiling) {
+			profile_buffer->wall_clock_s = time->utime.tv_sec;
+			profile_buffer->wall_clock_ns = time->utime.tv_nsec;
+			profile_buffer->gpu_ticks_queued = time->ticks;
+		}
+	}
+
+	kgsl_cffdump_regpoll(device,
+		adreno_getreg(adreno_dev, ADRENO_REG_RBBM_STATUS) << 2,
+		0x00000000, 0x80000000);
+done:
+	/* Corresponding unmap to the memdesc map of profile_buffer */
+	if (entry)
+		kgsl_memdesc_unmap(&entry->memdesc);
+
+
+	trace_kgsl_issueibcmds(device, context->id, cmdbatch,
+			numibs, cmdbatch->timestamp,
+			cmdbatch->flags, ret, drawctxt->type);
+
+	kfree(link);
+	return ret;
+}
+
+/**
+ * adreno_ringbuffer_mmu_clk_disable_event() - Callback function that
+ * disables the MMU clocks.
+ * @device: Device pointer
+ * @context: The ringbuffer context pointer
+ * @data: Pointer containing the adreno_mmu_disable_clk_param structure
+ * @type: The event call type (RETIRED or CANCELLED)
+ */
+static void adreno_ringbuffer_mmu_clk_disable_event(struct kgsl_device *device,
+			struct kgsl_event_group *group, void *data, int type)
+{
+	kgsl_mmu_disable_clk(&device->mmu);
+}
+
+/*
+ * adreno_ringbuffer_mmu_disable_clk_on_ts() - Sets up event to disable MMU
+ * clocks
+ * @device - The kgsl device pointer
+ * @rb: The ringbuffer in whose event list the event is added
+ * @timestamp: The timestamp on which the event should trigger
+ *
+ * Creates an event to disable the MMU clocks on timestamp and if event
+ * already exists then updates the timestamp of disabling the MMU clocks
+ * with the passed in ts if it is greater than the current value at which
+ * the clocks will be disabled
+ * Return - void
+ */
+void
+adreno_ringbuffer_mmu_disable_clk_on_ts(struct kgsl_device *device,
+			struct adreno_ringbuffer *rb, unsigned int timestamp)
+{
+	if (kgsl_add_event(device, &(rb->events), timestamp,
+		adreno_ringbuffer_mmu_clk_disable_event, NULL)) {
+		KGSL_DRV_ERR(device,
+			"Failed to add IOMMU disable clk event\n");
+	}
+}
+
+/**
+ * adreno_ringbuffer_wait_callback() - Callback function for event registered
+ * on a ringbuffer timestamp
+ * @device: Device for which the the callback is valid
+ * @context: The context of the event
+ * @priv: The private parameter of the event
+ * @result: Result of the event trigger
+ */
+static void adreno_ringbuffer_wait_callback(struct kgsl_device *device,
+		struct kgsl_event_group *group,
+		void *priv, int result)
+{
+	struct adreno_ringbuffer *rb = group->priv;
+	wake_up_all(&rb->ts_expire_waitq);
+}
+
+/**
+ * adreno_ringbuffer_waittimestamp() - Wait for a RB timestamp
+ * @rb: The ringbuffer to wait on
+ * @timestamp: The timestamp to wait for
+ * @msecs: The wait timeout period
+ */
+int adreno_ringbuffer_waittimestamp(struct adreno_ringbuffer *rb,
+					unsigned int timestamp,
+					unsigned int msecs)
+{
+	struct kgsl_device *device = rb->device;
+	int ret;
+	unsigned long wait_time;
+
+	/* force a timeout from caller for the wait */
+	BUG_ON(0 == msecs);
+
+	ret = kgsl_add_event(device, &rb->events, timestamp,
+		adreno_ringbuffer_wait_callback, NULL);
+	if (ret)
+		return ret;
+
+	mutex_unlock(&device->mutex);
+
+	wait_time = msecs_to_jiffies(msecs);
+	if (0 == wait_event_timeout(rb->ts_expire_waitq,
+		!kgsl_event_pending(device, &rb->events, timestamp,
+				adreno_ringbuffer_wait_callback, NULL),
+		wait_time))
+		ret  = -ETIMEDOUT;
+
+	mutex_lock(&device->mutex);
+	/*
+	 * after wake up make sure that expected timestamp has retired
+	 * because the wakeup could have happened due to a cancel event
+	 */
+	if (!ret && !adreno_ringbuffer_check_timestamp(rb,
+		timestamp, KGSL_TIMESTAMP_RETIRED)) {
+		ret = -EAGAIN;
+	}
+
+	return ret;
+}
+
+/**
+ * adreno_ringbuffer_submit_preempt_token() - Submit a preempt token
+ * @rb: Ringbuffer in which the token is submitted
+ * @incoming_rb: The RB to which the GPU switches when this preemption
+ * token is executed.
+ *
+ * Called to make sure that an outstanding preemption request is
+ * granted.
+ */
+int adreno_ringbuffer_submit_preempt_token(struct adreno_ringbuffer *rb,
+					struct adreno_ringbuffer *incoming_rb)
+{
+	unsigned int *ringcmds, *start;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
+	struct kgsl_device *device = &(adreno_dev->dev);
+	struct kgsl_iommu *iommu = device->mmu.priv;
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int ptname;
+	struct kgsl_pagetable *pt;
+	int pt_switch_sizedwords = 0, total_sizedwords = 20;
+	unsigned link[ADRENO_RB_PREEMPT_TOKEN_DWORDS];
+	uint i;
+	uint64_t ttbr0;
+
+	if (incoming_rb->preempted_midway) {
+
+		if (adreno_is_a5xx(adreno_dev)) {
+			kgsl_sharedmem_readq(&rb->pagetable_desc, &ttbr0,
+				offsetof(struct adreno_ringbuffer_pagetable_info
+				, ttbr0));
+			kgsl_sharedmem_writeq(rb->device, &iommu->smmu_info,
+				offsetof(struct a5xx_cp_smmu_info, ttbr0),
+				ttbr0);
+		} else {
+			kgsl_sharedmem_readl(&incoming_rb->pagetable_desc,
+				&ptname, offsetof(
+				struct adreno_ringbuffer_pagetable_info,
+				current_rb_ptname));
+			pt = kgsl_mmu_get_pt_from_ptname(&(rb->device->mmu),
+				ptname);
+			/*
+			 * always expect a valid pt, else pt refcounting is
+			 * messed up or current pt tracking has a bug which
+			 * could lead to eventual disaster
+			 */
+			BUG_ON(!pt);
+			/* set the ringbuffer for incoming RB */
+			pt_switch_sizedwords =
+				adreno_iommu_set_pt_generate_cmds(incoming_rb,
+								&link[0], pt);
+			total_sizedwords += pt_switch_sizedwords;
+
+		}
+	}
+
+	/*
+	 *  Allocate total_sizedwords space in RB, this is the max space
+	 *  required.
+	 */
+	ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords);
+
+	if (IS_ERR(ringcmds))
+		return PTR_ERR(ringcmds);
+
+	start = ringcmds;
+
+	*ringcmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1);
+	*ringcmds++ = 0;
+
+	if (incoming_rb->preempted_midway) {
+		for (i = 0; i < pt_switch_sizedwords; i++)
+			*ringcmds++ = link[i];
+	}
+
+	*ringcmds++ = cp_register(adreno_dev, adreno_getreg(adreno_dev,
+			ADRENO_REG_CP_PREEMPT_DISABLE), 1);
+	*ringcmds++ = 0;
+
+	*ringcmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1);
+	*ringcmds++ = 1;
+
+	ringcmds += gpudev->preemption_token(adreno_dev, rb, ringcmds,
+				rb->device->memstore.gpuaddr +
+				KGSL_MEMSTORE_RB_OFFSET(rb, preempted));
+
+	if ((uint)(ringcmds - start) > total_sizedwords) {
+		KGSL_DRV_ERR(device, "Insufficient rb size allocated\n");
+		BUG();
+	}
+
+	/*
+	 * If we have commands less than the space reserved in RB
+	 *  adjust the wptr accordingly
+	 */
+	rb->wptr = rb->wptr - (total_sizedwords - (uint)(ringcmds - start));
+
+	/* submit just the preempt token */
+	mb();
+	kgsl_pwrscale_busy(rb->device);
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR, rb->wptr);
+	return 0;
+}
diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h
new file mode 100644
index 000000000000..7fd28f52db83
--- /dev/null
+++ b/drivers/gpu/msm/adreno_ringbuffer.h
@@ -0,0 +1,226 @@
+/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __ADRENO_RINGBUFFER_H
+#define __ADRENO_RINGBUFFER_H
+
+#include "kgsl_iommu.h"
+#include "adreno_dispatch.h"
+
+/* Adreno ringbuffer size in bytes */
+#define KGSL_RB_SIZE (32 * 1024)
+
+/*
+ * A handy macro to convert the RB size to dwords since most ringbuffer
+ * operations happen in dword increments
+ */
+#define KGSL_RB_DWORDS (KGSL_RB_SIZE >> 2)
+
+struct kgsl_device;
+struct kgsl_device_private;
+
+/**
+ * struct adreno_submit_time - utility structure to store the wall clock / GPU
+ * ticks at command submit time
+ * @ticks: GPU ticks at submit time (from the 19.2Mhz timer)
+ * @ktime: local clock time (in nanoseconds)
+ * @utime: Wall clock time
+ */
+struct adreno_submit_time {
+	uint64_t ticks;
+	u64 ktime;
+	struct timespec utime;
+};
+
+/**
+ * struct adreno_ringbuffer_pagetable_info - Contains fields used during a
+ * pagetable switch.
+ * @current_global_ptname: The current pagetable id being used by the GPU.
+ * Only the ringbuffers[0] current_global_ptname is used to keep track of
+ * the current pagetable id
+ * @current_rb_ptname: The current pagetable active on the given RB
+ * @incoming_ptname: Contains the incoming pagetable we are switching to. After
+ * switching of pagetable this value equals current_rb_ptname.
+ * @switch_pt_enable: Flag used during pagetable switch to check if pt
+ * switch can be skipped
+ * @ttbr0: value to program into TTBR0 during pagetable switch.
+ * @contextidr: value to program into CONTEXTIDR during pagetable switch.
+ */
+struct adreno_ringbuffer_pagetable_info {
+	int current_global_ptname;
+	int current_rb_ptname;
+	int incoming_ptname;
+	int switch_pt_enable;
+	uint64_t ttbr0;
+	unsigned int contextidr;
+};
+
+/**
+ * struct adreno_ringbuffer - Definition for an adreno ringbuffer object
+ * @device: KGSL device that owns the ringbuffer object
+ * @flags: Internal control flags for the ringbuffer
+ * @buffer_desc: Pointer to the ringbuffer memory descriptor
+ * @wptr: Local copy of the wptr offset
+ * @rptr: Read pointer offset in dwords from baseaddr
+ * @last_wptr: offset of the last H/W committed wptr
+ * @rb_ctx: The context that represents a ringbuffer
+ * @id: Priority level of the ringbuffer, also used as an ID
+ * @fault_detect_ts: The last retired global timestamp read during fault detect
+ * @timestamp: The RB's global timestamp
+ * @events: A kgsl_event_group for this context - contains the list of GPU
+ * events
+ * @drawctxt_active: The last pagetable that this ringbuffer is set to
+ * @preemption_desc: The memory descriptor containing
+ * preemption info written/read by CP
+ * @pagetable_desc: Memory to hold information about the pagetables being used
+ * and the commands to switch pagetable on the RB
+ * @pt_update_desc: The memory descriptor containing commands that update
+ * pagetable
+ * @dispatch_q: The dispatcher side queue for this ringbuffer
+ * @ts_expire_waitq: Wait queue to wait for rb timestamp to expire
+ * @ts_expire_waitq: Wait q to wait for rb timestamp to expire
+ * @wptr_preempt_end: Used during preemption to check that preemption occurred
+ * at the right rptr
+ * @gpr11: The gpr11 value of this RB
+ * @preempted_midway: Indicates that the RB was preempted before rptr = wptr
+ * @sched_timer: Timer that tracks how long RB has been waiting to be scheduled
+ * or how long it has been scheduled for after preempting in
+ * @starve_timer_state: Indicates the state of the wait.
+ */
+struct adreno_ringbuffer {
+	struct kgsl_device *device;
+	uint32_t flags;
+	struct kgsl_memdesc buffer_desc;
+	unsigned int sizedwords;
+	unsigned int wptr;
+	unsigned int rptr;
+	unsigned int last_wptr;
+	int id;
+	unsigned int fault_detect_ts;
+	unsigned int timestamp;
+	struct kgsl_event_group events;
+	struct adreno_context *drawctxt_active;
+	struct kgsl_memdesc preemption_desc;
+	struct kgsl_memdesc pagetable_desc;
+	struct kgsl_memdesc pt_update_desc;
+	struct adreno_dispatcher_cmdqueue dispatch_q;
+	wait_queue_head_t ts_expire_waitq;
+	unsigned int wptr_preempt_end;
+	unsigned int gpr11;
+	int preempted_midway;
+	unsigned long sched_timer;
+	enum adreno_dispatcher_starve_timer_states starve_timer_state;
+};
+
+/* enable timestamp (...scratch0) memory shadowing */
+#define GSL_RB_MEMPTRS_SCRATCH_MASK 0x1
+
+/*
+ * protected mode error checking below register address 0x800
+ * note: if CP_INTERRUPT packet is used then checking needs
+ * to change to below register address 0x7C8
+ */
+#define GSL_RB_PROTECTED_MODE_CONTROL		0x200001F2
+
+/* Returns the current ringbuffer */
+#define ADRENO_CURRENT_RINGBUFFER(a)	((a)->cur_rb)
+
+#define KGSL_MEMSTORE_RB_OFFSET(rb, field)	\
+	KGSL_MEMSTORE_OFFSET((rb->id + KGSL_MEMSTORE_MAX), field)
+
+int cp_secure_mode(struct adreno_device *adreno_dev, uint *cmds, int set);
+
+int adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv,
+				struct kgsl_context *context,
+				struct kgsl_cmdbatch *cmdbatch,
+				uint32_t *timestamp);
+
+int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_cmdbatch *cmdbatch,
+		struct adreno_submit_time *time);
+
+int adreno_ringbuffer_init(struct adreno_device *adreno_dev, bool nopreempt);
+
+int adreno_ringbuffer_start(struct adreno_device *adreno_dev,
+		unsigned int start_type);
+
+void adreno_ringbuffer_stop(struct adreno_device *adreno_dev);
+
+void adreno_ringbuffer_close(struct adreno_device *adreno_dev);
+
+int adreno_ringbuffer_issuecmds(struct adreno_ringbuffer *rb,
+					unsigned int flags,
+					unsigned int *cmdaddr,
+					int sizedwords);
+
+void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time);
+
+int adreno_ringbuffer_submit_spin(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time, unsigned int timeout);
+
+void kgsl_cp_intrcallback(struct kgsl_device *device);
+
+unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb,
+						unsigned int numcmds);
+
+void adreno_ringbuffer_read_pfp_ucode(struct kgsl_device *device);
+
+void adreno_ringbuffer_read_pm4_ucode(struct kgsl_device *device);
+
+void adreno_ringbuffer_mmu_disable_clk_on_ts(struct kgsl_device *device,
+			struct adreno_ringbuffer *rb, unsigned int ts);
+
+int adreno_ringbuffer_waittimestamp(struct adreno_ringbuffer *rb,
+					unsigned int timestamp,
+					unsigned int msecs);
+
+int adreno_rb_readtimestamp(struct kgsl_device *device,
+	void *priv, enum kgsl_timestamp_type type,
+	unsigned int *timestamp);
+
+int adreno_ringbuffer_submit_preempt_token(struct adreno_ringbuffer *rb,
+					struct adreno_ringbuffer *incoming_rb);
+
+static inline int adreno_ringbuffer_count(struct adreno_ringbuffer *rb,
+	unsigned int rptr)
+{
+	if (rb->wptr >= rptr)
+		return rb->wptr - rptr;
+	return rb->wptr + KGSL_RB_DWORDS - rptr;
+}
+
+/* Increment a value by 4 bytes with wrap-around based on size */
+static inline unsigned int adreno_ringbuffer_inc_wrapped(unsigned int val,
+							unsigned int size)
+{
+	return (val + sizeof(unsigned int)) % size;
+}
+
+/* Decrement a value by 4 bytes with wrap-around based on size */
+static inline unsigned int adreno_ringbuffer_dec_wrapped(unsigned int val,
+							unsigned int size)
+{
+	return (val + size - sizeof(unsigned int)) % size;
+}
+
+/* check if timestamp is greater than the current rb timestamp */
+static inline int adreno_ringbuffer_check_timestamp(
+			struct adreno_ringbuffer *rb,
+			unsigned int timestamp, int type)
+{
+	unsigned int ts;
+	adreno_rb_readtimestamp(rb->device, rb, type, &ts);
+	return (timestamp_cmp(ts, timestamp) >= 0);
+}
+
+#endif  /* __ADRENO_RINGBUFFER_H */
diff --git a/drivers/gpu/msm/adreno_snapshot.c b/drivers/gpu/msm/adreno_snapshot.c
new file mode 100644
index 000000000000..81c6fc4ce381
--- /dev/null
+++ b/drivers/gpu/msm/adreno_snapshot.c
@@ -0,0 +1,1127 @@
+/* Copyright (c) 2012-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "kgsl.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_snapshot.h"
+
+#include "adreno.h"
+#include "adreno_pm4types.h"
+#include "a3xx_reg.h"
+#include "adreno_cp_parser.h"
+#include "adreno_snapshot.h"
+#include "adreno_a5xx.h"
+
+/* Number of dwords of ringbuffer history to record */
+#define NUM_DWORDS_OF_RINGBUFFER_HISTORY 100
+
+#define VPC_MEMORY_BANKS 4
+
+/* Maintain a list of the objects we see during parsing */
+
+#define SNAPSHOT_OBJ_BUFSIZE 64
+
+#define SNAPSHOT_OBJ_TYPE_IB 0
+
+/* Used to print error message if an IB has too many objects in it */
+static int ib_max_objs;
+
+struct snapshot_rb_params {
+	struct kgsl_snapshot *snapshot;
+	struct adreno_ringbuffer *rb;
+};
+
+/* Keep track of how many bytes are frozen after a snapshot and tell the user */
+static size_t snapshot_frozen_objsize;
+
+static struct kgsl_snapshot_object objbuf[SNAPSHOT_OBJ_BUFSIZE];
+
+/* Pointer to the next open entry in the object list */
+static unsigned int objbufptr;
+
+static inline int adreno_rb_ctxtswitch(struct adreno_device *adreno_dev,
+				   unsigned int *cmd)
+{
+	return cmd[0] == cp_packet(adreno_dev, CP_NOP, 1) &&
+		cmd[1] == KGSL_CONTEXT_TO_MEM_IDENTIFIER;
+}
+
+/* Push a new buffer object onto the list */
+static void push_object(int type,
+	struct kgsl_process_private *process,
+	uint64_t gpuaddr, uint64_t dwords)
+{
+	int index;
+	struct kgsl_mem_entry *entry;
+
+	if (process == NULL)
+		return;
+
+	/*
+	 * Sometimes IBs can be reused in the same dump.  Because we parse from
+	 * oldest to newest, if we come across an IB that has already been used,
+	 * assume that it has been reused and update the list with the newest
+	 * size.
+	 */
+
+	for (index = 0; index < objbufptr; index++) {
+		if (objbuf[index].gpuaddr == gpuaddr &&
+			objbuf[index].entry->priv == process) {
+
+			objbuf[index].size = max_t(uint64_t,
+						objbuf[index].size,
+						dwords << 2);
+			return;
+		}
+	}
+
+	if (objbufptr == SNAPSHOT_OBJ_BUFSIZE) {
+		KGSL_CORE_ERR("snapshot: too many snapshot objects\n");
+		return;
+	}
+
+	entry = kgsl_sharedmem_find(process, gpuaddr);
+	if (entry == NULL) {
+		KGSL_CORE_ERR("snapshot: Can't find entry for 0x%016llX\n",
+			gpuaddr);
+		return;
+	}
+
+	if (!kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, dwords << 2)) {
+		KGSL_CORE_ERR("snapshot: Mem entry 0x%016llX is too small\n",
+			gpuaddr);
+		kgsl_mem_entry_put(entry);
+		return;
+	}
+
+	/* Put it on the list of things to parse */
+	objbuf[objbufptr].type = type;
+	objbuf[objbufptr].gpuaddr = gpuaddr;
+	objbuf[objbufptr].size = dwords << 2;
+	objbuf[objbufptr++].entry = entry;
+}
+
+/*
+ * Return a 1 if the specified object is already on the list of buffers
+ * to be dumped
+ */
+
+static int find_object(int type, uint64_t gpuaddr,
+		struct kgsl_process_private *process)
+{
+	int index;
+
+	for (index = 0; index < objbufptr; index++) {
+		if (objbuf[index].gpuaddr == gpuaddr &&
+			objbuf[index].entry->priv == process)
+			return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * snapshot_freeze_obj_list() - Take a list of ib objects and freeze their
+ * memory for snapshot
+ * @snapshot: The snapshot data.
+ * @process: The process to which the IB belongs
+ * @ib_obj_list: List of the IB objects
+ * @ib2base: IB2 base address at time of the fault
+ *
+ * Returns 0 on success else error code
+ */
+static int snapshot_freeze_obj_list(struct kgsl_snapshot *snapshot,
+		struct kgsl_process_private *process,
+		struct adreno_ib_object_list *ib_obj_list,
+		uint64_t ib2base)
+{
+	int ret = 0;
+	struct adreno_ib_object *ib_objs;
+	int i;
+
+	for (i = 0; i < ib_obj_list->num_objs; i++) {
+		int temp_ret;
+		int index;
+		int freeze = 1;
+
+		ib_objs = &(ib_obj_list->obj_list[i]);
+		/* Make sure this object is not going to be saved statically */
+		for (index = 0; index < objbufptr; index++) {
+			if ((objbuf[index].gpuaddr <= ib_objs->gpuaddr) &&
+				((objbuf[index].gpuaddr +
+				(objbuf[index].size)) >=
+				(ib_objs->gpuaddr + ib_objs->size)) &&
+				(objbuf[index].entry->priv == process)) {
+				freeze = 0;
+				break;
+			}
+		}
+
+		if (freeze) {
+			/* Save current IB2 statically */
+			if (ib2base == ib_objs->gpuaddr) {
+				push_object(SNAPSHOT_OBJ_TYPE_IB,
+				process, ib_objs->gpuaddr, ib_objs->size >> 2);
+			} else {
+				temp_ret = kgsl_snapshot_get_object(snapshot,
+					process, ib_objs->gpuaddr,
+					ib_objs->size,
+					ib_objs->snapshot_obj_type);
+				if (temp_ret < 0) {
+					if (ret >= 0)
+						ret = temp_ret;
+				} else {
+					snapshot_frozen_objsize += temp_ret;
+				}
+			}
+		}
+	}
+	return ret;
+}
+
+/*
+ * We want to store the last executed IB1 and IB2 in the static region to ensure
+ * that we get at least some information out of the snapshot even if we can't
+ * access the dynamic data from the sysfs file.  Push all other IBs on the
+ * dynamic list
+ */
+static inline void parse_ib(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot,
+		struct kgsl_process_private *process,
+		uint64_t gpuaddr, uint64_t dwords)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	uint64_t ib1base;
+	struct adreno_ib_object_list *ib_obj_list;
+
+	/*
+	 * Check the IB address - if it is either the last executed IB1
+	 * then push it into the static blob otherwise put it in the dynamic
+	 * list
+	 */
+
+	adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE,
+		ADRENO_REG_CP_IB1_BASE_HI, &ib1base);
+
+	if (gpuaddr == ib1base) {
+		push_object(SNAPSHOT_OBJ_TYPE_IB, process,
+			gpuaddr, dwords);
+		return;
+	}
+
+	if (kgsl_snapshot_have_object(snapshot, process,
+					gpuaddr, dwords << 2))
+		return;
+
+	if (-E2BIG == adreno_ib_create_object_list(device, process,
+				gpuaddr, dwords, &ib_obj_list))
+		ib_max_objs = 1;
+
+	if (ib_obj_list)
+		kgsl_snapshot_add_ib_obj_list(snapshot, ib_obj_list);
+
+}
+
+/**
+ * snapshot_rb_ibs() - Dump rb data and capture the IB's in the RB as well
+ * @rb: The RB to dump
+ * @data: Pointer to memory where the RB data is to be dumped
+ * @snapshot: Pointer to information about the current snapshot being taken
+ */
+static void snapshot_rb_ibs(struct adreno_ringbuffer *rb,
+			unsigned int *data,
+			struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = rb->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int rptr, *rbptr;
+	uint64_t ibbase;
+	int index, i;
+	int parse_ibs = 0, ib_parse_start;
+
+	/* Get the current read pointers for the RB */
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &rptr);
+
+	/* Address of the last processed IB */
+	adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE,
+				ADRENO_REG_CP_IB1_BASE_HI, &ibbase);
+
+	/*
+	 * Figure out the window of ringbuffer data to dump.  First we need to
+	 * find where the last processed IB ws submitted.  Start walking back
+	 * from the rptr
+	 */
+
+	index = rptr;
+	rbptr = rb->buffer_desc.hostptr;
+
+	do {
+		index--;
+
+		if (index < 0) {
+			index = KGSL_RB_DWORDS - 3;
+
+			/* We wrapped without finding what we wanted */
+			if (index < rb->wptr) {
+				index = rb->wptr;
+				break;
+			}
+		}
+
+		if (adreno_cmd_is_ib(adreno_dev, rbptr[index]) &&
+			rbptr[index + 1] == ibbase)
+			break;
+	} while (index != rb->wptr);
+
+	/*
+	 * index points at the last submitted IB. We can only trust that the
+	 * memory between the context switch and the hanging IB is valid, so
+	 * the next step is to find the context switch before the submission
+	 */
+
+	while (index != rb->wptr) {
+		index--;
+
+		if (index < 0) {
+			index = KGSL_RB_DWORDS - 2;
+
+			/*
+			 * Wrapped without finding the context switch. This is
+			 * harmless - we should still have enough data to dump a
+			 * valid state
+			 */
+
+			if (index < rb->wptr) {
+				index = rb->wptr;
+				break;
+			}
+		}
+
+		/* Break if the current packet is a context switch identifier */
+		if ((rbptr[index] == cp_packet(adreno_dev, CP_NOP, 1)) &&
+			(rbptr[index + 1] == KGSL_CONTEXT_TO_MEM_IDENTIFIER))
+			break;
+	}
+
+	/*
+	 * Index represents the start of the window of interest.  We will try
+	 * to dump all buffers between here and the rptr
+	 */
+
+	ib_parse_start = index;
+
+	/*
+	 * Loop through the RB, copying the data and looking for indirect
+	 * buffers and MMU pagetable changes
+	 */
+
+	index = rb->wptr;
+	for (i = 0; i < KGSL_RB_DWORDS; i++) {
+		*data = rbptr[index];
+
+		/*
+		 * Only parse IBs between the start and the rptr or the next
+		 * context switch, whichever comes first
+		 */
+
+		if (parse_ibs == 0 && index == ib_parse_start)
+			parse_ibs = 1;
+		else if (index == rptr || adreno_rb_ctxtswitch(adreno_dev,
+							&rbptr[index]))
+			parse_ibs = 0;
+
+		if (parse_ibs && adreno_cmd_is_ib(adreno_dev, rbptr[index])) {
+			uint64_t ibaddr;
+			uint64_t ibsize;
+
+			if (ADRENO_LEGACY_PM4(adreno_dev)) {
+				ibaddr = rbptr[index + 1];
+				ibsize = rbptr[index + 2];
+			} else {
+				ibaddr = rbptr[index + 2];
+				ibaddr = ibaddr << 32 | rbptr[index + 1];
+				ibsize = rbptr[index + 3];
+			}
+
+			/*
+			 * Sometimes the kernel generates IBs in global
+			 * memory. We dump the interesting global buffers,
+			 * so there's no need to parse these IBs.
+			 */
+			if (!kgsl_search_global_pt_entries(ibaddr, ibsize))
+				parse_ib(device, snapshot, snapshot->process,
+					ibaddr, ibsize);
+		}
+
+		index = index + 1;
+
+		if (index == KGSL_RB_DWORDS)
+			index = 0;
+
+		data++;
+	}
+
+}
+
+/* Snapshot the ringbuffer memory */
+static size_t snapshot_rb(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct snapshot_rb_params *snap_rb_params = priv;
+	struct kgsl_snapshot *snapshot = snap_rb_params->snapshot;
+	struct adreno_ringbuffer *rb = snap_rb_params->rb;
+
+	/*
+	 * Dump the entire ringbuffer - the parser can choose how much of it to
+	 * process
+	 */
+
+	if (remain < KGSL_RB_SIZE + sizeof(*header)) {
+		KGSL_CORE_ERR("snapshot: Not enough memory for the rb section");
+		return 0;
+	}
+
+	/* Write the sub-header for the section */
+	header->start = rb->wptr;
+	header->end = rb->wptr;
+	header->wptr = rb->wptr;
+	header->rptr = rb->rptr;
+	header->rbsize = KGSL_RB_DWORDS;
+	header->count = KGSL_RB_DWORDS;
+	adreno_rb_readtimestamp(device, rb, KGSL_TIMESTAMP_QUEUED,
+					&header->timestamp_queued);
+	adreno_rb_readtimestamp(device, rb, KGSL_TIMESTAMP_RETIRED,
+					&header->timestamp_retired);
+	header->gpuaddr = rb->buffer_desc.gpuaddr;
+	header->id = rb->id;
+
+	if (rb == adreno_dev->cur_rb) {
+		snapshot_rb_ibs(rb, data, snapshot);
+	} else {
+		/* Just copy the ringbuffer, there are no active IBs */
+		memcpy(data, rb->buffer_desc.hostptr, KGSL_RB_SIZE);
+	}
+	/* Return the size of the section */
+	return KGSL_RB_SIZE + sizeof(*header);
+}
+
+static int _count_mem_entries(int id, void *ptr, void *data)
+{
+	int *count = data;
+	*count = *count + 1;
+	return 0;
+}
+
+struct mem_entry {
+	uint64_t gpuaddr;
+	uint64_t size;
+	unsigned int type;
+} __packed;
+
+static int _save_mem_entries(int id, void *ptr, void *data)
+{
+	struct kgsl_mem_entry *entry = ptr;
+	struct mem_entry *m = (struct mem_entry *) data;
+
+	m->gpuaddr = entry->memdesc.gpuaddr;
+	m->size = entry->memdesc.size;
+	m->type = kgsl_memdesc_get_memtype(&entry->memdesc);
+
+	return 0;
+}
+
+static size_t snapshot_capture_mem_list(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_mem_list_v2 *header =
+		(struct kgsl_snapshot_mem_list_v2 *)buf;
+	int num_mem = 0;
+	int ret = 0;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	struct kgsl_process_private *process = priv;
+
+	/* we need a process to search! */
+	if (process == NULL)
+		return 0;
+
+	spin_lock(&process->mem_lock);
+
+	/* We need to know the number of memory objects that the process has */
+	idr_for_each(&process->mem_idr, _count_mem_entries, &num_mem);
+
+	if (num_mem == 0)
+		goto out;
+
+	if (remain < ((num_mem * sizeof(struct mem_entry)) + sizeof(*header))) {
+		KGSL_CORE_ERR("snapshot: Not enough memory for the mem list");
+		goto out;
+	}
+
+	header->num_entries = num_mem;
+	header->ptbase = kgsl_mmu_pagetable_get_ttbr0(process->pagetable);
+	/*
+	 * Walk throught the memory list and store the
+	 * tuples(gpuaddr, size, memtype) in snapshot
+	 */
+
+	idr_for_each(&process->mem_idr, _save_mem_entries, data);
+
+	ret = sizeof(*header) + (num_mem * sizeof(struct mem_entry));
+out:
+	spin_unlock(&process->mem_lock);
+	return ret;
+}
+
+struct snapshot_ib_meta {
+	struct kgsl_snapshot *snapshot;
+	struct kgsl_snapshot_object *obj;
+	uint64_t ib1base;
+	uint64_t ib1size;
+	uint64_t ib2base;
+	uint64_t ib2size;
+};
+
+/* Snapshot the memory for an indirect buffer */
+static size_t snapshot_ib(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	struct kgsl_snapshot_ib_v2 *header = (struct kgsl_snapshot_ib_v2 *)buf;
+	struct snapshot_ib_meta *meta = priv;
+	unsigned int *src;
+	unsigned int *dst = (unsigned int *)(buf + sizeof(*header));
+	struct adreno_ib_object_list *ib_obj_list;
+	struct kgsl_snapshot *snapshot;
+	struct kgsl_snapshot_object *obj;
+
+	if (meta == NULL || meta->snapshot == NULL || meta->obj == NULL) {
+		KGSL_CORE_ERR("snapshot: bad metadata");
+		return 0;
+	}
+	snapshot = meta->snapshot;
+	obj = meta->obj;
+
+	if (remain < (obj->size + sizeof(*header))) {
+		KGSL_CORE_ERR("snapshot: Not enough memory for the ib\n");
+		return 0;
+	}
+
+	src = kgsl_gpuaddr_to_vaddr(&obj->entry->memdesc, obj->gpuaddr);
+	if (src == NULL) {
+		KGSL_DRV_ERR(device,
+			"snapshot: Unable to map GPU memory object 0x%016llX into the kernel\n",
+			obj->gpuaddr);
+		return 0;
+	}
+
+	if (remain < (obj->size + sizeof(*header))) {
+		KGSL_CORE_ERR("snapshot: Not enough memory for the ib\n");
+		return 0;
+	}
+
+	/* only do this for IB1 because the IB2's are part of IB1 objects */
+	if (meta->ib1base == obj->gpuaddr) {
+		if (-E2BIG == adreno_ib_create_object_list(device,
+				obj->entry->priv,
+				obj->gpuaddr, obj->size >> 2,
+				&ib_obj_list))
+			ib_max_objs = 1;
+		if (ib_obj_list) {
+			/* freeze the IB objects in the IB */
+			snapshot_freeze_obj_list(snapshot,
+						obj->entry->priv,
+						ib_obj_list, meta->ib2base);
+			adreno_ib_destroy_obj_list(ib_obj_list);
+		}
+	}
+
+	/* Write the sub-header for the section */
+	header->gpuaddr = obj->gpuaddr;
+	header->ptbase =
+		kgsl_mmu_pagetable_get_ttbr0(obj->entry->priv->pagetable);
+	header->size = obj->size >> 2;
+
+	/* Write the contents of the ib */
+	memcpy((void *)dst, (void *)src, (size_t) obj->size);
+	/* Write the contents of the ib */
+
+	return obj->size + sizeof(*header);
+}
+
+/* Dump another item on the current pending list */
+static void dump_object(struct kgsl_device *device, int obj,
+		struct kgsl_snapshot *snapshot,
+		uint64_t ib1base, uint64_t ib1size,
+		uint64_t ib2base, uint64_t ib2size)
+{
+	struct snapshot_ib_meta meta;
+
+	switch (objbuf[obj].type) {
+	case SNAPSHOT_OBJ_TYPE_IB:
+		meta.snapshot = snapshot;
+		meta.obj = &objbuf[obj];
+		meta.ib1base = ib1base;
+		meta.ib1size = ib1size;
+		meta.ib2base = ib2base;
+		meta.ib2size = ib2size;
+
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_IB_V2,
+			snapshot, snapshot_ib, &meta);
+		if (objbuf[obj].entry) {
+			kgsl_memdesc_unmap(&(objbuf[obj].entry->memdesc));
+			kgsl_mem_entry_put(objbuf[obj].entry);
+		}
+		break;
+	default:
+		KGSL_CORE_ERR("snapshot: Invalid snapshot object type: %d\n",
+			objbuf[obj].type);
+		break;
+	}
+}
+
+/* setup_fault process - Find kgsl_process_private struct that caused the fault
+ *
+ * Find the faulting process based what the dispatcher thinks happened and
+ * what the hardware is using for the current pagetable. The process struct
+ * will be used to look up GPU addresses that are encountered while parsing
+ * the GPU state.
+ */
+static void setup_fault_process(struct kgsl_device *device,
+				struct kgsl_snapshot *snapshot,
+				struct kgsl_process_private *process)
+{
+	u64 hw_ptbase, proc_ptbase;
+
+	if (process != NULL && !kgsl_process_private_get(process))
+		process = NULL;
+
+	/* Get the physical address of the MMU pagetable */
+	hw_ptbase = kgsl_mmu_get_current_ttbr0(&device->mmu);
+
+	/* if we have an input process, make sure the ptbases match */
+	if (process) {
+		proc_ptbase = kgsl_mmu_pagetable_get_ttbr0(process->pagetable);
+		/* agreement! No need to check further */
+		if (hw_ptbase == proc_ptbase)
+			goto done;
+
+		kgsl_process_private_put(process);
+		process = NULL;
+		KGSL_CORE_ERR("snapshot: ptbase mismatch hw %llx sw %llx\n",
+				hw_ptbase, proc_ptbase);
+	}
+
+	/* try to find the right pagetable by walking the process list */
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		struct kgsl_process_private *tmp;
+
+		mutex_lock(&kgsl_driver.process_mutex);
+		list_for_each_entry(tmp, &kgsl_driver.process_list, list) {
+			u64 pt_ttbr0;
+
+			pt_ttbr0 = kgsl_mmu_pagetable_get_ttbr0(tmp->pagetable);
+			if ((pt_ttbr0 == hw_ptbase)
+			    && kgsl_process_private_get(tmp)) {
+				process = tmp;
+				break;
+			}
+		}
+		mutex_unlock(&kgsl_driver.process_mutex);
+	}
+done:
+	snapshot->process = process;
+}
+
+/* Snapshot a global memory buffer */
+static size_t snapshot_global(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	struct kgsl_memdesc *memdesc = priv;
+
+	struct kgsl_snapshot_gpu_object_v2 *header =
+		(struct kgsl_snapshot_gpu_object_v2 *)buf;
+
+	u8 *ptr = buf + sizeof(*header);
+
+	if (memdesc->size == 0)
+		return 0;
+
+	if (remain < (memdesc->size + sizeof(*header))) {
+		KGSL_CORE_ERR("snapshot: Not enough memory for the memdesc\n");
+		return 0;
+	}
+
+	if (memdesc->hostptr == NULL) {
+		KGSL_CORE_ERR("snapshot: no kernel mapping for global object 0x%016llX\n",
+				memdesc->gpuaddr);
+		return 0;
+	}
+
+	header->size = memdesc->size >> 2;
+	header->gpuaddr = memdesc->gpuaddr;
+	header->ptbase =
+		kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable);
+	header->type = SNAPSHOT_GPU_OBJECT_GLOBAL;
+
+	memcpy(ptr, memdesc->hostptr, memdesc->size);
+
+	return memdesc->size + sizeof(*header);
+}
+
+/* Snapshot a preemption record buffer */
+static size_t snapshot_preemption_record(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	struct kgsl_memdesc *memdesc = priv;
+	struct a5xx_cp_preemption_record record;
+	int size = sizeof(record);
+
+	struct kgsl_snapshot_gpu_object_v2 *header =
+		(struct kgsl_snapshot_gpu_object_v2 *)buf;
+
+	u8 *ptr = buf + sizeof(*header);
+
+	if (size == 0)
+		return 0;
+
+	if (remain < (size + sizeof(*header))) {
+		KGSL_CORE_ERR(
+			"snapshot: Not enough memory for preemption record\n");
+		return 0;
+	}
+
+	if (memdesc->hostptr == NULL) {
+		KGSL_CORE_ERR(
+		"snapshot: no kernel mapping for preemption record 0x%016llX\n",
+				memdesc->gpuaddr);
+		return 0;
+	}
+
+	header->size = size >> 2;
+	header->gpuaddr = memdesc->gpuaddr;
+	header->ptbase =
+		kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable);
+	header->type = SNAPSHOT_GPU_OBJECT_GLOBAL;
+
+	memcpy(ptr, memdesc->hostptr, size);
+
+	return size + sizeof(*header);
+}
+
+/* adreno_snapshot - Snapshot the Adreno GPU state
+ * @device - KGSL device to snapshot
+ * @snapshot - Pointer to the snapshot instance
+ * @context - context that caused the fault, if known by the driver
+ * This is a hook function called by kgsl_snapshot to snapshot the
+ * Adreno specific information for the GPU snapshot.  In turn, this function
+ * calls the GPU specific snapshot function to get core specific information.
+ */
+void adreno_snapshot(struct kgsl_device *device, struct kgsl_snapshot *snapshot,
+			struct kgsl_context *context)
+{
+	unsigned int i;
+	uint64_t ib1base, ib2base;
+	unsigned int ib1size, ib2size;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_ringbuffer *rb;
+	struct snapshot_rb_params snap_rb_params;
+	struct kgsl_iommu *iommu = device->mmu.priv;
+
+	ib_max_objs = 0;
+	/* Reset the list of objects */
+	objbufptr = 0;
+
+	snapshot_frozen_objsize = 0;
+
+	setup_fault_process(device, snapshot,
+			context ? context->proc_priv : NULL);
+
+	/* Dump the current ringbuffer */
+	snap_rb_params.snapshot = snapshot;
+	snap_rb_params.rb = adreno_dev->cur_rb;
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_RB_V2, snapshot,
+			snapshot_rb, &snap_rb_params);
+
+	/* Dump the prev ringbuffer */
+	if (adreno_dev->prev_rb) {
+		snap_rb_params.rb = adreno_dev->prev_rb;
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_RB_V2,
+			snapshot, snapshot_rb, &snap_rb_params);
+	}
+
+	/* Dump next ringbuffer */
+	if (adreno_dev->next_rb) {
+		snap_rb_params.rb = adreno_dev->next_rb;
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_RB_V2,
+			snapshot, snapshot_rb, &snap_rb_params);
+	}
+
+	adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE,
+			ADRENO_REG_CP_IB1_BASE_HI, &ib1base);
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BUFSZ, &ib1size);
+	adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB2_BASE,
+			ADRENO_REG_CP_IB2_BASE_HI, &ib2base);
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_IB2_BUFSZ, &ib2size);
+
+	/* Add GPU specific sections - registers mainly, but other stuff too */
+	if (gpudev->snapshot)
+		gpudev->snapshot(adreno_dev, snapshot);
+
+	/* Dump selected global buffers */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+			snapshot, snapshot_global, &adreno_dev->dev.memstore);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+			snapshot, snapshot_global,
+			&adreno_dev->dev.mmu.setstate_memory);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+			snapshot, snapshot_global,
+			&adreno_dev->pwron_fixup);
+
+	if (test_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv)) {
+		FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+				snapshot, snapshot_preemption_record,
+				&rb->preemption_desc);
+		}
+
+		kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+				snapshot, snapshot_global, &iommu->smmu_info);
+	}
+
+	/*
+	 * Add a section that lists (gpuaddr, size, memtype) tuples of the
+	 * hanging process
+	 */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MEMLIST_V2,
+			snapshot, snapshot_capture_mem_list, snapshot->process);
+	/*
+	 * Make sure that the last IB1 that was being executed is dumped.
+	 * Since this was the last IB1 that was processed, we should have
+	 * already added it to the list during the ringbuffer parse but we
+	 * want to be double plus sure.
+	 * The problem is that IB size from the register is the unprocessed size
+	 * of the buffer not the original size, so if we didn't catch this
+	 * buffer being directly used in the RB, then we might not be able to
+	 * dump the whole thing. Print a warning message so we can try to
+	 * figure how often this really happens.
+	 */
+
+	if (!find_object(SNAPSHOT_OBJ_TYPE_IB, ib1base,
+			snapshot->process) && ib1size) {
+		push_object(SNAPSHOT_OBJ_TYPE_IB, snapshot->process,
+			ib1base, ib1size);
+		KGSL_CORE_ERR(
+		"CP_IB1_BASE not found in the ringbuffer.Dumping %x dwords of the buffer.\n",
+		ib1size);
+	}
+
+	/*
+	 * Add the last parsed IB2 to the list. The IB2 should be found as we
+	 * parse the objects below, but we try to add it to the list first, so
+	 * it too can be parsed.  Don't print an error message in this case - if
+	 * the IB2 is found during parsing, the list will be updated with the
+	 * correct size.
+	 */
+
+	if (!find_object(SNAPSHOT_OBJ_TYPE_IB, ib2base,
+		snapshot->process) && ib2size) {
+		push_object(SNAPSHOT_OBJ_TYPE_IB, snapshot->process,
+			ib2base, ib2size);
+	}
+
+	/*
+	 * Go through the list of found objects and dump each one.  As the IBs
+	 * are parsed, more objects might be found, and objbufptr will increase
+	 */
+	for (i = 0; i < objbufptr; i++)
+		dump_object(device, i, snapshot, ib1base, ib1size,
+			ib2base, ib2size);
+
+	if (ib_max_objs)
+		KGSL_CORE_ERR("Max objects found in IB\n");
+	if (snapshot_frozen_objsize)
+		KGSL_CORE_ERR("GPU snapshot froze %zdKb of GPU buffers\n",
+			snapshot_frozen_objsize / 1024);
+
+}
+
+/*
+ * adreno_snapshot_cp_roq - Dump CP merciu data in snapshot
+ * @device: Device being snapshotted
+ * @remain: Bytes remaining in snapshot memory
+ * @priv: Size of merciu data in Dwords
+ */
+size_t adreno_snapshot_cp_merciu(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int i, size = *((int *)priv);
+
+	/* The MERCIU data is two dwords per entry */
+	size = size << 1;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP MERCIU DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_MERCIU;
+	header->size = size;
+
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_MERCIU_ADDR, 0x0);
+
+	for (i = 0; i < size; i++) {
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_MERCIU_DATA,
+			&data[(i * 2)]);
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_MERCIU_DATA2,
+			&data[(i * 2) + 1]);
+	}
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+/*
+ * adreno_snapshot_cp_roq - Dump ROQ data in snapshot
+ * @device: Device being snapshotted
+ * @remain: Bytes remaining in snapshot memory
+ * @priv: Size of ROQ data in Dwords
+ */
+size_t adreno_snapshot_cp_roq(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int i, size = *((int *)priv);
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP ROQ DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_ROQ;
+	header->size = size;
+
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_ROQ_ADDR, 0x0);
+	for (i = 0; i < size; i++)
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_ROQ_DATA, &data[i]);
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+/*
+ * adreno_snapshot_cp_pm4_ram() - Dump PM4 data in snapshot
+ * @device: Device being snapshotted
+ * @buf: Snapshot memory
+ * @remain: Number of bytes left in snapshot memory
+ * @priv: Unused
+ */
+size_t adreno_snapshot_cp_pm4_ram(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int i;
+	size_t size = adreno_dev->pm4_fw_size - 1;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP PM4 RAM DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_PM4_RAM;
+	header->size = size;
+
+	/*
+	 * Read the firmware from the GPU rather than use our cache in order to
+	 * try to catch mis-programming or corruption in the hardware.  We do
+	 * use the cached version of the size, however, instead of trying to
+	 * maintain always changing hardcoded constants
+	 */
+
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_RAM_RADDR, 0x0);
+	for (i = 0; i < size; i++)
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_ME_RAM_DATA, &data[i]);
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+/*
+ * adreno_snapshot_cp_pfp_ram() - Dump the PFP data on snapshot
+ * @device: Device being snapshotted
+ * @buf: Snapshot memory
+ * @remain: Amount of butes left in snapshot memory
+ * @priv: Unused
+ */
+size_t adreno_snapshot_cp_pfp_ram(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int i, size = adreno_dev->pfp_fw_size - 1;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP PFP RAM DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_PFP_RAM;
+	header->size = size;
+
+	/*
+	 * Read the firmware from the GPU rather than use our cache in order to
+	 * try to catch mis-programming or corruption in the hardware.  We do
+	 * use the cached version of the size, however, instead of trying to
+	 * maintain always changing hardcoded constants
+	 */
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x0);
+	for (i = 0; i < size; i++)
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA,
+				&data[i]);
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+/*
+ * adreno_snapshot_vpc_memory() - Save VPC data in snapshot
+ * @device: Device being snapshotted
+ * @buf: Snapshot memory
+ * @remain: Number of bytes left in snapshot memory
+ * @priv: Private data for VPC if any
+ */
+size_t adreno_snapshot_vpc_memory(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int vpc_mem_size = *((int *)priv);
+	size_t size = VPC_MEMORY_BANKS * vpc_mem_size;
+	int bank, addr, i = 0;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "VPC MEMORY");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_VPC_MEMORY;
+	header->size = size;
+
+	for (bank = 0; bank < VPC_MEMORY_BANKS; bank++) {
+		for (addr = 0; addr < vpc_mem_size; addr++) {
+			unsigned int val = bank | (addr << 4);
+			adreno_writereg(adreno_dev,
+				ADRENO_REG_VPC_DEBUG_RAM_SEL, val);
+			adreno_readreg(adreno_dev,
+				ADRENO_REG_VPC_DEBUG_RAM_READ, &data[i++]);
+		}
+	}
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+/*
+ * adreno_snapshot_cp_meq() - Save CP MEQ data in snapshot
+ * @device: Device being snapshotted
+ * @buf: Snapshot memory
+ * @remain: Number of bytes left in snapshot memory
+ * @priv: Contains the size of MEQ data
+ */
+size_t adreno_snapshot_cp_meq(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int i;
+	int cp_meq_sz = *((int *)priv);
+
+	if (remain < DEBUG_SECTION_SZ(cp_meq_sz)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP MEQ DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_MEQ;
+	header->size = cp_meq_sz;
+
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_MEQ_ADDR, 0x0);
+	for (i = 0; i < cp_meq_sz; i++)
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_MEQ_DATA, &data[i]);
+
+	return DEBUG_SECTION_SZ(cp_meq_sz);
+}
+
+static const struct adreno_vbif_snapshot_registers *vbif_registers(
+		struct adreno_device *adreno_dev,
+		const struct adreno_vbif_snapshot_registers *list,
+		unsigned int count)
+{
+	unsigned int version;
+	unsigned int i;
+
+	adreno_readreg(adreno_dev, ADRENO_REG_VBIF_VERSION, &version);
+
+	for (i = 0; i < count; i++) {
+		if (list[i].version == version)
+			return &list[i];
+	}
+
+	KGSL_CORE_ERR(
+		"snapshot: Registers for VBIF version %X register were not dumped\n",
+		version);
+
+	return NULL;
+}
+
+void adreno_snapshot_registers(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot,
+		const unsigned int *regs, unsigned int count)
+{
+	struct kgsl_snapshot_registers r;
+
+	r.regs = regs;
+	r.count = count;
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, snapshot,
+		kgsl_snapshot_dump_registers, &r);
+}
+
+void adreno_snapshot_vbif_registers(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot,
+		const struct adreno_vbif_snapshot_registers *list,
+		unsigned int count)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_registers regs;
+	const struct adreno_vbif_snapshot_registers *vbif;
+
+	vbif = vbif_registers(adreno_dev, list, count);
+
+	if (vbif != NULL) {
+		regs.regs = vbif->registers;
+		regs.count = vbif->count;
+
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS,
+			snapshot, kgsl_snapshot_dump_registers, &regs);
+	}
+}
diff --git a/drivers/gpu/msm/adreno_snapshot.h b/drivers/gpu/msm/adreno_snapshot.h
new file mode 100644
index 000000000000..6af050762402
--- /dev/null
+++ b/drivers/gpu/msm/adreno_snapshot.h
@@ -0,0 +1,58 @@
+/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __ADRENO_SNAPSHOT_H
+#define __ADRENO_SNAPSHOT_H
+
+#include "kgsl_snapshot.h"
+
+#define CP_CRASH_DUMPER_TIMEOUT 1000
+
+#define DEBUG_SECTION_SZ(_dwords) (((_dwords) * sizeof(unsigned int)) \
+		+ sizeof(struct kgsl_snapshot_debug))
+
+#define SHADER_SECTION_SZ(_dwords) (((_dwords) * sizeof(unsigned int)) \
+		+ sizeof(struct kgsl_snapshot_shader))
+
+/* Section sizes for A320 */
+#define A320_SNAPSHOT_CP_STATE_SECTION_SIZE	0x2e
+#define A320_SNAPSHOT_ROQ_SECTION_SIZE		512
+#define A320_SNAPSHOT_CP_MERCIU_SECTION_SIZE	32
+
+/* Macro to make it super easy to dump registers */
+#define SNAPSHOT_REGISTERS(_d, _s, _r) \
+	adreno_snapshot_registers((_d), (_s), \
+		(unsigned int *) _r, ARRAY_SIZE(_r) /  2)
+
+size_t adreno_snapshot_cp_merciu(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv);
+size_t adreno_snapshot_cp_roq(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv);
+size_t adreno_snapshot_cp_pm4_ram(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv);
+size_t adreno_snapshot_cp_pfp_ram(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv);
+size_t adreno_snapshot_cp_meq(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv);
+size_t adreno_snapshot_vpc_memory(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv);
+
+void adreno_snapshot_registers(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot,
+		const unsigned int *regs, unsigned int count);
+
+void adreno_snapshot_vbif_registers(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot,
+		const struct adreno_vbif_snapshot_registers *list,
+		unsigned int count);
+
+#endif /*__ADRENO_SNAPSHOT_H */
diff --git a/drivers/gpu/msm/adreno_sysfs.c b/drivers/gpu/msm/adreno_sysfs.c
new file mode 100644
index 000000000000..4c082fc2850e
--- /dev/null
+++ b/drivers/gpu/msm/adreno_sysfs.c
@@ -0,0 +1,475 @@
+/* Copyright (c) 2014-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/sysfs.h>
+#include <linux/device.h>
+
+#include "kgsl_device.h"
+#include "adreno.h"
+
+struct adreno_sysfs_attribute {
+	struct device_attribute attr;
+	unsigned int (*show)(struct adreno_device *adreno_dev);
+	int (*store)(struct adreno_device *adreno_dev, unsigned int val);
+};
+
+#define _ADRENO_SYSFS_ATTR(_name, __show, __store) \
+struct adreno_sysfs_attribute adreno_attr_##_name = { \
+	.attr = __ATTR(_name, 0644, __show, __store), \
+	.show = _ ## _name ## _show, \
+	.store = _ ## _name ## _store, \
+}
+
+#define ADRENO_SYSFS_ATTR(_a) \
+	container_of((_a), struct adreno_sysfs_attribute, attr)
+
+static struct adreno_device *_get_adreno_dev(struct device *dev)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	return device ? ADRENO_DEVICE(device) : NULL;
+}
+
+static int _ft_policy_store(struct adreno_device *adreno_dev,
+		unsigned int val)
+{
+	adreno_dev->ft_policy = val & KGSL_FT_POLICY_MASK;
+	return 0;
+}
+
+static unsigned int _ft_policy_show(struct adreno_device *adreno_dev)
+{
+	return adreno_dev->ft_policy;
+}
+
+static int _ft_pagefault_policy_store(struct adreno_device *adreno_dev,
+		unsigned int val)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	int ret = 0;
+
+	mutex_lock(&device->mutex);
+	val &= KGSL_FT_PAGEFAULT_MASK;
+
+	if (test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv))
+		ret = kgsl_mmu_set_pagefault_policy(&device->mmu,
+			(unsigned long) val);
+
+	if (ret == 0)
+		adreno_dev->ft_pf_policy = val;
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+static unsigned int _ft_pagefault_policy_show(struct adreno_device *adreno_dev)
+{
+	return adreno_dev->ft_pf_policy;
+}
+
+static int _ft_fast_hang_detect_store(struct adreno_device *adreno_dev,
+		unsigned int val)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv))
+		return 0;
+
+	mutex_lock(&device->mutex);
+
+	if (val) {
+		if (!kgsl_active_count_get(device)) {
+			adreno_fault_detect_start(adreno_dev);
+			kgsl_active_count_put(device);
+		}
+	} else
+		adreno_fault_detect_stop(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+static unsigned int _ft_fast_hang_detect_show(struct adreno_device *adreno_dev)
+{
+	return adreno_dev->fast_hang_detect;
+}
+
+static int _ft_long_ib_detect_store(struct adreno_device *adreno_dev,
+		unsigned int val)
+{
+	adreno_dev->long_ib_detect = val;
+	return 0;
+}
+
+static unsigned int _ft_long_ib_detect_show(struct adreno_device *adreno_dev)
+{
+	return adreno_dev->long_ib_detect;
+}
+
+static int _ft_hang_intr_status_store(struct adreno_device *adreno_dev,
+		unsigned int val)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	int ret = 0;
+
+	if (val == test_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv))
+		return 0;
+
+	mutex_lock(&device->mutex);
+	change_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv);
+
+	if (test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv)) {
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE);
+		adreno_irqctrl(adreno_dev, 1);
+	} else if (device->state == KGSL_STATE_INIT) {
+		ret = -EACCES;
+		change_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv);
+	}
+
+	mutex_unlock(&device->mutex);
+	return ret;
+}
+
+static unsigned int _ft_hang_intr_status_show(struct adreno_device *adreno_dev)
+{
+	return test_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv);
+}
+
+static int _pwrctrl_store(struct adreno_device *adreno_dev,
+		unsigned int val, unsigned int flag)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	if (val == test_bit(flag, &adreno_dev->pwrctrl_flag))
+		return 0;
+
+	mutex_lock(&device->mutex);
+
+	/* Power down the GPU before changing the state */
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND);
+	change_bit(flag, &adreno_dev->pwrctrl_flag);
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER);
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+static int _preemption_store(struct adreno_device *adreno_dev,
+		unsigned int val)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	if (test_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv) == val)
+			return 0;
+
+	mutex_lock(&device->mutex);
+
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND);
+	change_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv);
+	adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]);
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER);
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+static unsigned int _preemption_show(struct adreno_device *adreno_dev)
+{
+	return adreno_is_preemption_enabled(adreno_dev);
+}
+
+static int _sptp_pc_store(struct adreno_device *adreno_dev,
+		unsigned int val)
+{
+	return _pwrctrl_store(adreno_dev, val, ADRENO_SPTP_PC_CTRL);
+}
+
+static unsigned int _sptp_pc_show(struct adreno_device *adreno_dev)
+{
+	return test_bit(ADRENO_SPTP_PC_CTRL, &adreno_dev->pwrctrl_flag);
+}
+
+static int _lm_store(struct adreno_device *adreno_dev, unsigned int val)
+{
+	return _pwrctrl_store(adreno_dev, val, ADRENO_LM_CTRL);
+}
+
+static unsigned int _lm_show(struct adreno_device *adreno_dev)
+{
+	return test_bit(ADRENO_LM_CTRL, &adreno_dev->pwrctrl_flag);
+}
+
+static ssize_t _sysfs_store_u32(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct adreno_device *adreno_dev = _get_adreno_dev(dev);
+	struct adreno_sysfs_attribute *_attr = ADRENO_SYSFS_ATTR(attr);
+	unsigned int val = 0;
+	int ret;
+
+	if (adreno_dev == NULL)
+		return 0;
+
+	ret = kgsl_sysfs_store(buf, &val);
+
+	if (!ret && _attr->store)
+		ret = _attr->store(adreno_dev, val);
+
+	return (ssize_t) ret < 0 ? ret : count;
+}
+
+static ssize_t _sysfs_show_u32(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct adreno_device *adreno_dev = _get_adreno_dev(dev);
+	struct adreno_sysfs_attribute *_attr = ADRENO_SYSFS_ATTR(attr);
+	unsigned int val = 0;
+
+	if (adreno_dev == NULL)
+		return 0;
+
+	if (_attr->show)
+		val = _attr->show(adreno_dev);
+
+	return snprintf(buf, PAGE_SIZE, "0x%X\n", val);
+}
+
+static ssize_t _sysfs_store_bool(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct adreno_device *adreno_dev = _get_adreno_dev(dev);
+	struct adreno_sysfs_attribute *_attr = ADRENO_SYSFS_ATTR(attr);
+	unsigned int val = 0;
+	int ret;
+
+	if (adreno_dev == NULL)
+		return 0;
+
+	ret = kgsl_sysfs_store(buf, &val);
+
+	if (!ret && _attr->store)
+		ret = _attr->store(adreno_dev, val ? 1 : 0);
+
+	return (ssize_t) ret < 0 ? ret : count;
+}
+
+static ssize_t _sysfs_show_bool(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct adreno_device *adreno_dev = _get_adreno_dev(dev);
+	struct adreno_sysfs_attribute *_attr = ADRENO_SYSFS_ATTR(attr);
+	unsigned int val = 0;
+
+	if (adreno_dev == NULL)
+		return 0;
+
+	if (_attr->show)
+		val = _attr->show(adreno_dev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", val);
+}
+
+#define ADRENO_SYSFS_BOOL(_name) \
+	_ADRENO_SYSFS_ATTR(_name, _sysfs_show_bool, _sysfs_store_bool)
+
+#define ADRENO_SYSFS_U32(_name) \
+	_ADRENO_SYSFS_ATTR(_name, _sysfs_show_u32, _sysfs_store_u32)
+
+static ADRENO_SYSFS_U32(ft_policy);
+static ADRENO_SYSFS_U32(ft_pagefault_policy);
+static ADRENO_SYSFS_BOOL(ft_fast_hang_detect);
+static ADRENO_SYSFS_BOOL(ft_long_ib_detect);
+static ADRENO_SYSFS_BOOL(ft_hang_intr_status);
+
+static DEVICE_INT_ATTR(wake_nice, 0644, adreno_wake_nice);
+static DEVICE_INT_ATTR(wake_timeout, 0644, adreno_wake_timeout);
+
+static ADRENO_SYSFS_BOOL(sptp_pc);
+static ADRENO_SYSFS_BOOL(lm);
+static ADRENO_SYSFS_BOOL(preemption);
+
+static const struct device_attribute *_attr_list[] = {
+	&adreno_attr_ft_policy.attr,
+	&adreno_attr_ft_pagefault_policy.attr,
+	&adreno_attr_ft_fast_hang_detect.attr,
+	&adreno_attr_ft_long_ib_detect.attr,
+	&adreno_attr_ft_hang_intr_status.attr,
+	&dev_attr_wake_nice.attr,
+	&dev_attr_wake_timeout.attr,
+	&adreno_attr_sptp_pc.attr,
+	&adreno_attr_lm.attr,
+	&adreno_attr_preemption.attr,
+	NULL,
+};
+
+/* Add a ppd directory for controlling different knobs from sysfs */
+struct adreno_ppd_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct kgsl_device *device, char *buf);
+	ssize_t (*store)(struct kgsl_device *device, const char *buf,
+		size_t count);
+};
+
+#define PPD_ATTR(_name, _mode, _show, _store) \
+struct adreno_ppd_attribute attr_##_name = { \
+	.attr = { .name = __stringify(_name), .mode = _mode }, \
+	.show = _show, \
+	.store = _store, \
+}
+
+#define to_ppd_attr(a) \
+container_of((a), struct adreno_ppd_attribute, attr)
+
+#define kobj_to_device(a) \
+container_of((a), struct kgsl_device, ppd_kobj)
+
+static ssize_t ppd_enable_store(struct kgsl_device *device,
+				const char *buf, size_t count)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int ppd_on = 0;
+	int ret;
+
+	if (!adreno_is_a430v2(adreno_dev) ||
+		!ADRENO_FEATURE(adreno_dev, ADRENO_PPD))
+		return count;
+
+	ret = kgsl_sysfs_store(buf, &ppd_on);
+	if (ret < 0)
+		return ret;
+
+	ppd_on = (ppd_on) ? 1 : 0;
+
+	if (ppd_on == test_bit(ADRENO_PPD_CTRL, &adreno_dev->pwrctrl_flag))
+		return count;
+
+	mutex_lock(&device->mutex);
+
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND);
+	change_bit(ADRENO_PPD_CTRL, &adreno_dev->pwrctrl_flag);
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER);
+
+	mutex_unlock(&device->mutex);
+	return count;
+}
+
+static ssize_t ppd_enable_show(struct kgsl_device *device,
+					char *buf)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	return snprintf(buf, PAGE_SIZE, "%u\n",
+		test_bit(ADRENO_PPD_CTRL, &adreno_dev->pwrctrl_flag));
+}
+/* Add individual ppd attributes here */
+static PPD_ATTR(enable, 0644, ppd_enable_show, ppd_enable_store);
+
+static ssize_t ppd_sysfs_show(struct kobject *kobj,
+	struct attribute *attr, char *buf)
+{
+	struct adreno_ppd_attribute *pattr = to_ppd_attr(attr);
+	struct kgsl_device *device = kobj_to_device(kobj);
+	ssize_t ret = -EIO;
+
+	if (device != NULL && pattr->show != NULL)
+		ret = pattr->show(device, buf);
+
+	return ret;
+}
+
+static ssize_t ppd_sysfs_store(struct kobject *kobj,
+	struct attribute *attr, const char *buf, size_t count)
+{
+	struct adreno_ppd_attribute *pattr = to_ppd_attr(attr);
+	struct kgsl_device *device = kobj_to_device(kobj);
+	ssize_t ret = -EIO;
+
+	if (device != NULL && pattr->store != NULL)
+		ret = pattr->store(device, buf, count);
+
+	return ret;
+}
+
+static const struct sysfs_ops ppd_sysfs_ops = {
+	.show = ppd_sysfs_show,
+	.store = ppd_sysfs_store,
+};
+
+static struct kobj_type ktype_ppd = {
+	.sysfs_ops = &ppd_sysfs_ops,
+};
+
+static void ppd_sysfs_close(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_PPD))
+		return;
+
+	sysfs_remove_file(&device->ppd_kobj, &attr_enable.attr);
+	kobject_put(&device->ppd_kobj);
+}
+
+static int ppd_sysfs_init(struct kgsl_device *device)
+{
+	int ret;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_PPD))
+		return -ENODEV;
+
+	ret = kobject_init_and_add(&device->ppd_kobj, &ktype_ppd,
+		&device->dev->kobj, "ppd");
+
+	if (ret == 0)
+		ret = sysfs_create_file(&device->ppd_kobj, &attr_enable.attr);
+
+	return ret;
+}
+
+/**
+ * adreno_sysfs_close() - Take down the adreno sysfs files
+ * @device: Pointer to the KGSL device
+ *
+ * Take down the sysfs files on when the device goes away
+ */
+void adreno_sysfs_close(struct kgsl_device *device)
+{
+	ppd_sysfs_close(device);
+	kgsl_remove_device_sysfs_files(device->dev, _attr_list);
+}
+
+/**
+ * adreno_sysfs_init() - Initialize adreno sysfs files
+ * @device: Pointer to the KGSL device
+ *
+ * Initialize many of the adreno specific sysfs files especially for fault
+ * tolerance and power control
+ */
+int adreno_sysfs_init(struct kgsl_device *device)
+{
+	int ret = kgsl_create_device_sysfs_files(device->dev, _attr_list);
+	if (ret != 0)
+		return ret;
+
+	/* Add the PPD directory and files */
+	ppd_sysfs_init(device);
+
+	return 0;
+}
+
diff --git a/drivers/gpu/msm/adreno_trace.c b/drivers/gpu/msm/adreno_trace.c
new file mode 100644
index 000000000000..20a7210afc1b
--- /dev/null
+++ b/drivers/gpu/msm/adreno_trace.c
@@ -0,0 +1,21 @@
+/* Copyright (c) 2013-2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "adreno.h"
+
+/* Instantiate tracepoints */
+#define CREATE_TRACE_POINTS
+#include "a3xx_reg.h"
+#include "a4xx_reg.h"
+#include "a5xx_reg.h"
+#include "adreno_trace.h"
diff --git a/drivers/gpu/msm/adreno_trace.h b/drivers/gpu/msm/adreno_trace.h
new file mode 100644
index 000000000000..c0926cbb6a85
--- /dev/null
+++ b/drivers/gpu/msm/adreno_trace.h
@@ -0,0 +1,687 @@
+/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#if !defined(_ADRENO_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _ADRENO_TRACE_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kgsl
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE adreno_trace
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(adreno_cmdbatch_queued,
+	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, unsigned int queued),
+	TP_ARGS(cmdbatch, queued),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, timestamp)
+		__field(unsigned int, queued)
+		__field(unsigned int, flags)
+		__field(unsigned int, prio)
+	),
+	TP_fast_assign(
+		__entry->id = cmdbatch->context->id;
+		__entry->timestamp = cmdbatch->timestamp;
+		__entry->queued = queued;
+		__entry->flags = cmdbatch->flags;
+		__entry->prio = cmdbatch->context->priority;
+	),
+	TP_printk(
+		"ctx=%u ctx_prio=%u ts=%u queued=%u flags=%s",
+			__entry->id, __entry->prio,
+			__entry->timestamp, __entry->queued,
+			__entry->flags ? __print_flags(__entry->flags, "|",
+						KGSL_CMDBATCH_FLAGS) : "none"
+	)
+);
+
+TRACE_EVENT(adreno_cmdbatch_submitted,
+	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, int inflight, uint64_t ticks,
+		unsigned long secs, unsigned long usecs,
+		struct adreno_ringbuffer *rb),
+	TP_ARGS(cmdbatch, inflight, ticks, secs, usecs, rb),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, timestamp)
+		__field(int, inflight)
+		__field(unsigned int, flags)
+		__field(uint64_t, ticks)
+		__field(unsigned long, secs)
+		__field(unsigned long, usecs)
+		__field(int, prio)
+		__field(int, rb_id)
+		__field(unsigned int, rptr)
+		__field(unsigned int, wptr)
+		__field(int, q_inflight)
+	),
+	TP_fast_assign(
+		__entry->id = cmdbatch->context->id;
+		__entry->timestamp = cmdbatch->timestamp;
+		__entry->inflight = inflight;
+		__entry->flags = cmdbatch->flags;
+		__entry->ticks = ticks;
+		__entry->secs = secs;
+		__entry->usecs = usecs;
+		__entry->prio = cmdbatch->context->priority;
+		__entry->rb_id = rb->id;
+		__entry->rptr = rb->rptr;
+		__entry->wptr = rb->wptr;
+		__entry->q_inflight = rb->dispatch_q.inflight;
+	),
+	TP_printk(
+		"ctx=%u ctx_prio=%d ts=%u inflight=%d flags=%s ticks=%lld time=%lu.%0lu rb_id=%d r/w=%x/%x, q_inflight=%d",
+			__entry->id, __entry->prio, __entry->timestamp,
+			__entry->inflight,
+			__entry->flags ? __print_flags(__entry->flags, "|",
+				KGSL_CMDBATCH_FLAGS) : "none",
+			__entry->ticks, __entry->secs, __entry->usecs,
+			__entry->rb_id, __entry->rptr, __entry->wptr,
+			__entry->q_inflight
+	)
+);
+
+TRACE_EVENT(adreno_cmdbatch_retired,
+	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, int inflight,
+		uint64_t start, uint64_t retire,
+		struct adreno_ringbuffer *rb),
+	TP_ARGS(cmdbatch, inflight, start, retire, rb),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, timestamp)
+		__field(int, inflight)
+		__field(unsigned int, recovery)
+		__field(unsigned int, flags)
+		__field(uint64_t, start)
+		__field(uint64_t, retire)
+		__field(int, prio)
+		__field(int, rb_id)
+		__field(unsigned int, rptr)
+		__field(unsigned int, wptr)
+		__field(int, q_inflight)
+	),
+	TP_fast_assign(
+		__entry->id = cmdbatch->context->id;
+		__entry->timestamp = cmdbatch->timestamp;
+		__entry->inflight = inflight;
+		__entry->recovery = cmdbatch->fault_recovery;
+		__entry->flags = cmdbatch->flags;
+		__entry->start = start;
+		__entry->retire = retire;
+		__entry->prio = cmdbatch->context->priority;
+		__entry->rb_id = rb->id;
+		__entry->rptr = rb->rptr;
+		__entry->wptr = rb->wptr;
+		__entry->q_inflight = rb->dispatch_q.inflight;
+	),
+	TP_printk(
+		"ctx=%u ctx_prio=%d ts=%u inflight=%d recovery=%s flags=%s start=%lld retire=%lld rb_id=%d, r/w=%x/%x, q_inflight=%d",
+			__entry->id, __entry->prio, __entry->timestamp,
+			__entry->inflight,
+			__entry->recovery ?
+				__print_flags(__entry->recovery, "|",
+				ADRENO_FT_TYPES) : "none",
+			__entry->flags ? __print_flags(__entry->flags, "|",
+				KGSL_CMDBATCH_FLAGS) : "none",
+			__entry->start,
+			__entry->retire,
+			__entry->rb_id, __entry->rptr, __entry->wptr,
+			__entry->q_inflight
+	)
+);
+
+TRACE_EVENT(adreno_cmdbatch_fault,
+	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, unsigned int fault),
+	TP_ARGS(cmdbatch, fault),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, timestamp)
+		__field(unsigned int, fault)
+	),
+	TP_fast_assign(
+		__entry->id = cmdbatch->context->id;
+		__entry->timestamp = cmdbatch->timestamp;
+		__entry->fault = fault;
+	),
+	TP_printk(
+		"ctx=%u ts=%u type=%s",
+			__entry->id, __entry->timestamp,
+			__print_symbolic(__entry->fault,
+				{ 0, "none" },
+				{ ADRENO_SOFT_FAULT, "soft" },
+				{ ADRENO_HARD_FAULT, "hard" },
+				{ ADRENO_TIMEOUT_FAULT, "timeout" })
+	)
+);
+
+TRACE_EVENT(adreno_cmdbatch_recovery,
+	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, unsigned int action),
+	TP_ARGS(cmdbatch, action),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, timestamp)
+		__field(unsigned int, action)
+	),
+	TP_fast_assign(
+		__entry->id = cmdbatch->context->id;
+		__entry->timestamp = cmdbatch->timestamp;
+		__entry->action = action;
+	),
+	TP_printk(
+		"ctx=%u ts=%u action=%s",
+			__entry->id, __entry->timestamp,
+			__print_symbolic(__entry->action, ADRENO_FT_TYPES)
+	)
+);
+
+DECLARE_EVENT_CLASS(adreno_drawctxt_template,
+	TP_PROTO(struct adreno_context *drawctxt),
+	TP_ARGS(drawctxt),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, priority)
+	),
+	TP_fast_assign(
+		__entry->id = drawctxt->base.id;
+		__entry->priority = drawctxt->base.priority;
+	),
+	TP_printk("ctx=%u priority=%u", __entry->id, __entry->priority)
+);
+
+DEFINE_EVENT(adreno_drawctxt_template, adreno_drawctxt_sleep,
+	TP_PROTO(struct adreno_context *drawctxt),
+	TP_ARGS(drawctxt)
+);
+
+DEFINE_EVENT(adreno_drawctxt_template, adreno_drawctxt_wake,
+	TP_PROTO(struct adreno_context *drawctxt),
+	TP_ARGS(drawctxt)
+);
+
+DEFINE_EVENT(adreno_drawctxt_template, dispatch_queue_context,
+	TP_PROTO(struct adreno_context *drawctxt),
+	TP_ARGS(drawctxt)
+);
+
+DEFINE_EVENT(adreno_drawctxt_template, adreno_drawctxt_invalidate,
+	TP_PROTO(struct adreno_context *drawctxt),
+	TP_ARGS(drawctxt)
+);
+
+TRACE_EVENT(adreno_drawctxt_wait_start,
+	TP_PROTO(unsigned int rb_id, unsigned int ctx_id, unsigned int ts),
+	TP_ARGS(rb_id, ctx_id, ts),
+	TP_STRUCT__entry(
+		__field(unsigned int, rb_id)
+		__field(unsigned int, ctx_id)
+		__field(unsigned int, ts)
+	),
+	TP_fast_assign(
+		__entry->rb_id = rb_id;
+		__entry->ctx_id = ctx_id;
+		__entry->ts = ts;
+	),
+	TP_printk(
+		"rb=%u ctx=%u ts=%u",
+			__entry->rb_id, __entry->ctx_id, __entry->ts
+	)
+);
+
+TRACE_EVENT(adreno_drawctxt_wait_done,
+	TP_PROTO(unsigned int rb_id, unsigned int ctx_id,
+			unsigned int ts, int status),
+	TP_ARGS(rb_id, ctx_id, ts, status),
+	TP_STRUCT__entry(
+		__field(unsigned int, rb_id)
+		__field(unsigned int, ctx_id)
+		__field(unsigned int, ts)
+		__field(int, status)
+	),
+	TP_fast_assign(
+		__entry->rb_id = rb_id;
+		__entry->ctx_id = ctx_id;
+		__entry->ts = ts;
+		__entry->status = status;
+	),
+	TP_printk(
+		"rb=%u ctx=%u ts=%u status=%d",
+		__entry->rb_id, __entry->ctx_id, __entry->ts, __entry->status
+	)
+);
+
+TRACE_EVENT(adreno_drawctxt_switch,
+	TP_PROTO(struct adreno_ringbuffer *rb,
+		struct adreno_context *newctx,
+		unsigned int flags),
+	TP_ARGS(rb, newctx, flags),
+	TP_STRUCT__entry(
+		__field(int, rb_level)
+		__field(unsigned int, oldctx)
+		__field(unsigned int, newctx)
+		__field(unsigned int, flags)
+	),
+	TP_fast_assign(
+		__entry->rb_level = rb->id;
+		__entry->oldctx = rb->drawctxt_active ?
+			rb->drawctxt_active->base.id : 0;
+		__entry->newctx = newctx ? newctx->base.id : 0;
+	),
+	TP_printk(
+		"rb level=%d oldctx=%u newctx=%u flags=%X",
+		__entry->rb_level, __entry->oldctx, __entry->newctx, flags
+	)
+);
+
+TRACE_EVENT(adreno_gpu_fault,
+	TP_PROTO(unsigned int ctx, unsigned int ts,
+		unsigned int status, unsigned int rptr, unsigned int wptr,
+		unsigned int ib1base, unsigned int ib1size,
+		unsigned int ib2base, unsigned int ib2size, int rb_id),
+	TP_ARGS(ctx, ts, status, rptr, wptr, ib1base, ib1size, ib2base,
+		ib2size, rb_id),
+	TP_STRUCT__entry(
+		__field(unsigned int, ctx)
+		__field(unsigned int, ts)
+		__field(unsigned int, status)
+		__field(unsigned int, rptr)
+		__field(unsigned int, wptr)
+		__field(unsigned int, ib1base)
+		__field(unsigned int, ib1size)
+		__field(unsigned int, ib2base)
+		__field(unsigned int, ib2size)
+		__field(int, rb_id)
+	),
+	TP_fast_assign(
+		__entry->ctx = ctx;
+		__entry->ts = ts;
+		__entry->status = status;
+		__entry->rptr = rptr;
+		__entry->wptr = wptr;
+		__entry->ib1base = ib1base;
+		__entry->ib1size = ib1size;
+		__entry->ib2base = ib2base;
+		__entry->ib2size = ib2size;
+		__entry->rb_id = rb_id;
+	),
+	TP_printk("ctx=%d ts=%d rb_id=%d status=%X RB=%X/%X IB1=%X/%X IB2=%X/%X",
+		__entry->ctx, __entry->ts, __entry->rb_id, __entry->status,
+		__entry->wptr, __entry->rptr, __entry->ib1base,
+		__entry->ib1size, __entry->ib2base, __entry->ib2size)
+);
+
+TRACE_EVENT(adreno_sp_tp,
+
+	TP_PROTO(unsigned long ip),
+
+	TP_ARGS(ip),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, ip)
+	),
+
+	TP_fast_assign(
+		__entry->ip = ip;
+	),
+
+	TP_printk(
+		"func=%pf", (void *) __entry->ip
+	)
+);
+
+/*
+ * Tracepoint for a3xx irq. Includes status info
+ */
+TRACE_EVENT(kgsl_a3xx_irq_status,
+
+	TP_PROTO(struct adreno_device *adreno_dev, unsigned int status),
+
+	TP_ARGS(adreno_dev, status),
+
+	TP_STRUCT__entry(
+		__string(device_name, adreno_dev->dev.name)
+		__field(unsigned int, status)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, adreno_dev->dev.name);
+		__entry->status = status;
+	),
+
+	TP_printk(
+		"d_name=%s status=%s",
+		__get_str(device_name),
+		__entry->status ? __print_flags(__entry->status, "|",
+			{ 1 << A3XX_INT_RBBM_GPU_IDLE, "RBBM_GPU_IDLE" },
+			{ 1 << A3XX_INT_RBBM_AHB_ERROR, "RBBM_AHB_ERR" },
+			{ 1 << A3XX_INT_RBBM_REG_TIMEOUT, "RBBM_REG_TIMEOUT" },
+			{ 1 << A3XX_INT_RBBM_ME_MS_TIMEOUT,
+				"RBBM_ME_MS_TIMEOUT" },
+			{ 1 << A3XX_INT_RBBM_PFP_MS_TIMEOUT,
+				"RBBM_PFP_MS_TIMEOUT" },
+			{ 1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW,
+				"RBBM_ATB_BUS_OVERFLOW" },
+			{ 1 << A3XX_INT_VFD_ERROR, "RBBM_VFD_ERROR" },
+			{ 1 << A3XX_INT_CP_SW_INT, "CP_SW" },
+			{ 1 << A3XX_INT_CP_T0_PACKET_IN_IB,
+				"CP_T0_PACKET_IN_IB" },
+			{ 1 << A3XX_INT_CP_OPCODE_ERROR, "CP_OPCODE_ERROR" },
+			{ 1 << A3XX_INT_CP_RESERVED_BIT_ERROR,
+				"CP_RESERVED_BIT_ERROR" },
+			{ 1 << A3XX_INT_CP_HW_FAULT, "CP_HW_FAULT" },
+			{ 1 << A3XX_INT_CP_DMA, "CP_DMA" },
+			{ 1 << A3XX_INT_CP_IB2_INT, "CP_IB2_INT" },
+			{ 1 << A3XX_INT_CP_IB1_INT, "CP_IB1_INT" },
+			{ 1 << A3XX_INT_CP_RB_INT, "CP_RB_INT" },
+			{ 1 << A3XX_INT_CP_REG_PROTECT_FAULT,
+				"CP_REG_PROTECT_FAULT" },
+			{ 1 << A3XX_INT_CP_RB_DONE_TS, "CP_RB_DONE_TS" },
+			{ 1 << A3XX_INT_CP_VS_DONE_TS, "CP_VS_DONE_TS" },
+			{ 1 << A3XX_INT_CP_PS_DONE_TS, "CP_PS_DONE_TS" },
+			{ 1 << A3XX_INT_CACHE_FLUSH_TS, "CACHE_FLUSH_TS" },
+			{ 1 << A3XX_INT_CP_AHB_ERROR_HALT,
+				"CP_AHB_ERROR_HALT" },
+			{ 1 << A3XX_INT_MISC_HANG_DETECT, "MISC_HANG_DETECT" },
+			{ 1 << A3XX_INT_UCHE_OOB_ACCESS, "UCHE_OOB_ACCESS" })
+		: "None"
+	)
+);
+
+/*
+ * Tracepoint for a4xx irq. Includes status info
+ */
+TRACE_EVENT(kgsl_a4xx_irq_status,
+
+	TP_PROTO(struct adreno_device *adreno_dev, unsigned int status),
+
+	TP_ARGS(adreno_dev, status),
+
+	TP_STRUCT__entry(
+		__string(device_name, adreno_dev->dev.name)
+		__field(unsigned int, status)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, adreno_dev->dev.name);
+		__entry->status = status;
+	),
+
+	TP_printk(
+		"d_name=%s status=%s",
+		__get_str(device_name),
+		__entry->status ? __print_flags(__entry->status, "|",
+			{ 1 << A4XX_INT_RBBM_GPU_IDLE, "RBBM_GPU_IDLE" },
+			{ 1 << A4XX_INT_RBBM_AHB_ERROR, "RBBM_AHB_ERR" },
+			{ 1 << A4XX_INT_RBBM_REG_TIMEOUT, "RBBM_REG_TIMEOUT" },
+			{ 1 << A4XX_INT_RBBM_ME_MS_TIMEOUT,
+				"RBBM_ME_MS_TIMEOUT" },
+			{ 1 << A4XX_INT_RBBM_PFP_MS_TIMEOUT,
+				"RBBM_PFP_MS_TIMEOUT" },
+			{ 1 << A4XX_INT_RBBM_ETS_MS_TIMEOUT,
+				"RBBM_ETS_MS_TIMEOUT" },
+			{ 1 << A4XX_INT_RBBM_ASYNC_OVERFLOW,
+				"RBBM_ASYNC_OVERFLOW" },
+			{ 1 << A4XX_INT_RBBM_GPC_ERR,
+				"RBBM_GPC_ERR" },
+			{ 1 << A4XX_INT_CP_SW, "CP_SW" },
+			{ 1 << A4XX_INT_CP_OPCODE_ERROR, "CP_OPCODE_ERROR" },
+			{ 1 << A4XX_INT_CP_RESERVED_BIT_ERROR,
+				"CP_RESERVED_BIT_ERROR" },
+			{ 1 << A4XX_INT_CP_HW_FAULT, "CP_HW_FAULT" },
+			{ 1 << A4XX_INT_CP_DMA, "CP_DMA" },
+			{ 1 << A4XX_INT_CP_IB2_INT, "CP_IB2_INT" },
+			{ 1 << A4XX_INT_CP_IB1_INT, "CP_IB1_INT" },
+			{ 1 << A4XX_INT_CP_RB_INT, "CP_RB_INT" },
+			{ 1 << A4XX_INT_CP_REG_PROTECT_FAULT,
+				"CP_REG_PROTECT_FAULT" },
+			{ 1 << A4XX_INT_CP_RB_DONE_TS, "CP_RB_DONE_TS" },
+			{ 1 << A4XX_INT_CP_VS_DONE_TS, "CP_VS_DONE_TS" },
+			{ 1 << A4XX_INT_CP_PS_DONE_TS, "CP_PS_DONE_TS" },
+			{ 1 << A4XX_INT_CACHE_FLUSH_TS, "CACHE_FLUSH_TS" },
+			{ 1 << A4XX_INT_CP_AHB_ERROR_HALT,
+				"CP_AHB_ERROR_HALT" },
+			{ 1 << A4XX_INT_RBBM_ATB_BUS_OVERFLOW,
+				"RBBM_ATB_BUS_OVERFLOW" },
+			{ 1 << A4XX_INT_MISC_HANG_DETECT, "MISC_HANG_DETECT" },
+			{ 1 << A4XX_INT_UCHE_OOB_ACCESS, "UCHE_OOB_ACCESS" },
+			{ 1 << A4XX_INT_RBBM_DPM_CALC_ERR,
+				"RBBM_DPM_CALC_ERR" },
+			{ 1 << A4XX_INT_RBBM_DPM_EPOCH_ERR,
+				"RBBM_DPM_CALC_ERR" },
+			{ 1 << A4XX_INT_RBBM_DPM_THERMAL_YELLOW_ERR,
+				"RBBM_DPM_THERMAL_YELLOW_ERR" },
+			{ 1 << A4XX_INT_RBBM_DPM_THERMAL_RED_ERR,
+				"RBBM_DPM_THERMAL_RED_ERR" })
+		: "None"
+	)
+);
+
+DECLARE_EVENT_CLASS(adreno_hw_preempt_template,
+	TP_PROTO(struct adreno_ringbuffer *cur_rb,
+		struct adreno_ringbuffer *new_rb),
+	TP_ARGS(cur_rb, new_rb),
+	TP_STRUCT__entry(__field(int, cur_level)
+			__field(int, new_level)
+			__field(unsigned int, cur_rptr)
+			__field(unsigned int, new_rptr)
+			__field(unsigned int, cur_wptr)
+			__field(unsigned int, new_wptr)
+			__field(unsigned int, cur_rbbase)
+			__field(unsigned int, new_rbbase)
+	),
+	TP_fast_assign(__entry->cur_level = cur_rb->id;
+			__entry->new_level = new_rb->id;
+			__entry->cur_rptr = cur_rb->rptr;
+			__entry->new_rptr = new_rb->rptr;
+			__entry->cur_wptr = cur_rb->wptr;
+			__entry->new_wptr = new_rb->wptr;
+			__entry->cur_rbbase = cur_rb->buffer_desc.gpuaddr;
+			__entry->new_rbbase = new_rb->buffer_desc.gpuaddr;
+	),
+	TP_printk(
+	"cur_rb_lvl=%d rptr=%x wptr=%x rbbase=%x new_rb_lvl=%d rptr=%x wptr=%x rbbase=%x",
+		__entry->cur_level, __entry->cur_rptr,
+		__entry->cur_wptr, __entry->cur_rbbase,
+		__entry->new_level, __entry->new_rptr,
+		__entry->new_wptr, __entry->new_rbbase
+	)
+);
+
+DEFINE_EVENT(adreno_hw_preempt_template, adreno_hw_preempt_clear_to_trig,
+	TP_PROTO(struct adreno_ringbuffer *cur_rb,
+		struct adreno_ringbuffer *new_rb),
+	TP_ARGS(cur_rb, new_rb)
+);
+
+DEFINE_EVENT(adreno_hw_preempt_template, adreno_hw_preempt_trig_to_comp,
+	TP_PROTO(struct adreno_ringbuffer *cur_rb,
+		struct adreno_ringbuffer *new_rb),
+	TP_ARGS(cur_rb, new_rb)
+);
+
+DEFINE_EVENT(adreno_hw_preempt_template, adreno_hw_preempt_trig_to_comp_int,
+	TP_PROTO(struct adreno_ringbuffer *cur_rb,
+		struct adreno_ringbuffer *new_rb),
+	TP_ARGS(cur_rb, new_rb)
+);
+
+TRACE_EVENT(adreno_hw_preempt_comp_to_clear,
+	TP_PROTO(struct adreno_ringbuffer *cur_rb,
+		struct adreno_ringbuffer *new_rb),
+	TP_ARGS(cur_rb, new_rb),
+	TP_STRUCT__entry(__field(int, cur_level)
+			__field(int, new_level)
+			__field(unsigned int, cur_rptr)
+			__field(unsigned int, new_rptr)
+			__field(unsigned int, cur_wptr)
+			__field(unsigned int, new_wptr_end)
+			__field(unsigned int, new_wptr)
+			__field(unsigned int, cur_rbbase)
+			__field(unsigned int, new_rbbase)
+	),
+	TP_fast_assign(__entry->cur_level = cur_rb->id;
+			__entry->new_level = new_rb->id;
+			__entry->cur_rptr = cur_rb->rptr;
+			__entry->new_rptr = new_rb->rptr;
+			__entry->cur_wptr = cur_rb->wptr;
+			__entry->new_wptr_end = new_rb->wptr_preempt_end;
+			__entry->new_wptr = new_rb->wptr;
+			__entry->cur_rbbase = cur_rb->buffer_desc.gpuaddr;
+			__entry->new_rbbase = new_rb->buffer_desc.gpuaddr;
+	),
+	TP_printk(
+	"cur_rb_lvl=%d rptr=%x wptr=%x rbbase=%x prev_rb_lvl=%d rptr=%x wptr_preempt_end=%x wptr=%x rbbase=%x",
+		__entry->cur_level, __entry->cur_rptr,
+		__entry->cur_wptr, __entry->cur_rbbase,
+		__entry->new_level, __entry->new_rptr,
+		__entry->new_wptr_end, __entry->new_wptr, __entry->new_rbbase
+	)
+);
+
+TRACE_EVENT(adreno_hw_preempt_token_submit,
+	TP_PROTO(struct adreno_ringbuffer *cur_rb,
+		struct adreno_ringbuffer *new_rb),
+	TP_ARGS(cur_rb, new_rb),
+	TP_STRUCT__entry(__field(int, cur_level)
+		__field(int, new_level)
+		__field(unsigned int, cur_rptr)
+		__field(unsigned int, new_rptr)
+		__field(unsigned int, cur_wptr)
+		__field(unsigned int, cur_wptr_end)
+		__field(unsigned int, new_wptr)
+		__field(unsigned int, cur_rbbase)
+		__field(unsigned int, new_rbbase)
+	),
+	TP_fast_assign(__entry->cur_level = cur_rb->id;
+			__entry->new_level = new_rb->id;
+			__entry->cur_rptr = cur_rb->rptr;
+			__entry->new_rptr = new_rb->rptr;
+			__entry->cur_wptr = cur_rb->wptr;
+			__entry->cur_wptr_end = cur_rb->wptr_preempt_end;
+			__entry->new_wptr = new_rb->wptr;
+			__entry->cur_rbbase = cur_rb->buffer_desc.gpuaddr;
+			__entry->new_rbbase = new_rb->buffer_desc.gpuaddr;
+	),
+	TP_printk(
+		"cur_rb_lvl=%d rptr=%x wptr_preempt_end=%x wptr=%x rbbase=%x new_rb_lvl=%d rptr=%x wptr=%x rbbase=%x",
+		__entry->cur_level, __entry->cur_rptr,
+		__entry->cur_wptr_end, __entry->cur_wptr,
+		__entry->cur_rbbase,
+		__entry->new_level, __entry->new_rptr,
+		__entry->new_wptr, __entry->new_rbbase
+	)
+);
+
+TRACE_EVENT(adreno_rb_starve,
+	TP_PROTO(struct adreno_ringbuffer *rb),
+	TP_ARGS(rb),
+	TP_STRUCT__entry(__field(int, id)
+		__field(unsigned int, rptr)
+		__field(unsigned int, wptr)
+	),
+	TP_fast_assign(__entry->id = rb->id;
+		__entry->rptr = rb->rptr;
+		__entry->wptr = rb->wptr;
+	),
+	TP_printk(
+		"rb %d r/w %x/%x starved", __entry->id, __entry->rptr,
+		__entry->wptr
+	)
+);
+
+/*
+ * Tracepoint for a5xx irq. Includes status info
+ */
+TRACE_EVENT(kgsl_a5xx_irq_status,
+
+	TP_PROTO(struct adreno_device *adreno_dev, unsigned int status),
+
+	TP_ARGS(adreno_dev, status),
+
+	TP_STRUCT__entry(
+		__string(device_name, adreno_dev->dev.name)
+		__field(unsigned int, status)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, adreno_dev->dev.name);
+		__entry->status = status;
+	),
+
+	TP_printk(
+		"d_name=%s status=%s",
+		__get_str(device_name),
+		__entry->status ? __print_flags(__entry->status, "|",
+			{ 1 << A5XX_INT_RBBM_GPU_IDLE, "RBBM_GPU_IDLE" },
+			{ 1 << A5XX_INT_RBBM_AHB_ERROR, "RBBM_AHB_ERR" },
+			{ 1 << A5XX_INT_RBBM_TRANSFER_TIMEOUT,
+				"RBBM_TRANSFER_TIMEOUT" },
+			{ 1 << A5XX_INT_RBBM_ME_MS_TIMEOUT,
+				"RBBM_ME_MS_TIMEOUT" },
+			{ 1 << A5XX_INT_RBBM_PFP_MS_TIMEOUT,
+				"RBBM_PFP_MS_TIMEOUT" },
+			{ 1 << A5XX_INT_RBBM_ETS_MS_TIMEOUT,
+				"RBBM_ETS_MS_TIMEOUT" },
+			{ 1 << A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW,
+				"RBBM_ATB_ASYNC_OVERFLOW" },
+			{ 1 << A5XX_INT_RBBM_GPC_ERROR,
+				"RBBM_GPC_ERR" },
+			{ 1 << A5XX_INT_CP_SW, "CP_SW" },
+			{ 1 << A5XX_INT_CP_HW_ERROR, "CP_OPCODE_ERROR" },
+			{ 1 << A5XX_INT_CP_CCU_FLUSH_DEPTH_TS,
+				"CP_CCU_FLUSH_DEPTH_TS" },
+			{ 1 << A5XX_INT_CP_CCU_FLUSH_COLOR_TS,
+				"CP_CCU_FLUSH_COLOR_TS" },
+			{ 1 << A5XX_INT_CP_CCU_RESOLVE_TS,
+				"CP_CCU_RESOLVE_TS" },
+			{ 1 << A5XX_INT_CP_IB2, "CP_IB2_INT" },
+			{ 1 << A5XX_INT_CP_IB1, "CP_IB1_INT" },
+			{ 1 << A5XX_INT_CP_RB, "CP_RB_INT" },
+			{ 1 << A5XX_INT_CP_UNUSED_1, "CP_UNUSED_1" },
+			{ 1 << A5XX_INT_CP_RB_DONE_TS, "CP_RB_DONE_TS" },
+			{ 1 << A5XX_INT_CP_WT_DONE_TS, "CP_WT_DONE_TS" },
+			{ 1 << A5XX_INT_UNKNOWN_1, "UNKNOWN_1" },
+			{ 1 << A5XX_INT_CP_CACHE_FLUSH_TS,
+				"CP_CACHE_FLUSH_TS" },
+			{ 1 << A5XX_INT_UNUSED_2,
+				"UNUSED_2" },
+			{ 1 << A5XX_INT_RBBM_ATB_BUS_OVERFLOW,
+				"RBBM_ATB_BUS_OVERFLOW" },
+			{ 1 << A5XX_INT_MISC_HANG_DETECT,
+				"MISC_HANG_DETECT" },
+			{ 1 << A5XX_INT_UCHE_OOB_ACCESS,
+				"UCHE_OOB_ACCESS" },
+			{ 1 << A5XX_INT_UCHE_TRAP_INTR,
+				"UCHE_TRAP_INTR" },
+			{ 1 << A5XX_INT_DEBBUS_INTR_0,
+				"DEBBUS_INTR_0" },
+			{ 1 << A5XX_INT_DEBBUS_INTR_1,
+				"DEBBUS_INTR_1" },
+			{ 1 << A5XX_INT_GPMU_VOLTAGE_DROOP,
+				"GPMU_VOLTAGE_DROOP" },
+			{ 1 << A5XX_INT_GPMU_FIRMWARE,
+				"GPMU_FIRMWARE" },
+			{ 1 << A5XX_INT_ISDB_CPU_IRQ,
+				"ISDB_CPU_IRQ" },
+			{ 1 << A5XX_INT_ISDB_UNDER_DEBUG,
+				"ISDB_UNDER_DEBUG" })
+		: "None"
+	)
+);
+
+#endif /* _ADRENO_TRACE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
new file mode 100644
index 000000000000..2f28a6f604ba
--- /dev/null
+++ b/drivers/gpu/msm/kgsl.c
@@ -0,0 +1,4113 @@
+/* Copyright (c) 2008-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/module.h>
+#include <linux/fb.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fdtable.h>
+#include <linux/list.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/dma-buf.h>
+#include <linux/pm_runtime.h>
+#include <linux/rbtree.h>
+#include <linux/major.h>
+#include <linux/io.h>
+#include <linux/mman.h>
+#include <linux/sort.h>
+#include <linux/security.h>
+#include <linux/compat.h>
+#include <linux/ctype.h>
+
+#include "kgsl.h"
+#include "kgsl_debugfs.h"
+#include "kgsl_cffdump.h"
+#include "kgsl_log.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_cmdbatch.h"
+#include "kgsl_device.h"
+#include "kgsl_trace.h"
+#include "kgsl_sync.h"
+#include "kgsl_compat.h"
+
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "kgsl."
+
+#ifndef arch_mmap_check
+#define arch_mmap_check(addr, len, flags)	(0)
+#endif
+
+#ifndef pgprot_writebackcache
+#define pgprot_writebackcache(_prot)	(_prot)
+#endif
+
+#ifndef pgprot_writethroughcache
+#define pgprot_writethroughcache(_prot)	(_prot)
+#endif
+
+#ifdef CONFIG_ARM_LPAE
+#define KGSL_DMA_BIT_MASK	DMA_BIT_MASK(64)
+#else
+#define KGSL_DMA_BIT_MASK	DMA_BIT_MASK(32)
+#endif
+
+static char *ksgl_mmu_type;
+module_param_named(mmutype, ksgl_mmu_type, charp, 0);
+MODULE_PARM_DESC(ksgl_mmu_type,
+"Type of MMU to be used for graphics. Valid values are 'iommu' or 'nommu'");
+
+/* Mutex used for the IOMMU sync quirk */
+DEFINE_MUTEX(kgsl_mmu_sync);
+EXPORT_SYMBOL(kgsl_mmu_sync);
+
+struct kgsl_dma_buf_meta {
+	struct dma_buf_attachment *attach;
+	struct dma_buf *dmabuf;
+	struct sg_table *table;
+};
+
+static void kgsl_mem_entry_detach_process(struct kgsl_mem_entry *entry);
+
+static const struct file_operations kgsl_fops;
+
+/*
+ * The memfree list contains the last N blocks of memory that have been freed.
+ * On a GPU fault we walk the list to see if the faulting address had been
+ * recently freed and print out a message to that effect
+ */
+
+#define MEMFREE_ENTRIES 512
+
+static DEFINE_SPINLOCK(memfree_lock);
+
+struct memfree_entry {
+	pid_t ptname;
+	uint64_t gpuaddr;
+	uint64_t size;
+	pid_t pid;
+	uint64_t flags;
+};
+
+static struct {
+	struct memfree_entry *list;
+	int head;
+	int tail;
+} memfree;
+
+static int kgsl_memfree_init(void)
+{
+	memfree.list = kzalloc(MEMFREE_ENTRIES * sizeof(struct memfree_entry),
+		GFP_KERNEL);
+
+	return (memfree.list) ? 0 : -ENOMEM;
+}
+
+static void kgsl_memfree_exit(void)
+{
+	kfree(memfree.list);
+	memset(&memfree, 0, sizeof(memfree));
+}
+
+static inline bool match_memfree_addr(struct memfree_entry *entry,
+		pid_t ptname, uint64_t gpuaddr)
+{
+	return ((entry->ptname == ptname) &&
+		(entry->size > 0) &&
+		(gpuaddr >= entry->gpuaddr &&
+			 gpuaddr < (entry->gpuaddr + entry->size)));
+}
+int kgsl_memfree_find_entry(pid_t ptname, uint64_t *gpuaddr,
+	uint64_t *size, uint64_t *flags, pid_t *pid)
+{
+	int ptr;
+
+	if (memfree.list == NULL)
+		return 0;
+
+	spin_lock(&memfree_lock);
+
+	ptr = memfree.head - 1;
+	if (ptr < 0)
+		ptr = MEMFREE_ENTRIES - 1;
+
+	/* Walk backwards through the list looking for the last match  */
+	while (ptr != memfree.tail) {
+		struct memfree_entry *entry = &memfree.list[ptr];
+
+		if (match_memfree_addr(entry, ptname, *gpuaddr)) {
+			*gpuaddr = entry->gpuaddr;
+			*flags = entry->flags;
+			*size = entry->size;
+			*pid = entry->pid;
+
+			spin_unlock(&memfree_lock);
+			return 1;
+		}
+
+		ptr = ptr - 1;
+
+		if (ptr < 0)
+			ptr = MEMFREE_ENTRIES - 1;
+	}
+
+	spin_unlock(&memfree_lock);
+	return 0;
+}
+
+static void kgsl_memfree_purge(pid_t ptname, uint64_t gpuaddr,
+		uint64_t size)
+{
+	int i;
+
+	if (memfree.list == NULL)
+		return;
+
+	spin_lock(&memfree_lock);
+
+	for (i = 0; i < MEMFREE_ENTRIES; i++) {
+		struct memfree_entry *entry = &memfree.list[i];
+
+		if (entry->ptname != ptname || entry->size == 0)
+			continue;
+
+		if (gpuaddr > entry->gpuaddr &&
+			gpuaddr < entry->gpuaddr + entry->size) {
+			/* truncate the end of the entry */
+			entry->size = entry->gpuaddr - gpuaddr;
+		} else if (gpuaddr <= entry->gpuaddr &&
+			gpuaddr + size < entry->gpuaddr + entry->size)
+			/* Truncate the beginning of the entry */
+			entry->gpuaddr = gpuaddr + size;
+		else if (gpuaddr + size >= entry->gpuaddr + entry->size) {
+			/* Remove the entire entry */
+			entry->size = 0;
+		}
+	}
+	spin_unlock(&memfree_lock);
+}
+
+static void kgsl_memfree_add(pid_t pid, pid_t ptname, uint64_t gpuaddr,
+		uint64_t size, uint64_t flags)
+
+{
+	struct memfree_entry *entry;
+
+	if (memfree.list == NULL)
+		return;
+
+	spin_lock(&memfree_lock);
+
+	entry = &memfree.list[memfree.head];
+
+	entry->pid = pid;
+	entry->ptname = ptname;
+	entry->gpuaddr = gpuaddr;
+	entry->size = size;
+	entry->flags = flags;
+
+	memfree.head = (memfree.head + 1) % MEMFREE_ENTRIES;
+
+	if (memfree.head == memfree.tail)
+		memfree.tail = (memfree.tail + 1) % MEMFREE_ENTRIES;
+
+	spin_unlock(&memfree_lock);
+}
+
+int kgsl_readtimestamp(struct kgsl_device *device, void *priv,
+		enum kgsl_timestamp_type type, unsigned int *timestamp)
+{
+	return device->ftbl->readtimestamp(device, priv, type, timestamp);
+}
+EXPORT_SYMBOL(kgsl_readtimestamp);
+
+/* Scheduled by kgsl_mem_entry_put_deferred() */
+static void _deferred_put(struct work_struct *work)
+{
+	struct kgsl_mem_entry *entry =
+		container_of(work, struct kgsl_mem_entry, work);
+
+	kgsl_mem_entry_put(entry);
+}
+
+static inline struct kgsl_mem_entry *
+kgsl_mem_entry_create(void)
+{
+	struct kgsl_mem_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+
+	if (entry != NULL) {
+		kref_init(&entry->refcount);
+		INIT_WORK(&entry->work, _deferred_put);
+	}
+
+	return entry;
+}
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static void kgsl_destroy_ion(struct kgsl_dma_buf_meta *meta)
+{
+	if (meta != NULL) {
+		dma_buf_unmap_attachment(meta->attach, meta->table,
+			DMA_FROM_DEVICE);
+		dma_buf_detach(meta->dmabuf, meta->attach);
+		dma_buf_put(meta->dmabuf);
+		kfree(meta);
+	}
+}
+#else
+static void kgsl_destroy_ion(struct kgsl_dma_buf_meta *meta)
+{
+
+}
+#endif
+
+void
+kgsl_mem_entry_destroy(struct kref *kref)
+{
+	struct kgsl_mem_entry *entry = container_of(kref,
+						    struct kgsl_mem_entry,
+						    refcount);
+	unsigned int memtype;
+
+	if (entry == NULL)
+		return;
+
+	/* pull out the memtype before the flags get cleared */
+	memtype = kgsl_memdesc_usermem_type(&entry->memdesc);
+
+	/* Detach from process list */
+	kgsl_mem_entry_detach_process(entry);
+
+	if (memtype != KGSL_MEM_ENTRY_KERNEL)
+		atomic_long_sub(entry->memdesc.size,
+			&kgsl_driver.stats.mapped);
+
+	/*
+	 * Ion takes care of freeing the sg_table for us so
+	 * clear the sg table before freeing the sharedmem
+	 * so kgsl_sharedmem_free doesn't try to free it again
+	 */
+	if (memtype == KGSL_MEM_ENTRY_ION)
+		entry->memdesc.sgt = NULL;
+
+	if ((memtype == KGSL_MEM_ENTRY_USER)
+		&& !(entry->memdesc.flags & KGSL_MEMFLAGS_GPUREADONLY)) {
+		int i = 0, j;
+		struct scatterlist *sg;
+		struct page *page;
+		/*
+		 * Mark all of pages in the scatterlist as dirty since they
+		 * were writable by the GPU.
+		 */
+		for_each_sg(entry->memdesc.sgt->sgl, sg,
+			    entry->memdesc.sgt->nents, i) {
+			page = sg_page(sg);
+			for (j = 0; j < (sg->length >> PAGE_SHIFT); j++)
+				set_page_dirty(nth_page(page, j));
+		}
+	}
+
+	kgsl_sharedmem_free(&entry->memdesc);
+
+	switch (memtype) {
+	case KGSL_MEM_ENTRY_ION:
+		kgsl_destroy_ion(entry->priv_data);
+		break;
+	default:
+		break;
+	}
+
+	kfree(entry);
+}
+EXPORT_SYMBOL(kgsl_mem_entry_destroy);
+
+/**
+ * kgsl_mem_entry_track_gpuaddr - Insert a mem_entry in the address tree and
+ * assign it with a gpu address space before insertion
+ * @process: the process that owns the memory
+ * @entry: the memory entry
+ *
+ * @returns - 0 on succcess else error code
+ *
+ * Insert the kgsl_mem_entry in to the rb_tree for searching by GPU address.
+ * The assignment of gpu address and insertion into list needs to
+ * happen with the memory lock held to avoid race conditions between
+ * gpu address being selected and some other thread looking through the
+ * rb list in search of memory based on gpuaddr
+ * This function should be called with processes memory spinlock held
+ */
+static int
+kgsl_mem_entry_track_gpuaddr(struct kgsl_process_private *process,
+				struct kgsl_mem_entry *entry)
+{
+	struct kgsl_pagetable *pagetable = process->pagetable;
+
+	/*
+	 * If cpu=gpu map is used then caller needs to set the
+	 * gpu address
+	 */
+	if (kgsl_memdesc_use_cpu_map(&entry->memdesc)) {
+		if (!entry->memdesc.gpuaddr)
+			return 0;
+	} else if (entry->memdesc.gpuaddr) {
+		WARN_ONCE(1, "gpuaddr assigned w/o holding memory lock\n");
+		return -EINVAL;
+	}
+	if (kgsl_memdesc_is_secured(&entry->memdesc))
+		pagetable = pagetable->mmu->securepagetable;
+
+	return kgsl_mmu_get_gpuaddr(pagetable, &entry->memdesc);
+}
+
+/**
+ * kgsl_mem_entry_untrack_gpuaddr() - Untrack memory that is previously tracked
+ * process - Pointer to process private to which memory belongs
+ * entry - Memory entry to untrack
+ *
+ * Function just does the opposite of kgsl_mem_entry_track_gpuaddr. Needs to be
+ * called with processes spin lock held
+ */
+static void
+kgsl_mem_entry_untrack_gpuaddr(struct kgsl_process_private *process,
+				struct kgsl_mem_entry *entry)
+{
+	struct kgsl_pagetable *pagetable = entry->memdesc.pagetable;
+
+	if (entry->memdesc.gpuaddr)
+		kgsl_mmu_put_gpuaddr(pagetable, &entry->memdesc);
+}
+
+/**
+ * kgsl_mem_entry_attach_process - Attach a mem_entry to its owner process
+ * @entry: the memory entry
+ * @process: the owner process
+ *
+ * Attach a newly created mem_entry to its owner process so that
+ * it can be found later. The mem_entry will be added to mem_idr and have
+ * its 'id' field assigned.
+ *
+ * @returns - 0 on success or error code on failure.
+ */
+int
+kgsl_mem_entry_attach_process(struct kgsl_mem_entry *entry,
+				   struct kgsl_device_private *dev_priv)
+{
+	int id;
+	int ret;
+	struct kgsl_process_private *process = dev_priv->process_priv;
+	struct kgsl_pagetable *pagetable = NULL;
+
+	ret = kgsl_process_private_get(process);
+	if (!ret)
+		return -EBADF;
+	idr_preload(GFP_KERNEL);
+	spin_lock(&process->mem_lock);
+	id = idr_alloc(&process->mem_idr, entry, 1, 0, GFP_NOWAIT);
+	spin_unlock(&process->mem_lock);
+	idr_preload_end();
+
+	if (id < 0) {
+		ret = id;
+		goto err_put_proc_priv;
+	}
+
+	entry->id = id;
+	entry->priv = process;
+
+	ret = kgsl_mem_entry_track_gpuaddr(process, entry);
+	if (ret) {
+		spin_lock(&process->mem_lock);
+		idr_remove(&process->mem_idr, entry->id);
+		spin_unlock(&process->mem_lock);
+		goto err_put_proc_priv;
+	}
+
+	/* map the memory after unlocking if gpuaddr has been assigned */
+	if (entry->memdesc.gpuaddr) {
+		/* if a secured buffer map it to secure global pagetable */
+		if (kgsl_memdesc_is_secured(&entry->memdesc))
+			pagetable = process->pagetable->mmu->securepagetable;
+		else
+			pagetable = process->pagetable;
+
+		entry->memdesc.pagetable = pagetable;
+		ret = kgsl_mmu_map(pagetable, &entry->memdesc);
+		if (ret)
+			kgsl_mem_entry_detach_process(entry);
+	}
+
+	kgsl_memfree_purge(pagetable ? pagetable->name : 0,
+		entry->memdesc.gpuaddr, entry->memdesc.size);
+
+	return ret;
+
+err_put_proc_priv:
+	kgsl_process_private_put(process);
+	return ret;
+}
+
+/* Detach a memory entry from a process and unmap it from the MMU */
+
+static void kgsl_mem_entry_detach_process(struct kgsl_mem_entry *entry)
+{
+	unsigned int type;
+	if (entry == NULL)
+		return;
+
+	/* Unmap here so that below we can call kgsl_mmu_put_gpuaddr */
+	kgsl_mmu_unmap(entry->memdesc.pagetable, &entry->memdesc);
+
+	kgsl_mem_entry_untrack_gpuaddr(entry->priv, entry);
+
+	spin_lock(&entry->priv->mem_lock);
+	if (entry->id != 0)
+		idr_remove(&entry->priv->mem_idr, entry->id);
+	entry->id = 0;
+
+	type = kgsl_memdesc_usermem_type(&entry->memdesc);
+	entry->priv->stats[type].cur -= entry->memdesc.size;
+	spin_unlock(&entry->priv->mem_lock);
+	kgsl_process_private_put(entry->priv);
+
+	entry->priv = NULL;
+}
+
+/**
+ * kgsl_context_dump() - dump information about a draw context
+ * @device: KGSL device that owns the context
+ * @context: KGSL context to dump information about
+ *
+ * Dump specific information about the context to the kernel log.  Used for
+ * fence timeout callbacks
+ */
+void kgsl_context_dump(struct kgsl_context *context)
+{
+	struct kgsl_device *device;
+
+	if (_kgsl_context_get(context) == 0)
+		return;
+
+	device = context->device;
+
+	if (kgsl_context_detached(context)) {
+		dev_err(device->dev, "  context[%d]: context detached\n",
+			context->id);
+	} else if (device->ftbl->drawctxt_dump != NULL)
+		device->ftbl->drawctxt_dump(device, context);
+
+	kgsl_context_put(context);
+}
+EXPORT_SYMBOL(kgsl_context_dump);
+
+/* Allocate a new context ID */
+static int _kgsl_get_context_id(struct kgsl_device *device,
+		struct kgsl_context *context)
+{
+	int id;
+
+	idr_preload(GFP_KERNEL);
+	write_lock(&device->context_lock);
+	id = idr_alloc(&device->context_idr, context, 1,
+		KGSL_MEMSTORE_MAX, GFP_NOWAIT);
+	write_unlock(&device->context_lock);
+	idr_preload_end();
+
+	if (id > 0)
+		context->id = id;
+
+	return id;
+}
+
+/**
+ * kgsl_context_init() - helper to initialize kgsl_context members
+ * @dev_priv: the owner of the context
+ * @context: the newly created context struct, should be allocated by
+ * the device specific drawctxt_create function.
+ *
+ * This is a helper function for the device specific drawctxt_create
+ * function to initialize the common members of its context struct.
+ * If this function succeeds, reference counting is active in the context
+ * struct and the caller should kgsl_context_put() it on error.
+ * If it fails, the caller should just free the context structure
+ * it passed in.
+ */
+int kgsl_context_init(struct kgsl_device_private *dev_priv,
+			struct kgsl_context *context)
+{
+	struct kgsl_device *device = dev_priv->device;
+	char name[64];
+	int ret = 0, id;
+
+	id = _kgsl_get_context_id(device, context);
+	if (id == -ENOSPC) {
+		/*
+		 * Before declaring that there are no contexts left try
+		 * flushing the event workqueue just in case there are
+		 * detached contexts waiting to finish
+		 */
+
+		flush_workqueue(device->events_wq);
+		id = _kgsl_get_context_id(device, context);
+	}
+
+	if (id < 0) {
+		if (id == -ENOSPC)
+			KGSL_DRV_INFO(device,
+				"cannot have more than %zu contexts due to memstore limitation\n",
+				KGSL_MEMSTORE_MAX);
+
+		return id;
+	}
+
+	kref_init(&context->refcount);
+	/*
+	 * Get a refernce to the process private so its not destroyed, until
+	 * the context is destroyed. This will also prevent the pagetable
+	 * from being destroyed
+	 */
+	if (!kgsl_process_private_get(dev_priv->process_priv)) {
+		ret = -EBADF;
+		goto out;
+	}
+	context->device = dev_priv->device;
+	context->dev_priv = dev_priv;
+	context->proc_priv = dev_priv->process_priv;
+	context->tid = task_pid_nr(current);
+
+	ret = kgsl_sync_timeline_create(context);
+	if (ret)
+		goto out;
+
+	snprintf(name, sizeof(name), "context-%d", id);
+	kgsl_add_event_group(&context->events, context, name,
+		kgsl_readtimestamp, context);
+
+out:
+	if (ret) {
+		write_lock(&device->context_lock);
+		idr_remove(&dev_priv->device->context_idr, id);
+		write_unlock(&device->context_lock);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(kgsl_context_init);
+
+/**
+ * kgsl_context_detach() - Release the "master" context reference
+ * @context: The context that will be detached
+ *
+ * This is called when a context becomes unusable, because userspace
+ * has requested for it to be destroyed. The context itself may
+ * exist a bit longer until its reference count goes to zero.
+ * Other code referencing the context can detect that it has been
+ * detached by checking the KGSL_CONTEXT_PRIV_DETACHED bit in
+ * context->priv.
+ */
+static void kgsl_context_detach(struct kgsl_context *context)
+{
+	struct kgsl_device *device;
+
+	if (context == NULL)
+		return;
+
+	/*
+	 * Mark the context as detached to keep others from using
+	 * the context before it gets fully removed, and to make sure
+	 * we don't try to detach twice.
+	 */
+	if (test_and_set_bit(KGSL_CONTEXT_PRIV_DETACHED, &context->priv))
+		return;
+
+	device = context->device;
+
+	trace_kgsl_context_detach(device, context);
+
+	context->device->ftbl->drawctxt_detach(context);
+
+	/*
+	 * Cancel all pending events after the device-specific context is
+	 * detached, to avoid possibly freeing memory while it is still
+	 * in use by the GPU.
+	 */
+	kgsl_cancel_events(device, &context->events);
+
+	/* Remove the event group from the list */
+	kgsl_del_event_group(&context->events);
+
+	kgsl_context_put(context);
+}
+
+void
+kgsl_context_destroy(struct kref *kref)
+{
+	struct kgsl_context *context = container_of(kref, struct kgsl_context,
+						    refcount);
+	struct kgsl_device *device = context->device;
+
+	trace_kgsl_context_destroy(device, context);
+
+	BUG_ON(!kgsl_context_detached(context));
+
+	write_lock(&device->context_lock);
+	if (context->id != KGSL_CONTEXT_INVALID) {
+
+		/* Clear the timestamps in the memstore during destroy */
+		kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, soptimestamp), 0);
+		kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp), 0);
+
+		/* clear device power constraint */
+		if (context->id == device->pwrctrl.constraint.owner_id) {
+			trace_kgsl_constraint(device,
+				device->pwrctrl.constraint.type,
+				device->pwrctrl.active_pwrlevel,
+				0);
+			device->pwrctrl.constraint.type = KGSL_CONSTRAINT_NONE;
+		}
+
+		idr_remove(&device->context_idr, context->id);
+		context->id = KGSL_CONTEXT_INVALID;
+	}
+	write_unlock(&device->context_lock);
+	kgsl_sync_timeline_destroy(context);
+	kgsl_process_private_put(context->proc_priv);
+
+	device->ftbl->drawctxt_destroy(context);
+}
+
+struct kgsl_device *kgsl_get_device(int dev_idx)
+{
+	int i;
+	struct kgsl_device *ret = NULL;
+
+	mutex_lock(&kgsl_driver.devlock);
+
+	for (i = 0; i < KGSL_DEVICE_MAX; i++) {
+		if (kgsl_driver.devp[i] && kgsl_driver.devp[i]->id == dev_idx) {
+			ret = kgsl_driver.devp[i];
+			break;
+		}
+	}
+
+	mutex_unlock(&kgsl_driver.devlock);
+	return ret;
+}
+EXPORT_SYMBOL(kgsl_get_device);
+
+static struct kgsl_device *kgsl_get_minor(int minor)
+{
+	struct kgsl_device *ret = NULL;
+
+	if (minor < 0 || minor >= KGSL_DEVICE_MAX)
+		return NULL;
+
+	mutex_lock(&kgsl_driver.devlock);
+	ret = kgsl_driver.devp[minor];
+	mutex_unlock(&kgsl_driver.devlock);
+
+	return ret;
+}
+
+/**
+ * kgsl_check_timestamp() - return true if the specified timestamp is retired
+ * @device: Pointer to the KGSL device to check
+ * @context: Pointer to the context for the timestamp
+ * @timestamp: The timestamp to compare
+ */
+int kgsl_check_timestamp(struct kgsl_device *device,
+	struct kgsl_context *context, unsigned int timestamp)
+{
+	unsigned int ts_processed;
+
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED,
+		&ts_processed);
+
+	return (timestamp_cmp(ts_processed, timestamp) >= 0);
+}
+EXPORT_SYMBOL(kgsl_check_timestamp);
+
+static int kgsl_suspend_device(struct kgsl_device *device, pm_message_t state)
+{
+	int status = -EINVAL;
+
+	if (!device)
+		return -EINVAL;
+
+	KGSL_PWR_WARN(device, "suspend start\n");
+
+	mutex_lock(&device->mutex);
+	status = kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND);
+	mutex_unlock(&device->mutex);
+
+	KGSL_PWR_WARN(device, "suspend end\n");
+	return status;
+}
+
+static int kgsl_resume_device(struct kgsl_device *device)
+{
+	if (!device)
+		return -EINVAL;
+
+	KGSL_PWR_WARN(device, "resume start\n");
+	mutex_lock(&device->mutex);
+	if (device->state == KGSL_STATE_SUSPEND) {
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER);
+	} else if (device->state != KGSL_STATE_INIT) {
+		/*
+		 * This is an error situation,so wait for the device
+		 * to idle and then put the device to SLUMBER state.
+		 * This will put the device to the right state when
+		 * we resume.
+		 */
+		if (device->state == KGSL_STATE_ACTIVE)
+			device->ftbl->idle(device);
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER);
+		KGSL_PWR_ERR(device,
+			"resume invoked without a suspend\n");
+	}
+
+	mutex_unlock(&device->mutex);
+	KGSL_PWR_WARN(device, "resume end\n");
+	return 0;
+}
+
+static int kgsl_suspend(struct device *dev)
+{
+
+	pm_message_t arg = {0};
+	struct kgsl_device *device = dev_get_drvdata(dev);
+	return kgsl_suspend_device(device, arg);
+}
+
+static int kgsl_resume(struct device *dev)
+{
+	struct kgsl_device *device = dev_get_drvdata(dev);
+	return kgsl_resume_device(device);
+}
+
+static int kgsl_runtime_suspend(struct device *dev)
+{
+	return 0;
+}
+
+static int kgsl_runtime_resume(struct device *dev)
+{
+	return 0;
+}
+
+const struct dev_pm_ops kgsl_pm_ops = {
+	.suspend = kgsl_suspend,
+	.resume = kgsl_resume,
+	.runtime_suspend = kgsl_runtime_suspend,
+	.runtime_resume = kgsl_runtime_resume,
+};
+EXPORT_SYMBOL(kgsl_pm_ops);
+
+int kgsl_suspend_driver(struct platform_device *pdev,
+					pm_message_t state)
+{
+	struct kgsl_device *device = dev_get_drvdata(&pdev->dev);
+	return kgsl_suspend_device(device, state);
+}
+EXPORT_SYMBOL(kgsl_suspend_driver);
+
+int kgsl_resume_driver(struct platform_device *pdev)
+{
+	struct kgsl_device *device = dev_get_drvdata(&pdev->dev);
+	return kgsl_resume_device(device);
+}
+EXPORT_SYMBOL(kgsl_resume_driver);
+
+/**
+ * kgsl_destroy_process_private() - Cleanup function to free process private
+ * @kref: - Pointer to object being destroyed's kref struct
+ * Free struct object and all other resources attached to it.
+ * Since the function can be used when not all resources inside process
+ * private have been allocated, there is a check to (before each resource
+ * cleanup) see if the struct member being cleaned is in fact allocated or not.
+ * If the value is not NULL, resource is freed.
+ */
+static void kgsl_destroy_process_private(struct kref *kref)
+{
+	struct kgsl_process_private *private = container_of(kref,
+			struct kgsl_process_private, refcount);
+
+	idr_destroy(&private->mem_idr);
+	idr_destroy(&private->syncsource_idr);
+
+	/* When using global pagetables, do not detach global pagetable */
+	if (kgsl_mmu_enabled() &&
+		 private->pagetable->name != KGSL_MMU_GLOBAL_PT)
+		kgsl_mmu_putpagetable(private->pagetable);
+
+	kfree(private);
+	return;
+}
+
+void
+kgsl_process_private_put(struct kgsl_process_private *private)
+{
+	if (private)
+		kref_put(&private->refcount, kgsl_destroy_process_private);
+}
+
+/**
+ * kgsl_process_private_find() - Find the process associated with the specified
+ * name
+ * @name: pid_t of the process to search for
+ * Return the process struct for the given ID.
+ */
+struct kgsl_process_private *kgsl_process_private_find(pid_t pid)
+{
+	struct kgsl_process_private *p, *private = NULL;
+
+	mutex_lock(&kgsl_driver.process_mutex);
+	list_for_each_entry(p, &kgsl_driver.process_list, list) {
+		if (p->pid == pid) {
+			if (kgsl_process_private_get(p))
+				private = p;
+			break;
+		}
+	}
+	mutex_unlock(&kgsl_driver.process_mutex);
+	return private;
+}
+
+static struct kgsl_process_private *kgsl_process_private_new(
+		struct kgsl_device *device)
+{
+	struct kgsl_process_private *private;
+	pid_t tgid = task_tgid_nr(current);
+
+	/* Search in the process list */
+	list_for_each_entry(private, &kgsl_driver.process_list, list) {
+		if (private->pid == tgid) {
+			if (!kgsl_process_private_get(private))
+				private = ERR_PTR(-EINVAL);
+			return private;
+		}
+	}
+
+	/* Create a new object */
+	private = kzalloc(sizeof(struct kgsl_process_private), GFP_KERNEL);
+	if (private == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&private->refcount);
+
+	private->pid = tgid;
+	get_task_comm(private->comm, current->group_leader);
+
+	spin_lock_init(&private->mem_lock);
+	spin_lock_init(&private->syncsource_lock);
+
+	idr_init(&private->mem_idr);
+	idr_init(&private->syncsource_idr);
+
+	/* Allocate a pagetable for the new process object */
+	if (kgsl_mmu_enabled()) {
+		private->pagetable = kgsl_mmu_getpagetable(&device->mmu, tgid);
+		if (IS_ERR(private->pagetable)) {
+			int err = PTR_ERR(private->pagetable);
+
+			idr_destroy(&private->mem_idr);
+			idr_destroy(&private->syncsource_idr);
+
+			kfree(private);
+			private = ERR_PTR(err);
+		}
+	}
+
+	return private;
+}
+
+static void process_release_memory(struct kgsl_process_private *private)
+{
+	struct kgsl_mem_entry *entry;
+	int next = 0;
+
+	while (1) {
+		spin_lock(&private->mem_lock);
+		entry = idr_get_next(&private->mem_idr, &next);
+		if (entry == NULL) {
+			spin_unlock(&private->mem_lock);
+			break;
+		}
+		/*
+		 * If the free pending flag is not set it means that user space
+		 * did not free it's reference to this entry, in that case
+		 * free a reference to this entry, other references are from
+		 * within kgsl so they will be freed eventually by kgsl
+		 */
+		if (!entry->pending_free) {
+			entry->pending_free = 1;
+			spin_unlock(&private->mem_lock);
+			kgsl_mem_entry_put(entry);
+		} else {
+			spin_unlock(&private->mem_lock);
+		}
+		next = next + 1;
+	}
+}
+
+static void process_release_sync_sources(struct kgsl_process_private *private)
+{
+	struct kgsl_syncsource *syncsource;
+	int next = 0;
+
+	while (1) {
+		spin_lock(&private->syncsource_lock);
+		syncsource = idr_get_next(&private->syncsource_idr, &next);
+		spin_unlock(&private->syncsource_lock);
+
+		if (syncsource == NULL)
+			break;
+
+		kgsl_syncsource_put(syncsource);
+		next = next + 1;
+	}
+}
+
+static void kgsl_process_private_close(struct kgsl_device_private *dev_priv,
+		struct kgsl_process_private *private)
+{
+	mutex_lock(&kgsl_driver.process_mutex);
+
+	if (--private->fd_count > 0) {
+		mutex_unlock(&kgsl_driver.process_mutex);
+		kgsl_process_private_put(private);
+		return;
+	}
+
+	/*
+	 * If this is the last file on the process take down the debug
+	 * directories and garbage collect any outstanding resources
+	 */
+
+	kgsl_process_uninit_sysfs(private);
+	debugfs_remove_recursive(private->debug_root);
+
+	process_release_sync_sources(private);
+
+	/* When using global pagetables, do not detach global pagetable */
+	if (kgsl_mmu_enabled() &&
+		private->pagetable->name != KGSL_MMU_GLOBAL_PT)
+		kgsl_mmu_detach_pagetable(private->pagetable);
+
+	/* Remove the process struct from the master list */
+	list_del(&private->list);
+
+	/*
+	 * Unlock the mutex before releasing the memory - this prevents a
+	 * deadlock with the IOMMU mutex if a page fault occurs
+	 */
+	mutex_unlock(&kgsl_driver.process_mutex);
+
+	process_release_memory(private);
+
+	kgsl_process_private_put(private);
+}
+
+
+static struct kgsl_process_private *kgsl_process_private_open(
+		struct kgsl_device *device)
+{
+	struct kgsl_process_private *private;
+
+	mutex_lock(&kgsl_driver.process_mutex);
+	private = kgsl_process_private_new(device);
+
+	if (IS_ERR(private))
+		goto done;
+
+	/*
+	 * If this is a new process create the debug directories and add it to
+	 * the process list
+	 */
+
+	if (private->fd_count++ == 0) {
+		kgsl_process_init_sysfs(device, private);
+		kgsl_process_init_debugfs(private);
+
+		list_add(&private->list, &kgsl_driver.process_list);
+	}
+
+done:
+	mutex_unlock(&kgsl_driver.process_mutex);
+	return private;
+}
+
+static int kgsl_close_device(struct kgsl_device *device)
+{
+	int result = 0;
+
+	mutex_lock(&device->mutex);
+	device->open_count--;
+	if (device->open_count == 0) {
+
+		/* Wait for the active count to go to 0 */
+		kgsl_active_count_wait(device, 0);
+
+		/* Fail if the wait times out */
+		BUG_ON(atomic_read(&device->active_cnt) > 0);
+
+		result = kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT);
+	}
+	mutex_unlock(&device->mutex);
+	return result;
+
+}
+
+static void device_release_contexts(struct kgsl_device_private *dev_priv)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct kgsl_context *context;
+	int next = 0;
+
+	while (1) {
+		read_lock(&device->context_lock);
+		context = idr_get_next(&device->context_idr, &next);
+		read_unlock(&device->context_lock);
+
+		if (context == NULL)
+			break;
+
+		if (context->dev_priv == dev_priv) {
+			/*
+			 * Hold a reference to the context in case somebody
+			 * tries to put it while we are detaching
+			 */
+
+			if (_kgsl_context_get(context)) {
+				kgsl_context_detach(context);
+				kgsl_context_put(context);
+			}
+		}
+
+		next = next + 1;
+	}
+}
+
+static int kgsl_release(struct inode *inodep, struct file *filep)
+{
+	struct kgsl_device_private *dev_priv = filep->private_data;
+	struct kgsl_device *device = dev_priv->device;
+	int result;
+
+	filep->private_data = NULL;
+
+	/* Release the contexts for the file */
+	device_release_contexts(dev_priv);
+
+	/* Close down the process wide resources for the file */
+	kgsl_process_private_close(dev_priv, dev_priv->process_priv);
+
+	kfree(dev_priv);
+
+	result = kgsl_close_device(device);
+	pm_runtime_put(&device->pdev->dev);
+
+	return result;
+}
+
+static int kgsl_open_device(struct kgsl_device *device)
+{
+	int result = 0;
+
+	mutex_lock(&device->mutex);
+	if (device->open_count == 0) {
+		/*
+		 * active_cnt special case: we are starting up for the first
+		 * time, so use this sequence instead of the kgsl_pwrctrl_wake()
+		 * which will be called by kgsl_active_count_get().
+		 */
+		atomic_inc(&device->active_cnt);
+		kgsl_sharedmem_set(device, &device->memstore, 0, 0,
+				device->memstore.size);
+
+		result = device->ftbl->init(device);
+		if (result)
+			goto err;
+
+		result = device->ftbl->start(device, 0);
+		if (result)
+			goto err;
+		/*
+		 * Make sure the gates are open, so they don't block until
+		 * we start suspend or FT.
+		 */
+		complete_all(&device->hwaccess_gate);
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE);
+		kgsl_active_count_put(device);
+	}
+	device->open_count++;
+err:
+	if (result) {
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT);
+		atomic_dec(&device->active_cnt);
+	}
+
+	mutex_unlock(&device->mutex);
+	return result;
+}
+
+static int kgsl_open(struct inode *inodep, struct file *filep)
+{
+	int result;
+	struct kgsl_device_private *dev_priv;
+	struct kgsl_device *device;
+	unsigned int minor = iminor(inodep);
+
+	device = kgsl_get_minor(minor);
+	BUG_ON(device == NULL);
+
+	result = pm_runtime_get_sync(&device->pdev->dev);
+	if (result < 0) {
+		KGSL_DRV_ERR(device,
+			"Runtime PM: Unable to wake up the device, rc = %d\n",
+			result);
+		return result;
+	}
+	result = 0;
+
+	dev_priv = kzalloc(sizeof(struct kgsl_device_private), GFP_KERNEL);
+	if (dev_priv == NULL) {
+		result = -ENOMEM;
+		goto err;
+	}
+
+	dev_priv->device = device;
+	filep->private_data = dev_priv;
+
+	result = kgsl_open_device(device);
+	if (result)
+		goto err;
+
+	/*
+	 * Get file (per process) private struct. This must be done
+	 * after the first start so that the global pagetable mappings
+	 * are set up before we create the per-process pagetable.
+	 */
+	dev_priv->process_priv = kgsl_process_private_open(device);
+	if (IS_ERR(dev_priv->process_priv)) {
+		result = PTR_ERR(dev_priv->process_priv);
+		kgsl_close_device(device);
+		goto err;
+	}
+
+err:
+	if (result) {
+		filep->private_data = NULL;
+		kfree(dev_priv);
+		pm_runtime_put(&device->pdev->dev);
+	}
+	return result;
+}
+
+#define GPUADDR_IN_MEMDESC(_val, _memdesc) \
+	(((_val) >= (_memdesc)->gpuaddr) && \
+	 ((_val) < ((_memdesc)->gpuaddr + (_memdesc)->size)))
+
+/**
+ * kgsl_sharedmem_find() - Find a gpu memory allocation
+ *
+ * @private: private data for the process to check.
+ * @gpuaddr: start address of the region
+ *
+ * Find a gpu allocation. Caller must kgsl_mem_entry_put()
+ * the returned entry when finished using it.
+ */
+struct kgsl_mem_entry * __must_check
+kgsl_sharedmem_find(struct kgsl_process_private *private, uint64_t gpuaddr)
+{
+	int ret = 0, id;
+	struct kgsl_mem_entry *entry = NULL;
+
+	if (!private)
+		return NULL;
+
+	if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, gpuaddr))
+		return NULL;
+
+	spin_lock(&private->mem_lock);
+	idr_for_each_entry(&private->mem_idr, entry, id) {
+		if (entry == NULL)
+			continue;
+
+		if (GPUADDR_IN_MEMDESC(gpuaddr, &entry->memdesc)) {
+			ret = kgsl_mem_entry_get(entry);
+			break;
+		}
+	}
+	spin_unlock(&private->mem_lock);
+
+	return (ret == 0) ? NULL : entry;
+}
+EXPORT_SYMBOL(kgsl_sharedmem_find);
+
+/**
+ * kgsl_sharedmem_find_id() - find a memory entry by id
+ * @process: the owning process
+ * @id: id to find
+ *
+ * @returns - the mem_entry or NULL
+ *
+ * Caller must kgsl_mem_entry_put() the returned entry, when finished using
+ * it.
+ */
+struct kgsl_mem_entry * __must_check
+kgsl_sharedmem_find_id(struct kgsl_process_private *process, unsigned int id)
+{
+	int result = 0;
+	struct kgsl_mem_entry *entry;
+
+	drain_workqueue(kgsl_driver.mem_workqueue);
+
+	spin_lock(&process->mem_lock);
+	entry = idr_find(&process->mem_idr, id);
+	if (entry)
+		result = kgsl_mem_entry_get(entry);
+	spin_unlock(&process->mem_lock);
+
+	if (!result)
+		return NULL;
+	return entry;
+}
+
+/**
+ * kgsl_mem_entry_unset_pend() - Unset the pending free flag of an entry
+ * @entry - The memory entry
+ */
+static inline void kgsl_mem_entry_unset_pend(struct kgsl_mem_entry *entry)
+{
+	if (entry == NULL)
+		return;
+	spin_lock(&entry->priv->mem_lock);
+	entry->pending_free = 0;
+	spin_unlock(&entry->priv->mem_lock);
+}
+
+/**
+ * kgsl_mem_entry_set_pend() - Set the pending free flag of a memory entry
+ * @entry - The memory entry
+ *
+ * @returns - true if pending flag was 0 else false
+ *
+ * This function will set the pending free flag if it is previously unset. Used
+ * to prevent race condition between ioctls calling free/freememontimestamp
+ * on the same entry. Whichever thread set's the flag first will do the free.
+ */
+static inline bool kgsl_mem_entry_set_pend(struct kgsl_mem_entry *entry)
+{
+	bool ret = false;
+
+	if (entry == NULL)
+		return false;
+
+	spin_lock(&entry->priv->mem_lock);
+	if (!entry->pending_free) {
+		entry->pending_free = 1;
+		ret = true;
+	}
+	spin_unlock(&entry->priv->mem_lock);
+	return ret;
+}
+
+/*call all ioctl sub functions with driver locked*/
+long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv,
+					  unsigned int cmd, void *data)
+{
+	int result = 0;
+	struct kgsl_device_getproperty *param = data;
+
+	switch (param->type) {
+	case KGSL_PROP_VERSION:
+	{
+		struct kgsl_version version;
+		if (param->sizebytes != sizeof(version)) {
+			result = -EINVAL;
+			break;
+		}
+
+		version.drv_major = KGSL_VERSION_MAJOR;
+		version.drv_minor = KGSL_VERSION_MINOR;
+		version.dev_major = dev_priv->device->ver_major;
+		version.dev_minor = dev_priv->device->ver_minor;
+
+		if (copy_to_user(param->value, &version, sizeof(version)))
+			result = -EFAULT;
+
+		break;
+	}
+	case KGSL_PROP_GPU_RESET_STAT:
+	{
+		/* Return reset status of given context and clear it */
+		uint32_t id;
+		struct kgsl_context *context;
+
+		if (param->sizebytes != sizeof(unsigned int)) {
+			result = -EINVAL;
+			break;
+		}
+		/* We expect the value passed in to contain the context id */
+		if (copy_from_user(&id, param->value,
+			sizeof(unsigned int))) {
+			result = -EFAULT;
+			break;
+		}
+		context = kgsl_context_get_owner(dev_priv, id);
+		if (!context) {
+			result = -EINVAL;
+			break;
+		}
+		/*
+		 * Copy the reset status to value which also serves as
+		 * the out parameter
+		 */
+		if (copy_to_user(param->value, &(context->reset_status),
+			sizeof(unsigned int)))
+			result = -EFAULT;
+		else {
+			/* Clear reset status once its been queried */
+			context->reset_status = KGSL_CTX_STAT_NO_ERROR;
+		}
+
+		kgsl_context_put(context);
+		break;
+	}
+	default:
+		if (is_compat_task())
+			result = dev_priv->device->ftbl->getproperty_compat(
+					dev_priv->device, param->type,
+					param->value, param->sizebytes);
+		else
+			result = dev_priv->device->ftbl->getproperty(
+					dev_priv->device, param->type,
+					param->value, param->sizebytes);
+	}
+
+
+	return result;
+}
+
+long kgsl_ioctl_device_setproperty(struct kgsl_device_private *dev_priv,
+					  unsigned int cmd, void *data)
+{
+	int result = 0;
+	/* The getproperty struct is reused for setproperty too */
+	struct kgsl_device_getproperty *param = data;
+
+	/* Reroute to compat version if coming from compat_ioctl */
+	if (is_compat_task())
+		result = dev_priv->device->ftbl->setproperty_compat(
+			dev_priv, param->type, param->value,
+			param->sizebytes);
+	else if (dev_priv->device->ftbl->setproperty)
+		result = dev_priv->device->ftbl->setproperty(
+			dev_priv, param->type, param->value,
+			param->sizebytes);
+
+	return result;
+}
+
+long kgsl_ioctl_device_waittimestamp_ctxtid(
+		struct kgsl_device_private *dev_priv, unsigned int cmd,
+		void *data)
+{
+	struct kgsl_device_waittimestamp_ctxtid *param = data;
+	struct kgsl_device *device = dev_priv->device;
+	long result = -EINVAL;
+	unsigned int temp_cur_ts = 0;
+	struct kgsl_context *context;
+
+	context = kgsl_context_get_owner(dev_priv, param->context_id);
+	if (context == NULL)
+		return result;
+
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED,
+		&temp_cur_ts);
+
+	trace_kgsl_waittimestamp_entry(device, context->id, temp_cur_ts,
+		param->timestamp, param->timeout);
+
+	result = device->ftbl->waittimestamp(device, context, param->timestamp,
+		param->timeout);
+
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED,
+		&temp_cur_ts);
+	trace_kgsl_waittimestamp_exit(device, temp_cur_ts, result);
+
+	kgsl_context_put(context);
+
+	return result;
+}
+
+long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv,
+				      unsigned int cmd, void *data)
+{
+	struct kgsl_ringbuffer_issueibcmds *param = data;
+	struct kgsl_device *device = dev_priv->device;
+	struct kgsl_context *context;
+	struct kgsl_cmdbatch *cmdbatch = NULL;
+	long result = -EINVAL;
+
+	/* The legacy functions don't support synchronization commands */
+	if ((param->flags & (KGSL_CMDBATCH_SYNC | KGSL_CMDBATCH_MARKER)))
+		return -EINVAL;
+
+	/* Get the context */
+	context = kgsl_context_get_owner(dev_priv, param->drawctxt_id);
+	if (context == NULL)
+		return -EINVAL;
+
+	/* Create a command batch */
+	cmdbatch = kgsl_cmdbatch_create(device, context, param->flags);
+	if (IS_ERR(cmdbatch)) {
+		result = PTR_ERR(cmdbatch);
+		goto done;
+	}
+
+	if (param->flags & KGSL_CMDBATCH_SUBMIT_IB_LIST) {
+		/* Sanity check the number of IBs */
+		if (param->numibs == 0 || param->numibs > KGSL_MAX_NUMIBS) {
+			result = -EINVAL;
+			goto done;
+		}
+		result = kgsl_cmdbatch_add_ibdesc_list(device, cmdbatch,
+			(void __user *) param->ibdesc_addr,
+			param->numibs);
+	} else {
+		struct kgsl_ibdesc ibdesc;
+		/* Ultra legacy path */
+
+		ibdesc.gpuaddr = param->ibdesc_addr;
+		ibdesc.sizedwords = param->numibs;
+		ibdesc.ctrl = 0;
+
+		result = kgsl_cmdbatch_add_ibdesc(device, cmdbatch, &ibdesc);
+	}
+
+	if (result)
+		goto done;
+
+	result = dev_priv->device->ftbl->issueibcmds(dev_priv, context,
+		cmdbatch, &param->timestamp);
+
+done:
+	/*
+	 * -EPROTO is a "success" error - it just tells the user that the
+	 * context had previously faulted
+	 */
+	if (result && result != -EPROTO)
+		kgsl_cmdbatch_destroy(cmdbatch);
+
+	kgsl_context_put(context);
+	return result;
+}
+
+long kgsl_ioctl_submit_commands(struct kgsl_device_private *dev_priv,
+				      unsigned int cmd, void *data)
+{
+	struct kgsl_submit_commands *param = data;
+	struct kgsl_device *device = dev_priv->device;
+	struct kgsl_context *context;
+	struct kgsl_cmdbatch *cmdbatch = NULL;
+	long result = -EINVAL;
+
+	/*
+	 * The SYNC bit is supposed to identify a dummy sync object so warn the
+	 * user if they specified any IBs with it.  A MARKER command can either
+	 * have IBs or not but if the command has 0 IBs it is automatically
+	 * assumed to be a marker.  If none of the above make sure that the user
+	 * specified a sane number of IBs
+	 */
+
+	if ((param->flags & KGSL_CMDBATCH_SYNC) && param->numcmds)
+		KGSL_DEV_ERR_ONCE(device,
+			"Commands specified with the SYNC flag.  They will be ignored\n");
+	else if (param->numcmds > KGSL_MAX_NUMIBS)
+		return -EINVAL;
+	else if (!(param->flags & KGSL_CMDBATCH_SYNC) && param->numcmds == 0)
+		param->flags |= KGSL_CMDBATCH_MARKER;
+
+	/* Make sure that we don't have too many syncpoints */
+	if (param->numsyncs > KGSL_MAX_SYNCPOINTS)
+		return -EINVAL;
+
+	context = kgsl_context_get_owner(dev_priv, param->context_id);
+	if (context == NULL)
+		return -EINVAL;
+
+	/* Create a command batch */
+	cmdbatch = kgsl_cmdbatch_create(device, context, param->flags);
+	if (IS_ERR(cmdbatch)) {
+		result = PTR_ERR(cmdbatch);
+		goto done;
+	}
+
+	result = kgsl_cmdbatch_add_ibdesc_list(device, cmdbatch,
+		param->cmdlist, param->numcmds);
+	if (result)
+		goto done;
+
+	result = kgsl_cmdbatch_add_syncpoints(device, cmdbatch,
+		param->synclist, param->numsyncs);
+	if (result)
+		goto done;
+
+	/* If no profiling buffer was specified, clear the flag */
+	if (cmdbatch->profiling_buf_entry == NULL)
+		cmdbatch->flags &= ~KGSL_CMDBATCH_PROFILING;
+
+	result = dev_priv->device->ftbl->issueibcmds(dev_priv, context,
+		cmdbatch, &param->timestamp);
+
+done:
+	/*
+	 * -EPROTO is a "success" error - it just tells the user that the
+	 * context had previously faulted
+	 */
+	if (result && result != -EPROTO)
+		kgsl_cmdbatch_destroy(cmdbatch);
+
+	kgsl_context_put(context);
+	return result;
+}
+
+long kgsl_ioctl_gpu_command(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_gpu_command *param = data;
+	struct kgsl_device *device = dev_priv->device;
+	struct kgsl_context *context;
+	struct kgsl_cmdbatch *cmdbatch = NULL;
+
+	long result = -EINVAL;
+
+	/*
+	 * The SYNC bit is supposed to identify a dummy sync object so warn the
+	 * user if they specified any IBs with it.  A MARKER command can either
+	 * have IBs or not but if the command has 0 IBs it is automatically
+	 * assumed to be a marker.  If none of the above make sure that the user
+	 * specified a sane number of IBs
+	 */
+	if ((param->flags & KGSL_CMDBATCH_SYNC) && param->numcmds)
+		KGSL_DEV_ERR_ONCE(device,
+			"Commands specified with the SYNC flag.  They will be ignored\n");
+	else if (!(param->flags & KGSL_CMDBATCH_SYNC) && param->numcmds == 0)
+		param->flags |= KGSL_CMDBATCH_MARKER;
+
+	/* Make sure that the memobj and syncpoint count isn't too big */
+	if (param->numcmds > KGSL_MAX_NUMIBS ||
+		param->numobjs > KGSL_MAX_NUMIBS ||
+		param->numsyncs > KGSL_MAX_SYNCPOINTS)
+		return -EINVAL;
+
+	context = kgsl_context_get_owner(dev_priv, param->context_id);
+	if (context == NULL)
+		return -EINVAL;
+
+	cmdbatch = kgsl_cmdbatch_create(device, context, param->flags);
+	if (IS_ERR(cmdbatch)) {
+		result = PTR_ERR(cmdbatch);
+		goto done;
+	}
+
+	result = kgsl_cmdbatch_add_cmdlist(device, cmdbatch,
+		to_user_ptr(param->cmdlist),
+		param->cmdsize, param->numcmds);
+	if (result)
+		goto done;
+
+	result = kgsl_cmdbatch_add_memlist(device, cmdbatch,
+		to_user_ptr(param->objlist),
+		param->objsize, param->numobjs);
+	if (result)
+		goto done;
+
+	result = kgsl_cmdbatch_add_synclist(device, cmdbatch,
+		to_user_ptr(param->synclist),
+		param->syncsize, param->numsyncs);
+	if (result)
+		goto done;
+
+	/* If no profiling buffer was specified, clear the flag */
+	if (cmdbatch->profiling_buf_entry == NULL)
+		cmdbatch->flags &= ~KGSL_CMDBATCH_PROFILING;
+
+	result = dev_priv->device->ftbl->issueibcmds(dev_priv, context,
+		cmdbatch, &param->timestamp);
+
+done:
+	/*
+	 * -EPROTO is a "success" error - it just tells the user that the
+	 * context had previously faulted
+	 */
+	if (result && result != -EPROTO)
+		kgsl_cmdbatch_destroy(cmdbatch);
+
+	kgsl_context_put(context);
+	return result;
+}
+
+long kgsl_ioctl_cmdstream_readtimestamp_ctxtid(struct kgsl_device_private
+						*dev_priv, unsigned int cmd,
+						void *data)
+{
+	struct kgsl_cmdstream_readtimestamp_ctxtid *param = data;
+	struct kgsl_device *device = dev_priv->device;
+	struct kgsl_context *context;
+	long result = -EINVAL;
+
+	mutex_lock(&device->mutex);
+	context = kgsl_context_get_owner(dev_priv, param->context_id);
+
+	if (context) {
+		result = kgsl_readtimestamp(device, context,
+			param->type, &param->timestamp);
+
+		trace_kgsl_readtimestamp(device, context->id,
+			param->type, param->timestamp);
+	}
+
+	kgsl_context_put(context);
+	mutex_unlock(&device->mutex);
+	return result;
+}
+
+long kgsl_ioctl_drawctxt_create(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	int result = 0;
+	struct kgsl_drawctxt_create *param = data;
+	struct kgsl_context *context = NULL;
+	struct kgsl_device *device = dev_priv->device;
+
+	context = device->ftbl->drawctxt_create(dev_priv, &param->flags);
+	if (IS_ERR(context)) {
+		result = PTR_ERR(context);
+		goto done;
+	}
+	trace_kgsl_context_create(dev_priv->device, context, param->flags);
+	param->drawctxt_id = context->id;
+done:
+	return result;
+}
+
+long kgsl_ioctl_drawctxt_destroy(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	struct kgsl_drawctxt_destroy *param = data;
+	struct kgsl_context *context;
+
+	context = kgsl_context_get_owner(dev_priv, param->drawctxt_id);
+	if (context == NULL)
+		return -EINVAL;
+
+	kgsl_context_detach(context);
+	kgsl_context_put(context);
+
+	return 0;
+}
+
+static long gpumem_free_entry(struct kgsl_mem_entry *entry)
+{
+	pid_t ptname = 0;
+
+	if (!kgsl_mem_entry_set_pend(entry))
+		return -EBUSY;
+
+	trace_kgsl_mem_free(entry);
+
+	if (entry->memdesc.pagetable != NULL)
+		ptname = entry->memdesc.pagetable->name;
+
+	kgsl_memfree_add(entry->priv->pid, ptname, entry->memdesc.gpuaddr,
+		entry->memdesc.size, entry->memdesc.flags);
+
+	kgsl_mem_entry_put(entry);
+
+	return 0;
+}
+
+static void gpumem_free_func(struct kgsl_device *device,
+		struct kgsl_event_group *group, void *priv, int ret)
+{
+	struct kgsl_context *context = group->context;
+	struct kgsl_mem_entry *entry = priv;
+	unsigned int timestamp;
+
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, &timestamp);
+
+	/* Free the memory for all event types */
+	trace_kgsl_mem_timestamp_free(device, entry, KGSL_CONTEXT_ID(context),
+		timestamp, 0);
+	kgsl_mem_entry_put(entry);
+}
+
+static long gpumem_free_entry_on_timestamp(struct kgsl_device *device,
+		struct kgsl_mem_entry *entry,
+		struct kgsl_context *context, unsigned int timestamp)
+{
+	int ret;
+	unsigned int temp;
+
+	if (!kgsl_mem_entry_set_pend(entry))
+		return -EBUSY;
+
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, &temp);
+	trace_kgsl_mem_timestamp_queue(device, entry, context->id, temp,
+		timestamp);
+	ret = kgsl_add_event(device, &context->events,
+		timestamp, gpumem_free_func, entry);
+
+	if (ret)
+		kgsl_mem_entry_unset_pend(entry);
+
+	return ret;
+}
+
+long kgsl_ioctl_sharedmem_free(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	struct kgsl_sharedmem_free *param = data;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_mem_entry *entry;
+	long ret;
+
+	entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr);
+	if (entry == NULL) {
+		KGSL_MEM_INFO(dev_priv->device,
+			"Invalid GPU address 0x%016llx\n",
+			(uint64_t) param->gpuaddr);
+		return -EINVAL;
+	}
+
+	ret = gpumem_free_entry(entry);
+	kgsl_mem_entry_put(entry);
+
+	return ret;
+}
+
+long kgsl_ioctl_gpumem_free_id(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	struct kgsl_gpumem_free_id *param = data;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_mem_entry *entry;
+	long ret;
+
+	entry = kgsl_sharedmem_find_id(private, param->id);
+	if (entry == NULL) {
+		KGSL_MEM_INFO(dev_priv->device,
+			"Invalid GPU memory object ID %d\n", param->id);
+		return -EINVAL;
+	}
+
+	ret = gpumem_free_entry(entry);
+	kgsl_mem_entry_put(entry);
+
+	return ret;
+}
+
+static long gpuobj_free_on_timestamp(struct kgsl_device_private *dev_priv,
+		struct kgsl_mem_entry *entry, struct kgsl_gpuobj_free *param)
+{
+	struct kgsl_gpu_event_timestamp event;
+	struct kgsl_context *context;
+	long ret;
+
+	memset(&event, 0, sizeof(event));
+
+	ret = _copy_from_user(&event, to_user_ptr(param->priv),
+		sizeof(event), param->len);
+	if (ret)
+		return ret;
+
+	if (event.context_id == 0)
+		return -EINVAL;
+
+	context = kgsl_context_get_owner(dev_priv, event.context_id);
+	if (context == NULL)
+		return -EINVAL;
+
+	ret = gpumem_free_entry_on_timestamp(dev_priv->device, entry, context,
+		event.timestamp);
+
+	kgsl_context_put(context);
+	return ret;
+}
+
+static void gpuobj_free_fence_func(void *priv)
+{
+	kgsl_mem_entry_put_deferred((struct kgsl_mem_entry *) priv);
+}
+
+static long gpuobj_free_on_fence(struct kgsl_device_private *dev_priv,
+		struct kgsl_mem_entry *entry, struct kgsl_gpuobj_free *param)
+{
+	struct kgsl_sync_fence_waiter *handle;
+	struct kgsl_gpu_event_fence event;
+	long ret;
+
+	if (!kgsl_mem_entry_set_pend(entry))
+		return -EBUSY;
+
+	memset(&event, 0, sizeof(event));
+
+	ret = _copy_from_user(&event, to_user_ptr(param->priv),
+		sizeof(event), param->len);
+	if (ret) {
+		kgsl_mem_entry_unset_pend(entry);
+		return ret;
+	}
+
+	if (event.fd < 0) {
+		kgsl_mem_entry_unset_pend(entry);
+		return -EINVAL;
+	}
+
+	handle = kgsl_sync_fence_async_wait(event.fd,
+		gpuobj_free_fence_func, entry);
+
+	/* if handle is NULL the fence has already signaled */
+	if (handle == NULL)
+		return gpumem_free_entry(entry);
+
+	if (IS_ERR(handle)) {
+		kgsl_mem_entry_unset_pend(entry);
+		return PTR_ERR(handle);
+	}
+
+	return 0;
+}
+
+long kgsl_ioctl_gpuobj_free(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_gpuobj_free *param = data;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_mem_entry *entry;
+	long ret;
+
+	entry = kgsl_sharedmem_find_id(private, param->id);
+	if (entry == NULL) {
+		KGSL_MEM_ERR(dev_priv->device,
+			"Invalid GPU memory object ID %d\n", param->id);
+		return -EINVAL;
+	}
+
+	/* If no event is specified then free immediately */
+	if (!(param->flags & KGSL_GPUOBJ_FREE_ON_EVENT))
+		ret = gpumem_free_entry(entry);
+	else if (param->type == KGSL_GPU_EVENT_TIMESTAMP)
+		ret = gpuobj_free_on_timestamp(dev_priv, entry, param);
+	else if (param->type == KGSL_GPU_EVENT_FENCE)
+		ret = gpuobj_free_on_fence(dev_priv, entry, param);
+	else
+		ret = -EINVAL;
+
+	kgsl_mem_entry_put(entry);
+	return ret;
+}
+
+long kgsl_ioctl_cmdstream_freememontimestamp_ctxtid(
+		struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_cmdstream_freememontimestamp_ctxtid *param = data;
+	struct kgsl_context *context = NULL;
+	struct kgsl_mem_entry *entry;
+	long ret = -EINVAL;
+
+	if (param->type != KGSL_TIMESTAMP_RETIRED)
+		return -EINVAL;
+
+	context = kgsl_context_get_owner(dev_priv, param->context_id);
+	if (context == NULL)
+		return -EINVAL;
+
+	entry = kgsl_sharedmem_find(dev_priv->process_priv,
+		(uint64_t) param->gpuaddr);
+	if (entry == NULL) {
+		KGSL_MEM_ERR(dev_priv->device,
+			"Invalid GPU address 0x%016llx\n",
+			(uint64_t) param->gpuaddr);
+		goto out;
+	}
+
+	ret = gpumem_free_entry_on_timestamp(dev_priv->device, entry,
+		context, param->timestamp);
+
+	kgsl_mem_entry_put(entry);
+out:
+	kgsl_context_put(context);
+
+	return ret;
+}
+
+static inline int _check_region(unsigned long start, unsigned long size,
+				uint64_t len)
+{
+	uint64_t end = ((uint64_t) start) + size;
+	return (end > len);
+}
+
+static int check_vma(struct vm_area_struct *vma, struct file *vmfile,
+		struct kgsl_memdesc *memdesc)
+{
+	if (vma == NULL || vma->vm_file != vmfile)
+		return -EINVAL;
+
+	/* userspace may not know the size, in which case use the whole vma */
+	if (memdesc->size == 0)
+		memdesc->size = vma->vm_end - vma->vm_start;
+	/* range checking */
+	if (vma->vm_start != memdesc->useraddr ||
+		(memdesc->useraddr + memdesc->size) != vma->vm_end)
+		return -EINVAL;
+	return 0;
+}
+
+static int memdesc_sg_virt(struct kgsl_memdesc *memdesc, struct file *vmfile)
+{
+	int ret = 0;
+	long npages = 0, i;
+	size_t sglen = (size_t) (memdesc->size / PAGE_SIZE);
+	struct page **pages = NULL;
+	int write = (memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY) != 0;
+
+	if (sglen == 0 || sglen >= LONG_MAX)
+		return -EINVAL;
+
+	pages = kgsl_malloc(sglen * sizeof(struct page *));
+	if (pages == NULL)
+		return -ENOMEM;
+
+	memdesc->sgt = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (memdesc->sgt == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	down_read(&current->mm->mmap_sem);
+	/* If we have vmfile, make sure we map the correct vma and map it all */
+	if (vmfile != NULL)
+		ret = check_vma(find_vma(current->mm, memdesc->useraddr),
+				vmfile, memdesc);
+
+	if (ret == 0) {
+		npages = get_user_pages(current, current->mm, memdesc->useraddr,
+					sglen, write, 0, pages, NULL);
+		ret = (npages < 0) ? (int)npages : 0;
+	}
+	up_read(&current->mm->mmap_sem);
+
+	if (ret)
+		goto out;
+
+	if ((unsigned long) npages != sglen) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = sg_alloc_table_from_pages(memdesc->sgt, pages, npages,
+					0, memdesc->size, GFP_KERNEL);
+out:
+	if (ret) {
+		for (i = 0; i < npages; i++)
+			put_page(pages[i]);
+
+		kfree(memdesc->sgt);
+		memdesc->sgt = NULL;
+	}
+	kgsl_free(pages);
+	return ret;
+}
+
+static int kgsl_setup_anon_useraddr(struct kgsl_pagetable *pagetable,
+	struct kgsl_mem_entry *entry, unsigned long hostptr,
+	size_t offset, size_t size)
+{
+	/* Map an anonymous memory chunk */
+
+	if (size == 0 || offset != 0 ||
+		!IS_ALIGNED(size, PAGE_SIZE))
+		return -EINVAL;
+
+	entry->memdesc.pagetable = pagetable;
+	entry->memdesc.size = (uint64_t) size;
+	entry->memdesc.mmapsize = (uint64_t) size;
+	entry->memdesc.useraddr = hostptr;
+	if (kgsl_memdesc_use_cpu_map(&entry->memdesc))
+		entry->memdesc.gpuaddr = (uint64_t)  entry->memdesc.useraddr;
+	entry->memdesc.flags |= KGSL_MEMFLAGS_USERMEM_ADDR;
+
+	return memdesc_sg_virt(&entry->memdesc, NULL);
+}
+
+static int match_file(const void *p, struct file *file, unsigned int fd)
+{
+	/*
+	 * We must return fd + 1 because iterate_fd stops searching on
+	 * non-zero return, but 0 is a valid fd.
+	 */
+	return (p == file) ? (fd + 1) : 0;
+}
+
+static void _setup_cache_mode(struct kgsl_mem_entry *entry,
+		struct vm_area_struct *vma)
+{
+	unsigned int mode;
+	pgprot_t pgprot = vma->vm_page_prot;
+
+	if (pgprot == pgprot_noncached(pgprot))
+		mode = KGSL_CACHEMODE_UNCACHED;
+	else if (pgprot == pgprot_writecombine(pgprot))
+		mode = KGSL_CACHEMODE_WRITECOMBINE;
+	else
+		mode = KGSL_CACHEMODE_WRITEBACK;
+
+	entry->memdesc.flags |= (mode << KGSL_CACHEMODE_SHIFT);
+}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static int kgsl_setup_dma_buf(struct kgsl_device *device,
+				struct kgsl_pagetable *pagetable,
+				struct kgsl_mem_entry *entry,
+				struct dma_buf *dmabuf);
+
+static int kgsl_setup_dmabuf_useraddr(struct kgsl_device *device,
+		struct kgsl_pagetable *pagetable,
+		struct kgsl_mem_entry *entry, unsigned long hostptr)
+{
+	struct vm_area_struct *vma;
+	struct dma_buf *dmabuf = NULL;
+	int ret;
+
+	/*
+	 * Find the VMA containing this pointer and figure out if it
+	 * is a dma-buf.
+	 */
+	down_read(&current->mm->mmap_sem);
+	vma = find_vma(current->mm, hostptr);
+
+	if (vma && vma->vm_file) {
+		int fd;
+
+		/*
+		 * Check to see that this isn't our own memory that we have
+		 * already mapped
+		 */
+		if (vma->vm_file->f_op == &kgsl_fops) {
+			up_read(&current->mm->mmap_sem);
+			return -EFAULT;
+		}
+
+		/* Look for the fd that matches this the vma file */
+		fd = iterate_fd(current->files, 0, match_file, vma->vm_file);
+		if (fd != 0)
+			dmabuf = dma_buf_get(fd - 1);
+	}
+	up_read(&current->mm->mmap_sem);
+
+	if (dmabuf == NULL)
+		return -ENODEV;
+
+	ret = kgsl_setup_dma_buf(device, pagetable, entry, dmabuf);
+	if (ret) {
+		dma_buf_put(dmabuf);
+		return ret;
+	}
+
+	/* Setup the user addr/cache mode for cache operations */
+	entry->memdesc.useraddr = hostptr;
+	_setup_cache_mode(entry, vma);
+
+	return 0;
+}
+#else
+static int kgsl_setup_dmabuf_useraddr(struct kgsl_device *device,
+		struct kgsl_pagetable *pagetable,
+		struct kgsl_mem_entry *entry, unsigned long hostptr)
+{
+	return -ENODEV;
+}
+#endif
+
+static int kgsl_setup_useraddr(struct kgsl_device *device,
+		struct kgsl_pagetable *pagetable,
+		struct kgsl_mem_entry *entry,
+		unsigned long hostptr, size_t offset, size_t size)
+{
+	int ret;
+
+	if (hostptr == 0 || !IS_ALIGNED(hostptr, PAGE_SIZE))
+		return -EINVAL;
+
+	/* Try to set up a dmabuf - if it returns -ENODEV assume anonymous */
+	ret = kgsl_setup_dmabuf_useraddr(device, pagetable, entry, hostptr);
+	if (ret != -ENODEV)
+		return ret;
+
+	/* Okay - lets go legacy */
+	return kgsl_setup_anon_useraddr(pagetable, entry,
+		hostptr, offset, size);
+}
+
+static long _gpuobj_map_useraddr(struct kgsl_device *device,
+		struct kgsl_pagetable *pagetable,
+		struct kgsl_mem_entry *entry,
+		struct kgsl_gpuobj_import *param)
+{
+	struct kgsl_gpuobj_import_useraddr useraddr;
+	int ret;
+
+	param->flags &= KGSL_MEMFLAGS_GPUREADONLY
+		| KGSL_CACHEMODE_MASK
+		| KGSL_MEMTYPE_MASK
+		| KGSL_MEMFLAGS_FORCE_32BIT;
+
+	/* Specifying SECURE is an explicit error */
+	if (param->flags & KGSL_MEMFLAGS_SECURE)
+		return -ENOTSUPP;
+
+	ret = _copy_from_user(&useraddr,
+		to_user_ptr(param->priv), sizeof(useraddr),
+		param->priv_len);
+	if (ret)
+		return ret;
+
+	/* Verify that the virtaddr and len are within bounds */
+	if (useraddr.virtaddr > ULONG_MAX)
+		return -EINVAL;
+
+	return kgsl_setup_useraddr(device, pagetable, entry,
+		(unsigned long) useraddr.virtaddr, 0, 0);
+}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static long _gpuobj_map_dma_buf(struct kgsl_device *device,
+		struct kgsl_pagetable *pagetable,
+		struct kgsl_mem_entry *entry,
+		struct kgsl_gpuobj_import *param,
+		int *fd)
+{
+	struct kgsl_gpuobj_import_dma_buf buf;
+	struct dma_buf *dmabuf;
+	int ret;
+
+	/*
+	 * If content protection is not enabled and secure buffer
+	 * is requested to be mapped return error.
+	 */
+	if (entry->memdesc.flags & KGSL_MEMFLAGS_SECURE) {
+		if (!kgsl_mmu_is_secured(&device->mmu)) {
+			dev_WARN_ONCE(device->dev, 1,
+				"Secure buffer not supported");
+			return -ENOTSUPP;
+		}
+
+		entry->memdesc.priv |= KGSL_MEMDESC_SECURE;
+	}
+
+	ret = _copy_from_user(&buf, to_user_ptr(param->priv),
+			sizeof(buf), param->priv_len);
+	if (ret)
+		return ret;
+
+	if (buf.fd == 0)
+		return -EINVAL;
+
+	*fd = buf.fd;
+	dmabuf = dma_buf_get(buf.fd);
+
+	if (IS_ERR_OR_NULL(dmabuf))
+		return (dmabuf == NULL) ? -EINVAL : PTR_ERR(dmabuf);
+
+	ret = kgsl_setup_dma_buf(device, pagetable, entry, dmabuf);
+	if (ret)
+		dma_buf_put(dmabuf);
+
+	return ret;
+}
+#else
+static long _gpuobj_map_dma_buf(struct kgsl_device *device,
+		struct kgsl_pagetable *pagetable,
+		struct kgsl_mem_entry *entry,
+		struct kgsl_gpuobj_import *param,
+		int *fd)
+{
+	return -EINVAL;
+}
+#endif
+
+long kgsl_ioctl_gpuobj_import(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_gpuobj_import *param = data;
+	struct kgsl_mem_entry *entry;
+	int ret, fd = -1;
+
+	entry = kgsl_mem_entry_create();
+	if (entry == NULL)
+		return -ENOMEM;
+
+	param->flags &= KGSL_MEMFLAGS_GPUREADONLY
+			| KGSL_MEMTYPE_MASK
+			| KGSL_MEMALIGN_MASK
+			| KGSL_MEMFLAGS_USE_CPU_MAP
+			| KGSL_MEMFLAGS_SECURE
+			| KGSL_MEMFLAGS_FORCE_32BIT;
+
+	entry->memdesc.flags = param->flags;
+
+	if (param->type == KGSL_USER_MEM_TYPE_ADDR)
+		ret = _gpuobj_map_useraddr(dev_priv->device, private->pagetable,
+			entry, param);
+	else if (param->type == KGSL_USER_MEM_TYPE_DMABUF)
+		ret = _gpuobj_map_dma_buf(dev_priv->device, private->pagetable,
+			entry, param, &fd);
+	else
+		ret = -ENOTSUPP;
+
+	if (ret)
+		goto out;
+
+	if (entry->memdesc.size >= SZ_1M)
+		kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_1M));
+	else if (entry->memdesc.size >= SZ_64K)
+		kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_64K));
+
+	param->flags = entry->memdesc.flags;
+
+	ret = kgsl_mem_entry_attach_process(entry, dev_priv);
+	if (ret)
+		goto unmap;
+
+	param->id = entry->id;
+
+	KGSL_STATS_ADD(entry->memdesc.size, &kgsl_driver.stats.mapped,
+		&kgsl_driver.stats.mapped_max);
+
+	kgsl_process_add_stats(private,
+		kgsl_memdesc_usermem_type(&entry->memdesc),
+		entry->memdesc.size);
+
+	trace_kgsl_mem_map(entry, fd);
+
+	return 0;
+
+unmap:
+	if (param->type == KGSL_USER_MEM_TYPE_DMABUF) {
+		kgsl_destroy_ion(entry->priv_data);
+		entry->memdesc.sgt = NULL;
+	}
+
+	kgsl_sharedmem_free(&entry->memdesc);
+
+out:
+	kfree(entry);
+	return ret;
+}
+
+static long _map_usermem_addr(struct kgsl_device *device,
+		struct kgsl_pagetable *pagetable, struct kgsl_mem_entry *entry,
+		unsigned long hostptr, size_t offset, size_t size)
+{
+	if (!kgsl_mmu_enabled()) {
+		KGSL_DRV_ERR(device,
+			"Cannot map paged memory with the MMU disabled\n");
+		return -EINVAL;
+	}
+
+	/* No CPU mapped buffer could ever be secure */
+	if (entry->memdesc.flags & KGSL_MEMFLAGS_SECURE)
+		return -EINVAL;
+
+	return kgsl_setup_useraddr(device, pagetable, entry, hostptr,
+		offset, size);
+}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static int _map_usermem_dma_buf(struct kgsl_device *device,
+		struct kgsl_pagetable *pagetable,
+		struct kgsl_mem_entry *entry,
+		unsigned int fd)
+{
+	int ret;
+	struct dma_buf *dmabuf;
+
+	/*
+	 * If content protection is not enabled and secure buffer
+	 * is requested to be mapped return error.
+	 */
+
+	if (entry->memdesc.flags & KGSL_MEMFLAGS_SECURE) {
+		if (!kgsl_mmu_is_secured(&device->mmu)) {
+			dev_WARN_ONCE(device->dev, 1,
+				"Secure buffer not supported");
+			return -EINVAL;
+		}
+
+		entry->memdesc.priv |= KGSL_MEMDESC_SECURE;
+	}
+
+	dmabuf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(dmabuf)) {
+		ret = PTR_ERR(dmabuf);
+		return ret ? ret : -EINVAL;
+	}
+	ret = kgsl_setup_dma_buf(device, pagetable, entry, dmabuf);
+	if (ret)
+		dma_buf_put(dmabuf);
+	return ret;
+}
+#else
+static int _map_usermem_dma_buf(struct kgsl_device *device,
+		struct kgsl_pagetable *pagetable,
+		struct kgsl_mem_entry *entry,
+		unsigned int fd)
+{
+	return -EINVAL;
+}
+#endif
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static int kgsl_setup_dma_buf(struct kgsl_device *device,
+				struct kgsl_pagetable *pagetable,
+				struct kgsl_mem_entry *entry,
+				struct dma_buf *dmabuf)
+{
+	int ret = 0;
+	struct scatterlist *s;
+	struct sg_table *sg_table;
+	struct dma_buf_attachment *attach = NULL;
+	struct kgsl_dma_buf_meta *meta;
+
+	meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+	if (!meta)
+		return -ENOMEM;
+
+	attach = dma_buf_attach(dmabuf, device->dev);
+	if (IS_ERR_OR_NULL(attach)) {
+		ret = attach ? PTR_ERR(attach) : -EINVAL;
+		goto out;
+	}
+
+	meta->dmabuf = dmabuf;
+	meta->attach = attach;
+
+	entry->priv_data = meta;
+	entry->memdesc.pagetable = pagetable;
+	entry->memdesc.size = 0;
+	entry->memdesc.mmapsize = 0;
+	/* USE_CPU_MAP is not impemented for ION. */
+	entry->memdesc.flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);
+	entry->memdesc.flags |= KGSL_MEMFLAGS_USERMEM_ION;
+
+	sg_table = dma_buf_map_attachment(attach, DMA_TO_DEVICE);
+
+	if (IS_ERR_OR_NULL(sg_table)) {
+		ret = PTR_ERR(sg_table);
+		goto out;
+	}
+
+	meta->table = sg_table;
+	entry->priv_data = meta;
+	entry->memdesc.sgt = sg_table;
+
+	/* Calculate the size of the memdesc from the sglist */
+	for (s = entry->memdesc.sgt->sgl; s != NULL; s = sg_next(s)) {
+		int priv = (entry->memdesc.priv & KGSL_MEMDESC_SECURE) ? 1 : 0;
+
+		/*
+		 * Check that each chunk of of the sg table matches the secure
+		 * flag.
+		 */
+
+		if (PagePrivate(sg_page(s)) != priv) {
+			ret = -EPERM;
+			goto out;
+		}
+
+		entry->memdesc.size += (uint64_t) s->length;
+	}
+
+	entry->memdesc.size = PAGE_ALIGN(entry->memdesc.size);
+	entry->memdesc.mmapsize = PAGE_ALIGN(entry->memdesc.size);
+
+out:
+	if (ret) {
+		if (!IS_ERR_OR_NULL(attach))
+			dma_buf_detach(dmabuf, attach);
+
+
+		kfree(meta);
+	}
+
+	return ret;
+}
+#endif
+
+long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv,
+				     unsigned int cmd, void *data)
+{
+	int result = -EINVAL;
+	struct kgsl_map_user_mem *param = data;
+	struct kgsl_mem_entry *entry = NULL;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_mmu *mmu = &dev_priv->device->mmu;
+	unsigned int memtype;
+
+	/*
+	 * If content protection is not enabled and secure buffer
+	 * is requested to be mapped return error.
+	 */
+
+	if (param->flags & KGSL_MEMFLAGS_SECURE) {
+		/* Log message and return if context protection isn't enabled */
+		if (!kgsl_mmu_is_secured(mmu)) {
+			dev_WARN_ONCE(dev_priv->device->dev, 1,
+				"Secure buffer not supported");
+			return -EOPNOTSUPP;
+		}
+
+		/* Can't use CPU map with secure buffers */
+		if (param->flags & KGSL_MEMFLAGS_USE_CPU_MAP)
+			return -EINVAL;
+	}
+
+	entry = kgsl_mem_entry_create();
+
+	if (entry == NULL)
+		return -ENOMEM;
+
+	/*
+	 * Convert from enum value to KGSL_MEM_ENTRY value, so that
+	 * we can use the latter consistently everywhere.
+	 */
+	memtype = param->memtype + 1;
+
+	/*
+	 * Mask off unknown flags from userspace. This way the caller can
+	 * check if a flag is supported by looking at the returned flags.
+	 * Note: CACHEMODE is ignored for this call. Caching should be
+	 * determined by type of allocation being mapped.
+	 */
+	param->flags &= KGSL_MEMFLAGS_GPUREADONLY
+			| KGSL_MEMTYPE_MASK
+			| KGSL_MEMALIGN_MASK
+			| KGSL_MEMFLAGS_USE_CPU_MAP
+			| KGSL_MEMFLAGS_SECURE;
+	entry->memdesc.flags = ((uint64_t) param->flags)
+		| KGSL_MEMFLAGS_FORCE_32BIT;
+
+	if (!kgsl_mmu_use_cpu_map(&dev_priv->device->mmu))
+		entry->memdesc.flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);
+
+	if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU)
+		entry->memdesc.priv |= KGSL_MEMDESC_GUARD_PAGE;
+
+	if (param->flags & KGSL_MEMFLAGS_SECURE)
+		entry->memdesc.priv |= KGSL_MEMDESC_SECURE;
+
+	switch (memtype) {
+	case KGSL_MEM_ENTRY_USER:
+		result = _map_usermem_addr(dev_priv->device, private->pagetable,
+			entry, param->hostptr, param->offset, param->len);
+		break;
+	case KGSL_MEM_ENTRY_ION:
+		if (param->offset != 0)
+			result = -EINVAL;
+		else
+			result = _map_usermem_dma_buf(dev_priv->device,
+				private->pagetable, entry, param->fd);
+		break;
+	default:
+		KGSL_CORE_ERR("Invalid memory type: %x\n", memtype);
+		result = -EOPNOTSUPP;
+		break;
+	}
+
+	if (result)
+		goto error;
+
+	if ((param->flags & KGSL_MEMFLAGS_SECURE) &&
+		(entry->memdesc.size & mmu->secure_align_mask)) {
+			KGSL_DRV_ERR(dev_priv->device,
+				"Secure buffer size %lld not aligned to %x alignment",
+				entry->memdesc.size,
+				mmu->secure_align_mask + 1);
+		result = -EINVAL;
+		goto error_attach;
+	}
+
+	if (entry->memdesc.size >= SZ_2M)
+		kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_2M));
+	else if (entry->memdesc.size >= SZ_1M)
+		kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_1M));
+	else if (entry->memdesc.size >= SZ_64K)
+		kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_64));
+
+	/* echo back flags */
+	param->flags = (unsigned int) entry->memdesc.flags;
+
+	result = kgsl_mem_entry_attach_process(entry, dev_priv);
+	if (result)
+		goto error_attach;
+
+	/* Adjust the returned value for a non 4k aligned offset */
+	param->gpuaddr = (unsigned long)
+		entry->memdesc.gpuaddr + (param->offset & PAGE_MASK);
+
+	KGSL_STATS_ADD(param->len, &kgsl_driver.stats.mapped,
+		&kgsl_driver.stats.mapped_max);
+
+	kgsl_process_add_stats(private,
+			kgsl_memdesc_usermem_type(&entry->memdesc), param->len);
+
+	trace_kgsl_mem_map(entry, param->fd);
+
+	return result;
+
+error_attach:
+	switch (memtype) {
+	case KGSL_MEM_ENTRY_ION:
+		kgsl_destroy_ion(entry->priv_data);
+		entry->memdesc.sgt = NULL;
+		break;
+	default:
+		break;
+	}
+	kgsl_sharedmem_free(&entry->memdesc);
+error:
+	/* Clear gpuaddr here so userspace doesn't get any wrong ideas */
+	param->gpuaddr = 0;
+
+	kfree(entry);
+	return result;
+}
+
+static int _kgsl_gpumem_sync_cache(struct kgsl_mem_entry *entry,
+		uint64_t offset, uint64_t length, unsigned int op)
+{
+	int ret = 0;
+	int cacheop;
+	int mode;
+
+	/*
+	 * Flush is defined as (clean | invalidate).  If both bits are set, then
+	 * do a flush, otherwise check for the individual bits and clean or inv
+	 * as requested
+	 */
+
+	if ((op & KGSL_GPUMEM_CACHE_FLUSH) == KGSL_GPUMEM_CACHE_FLUSH)
+		cacheop = KGSL_CACHE_OP_FLUSH;
+	else if (op & KGSL_GPUMEM_CACHE_CLEAN)
+		cacheop = KGSL_CACHE_OP_CLEAN;
+	else if (op & KGSL_GPUMEM_CACHE_INV)
+		cacheop = KGSL_CACHE_OP_INV;
+	else {
+		ret = -EINVAL;
+		goto done;
+	}
+
+	if (!(op & KGSL_GPUMEM_CACHE_RANGE)) {
+		offset = 0;
+		length = entry->memdesc.size;
+	}
+
+	mode = kgsl_memdesc_get_cachemode(&entry->memdesc);
+	if (mode != KGSL_CACHEMODE_UNCACHED
+		&& mode != KGSL_CACHEMODE_WRITECOMBINE) {
+		trace_kgsl_mem_sync_cache(entry, offset, length, op);
+		ret = kgsl_cache_range_op(&entry->memdesc, offset,
+					length, cacheop);
+	}
+
+done:
+	return ret;
+}
+
+/* New cache sync function - supports both directions (clean and invalidate) */
+
+long kgsl_ioctl_gpumem_sync_cache(struct kgsl_device_private *dev_priv,
+	unsigned int cmd, void *data)
+{
+	struct kgsl_gpumem_sync_cache *param = data;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_mem_entry *entry = NULL;
+	long ret;
+
+	if (param->id != 0) {
+		entry = kgsl_sharedmem_find_id(private, param->id);
+		if (entry == NULL) {
+			KGSL_MEM_INFO(dev_priv->device, "can't find id %d\n",
+					param->id);
+			return -EINVAL;
+		}
+	} else if (param->gpuaddr != 0) {
+		entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr);
+		if (entry == NULL) {
+			KGSL_MEM_INFO(dev_priv->device,
+					"can't find gpuaddr 0x%08lX\n",
+					param->gpuaddr);
+			return -EINVAL;
+		}
+	} else {
+		return -EINVAL;
+	}
+
+	ret = _kgsl_gpumem_sync_cache(entry, (uint64_t) param->offset,
+					(uint64_t) param->length, param->op);
+	kgsl_mem_entry_put(entry);
+	return ret;
+}
+
+static int mem_id_cmp(const void *_a, const void *_b)
+{
+	const unsigned int *a = _a, *b = _b;
+	if (*a == *b)
+		return 0;
+	return (*a > *b) ? 1 : -1;
+}
+
+#ifdef CONFIG_ARM64
+/* Do not support full flush on ARM64 targets */
+static inline bool check_full_flush(size_t size, int op)
+{
+	return false;
+}
+#else
+/* Support full flush if the size is bigger than the threshold */
+static inline bool check_full_flush(size_t size, int op)
+{
+	/* If we exceed the breakeven point, flush the entire cache */
+	return (kgsl_driver.full_cache_threshold != 0) &&
+		(size >= kgsl_driver.full_cache_threshold) &&
+		(op == KGSL_GPUMEM_CACHE_FLUSH);
+}
+#endif
+
+long kgsl_ioctl_gpumem_sync_cache_bulk(struct kgsl_device_private *dev_priv,
+	unsigned int cmd, void *data)
+{
+	int i;
+	struct kgsl_gpumem_sync_cache_bulk *param = data;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	unsigned int id, last_id = 0, *id_list = NULL, actual_count = 0;
+	struct kgsl_mem_entry **entries = NULL;
+	long ret = 0;
+	uint64_t op_size = 0;
+	bool full_flush = false;
+
+	if (param->id_list == NULL || param->count == 0
+			|| param->count > (PAGE_SIZE / sizeof(unsigned int)))
+		return -EINVAL;
+
+	id_list = kzalloc(param->count * sizeof(unsigned int), GFP_KERNEL);
+	if (id_list == NULL)
+		return -ENOMEM;
+
+	entries = kzalloc(param->count * sizeof(*entries), GFP_KERNEL);
+	if (entries == NULL) {
+		ret = -ENOMEM;
+		goto end;
+	}
+
+	if (copy_from_user(id_list, param->id_list,
+				param->count * sizeof(unsigned int))) {
+		ret = -EFAULT;
+		goto end;
+	}
+	/* sort the ids so we can weed out duplicates */
+	sort(id_list, param->count, sizeof(*id_list), mem_id_cmp, NULL);
+
+	for (i = 0; i < param->count; i++) {
+		unsigned int cachemode;
+		struct kgsl_mem_entry *entry = NULL;
+
+		id = id_list[i];
+		/* skip 0 ids or duplicates */
+		if (id == last_id)
+			continue;
+
+		entry = kgsl_sharedmem_find_id(private, id);
+		if (entry == NULL)
+			continue;
+
+		/* skip uncached memory */
+		cachemode = kgsl_memdesc_get_cachemode(&entry->memdesc);
+		if (cachemode != KGSL_CACHEMODE_WRITETHROUGH &&
+		    cachemode != KGSL_CACHEMODE_WRITEBACK) {
+			kgsl_mem_entry_put(entry);
+			continue;
+		}
+
+		op_size += entry->memdesc.size;
+		entries[actual_count++] = entry;
+
+		full_flush  = check_full_flush(op_size, param->op);
+		if (full_flush)
+			break;
+
+		last_id = id;
+	}
+	if (full_flush) {
+		trace_kgsl_mem_sync_full_cache(actual_count, op_size);
+		flush_cache_all();
+	}
+
+	param->op &= ~KGSL_GPUMEM_CACHE_RANGE;
+
+	for (i = 0; i < actual_count; i++) {
+		if (!full_flush)
+			_kgsl_gpumem_sync_cache(entries[i], 0,
+						entries[i]->memdesc.size,
+						param->op);
+		kgsl_mem_entry_put(entries[i]);
+	}
+end:
+	kfree(entries);
+	kfree(id_list);
+	return ret;
+}
+
+/* Legacy cache function, does a flush (clean  + invalidate) */
+
+long kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv,
+				 unsigned int cmd, void *data)
+{
+	struct kgsl_sharedmem_free *param = data;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_mem_entry *entry = NULL;
+	long ret;
+
+	entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr);
+	if (entry == NULL) {
+		KGSL_MEM_INFO(dev_priv->device,
+				"can't find gpuaddr 0x%08lX\n",
+				param->gpuaddr);
+		return -EINVAL;
+	}
+
+	ret = _kgsl_gpumem_sync_cache(entry, 0, entry->memdesc.size,
+					KGSL_GPUMEM_CACHE_FLUSH);
+	kgsl_mem_entry_put(entry);
+	return ret;
+}
+
+long kgsl_ioctl_gpuobj_sync(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_gpuobj_sync *param = data;
+	struct kgsl_gpuobj_sync_obj *objs;
+	struct kgsl_mem_entry **entries;
+	long ret = 0;
+	bool full_flush = false;
+	uint64_t size = 0;
+	int i, count = 0;
+	void __user *ptr;
+
+	if (param->count == 0 || param->count > 128)
+		return -EINVAL;
+
+	objs = kzalloc(param->count * sizeof(*objs), GFP_KERNEL);
+	if (objs == NULL)
+		return -ENOMEM;
+
+	entries = kzalloc(param->count * sizeof(*entries), GFP_KERNEL);
+	if (entries == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ptr = to_user_ptr(param->objs);
+
+	for (i = 0; i < param->count; i++) {
+		ret = _copy_from_user(&objs[i], ptr, sizeof(*objs),
+			param->obj_len);
+		if (ret)
+			goto out;
+
+		entries[i] = kgsl_sharedmem_find_id(private, objs[i].id);
+
+		/* Not finding the ID is not a fatal failure - just skip it */
+		if (entries[i] == NULL)
+			continue;
+
+		count++;
+
+		if (!(objs[i].op & KGSL_GPUMEM_CACHE_RANGE))
+			size += entries[i]->memdesc.size;
+		else if (objs[i].offset < entries[i]->memdesc.size)
+			size += (entries[i]->memdesc.size - objs[i].offset);
+
+		full_flush = check_full_flush(size, objs[i].op);
+		if (full_flush)
+			break;
+
+		ptr += sizeof(*objs);
+	}
+
+	if (full_flush) {
+		trace_kgsl_mem_sync_full_cache(count, size);
+		flush_cache_all();
+	} else {
+		for (i = 0; !ret && i < param->count; i++)
+			if (entries[i])
+				ret = _kgsl_gpumem_sync_cache(entries[i],
+						objs[i].offset, objs[i].length,
+						objs[i].op);
+	}
+
+	for (i = 0; i < param->count; i++)
+		if (entries[i])
+			kgsl_mem_entry_put(entries[i]);
+
+out:
+	kfree(entries);
+	kfree(objs);
+
+	return ret;
+}
+
+#ifdef CONFIG_ARM64
+static uint64_t kgsl_filter_cachemode(uint64_t flags)
+{
+	/*
+	 * WRITETHROUGH is not supported in arm64, so we tell the user that we
+	 * use WRITEBACK which is the default caching policy.
+	 */
+	if ((flags & KGSL_CACHEMODE_MASK) >> KGSL_CACHEMODE_SHIFT ==
+					KGSL_CACHEMODE_WRITETHROUGH) {
+		flags &= ~((uint64_t) KGSL_CACHEMODE_MASK);
+		flags |= (KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT) &
+							KGSL_CACHEMODE_MASK;
+	}
+	return flags;
+}
+#else
+static uint64_t kgsl_filter_cachemode(uint64_t flags)
+{
+	return flags;
+}
+#endif
+
+/* The largest allowable alignment for a GPU object is 32MB */
+#define KGSL_MAX_ALIGN (32 * SZ_1M)
+
+static struct kgsl_mem_entry *gpumem_alloc_entry(
+		struct kgsl_device_private *dev_priv,
+		uint64_t size, uint64_t mmapsize, uint64_t flags)
+{
+	int ret;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_mem_entry *entry;
+	unsigned int align;
+
+	flags &= KGSL_MEMFLAGS_GPUREADONLY
+		| KGSL_CACHEMODE_MASK
+		| KGSL_MEMTYPE_MASK
+		| KGSL_MEMALIGN_MASK
+		| KGSL_MEMFLAGS_USE_CPU_MAP
+		| KGSL_MEMFLAGS_SECURE
+		| KGSL_MEMFLAGS_FORCE_32BIT;
+
+	/* Turn off SVM if the system doesn't support it */
+	if (!kgsl_mmu_use_cpu_map(&dev_priv->device->mmu))
+		flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);
+
+	/* Return not supported error if secure memory isn't enabled */
+	if (!kgsl_mmu_is_secured(&dev_priv->device->mmu) &&
+			(flags & KGSL_MEMFLAGS_SECURE)) {
+		dev_WARN_ONCE(dev_priv->device->dev, 1,
+				"Secure memory not supported");
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	/* Secure memory disables advanced addressing modes */
+	if (flags & KGSL_MEMFLAGS_SECURE)
+		flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);
+
+	/* Cap the alignment bits to the highest number we can handle */
+	align = MEMFLAGS(flags, KGSL_MEMALIGN_MASK, KGSL_MEMALIGN_SHIFT);
+	if (align >= ilog2(KGSL_MAX_ALIGN)) {
+		KGSL_CORE_ERR("Alignment too large; restricting to %dK\n",
+			KGSL_MAX_ALIGN >> 10);
+
+		flags &= ~((uint64_t) KGSL_MEMALIGN_MASK);
+		flags |= (ilog2(KGSL_MAX_ALIGN) << KGSL_MEMALIGN_SHIFT) &
+			KGSL_MEMALIGN_MASK;
+	}
+
+	if (mmapsize < size)
+		mmapsize = size;
+
+	/* For now only allow allocations up to 4G */
+	if (size > UINT_MAX)
+		return ERR_PTR(-EINVAL);
+
+	/* Only allow a mmap size that we can actually mmap */
+	if (mmapsize > UINT_MAX)
+		return ERR_PTR(-EINVAL);
+
+	flags = kgsl_filter_cachemode(flags);
+
+	entry = kgsl_mem_entry_create();
+	if (entry == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU)
+		entry->memdesc.priv |= KGSL_MEMDESC_GUARD_PAGE;
+
+	if (flags & KGSL_MEMFLAGS_SECURE)
+		entry->memdesc.priv |= KGSL_MEMDESC_SECURE;
+
+	ret = kgsl_allocate_user(dev_priv->device, &entry->memdesc,
+				private->pagetable, size, mmapsize, flags);
+	if (ret != 0)
+		goto err;
+
+	ret = kgsl_mem_entry_attach_process(entry, dev_priv);
+	if (ret != 0) {
+		kgsl_sharedmem_free(&entry->memdesc);
+		goto err;
+	}
+
+	kgsl_process_add_stats(private,
+			kgsl_memdesc_usermem_type(&entry->memdesc),
+			entry->memdesc.size);
+	trace_kgsl_mem_alloc(entry);
+
+	return entry;
+err:
+	kfree(entry);
+	return ERR_PTR(ret);
+}
+
+static void copy_metadata(struct kgsl_mem_entry *entry, uint64_t metadata,
+		unsigned int len)
+{
+	unsigned int i, size;
+
+	if (len == 0)
+		return;
+
+	size = min_t(unsigned int, len, sizeof(entry->metadata) - 1);
+
+	if (copy_from_user(entry->metadata, to_user_ptr(metadata), size)) {
+		memset(entry->metadata, 0, sizeof(entry->metadata));
+		return;
+	}
+
+	/* Clean up non printable characters in the string */
+	for (i = 0; i < size && entry->metadata[i] != 0; i++) {
+		if (!isprint(entry->metadata[i]))
+			entry->metadata[i] = '?';
+	}
+}
+
+long kgsl_ioctl_gpuobj_alloc(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_gpuobj_alloc *param = data;
+	struct kgsl_mem_entry *entry;
+
+	entry = gpumem_alloc_entry(dev_priv, param->size,
+		param->va_len, param->flags);
+
+	if (IS_ERR(entry))
+		return PTR_ERR(entry);
+
+	copy_metadata(entry, param->metadata, param->metadata_len);
+
+	param->size = entry->memdesc.size;
+	param->flags = entry->memdesc.flags;
+	param->mmapsize = kgsl_memdesc_mmapsize(&entry->memdesc);
+	param->id = entry->id;
+
+	return 0;
+}
+
+long kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_gpumem_alloc *param = data;
+	struct kgsl_mem_entry *entry;
+	uint64_t flags = param->flags;
+
+	/* Legacy functions doesn't support these advanced features */
+	flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);
+	flags |= KGSL_MEMFLAGS_FORCE_32BIT;
+
+	entry = gpumem_alloc_entry(dev_priv, (uint64_t) param->size,
+		(uint64_t) param->size, flags);
+
+	if (IS_ERR(entry))
+		return PTR_ERR(entry);
+
+	param->gpuaddr = (unsigned long) entry->memdesc.gpuaddr;
+	param->size = (size_t) entry->memdesc.size;
+	param->flags = (unsigned int) entry->memdesc.flags;
+
+	return 0;
+}
+
+long kgsl_ioctl_gpumem_alloc_id(struct kgsl_device_private *dev_priv,
+			unsigned int cmd, void *data)
+{
+	struct kgsl_gpumem_alloc_id *param = data;
+	struct kgsl_mem_entry *entry;
+	uint64_t flags = param->flags;
+
+	flags |= KGSL_MEMFLAGS_FORCE_32BIT;
+
+	entry = gpumem_alloc_entry(dev_priv, (uint64_t) param->size,
+		(uint64_t) param->mmapsize, flags);
+
+	if (IS_ERR(entry))
+		return PTR_ERR(entry);
+
+	param->id = entry->id;
+	param->flags = (unsigned int) entry->memdesc.flags;
+	param->size = (size_t) entry->memdesc.size;
+	param->mmapsize = (size_t)
+		kgsl_memdesc_mmapsize(&entry->memdesc);
+	param->gpuaddr = (unsigned long) entry->memdesc.gpuaddr;
+
+	return 0;
+}
+
+long kgsl_ioctl_gpumem_get_info(struct kgsl_device_private *dev_priv,
+			unsigned int cmd, void *data)
+{
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_gpumem_get_info *param = data;
+	struct kgsl_mem_entry *entry = NULL;
+	int result = 0;
+
+	if (param->id != 0) {
+		entry = kgsl_sharedmem_find_id(private, param->id);
+		if (entry == NULL)
+			return -EINVAL;
+	} else if (param->gpuaddr != 0) {
+		entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr);
+		if (entry == NULL)
+			return -EINVAL;
+	} else
+		return -EINVAL;
+
+	/*
+	 * If any of the 64 bit address / sizes would end up being
+	 * truncated, return -ERANGE.  That will signal the user that they
+	 * should use a more modern API
+	 */
+	if (entry->memdesc.gpuaddr > ULONG_MAX)
+		result = -ERANGE;
+
+	param->gpuaddr = (unsigned long) entry->memdesc.gpuaddr;
+	param->id = entry->id;
+	param->flags = (unsigned int) entry->memdesc.flags;
+	param->size = (size_t) entry->memdesc.size;
+	param->mmapsize = (size_t) kgsl_memdesc_mmapsize(&entry->memdesc);
+	param->useraddr = entry->memdesc.useraddr;
+
+	kgsl_mem_entry_put(entry);
+	return result;
+}
+
+long kgsl_ioctl_gpuobj_info(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_gpuobj_info *param = data;
+	struct kgsl_mem_entry *entry;
+
+	if (param->id == 0)
+		return -EINVAL;
+
+	entry = kgsl_sharedmem_find_id(private, param->id);
+	if (entry == NULL)
+		return -EINVAL;
+
+	param->id = entry->id;
+	param->gpuaddr = entry->memdesc.gpuaddr;
+	param->flags = entry->memdesc.flags;
+	param->size = entry->memdesc.size;
+	param->va_len = kgsl_memdesc_mmapsize(&entry->memdesc);
+	param->va_addr = (uint64_t) entry->memdesc.useraddr;
+
+	kgsl_mem_entry_put(entry);
+	return 0;
+}
+
+long kgsl_ioctl_gpuobj_set_info(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_gpuobj_set_info *param = data;
+	struct kgsl_mem_entry *entry;
+
+	if (param->id == 0)
+		return -EINVAL;
+
+	entry = kgsl_sharedmem_find_id(private, param->id);
+	if (entry == NULL)
+		return -EINVAL;
+
+	if (param->flags & KGSL_GPUOBJ_SET_INFO_METADATA)
+		copy_metadata(entry, param->metadata, param->metadata_len);
+
+	if (param->flags & KGSL_GPUOBJ_SET_INFO_TYPE) {
+		entry->memdesc.flags &= ~((uint64_t) KGSL_MEMTYPE_MASK);
+		entry->memdesc.flags |= param->type << KGSL_MEMTYPE_SHIFT;
+	}
+
+	kgsl_mem_entry_put(entry);
+	return 0;
+}
+
+long kgsl_ioctl_cff_syncmem(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	struct kgsl_cff_syncmem *param = data;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_mem_entry *entry = NULL;
+	uint64_t offset, len;
+
+	entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr);
+	if (entry == NULL)
+		return -EINVAL;
+
+	/*
+	 * Calculate the offset between the requested GPU address and the start
+	 * of the object
+	 */
+
+	offset = ((uint64_t) param->gpuaddr) - entry->memdesc.gpuaddr;
+
+	if ((offset + param->len) > entry->memdesc.size)
+		len = entry->memdesc.size - offset;
+	else
+		len = param->len;
+
+	kgsl_cffdump_syncmem(dev_priv->device, entry, offset, len, true);
+
+	kgsl_mem_entry_put(entry);
+	return 0;
+}
+
+long kgsl_ioctl_cff_sync_gpuobj(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_cff_sync_gpuobj *param = data;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_mem_entry *entry = NULL;
+
+	entry = kgsl_sharedmem_find_id(private, param->id);
+	if (entry == NULL)
+		return -EINVAL;
+
+	kgsl_cffdump_syncmem(dev_priv->device, entry, param->offset,
+		param->length, true);
+
+	kgsl_mem_entry_put(entry);
+	return 0;
+}
+
+long kgsl_ioctl_cff_user_event(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	int result = 0;
+	struct kgsl_cff_user_event *param = data;
+
+	kgsl_cffdump_user_event(dev_priv->device, param->cff_opcode,
+			param->op1, param->op2,
+			param->op3, param->op4, param->op5);
+
+	return result;
+}
+
+/**
+ * kgsl_ioctl_timestamp_event - Register a new timestamp event from userspace
+ * @dev_priv - pointer to the private device structure
+ * @cmd - the ioctl cmd passed from kgsl_ioctl
+ * @data - the user data buffer from kgsl_ioctl
+ * @returns 0 on success or error code on failure
+ */
+
+long kgsl_ioctl_timestamp_event(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_timestamp_event *param = data;
+	int ret;
+
+	switch (param->type) {
+	case KGSL_TIMESTAMP_EVENT_FENCE:
+		ret = kgsl_add_fence_event(dev_priv->device,
+			param->context_id, param->timestamp, param->priv,
+			param->len, dev_priv);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int
+kgsl_mmap_memstore(struct kgsl_device *device, struct vm_area_struct *vma)
+{
+	struct kgsl_memdesc *memdesc = &device->memstore;
+	int result;
+	unsigned int vma_size = vma->vm_end - vma->vm_start;
+
+	/* The memstore can only be mapped as read only */
+
+	if (vma->vm_flags & VM_WRITE)
+		return -EPERM;
+
+	if (memdesc->size  !=  vma_size) {
+		KGSL_MEM_ERR(device, "memstore bad size: %d should be %llu\n",
+			     vma_size, memdesc->size);
+		return -EINVAL;
+	}
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	result = remap_pfn_range(vma, vma->vm_start,
+				device->memstore.physaddr >> PAGE_SHIFT,
+				 vma_size, vma->vm_page_prot);
+	if (result != 0)
+		KGSL_MEM_ERR(device, "remap_pfn_range failed: %d\n",
+			     result);
+
+	return result;
+}
+
+/*
+ * kgsl_gpumem_vm_open is called whenever a vma region is copied or split.
+ * Increase the refcount to make sure that the accounting stays correct
+ */
+
+static void kgsl_gpumem_vm_open(struct vm_area_struct *vma)
+{
+	struct kgsl_mem_entry *entry = vma->vm_private_data;
+	if (!kgsl_mem_entry_get(entry))
+		vma->vm_private_data = NULL;
+}
+
+static int
+kgsl_gpumem_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct kgsl_mem_entry *entry = vma->vm_private_data;
+
+	if (!entry)
+		return VM_FAULT_SIGBUS;
+	if (!entry->memdesc.ops || !entry->memdesc.ops->vmfault)
+		return VM_FAULT_SIGBUS;
+
+	return entry->memdesc.ops->vmfault(&entry->memdesc, vma, vmf);
+}
+
+static void
+kgsl_gpumem_vm_close(struct vm_area_struct *vma)
+{
+	struct kgsl_mem_entry *entry  = vma->vm_private_data;
+
+	if (!entry)
+		return;
+
+	entry->memdesc.useraddr = 0;
+	kgsl_mem_entry_put(entry);
+}
+
+static struct vm_operations_struct kgsl_gpumem_vm_ops = {
+	.open  = kgsl_gpumem_vm_open,
+	.fault = kgsl_gpumem_vm_fault,
+	.close = kgsl_gpumem_vm_close,
+};
+
+static int
+get_mmap_entry(struct kgsl_process_private *private,
+		struct kgsl_mem_entry **out_entry, unsigned long pgoff,
+		unsigned long len)
+{
+	int ret = 0;
+	struct kgsl_mem_entry *entry;
+
+	entry = kgsl_sharedmem_find_id(private, pgoff);
+	if (entry == NULL)
+		entry = kgsl_sharedmem_find(private, pgoff << PAGE_SHIFT);
+
+	if (!entry)
+		return -EINVAL;
+
+	if (!entry->memdesc.ops ||
+		!entry->memdesc.ops->vmflags ||
+		!entry->memdesc.ops->vmfault) {
+		ret = -EINVAL;
+		goto err_put;
+	}
+
+	if (entry->memdesc.useraddr != 0) {
+		ret = -EBUSY;
+		goto err_put;
+	}
+
+	if (kgsl_memdesc_use_cpu_map(&entry->memdesc)) {
+		if (len != kgsl_memdesc_mmapsize(&entry->memdesc)) {
+			ret = -ERANGE;
+			goto err_put;
+		}
+	} else if (len != kgsl_memdesc_mmapsize(&entry->memdesc) &&
+		len != entry->memdesc.size) {
+		/*
+		 * If cpu_map != gpumap then user can map either the
+		 * mmapsize or the entry size
+		 */
+		ret = -ERANGE;
+		goto err_put;
+	}
+
+	*out_entry = entry;
+	return 0;
+err_put:
+	kgsl_mem_entry_put(entry);
+	return ret;
+}
+
+static unsigned long _gpu_set_svm_region(struct kgsl_process_private *private,
+		struct kgsl_mem_entry *entry, unsigned long addr,
+		unsigned long size)
+{
+	int ret;
+
+	ret = kgsl_mmu_set_svm_region(private->pagetable, (uint64_t) addr,
+		(uint64_t) size);
+
+	if (ret != 0)
+		return ret;
+
+	entry->memdesc.gpuaddr = (uint64_t) addr;
+
+	ret = kgsl_mmu_map(private->pagetable, &entry->memdesc);
+	if (ret) {
+		kgsl_mmu_put_gpuaddr(private->pagetable,
+				&entry->memdesc);
+		return ret;
+	}
+
+	kgsl_memfree_purge(private->pagetable ? private->pagetable->name : 0,
+		entry->memdesc.gpuaddr, entry->memdesc.size);
+
+	return addr;
+}
+
+static unsigned long _gpu_find_svm(struct kgsl_process_private *private,
+		unsigned long start, unsigned long end, unsigned long len,
+		unsigned int align)
+{
+	uint64_t addr = kgsl_mmu_find_svm_region(private->pagetable,
+		(uint64_t) start, (uint64_t)end, (uint64_t) len, align);
+
+	BUG_ON(addr > ULONG_MAX);
+
+	return (unsigned long) addr;
+}
+
+/* Search top down in the CPU VM region for a free address */
+static unsigned long _cpu_get_unmapped_area(unsigned long bottom,
+		unsigned long top, unsigned long len, unsigned long align)
+{
+	struct vm_unmapped_area_info info;
+	unsigned long addr, err;
+
+	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+	info.low_limit = bottom;
+	info.high_limit = top;
+	info.length = len;
+	info.align_offset = 0;
+	info.align_mask = align - 1;
+
+	addr = vm_unmapped_area(&info);
+
+	if (IS_ERR_VALUE(addr))
+		return addr;
+
+	err = security_mmap_addr(addr);
+	return err ? err : addr;
+}
+
+static unsigned long _search_range(struct kgsl_process_private *private,
+		struct kgsl_mem_entry *entry,
+		unsigned long start, unsigned long end,
+		unsigned long len, uint64_t align)
+{
+	unsigned long cpu, gpu = end, result = -ENOMEM;
+
+	while (gpu > start) {
+		/* find a new empty spot on the CPU below the last one */
+		cpu = _cpu_get_unmapped_area(start, gpu, len,
+			(unsigned long) align);
+		if (IS_ERR_VALUE(cpu)) {
+			result = cpu;
+			break;
+		}
+		/* try to map it on the GPU */
+		result = _gpu_set_svm_region(private, entry, cpu, len);
+		if (!IS_ERR_VALUE(result))
+			break;
+
+		trace_kgsl_mem_unmapped_area_collision(entry, cpu, len);
+
+		if (cpu <= start) {
+			result = -ENOMEM;
+			break;
+		}
+
+		/* move downward to the next empty spot on the GPU */
+		gpu = _gpu_find_svm(private, start, cpu, len, align);
+		if (IS_ERR_VALUE(gpu)) {
+			result = gpu;
+			break;
+		}
+
+		/* Check that_gpu_find_svm doesn't put us in a loop */
+		BUG_ON(gpu >= cpu);
+
+		/* Break if the recommended GPU address is out of range */
+		if (gpu < start) {
+			result = -ENOMEM;
+			break;
+		}
+
+		/*
+		 * Add the length of the chunk to the GPU address to yield the
+		 * upper bound for the CPU search
+		 */
+		gpu += len;
+	}
+	return result;
+}
+
+static unsigned long _get_svm_area(struct kgsl_process_private *private,
+		struct kgsl_mem_entry *entry, unsigned long hint,
+		unsigned long len, unsigned long flags)
+{
+	uint64_t start, end;
+	int align_shift = kgsl_memdesc_get_align(&entry->memdesc);
+	uint64_t align;
+	unsigned long result;
+	unsigned long addr;
+
+	if (align_shift >= ilog2(SZ_2M))
+		align = SZ_2M;
+	else if (align_shift >= ilog2(SZ_1M))
+		align = SZ_1M;
+	else if (align_shift >= ilog2(SZ_64K))
+		align = SZ_64K;
+	else
+		align = SZ_4K;
+
+	/* get the GPU pagetable's SVM range */
+	if (kgsl_mmu_svm_range(private->pagetable, &start, &end,
+				entry->memdesc.flags))
+		return -ERANGE;
+
+	/* now clamp the range based on the CPU's requirements */
+	start = max_t(uint64_t, start, mmap_min_addr);
+	end = min_t(uint64_t, end, current->mm->mmap_base);
+	if (start >= end)
+		return -ERANGE;
+
+	if (flags & MAP_FIXED) {
+		/* we must use addr 'hint' or fail */
+		return _gpu_set_svm_region(private, entry, hint, len);
+	} else if (hint != 0) {
+		struct vm_area_struct *vma;
+
+		/*
+		 * See if the hint is usable, if not we will use
+		 * it as the start point for searching.
+		 */
+		addr = clamp_t(unsigned long, hint & ~(align - 1),
+				start, (end - len) & ~(align - 1));
+
+		vma = find_vma(current->mm, addr);
+
+		if (vma == NULL || ((addr + len) <= vma->vm_start)) {
+			result = _gpu_set_svm_region(private, entry, addr, len);
+
+			/* On failure drop down to keep searching */
+			if (!IS_ERR_VALUE(result))
+				return result;
+		}
+	} else {
+		/* no hint, start search at the top and work down */
+		addr = end & ~(align - 1);
+	}
+
+	/*
+	 * Search downwards from the hint first. If that fails we
+	 * must try to search above it.
+	 */
+	result = _search_range(private, entry, start, addr, len, align);
+	if (IS_ERR_VALUE(result) && hint != 0)
+		result = _search_range(private, entry, addr, end, len, align);
+
+	return result;
+}
+
+static unsigned long
+kgsl_get_unmapped_area(struct file *file, unsigned long addr,
+			unsigned long len, unsigned long pgoff,
+			unsigned long flags)
+{
+	unsigned long val;
+	unsigned long vma_offset = pgoff << PAGE_SHIFT;
+	struct kgsl_device_private *dev_priv = file->private_data;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_device *device = dev_priv->device;
+	struct kgsl_mem_entry *entry = NULL;
+
+	if (vma_offset == (unsigned long) device->memstore.gpuaddr)
+		return get_unmapped_area(NULL, addr, len, pgoff, flags);
+
+	val = get_mmap_entry(private, &entry, pgoff, len);
+	if (val)
+		return val;
+
+	/* Do not allow CPU mappings for secure buffers */
+	if (kgsl_memdesc_is_secured(&entry->memdesc)) {
+		val = -EPERM;
+		goto put;
+	}
+
+	if (!kgsl_memdesc_use_cpu_map(&entry->memdesc)) {
+		val = get_unmapped_area(NULL, addr, len, 0, flags);
+		if (IS_ERR_VALUE(val))
+			KGSL_MEM_ERR(device,
+				"get_unmapped_area: pid %d addr %lx pgoff %lx len %ld failed error %d\n",
+				private->pid, addr, pgoff, len, (int) val);
+	} else {
+		 val = _get_svm_area(private, entry, addr, len, flags);
+		 if (IS_ERR_VALUE(val))
+			KGSL_MEM_ERR(device,
+				"_get_svm_area: pid %d addr %lx pgoff %lx len %ld failed error %d\n",
+				private->pid, addr, pgoff, len, (int) val);
+	}
+
+put:
+	kgsl_mem_entry_put(entry);
+	return val;
+}
+
+static int kgsl_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	unsigned int ret, cache;
+	unsigned long vma_offset = vma->vm_pgoff << PAGE_SHIFT;
+	struct kgsl_device_private *dev_priv = file->private_data;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_mem_entry *entry = NULL;
+	struct kgsl_device *device = dev_priv->device;
+
+	/* Handle leagacy behavior for memstore */
+
+	if (vma_offset == (unsigned long) device->memstore.gpuaddr)
+		return kgsl_mmap_memstore(device, vma);
+
+	/*
+	 * The reference count on the entry that we get from
+	 * get_mmap_entry() will be held until kgsl_gpumem_vm_close().
+	 */
+	ret = get_mmap_entry(private, &entry, vma->vm_pgoff,
+				vma->vm_end - vma->vm_start);
+	if (ret)
+		return ret;
+
+	vma->vm_flags |= entry->memdesc.ops->vmflags;
+
+	vma->vm_private_data = entry;
+
+	/* Determine user-side caching policy */
+
+	cache = kgsl_memdesc_get_cachemode(&entry->memdesc);
+
+	switch (cache) {
+	case KGSL_CACHEMODE_UNCACHED:
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+		break;
+	case KGSL_CACHEMODE_WRITETHROUGH:
+		vma->vm_page_prot = pgprot_writethroughcache(vma->vm_page_prot);
+		if (vma->vm_page_prot ==
+			pgprot_writebackcache(vma->vm_page_prot))
+			WARN_ONCE(1, "WRITETHROUGH is deprecated for arm64");
+		break;
+	case KGSL_CACHEMODE_WRITEBACK:
+		vma->vm_page_prot = pgprot_writebackcache(vma->vm_page_prot);
+		break;
+	case KGSL_CACHEMODE_WRITECOMBINE:
+	default:
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+		break;
+	}
+
+	vma->vm_ops = &kgsl_gpumem_vm_ops;
+
+	if (cache == KGSL_CACHEMODE_WRITEBACK
+		|| cache == KGSL_CACHEMODE_WRITETHROUGH) {
+		struct scatterlist *s;
+		int i;
+		unsigned long addr = vma->vm_start;
+
+		for_each_sg(entry->memdesc.sgt->sgl, s,
+				entry->memdesc.sgt->nents, i) {
+			int j;
+			for (j = 0; j < (s->length >> PAGE_SHIFT); j++) {
+				struct page *page = sg_page(s);
+				page = nth_page(page, j);
+				vm_insert_page(vma, addr, page);
+				addr += PAGE_SIZE;
+			}
+		}
+	}
+
+	vma->vm_file = file;
+
+	entry->memdesc.useraddr = vma->vm_start;
+
+	trace_kgsl_mem_mmap(entry);
+	return 0;
+}
+
+static irqreturn_t kgsl_irq_handler(int irq, void *data)
+{
+	struct kgsl_device *device = data;
+
+	return device->ftbl->irq_handler(device);
+
+}
+
+#define KGSL_READ_MESSAGE "OH HAI GPU\n"
+
+static ssize_t kgsl_read(struct file *filep, char __user *buf, size_t count,
+		loff_t *pos)
+{
+	return simple_read_from_buffer(buf, count, pos,
+			KGSL_READ_MESSAGE, strlen(KGSL_READ_MESSAGE) + 1);
+}
+
+static const struct file_operations kgsl_fops = {
+	.owner = THIS_MODULE,
+	.release = kgsl_release,
+	.open = kgsl_open,
+	.mmap = kgsl_mmap,
+	.read = kgsl_read,
+	.get_unmapped_area = kgsl_get_unmapped_area,
+	.unlocked_ioctl = kgsl_ioctl,
+	.compat_ioctl = kgsl_compat_ioctl,
+};
+
+struct kgsl_driver kgsl_driver  = {
+	.process_mutex = __MUTEX_INITIALIZER(kgsl_driver.process_mutex),
+	.ptlock = __SPIN_LOCK_UNLOCKED(kgsl_driver.ptlock),
+	.devlock = __MUTEX_INITIALIZER(kgsl_driver.devlock),
+	/*
+	 * Full cache flushes are faster than line by line on at least
+	 * 8064 and 8974 once the region to be flushed is > 16mb.
+	 */
+	.full_cache_threshold = SZ_16M,
+
+	.stats.vmalloc = ATOMIC_LONG_INIT(0),
+	.stats.vmalloc_max = ATOMIC_LONG_INIT(0),
+	.stats.page_alloc = ATOMIC_LONG_INIT(0),
+	.stats.page_alloc_max = ATOMIC_LONG_INIT(0),
+	.stats.coherent = ATOMIC_LONG_INIT(0),
+	.stats.coherent_max = ATOMIC_LONG_INIT(0),
+	.stats.secure = ATOMIC_LONG_INIT(0),
+	.stats.secure_max = ATOMIC_LONG_INIT(0),
+	.stats.mapped = ATOMIC_LONG_INIT(0),
+	.stats.mapped_max = ATOMIC_LONG_INIT(0),
+};
+EXPORT_SYMBOL(kgsl_driver);
+
+static void _unregister_device(struct kgsl_device *device)
+{
+	int minor;
+
+	mutex_lock(&kgsl_driver.devlock);
+	for (minor = 0; minor < KGSL_DEVICE_MAX; minor++) {
+		if (device == kgsl_driver.devp[minor])
+			break;
+	}
+	if (minor != KGSL_DEVICE_MAX) {
+		device_destroy(kgsl_driver.class,
+				MKDEV(MAJOR(kgsl_driver.major), minor));
+		kgsl_driver.devp[minor] = NULL;
+	}
+	mutex_unlock(&kgsl_driver.devlock);
+}
+
+static int _register_device(struct kgsl_device *device)
+{
+	int minor, ret;
+	dev_t dev;
+
+	/* Find a minor for the device */
+
+	mutex_lock(&kgsl_driver.devlock);
+	for (minor = 0; minor < KGSL_DEVICE_MAX; minor++) {
+		if (kgsl_driver.devp[minor] == NULL) {
+			kgsl_driver.devp[minor] = device;
+			break;
+		}
+	}
+	mutex_unlock(&kgsl_driver.devlock);
+
+	if (minor == KGSL_DEVICE_MAX) {
+		KGSL_CORE_ERR("minor devices exhausted\n");
+		return -ENODEV;
+	}
+
+	/* Create the device */
+	dev = MKDEV(MAJOR(kgsl_driver.major), minor);
+	device->dev = device_create(kgsl_driver.class,
+				    &device->pdev->dev,
+				    dev, device,
+				    device->name);
+
+	if (IS_ERR(device->dev)) {
+		mutex_lock(&kgsl_driver.devlock);
+		kgsl_driver.devp[minor] = NULL;
+		mutex_unlock(&kgsl_driver.devlock);
+		ret = PTR_ERR(device->dev);
+		KGSL_CORE_ERR("device_create(%s): %d\n", device->name, ret);
+		return ret;
+	}
+
+	dev_set_drvdata(&device->pdev->dev, device);
+	return 0;
+}
+
+int kgsl_device_platform_probe(struct kgsl_device *device)
+{
+	int status = -EINVAL;
+	struct resource *res;
+
+	status = _register_device(device);
+	if (status)
+		return status;
+
+	/* Initialize logging first, so that failures below actually print. */
+	kgsl_device_debugfs_init(device);
+
+	status = kgsl_pwrctrl_init(device);
+	if (status)
+		goto error;
+
+	/* Get starting physical address of device registers */
+	res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM,
+					   device->iomemname);
+	if (res == NULL) {
+		KGSL_DRV_ERR(device, "platform_get_resource_byname failed\n");
+		status = -EINVAL;
+		goto error_pwrctrl_close;
+	}
+	if (res->start == 0 || resource_size(res) == 0) {
+		KGSL_DRV_ERR(device, "dev %d invalid register region\n",
+			device->id);
+		status = -EINVAL;
+		goto error_pwrctrl_close;
+	}
+
+	device->reg_phys = res->start;
+	device->reg_len = resource_size(res);
+
+	/*
+	 * Check if a shadermemname is defined, and then get shader memory
+	 * details including shader memory starting physical address
+	 * and shader memory length
+	 */
+	if (device->shadermemname != NULL) {
+		res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM,
+						device->shadermemname);
+
+		if (res == NULL) {
+			KGSL_DRV_WARN(device,
+			"Shader memory: platform_get_resource_byname failed\n");
+		}
+
+		else {
+			device->shader_mem_phys = res->start;
+			device->shader_mem_len = resource_size(res);
+		}
+
+		if (!devm_request_mem_region(device->dev,
+					device->shader_mem_phys,
+					device->shader_mem_len,
+						device->name)) {
+			KGSL_DRV_WARN(device, "request_mem_region_failed\n");
+		}
+	}
+
+	if (!devm_request_mem_region(device->dev, device->reg_phys,
+				device->reg_len, device->name)) {
+		KGSL_DRV_ERR(device, "request_mem_region failed\n");
+		status = -ENODEV;
+		goto error_pwrctrl_close;
+	}
+
+	device->reg_virt = devm_ioremap(device->dev, device->reg_phys,
+					device->reg_len);
+
+	if (device->reg_virt == NULL) {
+		KGSL_DRV_ERR(device, "ioremap failed\n");
+		status = -ENODEV;
+		goto error_pwrctrl_close;
+	}
+	/*acquire interrupt */
+	device->pwrctrl.interrupt_num =
+		platform_get_irq_byname(device->pdev, device->pwrctrl.irq_name);
+
+	if (device->pwrctrl.interrupt_num <= 0) {
+		KGSL_DRV_ERR(device, "platform_get_irq_byname failed: %d\n",
+					 device->pwrctrl.interrupt_num);
+		status = -EINVAL;
+		goto error_pwrctrl_close;
+	}
+
+	status = devm_request_irq(device->dev, device->pwrctrl.interrupt_num,
+				  kgsl_irq_handler, IRQF_TRIGGER_HIGH,
+				  device->name, device);
+	if (status) {
+		KGSL_DRV_ERR(device, "request_irq(%d) failed: %d\n",
+			      device->pwrctrl.interrupt_num, status);
+		goto error_pwrctrl_close;
+	}
+	disable_irq(device->pwrctrl.interrupt_num);
+
+	KGSL_DRV_INFO(device,
+		"dev_id %d regs phys 0x%08lx size 0x%08x virt %p\n",
+		device->id, device->reg_phys, device->reg_len,
+		device->reg_virt);
+
+	rwlock_init(&device->context_lock);
+
+	setup_timer(&device->idle_timer, kgsl_timer, (unsigned long) device);
+
+	status = kgsl_mmu_init(device, ksgl_mmu_type);
+	if (status != 0) {
+		KGSL_DRV_ERR(device, "kgsl_mmu_init failed %d\n", status);
+		goto error_pwrctrl_close;
+	}
+
+	/* Check to see if our device can perform DMA correctly */
+	status = dma_set_coherent_mask(&device->pdev->dev, KGSL_DMA_BIT_MASK);
+	if (status)
+		goto error_close_mmu;
+
+	status = kgsl_allocate_global(device, &device->memstore,
+		KGSL_MEMSTORE_SIZE, 0, 0);
+
+	if (status != 0) {
+		KGSL_DRV_ERR(device, "kgsl_allocate_global failed %d\n",
+				status);
+		goto error_close_mmu;
+	}
+
+	/*
+	 * The default request type PM_QOS_REQ_ALL_CORES is
+	 * applicable to all CPU cores that are online and
+	 * would have a power impact when there are more
+	 * number of CPUs. PM_QOS_REQ_AFFINE_IRQ request
+	 * type shall update/apply the vote only to that CPU to
+	 * which IRQ's affinity is set to.
+	 */
+#ifdef CONFIG_SMP
+
+	device->pwrctrl.pm_qos_req_dma.type = PM_QOS_REQ_AFFINE_IRQ;
+	device->pwrctrl.pm_qos_req_dma.irq = device->pwrctrl.interrupt_num;
+
+#endif
+	pm_qos_add_request(&device->pwrctrl.pm_qos_req_dma,
+				PM_QOS_CPU_DMA_LATENCY,
+				PM_QOS_DEFAULT_VALUE);
+
+
+	device->events_wq = create_workqueue("kgsl-events");
+
+	/* Initalize the snapshot engine */
+	kgsl_device_snapshot_init(device);
+
+	/* Initialize common sysfs entries */
+	kgsl_pwrctrl_init_sysfs(device);
+
+	dev_info(device->dev, "Initialized %s: mmu=%s\n", device->name,
+		kgsl_mmu_enabled() ? "on" : "off");
+
+	return 0;
+
+error_close_mmu:
+	kgsl_mmu_close(device);
+error_pwrctrl_close:
+	kgsl_pwrctrl_close(device);
+error:
+	_unregister_device(device);
+	return status;
+}
+EXPORT_SYMBOL(kgsl_device_platform_probe);
+
+void kgsl_device_platform_remove(struct kgsl_device *device)
+{
+	destroy_workqueue(device->events_wq);
+
+	kgsl_device_snapshot_close(device);
+
+	kgsl_pwrctrl_uninit_sysfs(device);
+
+	pm_qos_remove_request(&device->pwrctrl.pm_qos_req_dma);
+
+	idr_destroy(&device->context_idr);
+
+	kgsl_free_global(&device->memstore);
+
+	kgsl_mmu_close(device);
+
+	kgsl_pwrctrl_close(device);
+
+	_unregister_device(device);
+}
+EXPORT_SYMBOL(kgsl_device_platform_remove);
+
+static void kgsl_core_exit(void)
+{
+	kgsl_events_exit();
+	kgsl_cffdump_destroy();
+	kgsl_core_debugfs_close();
+
+	/*
+	 * We call kgsl_sharedmem_uninit_sysfs() and device_unregister()
+	 * only if kgsl_driver.virtdev has been populated.
+	 * We check at least one member of kgsl_driver.virtdev to
+	 * see if it is not NULL (and thus, has been populated).
+	 */
+	if (kgsl_driver.virtdev.class) {
+		kgsl_sharedmem_uninit_sysfs();
+		device_unregister(&kgsl_driver.virtdev);
+	}
+
+	if (kgsl_driver.class) {
+		class_destroy(kgsl_driver.class);
+		kgsl_driver.class = NULL;
+	}
+
+	kgsl_cmdbatch_exit();
+
+	kgsl_memfree_exit();
+	unregister_chrdev_region(kgsl_driver.major, KGSL_DEVICE_MAX);
+}
+
+static int __init kgsl_core_init(void)
+{
+	int result = 0;
+	/* alloc major and minor device numbers */
+	result = alloc_chrdev_region(&kgsl_driver.major, 0, KGSL_DEVICE_MAX,
+		"kgsl");
+
+	if (result < 0) {
+
+		KGSL_CORE_ERR("alloc_chrdev_region failed err = %d\n", result);
+		goto err;
+	}
+
+	cdev_init(&kgsl_driver.cdev, &kgsl_fops);
+	kgsl_driver.cdev.owner = THIS_MODULE;
+	kgsl_driver.cdev.ops = &kgsl_fops;
+	result = cdev_add(&kgsl_driver.cdev, MKDEV(MAJOR(kgsl_driver.major), 0),
+		       KGSL_DEVICE_MAX);
+
+	if (result) {
+		KGSL_CORE_ERR("kgsl: cdev_add() failed, dev_num= %d,"
+			     " result= %d\n", kgsl_driver.major, result);
+		goto err;
+	}
+
+	kgsl_driver.class = class_create(THIS_MODULE, "kgsl");
+
+	if (IS_ERR(kgsl_driver.class)) {
+		result = PTR_ERR(kgsl_driver.class);
+		KGSL_CORE_ERR("failed to create class for kgsl");
+		goto err;
+	}
+
+	/* Make a virtual device for managing core related things
+	   in sysfs */
+	kgsl_driver.virtdev.class = kgsl_driver.class;
+	dev_set_name(&kgsl_driver.virtdev, "kgsl");
+	result = device_register(&kgsl_driver.virtdev);
+	if (result) {
+		KGSL_CORE_ERR("driver_register failed\n");
+		goto err;
+	}
+
+	/* Make kobjects in the virtual device for storing statistics */
+
+	kgsl_driver.ptkobj =
+	  kobject_create_and_add("pagetables",
+				 &kgsl_driver.virtdev.kobj);
+
+	kgsl_driver.prockobj =
+		kobject_create_and_add("proc",
+				       &kgsl_driver.virtdev.kobj);
+
+	kgsl_core_debugfs_init();
+
+	kgsl_sharedmem_init_sysfs();
+	kgsl_cffdump_init();
+
+	INIT_LIST_HEAD(&kgsl_driver.process_list);
+
+	INIT_LIST_HEAD(&kgsl_driver.pagetable_list);
+
+	kgsl_driver.workqueue = create_singlethread_workqueue("kgsl-workqueue");
+	kgsl_driver.mem_workqueue =
+		create_singlethread_workqueue("kgsl-mementry");
+
+	kgsl_events_init();
+
+	result = kgsl_cmdbatch_init();
+	if (result)
+		goto err;
+
+	kgsl_memfree_init();
+
+	return 0;
+
+err:
+	kgsl_core_exit();
+	return result;
+}
+
+module_init(kgsl_core_init);
+module_exit(kgsl_core_exit);
+
+MODULE_AUTHOR("Qualcomm Innovation Center, Inc.");
+MODULE_DESCRIPTION("MSM GPU driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h
new file mode 100644
index 000000000000..757c07e6da86
--- /dev/null
+++ b/drivers/gpu/msm/kgsl.h
@@ -0,0 +1,528 @@
+/* Copyright (c) 2008-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __KGSL_H
+#define __KGSL_H
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/msm_kgsl.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/cdev.h>
+#include <linux/regulator/consumer.h>
+#include <linux/mm.h>
+#include <linux/dma-attrs.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+
+/* The number of memstore arrays limits the number of contexts allowed.
+ * If more contexts are needed, update multiple for MEMSTORE_SIZE
+ */
+#define KGSL_MEMSTORE_SIZE	((int)(PAGE_SIZE * 2))
+#define KGSL_MEMSTORE_GLOBAL	(0)
+#define KGSL_PRIORITY_MAX_RB_LEVELS 4
+#define KGSL_MEMSTORE_MAX	(KGSL_MEMSTORE_SIZE / \
+	sizeof(struct kgsl_devmemstore) - 1 - KGSL_PRIORITY_MAX_RB_LEVELS)
+
+/* Timestamp window used to detect rollovers (half of integer range) */
+#define KGSL_TIMESTAMP_WINDOW 0x80000000
+
+/* A macro for memory statistics - add the new size to the stat and if
+   the statisic is greater then _max, set _max
+*/
+
+static inline void KGSL_STATS_ADD(uint64_t size, atomic_long_t *stat,
+		atomic_long_t *max)
+{
+	uint64_t ret = atomic_long_add_return(size, stat);
+
+	if (ret > atomic_long_read(max))
+		atomic_long_set(max, ret);
+}
+
+#define KGSL_MAX_NUMIBS 100000
+#define KGSL_MAX_SYNCPOINTS 32
+
+struct kgsl_device;
+struct kgsl_context;
+
+/**
+ * struct kgsl_driver - main container for global KGSL things
+ * @cdev: Character device struct
+ * @major: Major ID for the KGSL device
+ * @class: Pointer to the class struct for the core KGSL sysfs entries
+ * @virtdev: Virtual device for managing the core
+ * @ptkobj: kobject for storing the pagetable statistics
+ * @prockobj: kobject for storing the process statistics
+ * @devp: Array of pointers to the individual KGSL device structs
+ * @process_list: List of open processes
+ * @pagetable_list: LIst of open pagetables
+ * @ptlock: Lock for accessing the pagetable list
+ * @process_mutex: Mutex for accessing the process list
+ * @devlock: Mutex protecting the device list
+ * @stats: Struct containing atomic memory statistics
+ * @full_cache_threshold: the threshold that triggers a full cache flush
+ * @workqueue: Pointer to a single threaded workqueue
+ * @mem_workqueue: Pointer to a workqueue for deferring memory entries
+ */
+struct kgsl_driver {
+	struct cdev cdev;
+	dev_t major;
+	struct class *class;
+	struct device virtdev;
+	struct kobject *ptkobj;
+	struct kobject *prockobj;
+	struct kgsl_device *devp[KGSL_DEVICE_MAX];
+	struct list_head process_list;
+	struct list_head pagetable_list;
+	spinlock_t ptlock;
+	struct mutex process_mutex;
+	struct mutex devlock;
+	struct {
+		atomic_long_t vmalloc;
+		atomic_long_t vmalloc_max;
+		atomic_long_t page_alloc;
+		atomic_long_t page_alloc_max;
+		atomic_long_t coherent;
+		atomic_long_t coherent_max;
+		atomic_long_t secure;
+		atomic_long_t secure_max;
+		atomic_long_t mapped;
+		atomic_long_t mapped_max;
+	} stats;
+	unsigned int full_cache_threshold;
+	struct workqueue_struct *workqueue;
+	struct workqueue_struct *mem_workqueue;
+};
+
+extern struct kgsl_driver kgsl_driver;
+extern struct mutex kgsl_mmu_sync;
+
+struct kgsl_pagetable;
+struct kgsl_memdesc;
+
+struct kgsl_memdesc_ops {
+	unsigned int vmflags;
+	int (*vmfault)(struct kgsl_memdesc *, struct vm_area_struct *,
+		       struct vm_fault *);
+	void (*free)(struct kgsl_memdesc *memdesc);
+	int (*map_kernel)(struct kgsl_memdesc *);
+	void (*unmap_kernel)(struct kgsl_memdesc *);
+};
+
+/* Internal definitions for memdesc->priv */
+#define KGSL_MEMDESC_GUARD_PAGE BIT(0)
+/* Set if the memdesc is mapped into all pagetables */
+#define KGSL_MEMDESC_GLOBAL BIT(1)
+/* The memdesc is frozen during a snapshot */
+#define KGSL_MEMDESC_FROZEN BIT(2)
+/* The memdesc is mapped into a pagetable */
+#define KGSL_MEMDESC_MAPPED BIT(3)
+/* The memdesc is secured for content protection */
+#define KGSL_MEMDESC_SECURE BIT(4)
+/* Memory is accessible in privileged mode */
+#define KGSL_MEMDESC_PRIVILEGED BIT(6)
+/* The memdesc is TZ locked content protection */
+#define KGSL_MEMDESC_TZ_LOCKED BIT(7)
+
+/**
+ * struct kgsl_memdesc - GPU memory object descriptor
+ * @pagetable: Pointer to the pagetable that the object is mapped in
+ * @hostptr: Kernel virtual address
+ * @hostptr_count: Number of threads using hostptr
+ * @useraddr: User virtual address (if applicable)
+ * @gpuaddr: GPU virtual address
+ * @physaddr: Physical address of the memory object
+ * @size: Size of the memory object
+ * @mmapsize: Total size of the object in VM (including guard)
+ * @priv: Internal flags and settings
+ * @sgt: Scatter gather table for allocated pages
+ * @ops: Function hooks for the memdesc memory type
+ * @flags: Flags set from userspace
+ * @dev: Pointer to the struct device that owns this memory
+ * @memmap: bitmap of pages for mmapsize
+ * @memmap_len: Number of bits for memmap
+ */
+struct kgsl_memdesc {
+	struct kgsl_pagetable *pagetable;
+	void *hostptr;
+	unsigned int hostptr_count;
+	unsigned long useraddr;
+	uint64_t gpuaddr;
+	phys_addr_t physaddr;
+	uint64_t size;
+	uint64_t mmapsize;
+	unsigned int priv;
+	struct sg_table *sgt;
+	struct kgsl_memdesc_ops *ops;
+	uint64_t flags;
+	struct device *dev;
+	struct dma_attrs attrs;
+};
+
+/*
+ * List of different memory entry types. The usermem enum
+ * starts at 0, which we use for allocated memory, so 1 is
+ * added to the enum values.
+ */
+#define KGSL_MEM_ENTRY_KERNEL 0
+#define KGSL_MEM_ENTRY_USER (KGSL_USER_MEM_TYPE_ADDR + 1)
+#define KGSL_MEM_ENTRY_ION (KGSL_USER_MEM_TYPE_ION + 1)
+#define KGSL_MEM_ENTRY_MAX (KGSL_USER_MEM_TYPE_MAX + 1)
+
+/* symbolic table for trace and debugfs */
+#define KGSL_MEM_TYPES \
+	{ KGSL_MEM_ENTRY_KERNEL, "gpumem" }, \
+	{ KGSL_MEM_ENTRY_USER, "usermem" }, \
+	{ KGSL_MEM_ENTRY_ION, "ion" }
+
+/*
+ * struct kgsl_mem_entry - a userspace memory allocation
+ * @refcount: reference count. Currently userspace can only
+ *  hold a single reference count, but the kernel may hold more.
+ * @memdesc: description of the memory
+ * @priv_data: type-specific data, such as the dma-buf attachment pointer.
+ * @node: rb_node for the gpu address lookup rb tree
+ * @id: idr index for this entry, can be used to find memory that does not have
+ *  a valid GPU address.
+ * @priv: back pointer to the process that owns this memory
+ * @pending_free: if !0, userspace requested that his memory be freed, but there
+ *  are still references to it.
+ * @dev_priv: back pointer to the device file that created this entry.
+ * @metadata: String containing user specified metadata for the entry
+ * @work: Work struct used to schedule a kgsl_mem_entry_put in atomic contexts
+ */
+struct kgsl_mem_entry {
+	struct kref refcount;
+	struct kgsl_memdesc memdesc;
+	void *priv_data;
+	struct rb_node node;
+	unsigned int id;
+	struct kgsl_process_private *priv;
+	int pending_free;
+	char metadata[KGSL_GPUOBJ_ALLOC_METADATA_MAX + 1];
+	struct work_struct work;
+};
+
+struct kgsl_device_private;
+struct kgsl_event_group;
+
+typedef void (*kgsl_event_func)(struct kgsl_device *, struct kgsl_event_group *,
+		void *, int);
+
+/**
+ * struct kgsl_event - KGSL GPU timestamp event
+ * @device: Pointer to the KGSL device that owns the event
+ * @context: Pointer to the context that owns the event
+ * @timestamp: Timestamp for the event to expire
+ * @func: Callback function for for the event when it expires
+ * @priv: Private data passed to the callback function
+ * @node: List node for the kgsl_event_group list
+ * @created: Jiffies when the event was created
+ * @work: Work struct for dispatching the callback
+ * @result: KGSL event result type to pass to the callback
+ * group: The event group this event belongs to
+ */
+struct kgsl_event {
+	struct kgsl_device *device;
+	struct kgsl_context *context;
+	unsigned int timestamp;
+	kgsl_event_func func;
+	void *priv;
+	struct list_head node;
+	unsigned int created;
+	struct work_struct work;
+	int result;
+	struct kgsl_event_group *group;
+};
+
+typedef int (*readtimestamp_func)(struct kgsl_device *, void *,
+	enum kgsl_timestamp_type, unsigned int *);
+
+/**
+ * struct event_group - A list of GPU events
+ * @context: Pointer to the active context for the events
+ * @lock: Spinlock for protecting the list
+ * @events: List of active GPU events
+ * @group: Node for the master group list
+ * @processed: Last processed timestamp
+ * @name: String name for the group (for the debugfs file)
+ * @readtimestamp: Function pointer to read a timestamp
+ * @priv: Priv member to pass to the readtimestamp function
+ */
+struct kgsl_event_group {
+	struct kgsl_context *context;
+	spinlock_t lock;
+	struct list_head events;
+	struct list_head group;
+	unsigned int processed;
+	char name[64];
+	readtimestamp_func readtimestamp;
+	void *priv;
+};
+
+/**
+ * struct kgsl_protected_registers - Protected register range
+ * @base: Offset of the range to be protected
+ * @range: Range (# of registers = 2 ** range)
+ */
+struct kgsl_protected_registers {
+	unsigned int base;
+	int range;
+};
+
+long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv,
+					  unsigned int cmd, void *data);
+long kgsl_ioctl_device_setproperty(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_device_waittimestamp_ctxtid(struct kgsl_device_private
+				*dev_priv, unsigned int cmd, void *data);
+long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv,
+				      unsigned int cmd, void *data);
+long kgsl_ioctl_submit_commands(struct kgsl_device_private *dev_priv,
+				unsigned int cmd, void *data);
+long kgsl_ioctl_cmdstream_readtimestamp_ctxtid(struct kgsl_device_private
+					*dev_priv, unsigned int cmd,
+					void *data);
+long kgsl_ioctl_cmdstream_freememontimestamp_ctxtid(
+						struct kgsl_device_private
+						*dev_priv, unsigned int cmd,
+						void *data);
+long kgsl_ioctl_drawctxt_create(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_drawctxt_destroy(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_sharedmem_free(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpumem_free_id(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpumem_sync_cache(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpumem_sync_cache_bulk(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpumem_alloc_id(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpumem_get_info(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_cff_syncmem(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_cff_user_event(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_timestamp_event(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_cff_sync_gpuobj(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpuobj_alloc(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpuobj_free(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpuobj_info(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpuobj_import(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpuobj_sync(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpu_command(struct kgsl_device_private *dev_priv,
+				unsigned int cmd, void *data);
+long kgsl_ioctl_gpuobj_set_info(struct kgsl_device_private *dev_priv,
+				unsigned int cmd, void *data);
+
+void kgsl_mem_entry_destroy(struct kref *kref);
+
+struct kgsl_mem_entry * __must_check
+kgsl_sharedmem_find(struct kgsl_process_private *private, uint64_t gpuaddr);
+
+struct kgsl_mem_entry * __must_check
+kgsl_sharedmem_find_id(struct kgsl_process_private *process, unsigned int id);
+
+void kgsl_get_memory_usage(char *str, size_t len, uint64_t memflags);
+
+extern const struct dev_pm_ops kgsl_pm_ops;
+
+int kgsl_suspend_driver(struct platform_device *pdev, pm_message_t state);
+int kgsl_resume_driver(struct platform_device *pdev);
+
+static inline int kgsl_gpuaddr_in_memdesc(const struct kgsl_memdesc *memdesc,
+				uint64_t gpuaddr, uint64_t size)
+{
+	/* set a minimum size to search for */
+	if (!size)
+		size = 1;
+
+	/* don't overflow */
+	if (size > U64_MAX - gpuaddr)
+		return 0;
+
+	if (gpuaddr >= memdesc->gpuaddr &&
+	    ((gpuaddr + size) <= (memdesc->gpuaddr + memdesc->size))) {
+		return 1;
+	}
+	return 0;
+}
+
+static inline void *kgsl_memdesc_map(struct kgsl_memdesc *memdesc)
+{
+	if (memdesc->ops && memdesc->ops->map_kernel)
+		memdesc->ops->map_kernel(memdesc);
+
+	return memdesc->hostptr;
+}
+
+static inline void kgsl_memdesc_unmap(struct kgsl_memdesc *memdesc)
+{
+	if (memdesc->ops && memdesc->ops->unmap_kernel)
+		memdesc->ops->unmap_kernel(memdesc);
+}
+
+static inline void *kgsl_gpuaddr_to_vaddr(struct kgsl_memdesc *memdesc,
+					     uint64_t gpuaddr)
+{
+	void *hostptr = NULL;
+
+	if ((gpuaddr >= memdesc->gpuaddr) &&
+		(gpuaddr < (memdesc->gpuaddr + memdesc->size)))
+		hostptr = kgsl_memdesc_map(memdesc);
+
+	return hostptr != NULL ? hostptr + (gpuaddr - memdesc->gpuaddr) : NULL;
+}
+
+static inline int timestamp_cmp(unsigned int a, unsigned int b)
+{
+	/* check for equal */
+	if (a == b)
+		return 0;
+
+	/* check for greater-than for non-rollover case */
+	if ((a > b) && (a - b < KGSL_TIMESTAMP_WINDOW))
+		return 1;
+
+	/* check for greater-than for rollover case
+	 * note that <= is required to ensure that consistent
+	 * results are returned for values whose difference is
+	 * equal to the window size
+	 */
+	a += KGSL_TIMESTAMP_WINDOW;
+	b += KGSL_TIMESTAMP_WINDOW;
+	return ((a > b) && (a - b <= KGSL_TIMESTAMP_WINDOW)) ? 1 : -1;
+}
+
+/**
+ * kgsl_schedule_work() - Schedule a work item on the KGSL workqueue
+ * @work: work item to schedule
+ */
+static inline void kgsl_schedule_work(struct work_struct *work)
+{
+	queue_work(kgsl_driver.workqueue, work);
+}
+
+static inline int
+kgsl_mem_entry_get(struct kgsl_mem_entry *entry)
+{
+	return kref_get_unless_zero(&entry->refcount);
+}
+
+static inline void
+kgsl_mem_entry_put(struct kgsl_mem_entry *entry)
+{
+	kref_put(&entry->refcount, kgsl_mem_entry_destroy);
+}
+
+/**
+ * kgsl_mem_entry_put_deferred() - Schedule a task to put the memory entry
+ * @entry: Mem entry to put
+ *
+ * This function is for atomic contexts where a normal kgsl_mem_entry_put()
+ * would result in the memory entry getting destroyed and possibly taking
+ * mutexes along the way.  Schedule the work to happen outside of the atomic
+ * context.
+ */
+static inline void kgsl_mem_entry_put_deferred(struct kgsl_mem_entry *entry)
+{
+	if (entry != NULL)
+		queue_work(kgsl_driver.mem_workqueue, &entry->work);
+}
+
+/*
+ * kgsl_addr_range_overlap() - Checks if 2 ranges overlap
+ * @gpuaddr1: Start of first address range
+ * @size1: Size of first address range
+ * @gpuaddr2: Start of second address range
+ * @size2: Size of second address range
+ *
+ * Function returns true if the 2 given address ranges overlap
+ * else false
+ */
+static inline bool kgsl_addr_range_overlap(uint64_t gpuaddr1,
+		uint64_t size1, uint64_t gpuaddr2, uint64_t size2)
+{
+	if ((size1 > (U64_MAX - gpuaddr1)) || (size2 > (U64_MAX - gpuaddr2)))
+		return false;
+	return !(((gpuaddr1 + size1) <= gpuaddr2) ||
+		(gpuaddr1 >= (gpuaddr2 + size2)));
+}
+
+/**
+ * kgsl_malloc() - Use either kzalloc or vmalloc to allocate memory
+ * @size: Size of the desired allocation
+ *
+ * Allocate a block of memory for the driver - if it is small try to allocate it
+ * from kmalloc (fast!) otherwise we need to go with vmalloc (safe!)
+ */
+static inline void *kgsl_malloc(size_t size)
+{
+	if (size <= PAGE_SIZE)
+		return kzalloc(size, GFP_KERNEL);
+
+	return vmalloc(size);
+}
+
+/**
+ * kgsl_free() - Free memory allocated by kgsl_malloc()
+ * @ptr: Pointer to the memory to free
+ *
+ * Free the memory be it in vmalloc or kmalloc space
+ */
+static inline void kgsl_free(void *ptr)
+{
+	if (ptr != NULL && is_vmalloc_addr(ptr))
+		return vfree(ptr);
+
+	kfree(ptr);
+}
+
+static inline int _copy_from_user(void *dest, void __user *src,
+		unsigned int ksize, unsigned int usize)
+{
+	unsigned int copy = ksize < usize ? ksize : usize;
+
+	if (copy == 0)
+		return -EINVAL;
+
+	return copy_from_user(dest, src, copy) ? -EFAULT : 0;
+}
+
+static inline void __user *to_user_ptr(uint64_t address)
+{
+	return (void __user *)(uintptr_t)address;
+}
+
+#endif /* __KGSL_H */
diff --git a/drivers/gpu/msm/kgsl_cffdump.c b/drivers/gpu/msm/kgsl_cffdump.c
new file mode 100644
index 000000000000..1f10a333adf7
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_cffdump.c
@@ -0,0 +1,747 @@
+/* Copyright (c) 2010-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/* #define DEBUG */
+#define ALIGN_CPU
+
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/relay.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/sched.h>
+
+#include "kgsl.h"
+#include "kgsl_cffdump.h"
+#include "kgsl_debugfs.h"
+#include "kgsl_log.h"
+#include "kgsl_sharedmem.h"
+#include "adreno_pm4types.h"
+#include "adreno.h"
+#include "adreno_cp_parser.h"
+
+static struct rchan	*chan;
+static struct dentry	*dir;
+static int		suspended;
+static size_t		dropped;
+static size_t		subbuf_size = 256*1024;
+static size_t		n_subbufs = 64;
+
+/* forward declarations */
+static void destroy_channel(void);
+static struct rchan *create_channel(unsigned subbuf_size, unsigned n_subbufs);
+
+static spinlock_t cffdump_lock;
+static ulong serial_nr;
+static ulong total_bytes;
+static ulong total_syncmem;
+static long last_sec;
+
+/* Some simulators have start address of gmem at this offset */
+#define KGSL_CFF_GMEM_OFFSET	0x100000
+
+#define MEMBUF_SIZE	64
+
+#define CFF_OP_WRITE_REG        0x00000002
+struct cff_op_write_reg {
+	unsigned char op;
+	uint addr;
+	uint value;
+} __packed;
+
+#define CFF_OP_POLL_REG         0x00000004
+struct cff_op_poll_reg {
+	unsigned char op;
+	uint addr;
+	uint value;
+	uint mask;
+} __packed;
+
+#define CFF_OP_WAIT_IRQ         0x00000005
+struct cff_op_wait_irq {
+	unsigned char op;
+} __packed;
+
+#define CFF_OP_RMW              0x0000000a
+
+struct cff_op_write_mem {
+	unsigned char op;
+	uint addr;
+	uint value;
+} __packed;
+
+#define CFF_OP_WRITE_MEMBUF     0x0000000c
+struct cff_op_write_membuf {
+	unsigned char op;
+	uint addr;
+	ushort count;
+	uint buffer[MEMBUF_SIZE];
+} __packed;
+
+#define CFF_OP_MEMORY_BASE	0x0000000d
+struct cff_op_memory_base {
+	unsigned char op;
+	uint base;
+	uint size;
+	uint gmemsize;
+} __packed;
+
+#define CFF_OP_HANG		0x0000000e
+struct cff_op_hang {
+	unsigned char op;
+} __packed;
+
+#define CFF_OP_EOF              0xffffffff
+struct cff_op_eof {
+	unsigned char op;
+} __packed;
+
+#define CFF_OP_VERIFY_MEM_FILE  0x00000007
+#define CFF_OP_WRITE_SURFACE_PARAMS 0x00000011
+struct cff_op_user_event {
+	unsigned char op;
+	unsigned int op1;
+	unsigned int op2;
+	unsigned int op3;
+	unsigned int op4;
+	unsigned int op5;
+} __packed;
+
+
+static void b64_encodeblock(unsigned char in[3], unsigned char out[4], int len)
+{
+	static const char tob64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmno"
+		"pqrstuvwxyz0123456789+/";
+
+	out[0] = tob64[in[0] >> 2];
+	out[1] = tob64[((in[0] & 0x03) << 4) | ((in[1] & 0xf0) >> 4)];
+	out[2] = (unsigned char) (len > 1 ? tob64[((in[1] & 0x0f) << 2)
+		| ((in[2] & 0xc0) >> 6)] : '=');
+	out[3] = (unsigned char) (len > 2 ? tob64[in[2] & 0x3f] : '=');
+}
+
+static void b64_encode(const unsigned char *in_buf, int in_size,
+	unsigned char *out_buf, int out_bufsize, int *out_size)
+{
+	unsigned char in[3], out[4];
+	int i, len;
+
+	*out_size = 0;
+	while (in_size > 0) {
+		len = 0;
+		for (i = 0; i < 3; ++i) {
+			if (in_size-- > 0) {
+				in[i] = *in_buf++;
+				++len;
+			} else
+				in[i] = 0;
+		}
+		if (len) {
+			b64_encodeblock(in, out, len);
+			if (out_bufsize < 4) {
+				pr_warn("kgsl: cffdump: %s: out of buffer\n",
+					__func__);
+				return;
+			}
+			for (i = 0; i < 4; ++i)
+				*out_buf++ = out[i];
+			*out_size += 4;
+			out_bufsize -= 4;
+		}
+	}
+}
+
+#define KLOG_TMPBUF_SIZE (1024)
+static void klog_printk(const char *fmt, ...)
+{
+	/* per-cpu klog formatting temporary buffer */
+	static char klog_buf[NR_CPUS][KLOG_TMPBUF_SIZE];
+
+	va_list args;
+	int len;
+	char *cbuf;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	cbuf = klog_buf[smp_processor_id()];
+	va_start(args, fmt);
+	len = vsnprintf(cbuf, KLOG_TMPBUF_SIZE, fmt, args);
+	total_bytes += len;
+	va_end(args);
+	relay_write(chan, cbuf, len);
+	local_irq_restore(flags);
+}
+
+static struct cff_op_write_membuf cff_op_write_membuf;
+static void cffdump_membuf(int id, unsigned char *out_buf, int out_bufsize)
+{
+	void *data;
+	int len, out_size;
+	struct cff_op_write_mem cff_op_write_mem;
+
+	uint addr = cff_op_write_membuf.addr
+		- sizeof(uint)*cff_op_write_membuf.count;
+
+	if (!cff_op_write_membuf.count) {
+		pr_warn("kgsl: cffdump: membuf: count == 0, skipping");
+		return;
+	}
+
+	if (cff_op_write_membuf.count != 1) {
+		cff_op_write_membuf.op = CFF_OP_WRITE_MEMBUF;
+		cff_op_write_membuf.addr = addr;
+		len = sizeof(cff_op_write_membuf) -
+			sizeof(uint)*(MEMBUF_SIZE - cff_op_write_membuf.count);
+		data = &cff_op_write_membuf;
+	} else {
+		cff_op_write_mem.op = CFF_OP_WRITE_MEM;
+		cff_op_write_mem.addr = addr;
+		cff_op_write_mem.value = cff_op_write_membuf.buffer[0];
+		data = &cff_op_write_mem;
+		len = sizeof(cff_op_write_mem);
+	}
+	b64_encode(data, len, out_buf, out_bufsize, &out_size);
+	out_buf[out_size] = 0;
+	klog_printk("%ld:%d;%s\n", ++serial_nr, id, out_buf);
+	cff_op_write_membuf.count = 0;
+	cff_op_write_membuf.addr = 0;
+}
+
+void kgsl_cffdump_printline(int id, uint opcode, uint op1, uint op2,
+	uint op3, uint op4, uint op5)
+{
+	struct cff_op_write_reg cff_op_write_reg;
+	struct cff_op_poll_reg cff_op_poll_reg;
+	struct cff_op_wait_irq cff_op_wait_irq;
+	struct cff_op_memory_base cff_op_memory_base;
+	struct cff_op_hang cff_op_hang;
+	struct cff_op_eof cff_op_eof;
+	struct cff_op_user_event cff_op_user_event;
+	unsigned char out_buf[sizeof(cff_op_write_membuf)/3*4 + 16];
+	void *data;
+	int len = 0, out_size;
+	long cur_secs;
+
+	spin_lock(&cffdump_lock);
+	if (opcode == CFF_OP_WRITE_MEM) {
+		if ((cff_op_write_membuf.addr != op1 &&
+			cff_op_write_membuf.count)
+			|| (cff_op_write_membuf.count == MEMBUF_SIZE))
+			cffdump_membuf(id, out_buf, sizeof(out_buf));
+
+		cff_op_write_membuf.buffer[cff_op_write_membuf.count++] = op2;
+		cff_op_write_membuf.addr = op1 + sizeof(uint);
+		spin_unlock(&cffdump_lock);
+		return;
+	} else if (cff_op_write_membuf.count)
+		cffdump_membuf(id, out_buf, sizeof(out_buf));
+	spin_unlock(&cffdump_lock);
+
+	switch (opcode) {
+	case CFF_OP_WRITE_REG:
+		cff_op_write_reg.op = opcode;
+		cff_op_write_reg.addr = op1;
+		cff_op_write_reg.value = op2;
+		data = &cff_op_write_reg;
+		len = sizeof(cff_op_write_reg);
+		break;
+
+	case CFF_OP_POLL_REG:
+		cff_op_poll_reg.op = opcode;
+		cff_op_poll_reg.addr = op1;
+		cff_op_poll_reg.value = op2;
+		cff_op_poll_reg.mask = op3;
+		data = &cff_op_poll_reg;
+		len = sizeof(cff_op_poll_reg);
+		break;
+
+	case CFF_OP_WAIT_IRQ:
+		cff_op_wait_irq.op = opcode;
+		data = &cff_op_wait_irq;
+		len = sizeof(cff_op_wait_irq);
+		break;
+
+	case CFF_OP_MEMORY_BASE:
+		cff_op_memory_base.op = opcode;
+		cff_op_memory_base.base = op1;
+		cff_op_memory_base.size = op2;
+		cff_op_memory_base.gmemsize = op3;
+		data = &cff_op_memory_base;
+		len = sizeof(cff_op_memory_base);
+		break;
+
+	case CFF_OP_HANG:
+		cff_op_hang.op = opcode;
+		data = &cff_op_hang;
+		len = sizeof(cff_op_hang);
+		break;
+
+	case CFF_OP_EOF:
+		cff_op_eof.op = opcode;
+		data = &cff_op_eof;
+		len = sizeof(cff_op_eof);
+		break;
+
+	case CFF_OP_WRITE_SURFACE_PARAMS:
+	case CFF_OP_VERIFY_MEM_FILE:
+		cff_op_user_event.op = opcode;
+		cff_op_user_event.op1 = op1;
+		cff_op_user_event.op2 = op2;
+		cff_op_user_event.op3 = op3;
+		cff_op_user_event.op4 = op4;
+		cff_op_user_event.op5 = op5;
+		data = &cff_op_user_event;
+		len = sizeof(cff_op_user_event);
+		break;
+	}
+
+	if (len) {
+		b64_encode(data, len, out_buf, sizeof(out_buf), &out_size);
+		out_buf[out_size] = 0;
+		klog_printk("%ld:%d;%s\n", ++serial_nr, id, out_buf);
+	} else
+		pr_warn("kgsl: cffdump: unhandled opcode: %d\n", opcode);
+
+	cur_secs = get_seconds();
+	if ((cur_secs - last_sec) > 10 || (last_sec - cur_secs) > 10) {
+		pr_info("kgsl: cffdump: total [bytes:%lu kB, syncmem:%lu kB], "
+			"seq#: %lu\n", total_bytes/1024, total_syncmem/1024,
+			serial_nr);
+		last_sec = cur_secs;
+	}
+}
+EXPORT_SYMBOL(kgsl_cffdump_printline);
+
+void kgsl_cffdump_init()
+{
+	struct dentry *debugfs_dir = kgsl_get_debugfs_dir();
+
+#ifdef ALIGN_CPU
+	cpumask_t mask;
+
+	cpumask_clear(&mask);
+	cpumask_set_cpu(0, &mask);
+	sched_setaffinity(0, &mask);
+#endif
+	if (!debugfs_dir || IS_ERR(debugfs_dir)) {
+		KGSL_CORE_ERR("Debugfs directory is bad\n");
+		return;
+	}
+
+	spin_lock_init(&cffdump_lock);
+
+	dir = debugfs_create_dir("cff", debugfs_dir);
+	if (!dir) {
+		KGSL_CORE_ERR("debugfs_create_dir failed\n");
+		return;
+	}
+
+	chan = create_channel(subbuf_size, n_subbufs);
+}
+
+void kgsl_cffdump_destroy()
+{
+	if (chan)
+		relay_flush(chan);
+	destroy_channel();
+	if (dir)
+		debugfs_remove(dir);
+}
+
+void kgsl_cffdump_open(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	if (!device->cff_dump_enable)
+		return;
+
+	/* Set the maximum possible address range */
+	kgsl_cffdump_memory_base(device,
+				adreno_dev->gmem_size + KGSL_CFF_GMEM_OFFSET,
+				0xFFFFFFFF -
+				(adreno_dev->gmem_size + KGSL_CFF_GMEM_OFFSET),
+				adreno_dev->gmem_size);
+}
+
+void kgsl_cffdump_memory_base(struct kgsl_device *device, unsigned int base,
+			      unsigned int range, unsigned gmemsize)
+{
+	if (!device->cff_dump_enable)
+		return;
+	kgsl_cffdump_printline(device->id, CFF_OP_MEMORY_BASE, base,
+			range, gmemsize, 0, 0);
+}
+
+void kgsl_cffdump_hang(struct kgsl_device *device)
+{
+	if (!device->cff_dump_enable)
+		return;
+	kgsl_cffdump_printline(device->id, CFF_OP_HANG, 0, 0, 0, 0, 0);
+}
+
+void kgsl_cffdump_close(struct kgsl_device *device)
+{
+	if (!device->cff_dump_enable)
+		return;
+	kgsl_cffdump_printline(device->id, CFF_OP_EOF, 0, 0, 0, 0, 0);
+}
+
+void kgsl_cffdump_user_event(struct kgsl_device *device,
+		unsigned int cff_opcode, unsigned int op1,
+		unsigned int op2, unsigned int op3,
+		unsigned int op4, unsigned int op5)
+{
+	if (!device->cff_dump_enable)
+		return;
+	kgsl_cffdump_printline(-1, cff_opcode, op1, op2, op3, op4, op5);
+}
+
+
+
+void kgsl_cffdump_memcpy(struct kgsl_device *device,
+		uint64_t gpuaddr, unsigned int *ptr, uint64_t sizebytes)
+{
+	int i;
+
+	if (!device || !device->cff_dump_enable)
+		return;
+
+	for (i = 0; i < ALIGN(sizebytes, 4) / 4; gpuaddr += 4, ptr++, i++)
+		kgsl_cffdump_write(device, gpuaddr, *ptr);
+}
+
+void kgsl_cffdump_syncmem(struct kgsl_device *device,
+		struct kgsl_mem_entry *entry, uint64_t offset,
+		uint64_t sizebytes, bool clean_cache)
+{
+	void *src;
+
+	if (!device || device->cff_dump_enable || !entry)
+		return;
+
+	if (sizebytes == 0)
+		return;
+
+	if ((offset >= entry->memdesc.size) ||
+		(entry->memdesc.size - len) > offset)
+		return;
+
+	total_syncmem += sizebytes;
+
+	src = kgsl_memdesc_map(&entry->memdesc);
+	if (src == NULL) {
+		KGSL_CORE_ERR(
+			"cffdump: no kernel mapping for GPU address 0x%llX\n",
+			gpuaddr);
+		return;
+	}
+
+	if (clean_cache) {
+		/* Makes sure that the region is freshly fetched */
+		mb();
+
+		kgsl_cache_range_op(entry->memdesc,
+			offset, sizebytes, KGSL_CACHE_OP_INV);
+	}
+
+	kgsl_cffdump_memcpy(device, entry->memdesc.gpuaddr + offset,
+			src + offset, sizebytes);
+
+	kgsl_memdesc_unmap(&entry->memdesc);
+}
+
+void kgsl_cffdump_memset(struct kgsl_device *device,
+		uint64_t gpuaddr, unsigned char ch, uint64_t sizebytes)
+{
+	int i;
+
+	if (!device || !device->cff_dump_enable)
+		return;
+
+	/* Expand the input char into a dword and output it */
+	for (i = 0; i < ALIGN(sizebytes, 4) / 4; gpuaddr += 4, i++)
+		kgsl_cffdump_write(device, gpuaddr,
+			(ch << 24) | (ch << 16) | (ch << 8) | ch);
+}
+
+void kgsl_cffdump_regwrite(struct kgsl_device *device, uint addr,
+	uint value)
+{
+	if (!device->cff_dump_enable)
+		return;
+
+	kgsl_cffdump_printline(device->id, CFF_OP_WRITE_REG, addr, value,
+			0, 0, 0);
+}
+
+void kgsl_cffdump_regpoll(struct kgsl_device *device, uint addr,
+	uint value, uint mask)
+{
+	if (!device->cff_dump_enable)
+		return;
+
+	kgsl_cffdump_printline(device->id, CFF_OP_POLL_REG, addr, value,
+			mask, 0, 0);
+}
+
+void kgsl_cffdump_slavewrite(struct kgsl_device *device, uint addr, uint value)
+{
+	if (!device->cff_dump_enable)
+		return;
+
+	kgsl_cffdump_printline(-1, CFF_OP_WRITE_REG, addr, value, 0, 0, 0);
+}
+
+int kgsl_cffdump_waitirq(struct kgsl_device *device)
+{
+	if (!device->cff_dump_enable)
+		return 0;
+
+	kgsl_cffdump_printline(-1, CFF_OP_WAIT_IRQ, 0, 0, 0, 0, 0);
+
+	return 1;
+}
+EXPORT_SYMBOL(kgsl_cffdump_waitirq);
+
+static int subbuf_start_handler(struct rchan_buf *buf,
+	void *subbuf, void *prev_subbuf, size_t prev_padding)
+{
+	pr_debug("kgsl: cffdump: subbuf_start_handler(subbuf=%p, prev_subbuf"
+		"=%p, prev_padding=%08zx)\n", subbuf, prev_subbuf,
+		 prev_padding);
+
+	if (relay_buf_full(buf)) {
+		if (!suspended) {
+			suspended = 1;
+			pr_warn("kgsl: cffdump: relay: cpu %d buffer full!!!\n",
+				smp_processor_id());
+		}
+		dropped++;
+		return 0;
+	} else if (suspended) {
+		suspended = 0;
+		pr_warn("kgsl: cffdump: relay: cpu %d buffer no longer full.\n",
+			smp_processor_id());
+	}
+
+	subbuf_start_reserve(buf, 0);
+	return 1;
+}
+
+static struct dentry *create_buf_file_handler(const char *filename,
+	struct dentry *parent, unsigned short mode, struct rchan_buf *buf,
+	int *is_global)
+{
+	return debugfs_create_file(filename, mode, parent, buf,
+				       &relay_file_operations);
+}
+
+/*
+ * file_remove() default callback.  Removes relay file in debugfs.
+ */
+static int remove_buf_file_handler(struct dentry *dentry)
+{
+	pr_info("kgsl: cffdump: %s()\n", __func__);
+	debugfs_remove(dentry);
+	return 0;
+}
+
+/*
+ * relay callbacks
+ */
+static struct rchan_callbacks relay_callbacks = {
+	.subbuf_start = subbuf_start_handler,
+	.create_buf_file = create_buf_file_handler,
+	.remove_buf_file = remove_buf_file_handler,
+};
+
+/**
+ *	create_channel - creates channel /debug/klog/cpuXXX
+ *
+ *	Creates channel along with associated produced/consumed control files
+ *
+ *	Returns channel on success, NULL otherwise
+ */
+static struct rchan *create_channel(unsigned subbuf_size, unsigned n_subbufs)
+{
+	struct rchan *chan;
+
+	pr_info("kgsl: cffdump: relay: create_channel: subbuf_size %u, "
+		"n_subbufs %u, dir 0x%p\n", subbuf_size, n_subbufs, dir);
+
+	chan = relay_open("cpu", dir, subbuf_size,
+			  n_subbufs, &relay_callbacks, NULL);
+	if (!chan) {
+		KGSL_CORE_ERR("relay_open failed\n");
+		return NULL;
+	}
+
+	suspended = 0;
+	dropped = 0;
+
+	return chan;
+}
+
+/**
+ *	destroy_channel - destroys channel /debug/kgsl/cff/cpuXXX
+ *
+ *	Destroys channel along with associated produced/consumed control files
+ */
+static void destroy_channel(void)
+{
+	pr_info("kgsl: cffdump: relay: destroy_channel\n");
+	if (chan) {
+		relay_close(chan);
+		chan = NULL;
+	}
+}
+
+int kgsl_cff_dump_enable_set(void *data, u64 val)
+{
+	int ret = 0;
+	struct kgsl_device *device = (struct kgsl_device *)data;
+	int i;
+
+	mutex_lock(&kgsl_driver.devlock);
+	if (val) {
+		/* Check if CFF is on for some other device already */
+		for (i = 0; i < KGSL_DEVICE_MAX; i++) {
+			if (kgsl_driver.devp[i]) {
+				struct kgsl_device *device_temp =
+						kgsl_driver.devp[i];
+				if (device_temp->cff_dump_enable &&
+					device != device_temp) {
+					KGSL_CORE_ERR(
+					"CFF is on for another device %d\n",
+					device_temp->id);
+					ret = -EINVAL;
+					goto done;
+				}
+			}
+		}
+		if (!device->cff_dump_enable) {
+			device->cff_dump_enable = 1;
+			/*
+			 * force device to slumber so that we ensure that the
+			 * start opcode in CFF is present
+			 */
+			mutex_lock(&device->mutex);
+			ret = kgsl_pwrctrl_change_state(device,
+				KGSL_STATE_SUSPEND);
+			ret |= kgsl_pwrctrl_change_state(device,
+				KGSL_STATE_SLUMBER);
+			if (ret)
+				device->cff_dump_enable = 0;
+			mutex_unlock(&device->mutex);
+		}
+	} else if (device->cff_dump_enable && !val) {
+		device->cff_dump_enable = 0;
+	}
+done:
+	mutex_unlock(&kgsl_driver.devlock);
+	return ret;
+}
+EXPORT_SYMBOL(kgsl_cff_dump_enable_set);
+
+int kgsl_cff_dump_enable_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = (struct kgsl_device *)data;
+	*val = device->cff_dump_enable;
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_cff_dump_enable_get);
+
+/*
+ * kgsl_cffdump_capture_adreno_ib_cff() - Capture CFF for an IB
+ * @device: Device for which CFF is to be captured
+ * @ptbase: The pagetable in which the IB is mapped
+ * @gpuaddr: Address of IB
+ * @dwords: Size of the IB
+ *
+ * Dumps the CFF format of the IB including all objects in it like, IB2,
+ * shaders, etc.
+ *
+ * Returns 0 on success else error code
+ */
+static int kgsl_cffdump_capture_adreno_ib_cff(struct kgsl_device *device,
+				struct kgsl_process_private *process,
+				uint64_t gpuaddr, uint64_t dwords)
+{
+	int ret;
+	struct adreno_ib_object_list *ib_obj_list;
+	struct adreno_ib_object *ib_obj;
+	int i;
+
+	if (!device->cff_dump_enable)
+		return 0;
+
+	ret = adreno_ib_create_object_list(device, process, gpuaddr, dwords,
+		&ib_obj_list);
+
+	if (ret) {
+		KGSL_DRV_ERR(device,
+		"Fail to create object list for IB 0x%016llX, size(dwords) 0x%llX\n",
+		gpuaddr, dwords);
+		return ret;
+	}
+
+	for (i = 0; i < ib_obj_list->num_objs; i++) {
+		ib_obj = &(ib_obj_list->obj_list[i]);
+		kgsl_cffdump_syncmem(device, ib_obj->entry, 0, ib_obj->size,
+			false);
+	}
+	adreno_ib_destroy_obj_list(ib_obj_list);
+	return 0;
+}
+
+/*
+ * kgsl_cffdump_capture_ib_desc() - Capture CFF for a list of IB's
+ * @device: Device for which CFF is to be captured
+ * @context: The context under which the IB list executes on device
+ * @ibdesc: The IB list
+ * @numibs: Number of IB's in ibdesc
+ *
+ * Returns 0 on success else error code
+ */
+int kgsl_cffdump_capture_ib_desc(struct kgsl_device *device,
+				struct kgsl_context *context,
+				struct kgsl_cmdbatch *cmdbatch)
+{
+	int ret = 0;
+	struct kgsl_memobj_node *ib;
+
+	if (!device->cff_dump_enable)
+		return 0;
+	/* Dump CFF for IB and all objects in it */
+	list_for_each_entry(ib, &cmdbatch->cmdlist, node) {
+		ret = kgsl_cffdump_capture_adreno_ib_cff(
+			device, context->proc_priv, ib->gpuaddr,
+			ib->size >> 2);
+		if (ret) {
+			KGSL_DRV_ERR(device,
+			"Fail cff capture, IB 0x%016llX, size 0x%llX\n",
+			ib->gpuaddr, ib->size);
+			break;
+		}
+	}
+	return ret;
+}
+EXPORT_SYMBOL(kgsl_cffdump_capture_ib_desc);
+
+DEFINE_SIMPLE_ATTRIBUTE(kgsl_cff_dump_enable_fops, kgsl_cff_dump_enable_get,
+			kgsl_cff_dump_enable_set, "%llu\n");
+
+void kgsl_cffdump_debugfs_create(struct kgsl_device *device)
+{
+	debugfs_create_file("cff_dump", 0644, device->d_debugfs, device,
+			    &kgsl_cff_dump_enable_fops);
+}
diff --git a/drivers/gpu/msm/kgsl_cffdump.h b/drivers/gpu/msm/kgsl_cffdump.h
new file mode 100644
index 000000000000..5eb04e7ea500
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_cffdump.h
@@ -0,0 +1,183 @@
+/* Copyright (c) 2010-2011,2013-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __KGSL_CFFDUMP_H
+#define __KGSL_CFFDUMP_H
+
+#include <linux/types.h>
+#include "kgsl_device.h"
+
+extern unsigned int kgsl_cff_dump_enable;
+
+static inline bool kgsl_cffdump_flags_no_memzero(void) { return true; }
+
+struct kgsl_device_private;
+
+#ifdef CONFIG_MSM_KGSL_CFF_DUMP
+
+#define CFF_OP_WRITE_MEM        0x0000000b
+
+void kgsl_cffdump_init(void);
+void kgsl_cffdump_destroy(void);
+void kgsl_cffdump_open(struct kgsl_device *device);
+void kgsl_cffdump_close(struct kgsl_device *device);
+void kgsl_cffdump_memcpy(struct kgsl_device *device, uint64_t gpuaddr,
+		unsigned int *ptr, uint64_t sizebytes);
+void kgsl_cffdump_syncmem(struct kgsl_device *, struct kgsl_mem_entry *,
+	uint64_t offset, uint64_t sizebytes, bool clean_cache);
+void kgsl_cffdump_memset(struct kgsl_device *device, uint64_t addr,
+			unsigned char value, size_t sizebytes);
+void kgsl_cffdump_regwrite(struct kgsl_device *device, uint addr,
+	uint value);
+void kgsl_cffdump_regpoll(struct kgsl_device *device, uint addr,
+	uint value, uint mask);
+bool kgsl_cffdump_parse_ibs(struct kgsl_device_private *dev_priv,
+	const struct kgsl_memdesc *memdesc, uint64_t gpuaddr,
+	uint64_t sizedwords, bool check_only);
+void kgsl_cffdump_user_event(struct kgsl_device *device,
+		unsigned int cff_opcode, unsigned int op1,
+		unsigned int op2, unsigned int op3,
+		unsigned int op4, unsigned int op5);
+
+void kgsl_cffdump_memory_base(struct kgsl_device *device, unsigned int base,
+			      unsigned int range, unsigned int gmemsize);
+
+void kgsl_cffdump_hang(struct kgsl_device *device);
+void kgsl_cffdump_debugfs_create(struct kgsl_device *device);
+int kgsl_cff_dump_enable_set(void *data, u64 val);
+int kgsl_cff_dump_enable_get(void *data, u64 *val);
+int kgsl_cffdump_capture_ib_desc(struct kgsl_device *device,
+				struct kgsl_context *context,
+				struct kgsl_cmdbatch *cmdbatch);
+
+void kgsl_cffdump_printline(int id, uint opcode, uint op1, uint op2,
+	uint op3, uint op4, uint op5);
+
+static inline void kgsl_cffdump_write(struct kgsl_device *device,
+		uint64_t gpuaddr, unsigned int value)
+{
+	if (!device || !device->cff_dump_enable)
+		return;
+
+	kgsl_cffdump_printline(-1, CFF_OP_WRITE_MEM, gpuaddr, value, 0, 0, 0);
+}
+
+#else
+
+static inline void kgsl_cffdump_init(void)
+{
+	return;
+}
+
+static inline void kgsl_cffdump_destroy(void)
+{
+	return;
+}
+
+static inline void kgsl_cffdump_open(struct kgsl_device *device)
+{
+	return;
+}
+
+static inline void kgsl_cffdump_close(struct kgsl_device *device)
+{
+	return;
+}
+
+static inline void kgsl_cffdump_write(struct kgsl_device *device,
+		uint64_t gpuaddr, unsigned int value)
+{
+	return;
+}
+
+static inline void kgsl_cffdump_memcpy(struct kgsl_device *device,
+		uint64_t gupaddr, unsigned int *ptr, uint64_t sizebytes)
+{
+	return;
+}
+
+static inline void kgsl_cffdump_syncmem(struct kgsl_device *device,
+		struct kgsl_mem_entry *entry, uint64_t offset,
+		uint64_t sizebytes, bool clean_cache)
+{
+	return;
+}
+
+static inline void kgsl_cffdump_memset(struct kgsl_device *device,
+		uint64_t addr, unsigned char ch, size_t sizebytes)
+{
+	return;
+}
+
+static inline void kgsl_cffdump_regwrite(struct kgsl_device *device, uint addr,
+					 uint value)
+{
+	return;
+}
+
+static inline void kgsl_cffdump_regpoll(struct kgsl_device *device, uint addr,
+		uint value, uint mask)
+{
+	return;
+}
+
+static inline bool kgsl_cffdump_parse_ibs(struct kgsl_device_private *dev_priv,
+	const struct kgsl_memdesc *memdesc, uint64_t gpuaddr,
+	uint64_t sizedwords, bool check_only)
+{
+	return false;
+}
+
+static inline void kgsl_cffdump_memory_base(struct kgsl_device *device,
+		unsigned int base, unsigned int range, unsigned int gmemsize)
+{
+	return;
+}
+
+static inline void kgsl_cffdump_hang(struct kgsl_device *device)
+{
+	return;
+}
+
+static inline void kgsl_cffdump_debugfs_create(struct kgsl_device *device)
+{
+	return;
+}
+
+static inline void kgsl_cffdump_user_event(struct kgsl_device *device,
+		unsigned int cff_opcode, unsigned int op1,
+		unsigned int op2, unsigned int op3,
+		unsigned int op4, unsigned int op5)
+{
+	return;
+}
+
+static inline int kgsl_cffdump_capture_ib_desc(struct kgsl_device *device,
+				struct kgsl_context *context,
+				struct kgsl_cmdbatch *cmdbatch)
+{
+	return 0;
+}
+
+static inline int kgsl_cff_dump_enable_set(void *data, u64 val)
+{
+	return -ENODEV;
+}
+
+static inline int kgsl_cff_dump_enable_get(void *data, u64 *val)
+{
+	return -ENODEV;
+}
+
+#endif /* CONFIG_MSM_KGSL_CFF_DUMP */
+#endif /* __KGSL_CFFDUMP_H */
diff --git a/drivers/gpu/msm/kgsl_cmdbatch.c b/drivers/gpu/msm/kgsl_cmdbatch.c
new file mode 100644
index 000000000000..2aac458f05eb
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_cmdbatch.c
@@ -0,0 +1,948 @@
+/* Copyright (c) 2008-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * KGSL command batch management
+ * A command batch is a single submission from userland.  The cmdbatch
+ * encapsulates everything about the submission : command buffers, flags and
+ * sync points.
+ *
+ * Sync points are events that need to expire before the
+ * cmdbatch can be queued to the hardware. All synpoints are contained in an
+ * array of kgsl_cmdbatch_sync_event structs in the command batch. There can be
+ * multiple types of events both internal ones (GPU events) and external
+ * triggers. As the events expire bits are cleared in a pending bitmap stored
+ * in the command batch. The GPU will submit the command as soon as the bitmap
+ * goes to zero indicating no more pending events.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/list.h>
+#include <linux/compat.h>
+
+#include "kgsl.h"
+#include "kgsl_device.h"
+#include "kgsl_cmdbatch.h"
+#include "kgsl_sync.h"
+#include "kgsl_trace.h"
+#include "kgsl_compat.h"
+
+/*
+ * Define an kmem cache for the memobj structures since we allocate and free
+ * them so frequently
+ */
+static struct kmem_cache *memobjs_cache;
+
+/**
+ * kgsl_cmdbatch_put() - Decrement the refcount for a command batch object
+ * @cmdbatch: Pointer to the command batch object
+ */
+static inline void kgsl_cmdbatch_put(struct kgsl_cmdbatch *cmdbatch)
+{
+	if (cmdbatch)
+		kref_put(&cmdbatch->refcount, kgsl_cmdbatch_destroy_object);
+}
+
+void kgsl_dump_syncpoints(struct kgsl_device *device,
+	struct kgsl_cmdbatch *cmdbatch)
+{
+	struct kgsl_cmdbatch_sync_event *event;
+	unsigned int i;
+
+	for (i = 0; i < cmdbatch->numsyncs; i++) {
+		event = &cmdbatch->synclist[i];
+
+		if (!kgsl_cmdbatch_event_pending(cmdbatch, i))
+			continue;
+
+		switch (event->type) {
+		case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP: {
+			unsigned int retired;
+
+			 kgsl_readtimestamp(event->device,
+				event->context, KGSL_TIMESTAMP_RETIRED,
+				&retired);
+
+			dev_err(device->dev,
+				"  [timestamp] context %d timestamp %d (retired %d)\n",
+				event->context->id, event->timestamp,
+				retired);
+			break;
+		}
+		case KGSL_CMD_SYNCPOINT_TYPE_FENCE:
+			if (event->handle)
+				dev_err(device->dev, "  fence: [%p] %s\n",
+					event->handle->fence,
+					event->handle->name);
+			else
+				dev_err(device->dev, "  fence: invalid\n");
+			break;
+		}
+	}
+}
+
+static void _kgsl_cmdbatch_timer(unsigned long data)
+{
+	struct kgsl_device *device;
+	struct kgsl_cmdbatch *cmdbatch = (struct kgsl_cmdbatch *) data;
+	struct kgsl_cmdbatch_sync_event *event;
+	unsigned int i;
+
+	if (cmdbatch == NULL || cmdbatch->context == NULL)
+		return;
+
+	device = cmdbatch->context->device;
+
+	dev_err(device->dev,
+		"kgsl: possible gpu syncpoint deadlock for context %d timestamp %d\n",
+		cmdbatch->context->id, cmdbatch->timestamp);
+
+	set_bit(CMDBATCH_FLAG_FENCE_LOG, &cmdbatch->priv);
+	kgsl_context_dump(cmdbatch->context);
+	clear_bit(CMDBATCH_FLAG_FENCE_LOG, &cmdbatch->priv);
+
+	dev_err(device->dev, "      pending events:\n");
+
+	for (i = 0; i < cmdbatch->numsyncs; i++) {
+		event = &cmdbatch->synclist[i];
+
+		if (!kgsl_cmdbatch_event_pending(cmdbatch, i))
+			continue;
+
+		switch (event->type) {
+		case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP:
+			dev_err(device->dev, "       [%d] TIMESTAMP %d:%d\n",
+				i, event->context->id, event->timestamp);
+			break;
+		case KGSL_CMD_SYNCPOINT_TYPE_FENCE:
+			if (event->handle != NULL) {
+				dev_err(device->dev, "       [%d] FENCE %s\n",
+				i, event->handle->fence ?
+					event->handle->fence->name : "NULL");
+				kgsl_sync_fence_log(event->handle->fence);
+			}
+			break;
+		}
+	}
+
+	dev_err(device->dev, "--gpu syncpoint deadlock print end--\n");
+}
+
+/**
+ * kgsl_cmdbatch_destroy_object() - Destroy a cmdbatch object
+ * @kref: Pointer to the kref structure for this object
+ *
+ * Actually destroy a command batch object.  Called from kgsl_cmdbatch_put
+ */
+void kgsl_cmdbatch_destroy_object(struct kref *kref)
+{
+	struct kgsl_cmdbatch *cmdbatch = container_of(kref,
+		struct kgsl_cmdbatch, refcount);
+
+	kgsl_context_put(cmdbatch->context);
+
+	kfree(cmdbatch->synclist);
+	kfree(cmdbatch);
+}
+EXPORT_SYMBOL(kgsl_cmdbatch_destroy_object);
+
+/*
+ * a generic function to retire a pending sync event and (possibly)
+ * kick the dispatcher
+ */
+static void kgsl_cmdbatch_sync_expire(struct kgsl_device *device,
+	struct kgsl_cmdbatch_sync_event *event)
+{
+	/*
+	 * Clear the event from the pending mask - if it is already clear, then
+	 * leave without doing anything useful
+	 */
+	if (!test_and_clear_bit(event->id, &event->cmdbatch->pending))
+		return;
+
+	/*
+	 * If no more pending events, delete the timer and schedule the command
+	 * for dispatch
+	 */
+	if (!kgsl_cmdbatch_events_pending(event->cmdbatch)) {
+		del_timer_sync(&event->cmdbatch->timer);
+
+		if (device->ftbl->drawctxt_sched)
+			device->ftbl->drawctxt_sched(device,
+				event->cmdbatch->context);
+	}
+}
+
+/*
+ * This function is called by the GPU event when the sync event timestamp
+ * expires
+ */
+static void kgsl_cmdbatch_sync_func(struct kgsl_device *device,
+		struct kgsl_event_group *group, void *priv, int result)
+{
+	struct kgsl_cmdbatch_sync_event *event = priv;
+
+	trace_syncpoint_timestamp_expire(event->cmdbatch,
+		event->context, event->timestamp);
+
+	kgsl_cmdbatch_sync_expire(device, event);
+	kgsl_context_put(event->context);
+	kgsl_cmdbatch_put(event->cmdbatch);
+}
+
+static inline void _free_memobj_list(struct list_head *list)
+{
+	struct kgsl_memobj_node *mem, *tmpmem;
+
+	/* Free the cmd mem here */
+	list_for_each_entry_safe(mem, tmpmem, list, node) {
+		list_del_init(&mem->node);
+		kmem_cache_free(memobjs_cache, mem);
+	}
+}
+
+/**
+ * kgsl_cmdbatch_destroy() - Destroy a cmdbatch structure
+ * @cmdbatch: Pointer to the command batch object to destroy
+ *
+ * Start the process of destroying a command batch.  Cancel any pending events
+ * and decrement the refcount.  Asynchronous events can still signal after
+ * kgsl_cmdbatch_destroy has returned.
+ */
+void kgsl_cmdbatch_destroy(struct kgsl_cmdbatch *cmdbatch)
+{
+	unsigned int i;
+	unsigned long pending;
+
+	if (IS_ERR_OR_NULL(cmdbatch))
+		return;
+
+	/* Zap the canary timer */
+	del_timer_sync(&cmdbatch->timer);
+
+	/*
+	 * Copy off the pending list and clear all pending events - this will
+	 * render any subsequent asynchronous callback harmless
+	 */
+	bitmap_copy(&pending, &cmdbatch->pending, KGSL_MAX_SYNCPOINTS);
+	bitmap_zero(&cmdbatch->pending, KGSL_MAX_SYNCPOINTS);
+
+	/*
+	 * Clear all pending events - this will render any subsequent async
+	 * callbacks harmless
+	 */
+
+	for (i = 0; i < cmdbatch->numsyncs; i++) {
+		struct kgsl_cmdbatch_sync_event *event = &cmdbatch->synclist[i];
+
+		/* Don't do anything if the event has already expired */
+		if (!test_bit(i, &pending))
+			continue;
+
+		switch (event->type) {
+		case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP:
+			kgsl_cancel_event(cmdbatch->device,
+				&event->context->events, event->timestamp,
+				kgsl_cmdbatch_sync_func, event);
+			break;
+		case KGSL_CMD_SYNCPOINT_TYPE_FENCE:
+			if (kgsl_sync_fence_async_cancel(event->handle))
+				kgsl_cmdbatch_put(cmdbatch);
+			break;
+		}
+	}
+
+	/*
+	 * Release the the refcount on the mem entry associated with the
+	 * cmdbatch profiling buffer
+	 */
+	if (cmdbatch->flags & KGSL_CMDBATCH_PROFILING)
+		kgsl_mem_entry_put(cmdbatch->profiling_buf_entry);
+
+	/* Destroy the cmdlist we created */
+	_free_memobj_list(&cmdbatch->cmdlist);
+
+	/* Destroy the memlist we created */
+	_free_memobj_list(&cmdbatch->memlist);
+
+	/*
+	 * If we cancelled an event, there's a good chance that the context is
+	 * on a dispatcher queue, so schedule to get it removed.
+	 */
+	if (!bitmap_empty(&pending, KGSL_MAX_SYNCPOINTS) &&
+		cmdbatch->device->ftbl->drawctxt_sched)
+		cmdbatch->device->ftbl->drawctxt_sched(cmdbatch->device,
+							cmdbatch->context);
+
+	kgsl_cmdbatch_put(cmdbatch);
+}
+EXPORT_SYMBOL(kgsl_cmdbatch_destroy);
+
+/*
+ * A callback that gets registered with kgsl_sync_fence_async_wait and is fired
+ * when a fence is expired
+ */
+static void kgsl_cmdbatch_sync_fence_func(void *priv)
+{
+	struct kgsl_cmdbatch_sync_event *event = priv;
+
+	trace_syncpoint_fence_expire(event->cmdbatch,
+		event->handle ? event->handle->name : "unknown");
+
+	kgsl_cmdbatch_sync_expire(event->device, event);
+
+	kgsl_cmdbatch_put(event->cmdbatch);
+}
+
+/* kgsl_cmdbatch_add_sync_fence() - Add a new sync fence syncpoint
+ * @device: KGSL device
+ * @cmdbatch: KGSL cmdbatch to add the sync point to
+ * @priv: Private sructure passed by the user
+ *
+ * Add a new fence sync syncpoint to the cmdbatch.
+ */
+static int kgsl_cmdbatch_add_sync_fence(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch, void *priv)
+{
+	struct kgsl_cmd_syncpoint_fence *sync = priv;
+	struct kgsl_cmdbatch_sync_event *event;
+	unsigned int id;
+
+	kref_get(&cmdbatch->refcount);
+
+	id = cmdbatch->numsyncs++;
+
+	event = &cmdbatch->synclist[id];
+
+	event->id = id;
+	event->type = KGSL_CMD_SYNCPOINT_TYPE_FENCE;
+	event->cmdbatch = cmdbatch;
+	event->device = device;
+	event->context = NULL;
+
+	set_bit(event->id, &cmdbatch->pending);
+
+	event->handle = kgsl_sync_fence_async_wait(sync->fd,
+		kgsl_cmdbatch_sync_fence_func, event);
+
+	if (IS_ERR_OR_NULL(event->handle)) {
+		int ret = PTR_ERR(event->handle);
+
+		clear_bit(event->id, &cmdbatch->pending);
+		event->handle = NULL;
+
+		kgsl_cmdbatch_put(cmdbatch);
+
+		/*
+		 * If ret == 0 the fence was already signaled - print a trace
+		 * message so we can track that
+		 */
+		if (ret == 0)
+			trace_syncpoint_fence_expire(cmdbatch, "signaled");
+
+		return ret;
+	}
+
+	trace_syncpoint_fence(cmdbatch, event->handle->name);
+
+	return 0;
+}
+
+/* kgsl_cmdbatch_add_sync_timestamp() - Add a new sync point for a cmdbatch
+ * @device: KGSL device
+ * @cmdbatch: KGSL cmdbatch to add the sync point to
+ * @priv: Private sructure passed by the user
+ *
+ * Add a new sync point timestamp event to the cmdbatch.
+ */
+static int kgsl_cmdbatch_add_sync_timestamp(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch, void *priv)
+{
+	struct kgsl_cmd_syncpoint_timestamp *sync = priv;
+	struct kgsl_context *context = kgsl_context_get(cmdbatch->device,
+		sync->context_id);
+	struct kgsl_cmdbatch_sync_event *event;
+	int ret = -EINVAL;
+	unsigned int id;
+
+	if (context == NULL)
+		return -EINVAL;
+
+	/*
+	 * We allow somebody to create a sync point on their own context.
+	 * This has the effect of delaying a command from submitting until the
+	 * dependent command has cleared.  That said we obviously can't let them
+	 * create a sync point on a future timestamp.
+	 */
+
+	if (context == cmdbatch->context) {
+		unsigned int queued;
+		kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED,
+			&queued);
+
+		if (timestamp_cmp(sync->timestamp, queued) > 0) {
+			KGSL_DRV_ERR(device,
+			"Cannot create syncpoint for future timestamp %d (current %d)\n",
+				sync->timestamp, queued);
+			goto done;
+		}
+	}
+
+	kref_get(&cmdbatch->refcount);
+
+	id = cmdbatch->numsyncs++;
+
+	event = &cmdbatch->synclist[id];
+	event->id = id;
+
+	event->type = KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP;
+	event->cmdbatch = cmdbatch;
+	event->context = context;
+	event->timestamp = sync->timestamp;
+	event->device = device;
+
+	set_bit(event->id, &cmdbatch->pending);
+
+	ret = kgsl_add_event(device, &context->events, sync->timestamp,
+		kgsl_cmdbatch_sync_func, event);
+
+	if (ret) {
+		clear_bit(event->id, &cmdbatch->pending);
+		kgsl_cmdbatch_put(cmdbatch);
+	} else {
+		trace_syncpoint_timestamp(cmdbatch, context, sync->timestamp);
+	}
+
+done:
+	if (ret)
+		kgsl_context_put(context);
+
+	return ret;
+}
+
+/**
+ * kgsl_cmdbatch_add_sync() - Add a sync point to a command batch
+ * @device: Pointer to the KGSL device struct for the GPU
+ * @cmdbatch: Pointer to the cmdbatch
+ * @sync: Pointer to the user-specified struct defining the syncpoint
+ *
+ * Create a new sync point in the cmdbatch based on the user specified
+ * parameters
+ */
+int kgsl_cmdbatch_add_sync(struct kgsl_device *device,
+	struct kgsl_cmdbatch *cmdbatch,
+	struct kgsl_cmd_syncpoint *sync)
+{
+	void *priv;
+	int ret, psize;
+	int (*func)(struct kgsl_device *device, struct kgsl_cmdbatch *cmdbatch,
+			void *priv);
+
+	switch (sync->type) {
+	case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP:
+		psize = sizeof(struct kgsl_cmd_syncpoint_timestamp);
+		func = kgsl_cmdbatch_add_sync_timestamp;
+		break;
+	case KGSL_CMD_SYNCPOINT_TYPE_FENCE:
+		psize = sizeof(struct kgsl_cmd_syncpoint_fence);
+		func = kgsl_cmdbatch_add_sync_fence;
+		break;
+	default:
+		KGSL_DRV_ERR(device,
+			"bad syncpoint type ctxt %d type 0x%x size %zu\n",
+			cmdbatch->context->id, sync->type, sync->size);
+		return -EINVAL;
+	}
+
+	if (sync->size != psize) {
+		KGSL_DRV_ERR(device,
+			"bad syncpoint size ctxt %d type 0x%x size %zu\n",
+			cmdbatch->context->id, sync->type, sync->size);
+		return -EINVAL;
+	}
+
+	priv = kzalloc(sync->size, GFP_KERNEL);
+	if (priv == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(priv, sync->priv, sync->size)) {
+		kfree(priv);
+		return -EFAULT;
+	}
+
+	ret = func(device, cmdbatch, priv);
+	kfree(priv);
+
+	return ret;
+}
+
+static void add_profiling_buffer(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch, uint64_t gpuaddr, uint64_t size,
+		unsigned int id, uint64_t offset)
+{
+	struct kgsl_mem_entry *entry;
+
+	if (!(cmdbatch->flags & KGSL_CMDBATCH_PROFILING))
+		return;
+
+	/* Only the first buffer entry counts - ignore the rest */
+	if (cmdbatch->profiling_buf_entry != NULL)
+		return;
+
+	if (id != 0)
+		entry = kgsl_sharedmem_find_id(cmdbatch->context->proc_priv,
+				id);
+	else
+		entry = kgsl_sharedmem_find(cmdbatch->context->proc_priv,
+			gpuaddr);
+
+	if (entry != NULL) {
+		if (!kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, size)) {
+			kgsl_mem_entry_put(entry);
+			entry = NULL;
+		}
+	}
+
+	if (entry == NULL) {
+		KGSL_DRV_ERR(device,
+			"ignore bad profile buffer ctxt %d id %d offset %lld gpuaddr %llx size %lld\n",
+			cmdbatch->context->id, id, offset, gpuaddr, size);
+		return;
+	}
+
+	cmdbatch->profiling_buf_entry = entry;
+
+	if (id != 0)
+		cmdbatch->profiling_buffer_gpuaddr =
+			entry->memdesc.gpuaddr + offset;
+	else
+		cmdbatch->profiling_buffer_gpuaddr = gpuaddr;
+}
+
+/**
+ * kgsl_cmdbatch_add_ibdesc() - Add a legacy ibdesc to a command batch
+ * @cmdbatch: Pointer to the cmdbatch
+ * @ibdesc: Pointer to the user-specified struct defining the memory or IB
+ *
+ * Create a new memory entry in the cmdbatch based on the user specified
+ * parameters
+ */
+int kgsl_cmdbatch_add_ibdesc(struct kgsl_device *device,
+	struct kgsl_cmdbatch *cmdbatch, struct kgsl_ibdesc *ibdesc)
+{
+	struct kgsl_memobj_node *mem;
+
+	mem = kmem_cache_alloc(memobjs_cache, GFP_KERNEL);
+	if (mem == NULL)
+		return -ENOMEM;
+
+	mem->gpuaddr = (uint64_t) ibdesc->gpuaddr;
+	mem->size = (uint64_t) ibdesc->sizedwords << 2;
+	mem->priv = 0;
+	mem->id = 0;
+	mem->offset = 0;
+	mem->flags = 0;
+
+	/* sanitize the ibdesc ctrl flags */
+	ibdesc->ctrl &= KGSL_IBDESC_MEMLIST | KGSL_IBDESC_PROFILING_BUFFER;
+
+	if (cmdbatch->flags & KGSL_CMDBATCH_MEMLIST &&
+			ibdesc->ctrl & KGSL_IBDESC_MEMLIST) {
+		if (ibdesc->ctrl & KGSL_IBDESC_PROFILING_BUFFER) {
+			add_profiling_buffer(device, cmdbatch, mem->gpuaddr,
+					mem->size, 0, 0);
+			return 0;
+		}
+
+		/* add to the memlist */
+		list_add_tail(&mem->node, &cmdbatch->memlist);
+
+		if (ibdesc->ctrl & KGSL_IBDESC_PROFILING_BUFFER)
+			add_profiling_buffer(device, cmdbatch, mem->gpuaddr,
+				mem->size, 0, 0);
+	} else {
+		/* Ignore if SYNC or MARKER is specified */
+		if (cmdbatch->flags &
+			(KGSL_CMDBATCH_SYNC | KGSL_CMDBATCH_MARKER))
+			return 0;
+
+		/* set the preamble flag if directed to */
+		if (cmdbatch->context->flags & KGSL_CONTEXT_PREAMBLE &&
+			list_empty(&cmdbatch->cmdlist))
+			mem->flags = KGSL_CMDLIST_CTXTSWITCH_PREAMBLE;
+
+		/* add to the cmd list */
+		list_add_tail(&mem->node, &cmdbatch->cmdlist);
+	}
+
+	return 0;
+}
+
+/**
+ * kgsl_cmdbatch_create() - Create a new cmdbatch structure
+ * @device: Pointer to a KGSL device struct
+ * @context: Pointer to a KGSL context struct
+ * @flags: Flags for the cmdbatch
+ *
+ * Allocate an new cmdbatch structure
+ */
+struct kgsl_cmdbatch *kgsl_cmdbatch_create(struct kgsl_device *device,
+		struct kgsl_context *context, unsigned int flags)
+{
+	struct kgsl_cmdbatch *cmdbatch = kzalloc(sizeof(*cmdbatch), GFP_KERNEL);
+	if (cmdbatch == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * Increase the reference count on the context so it doesn't disappear
+	 * during the lifetime of this command batch
+	 */
+
+	if (!_kgsl_context_get(context)) {
+		kfree(cmdbatch);
+		return ERR_PTR(-ENOENT);
+	}
+
+	kref_init(&cmdbatch->refcount);
+	INIT_LIST_HEAD(&cmdbatch->cmdlist);
+	INIT_LIST_HEAD(&cmdbatch->memlist);
+
+	cmdbatch->device = device;
+	cmdbatch->context = context;
+	/* sanitize our flags for cmdbatches */
+	cmdbatch->flags = flags & (KGSL_CMDBATCH_CTX_SWITCH
+				| KGSL_CMDBATCH_MARKER
+				| KGSL_CMDBATCH_END_OF_FRAME
+				| KGSL_CMDBATCH_SYNC
+				| KGSL_CMDBATCH_PWR_CONSTRAINT
+				| KGSL_CMDBATCH_MEMLIST
+				| KGSL_CMDBATCH_PROFILING);
+
+	/* Add a timer to help debug sync deadlocks */
+	setup_timer(&cmdbatch->timer, _kgsl_cmdbatch_timer,
+		(unsigned long) cmdbatch);
+
+	return cmdbatch;
+}
+
+#ifdef CONFIG_COMPAT
+static int add_ibdesc_list_compat(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch, void __user *ptr, int count)
+{
+	int i, ret = 0;
+	struct kgsl_ibdesc_compat ibdesc32;
+	struct kgsl_ibdesc ibdesc;
+
+	for (i = 0; i < count; i++) {
+		memset(&ibdesc32, 0, sizeof(ibdesc32));
+
+		if (copy_from_user(&ibdesc32, ptr, sizeof(ibdesc32))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		ibdesc.gpuaddr = (unsigned long) ibdesc32.gpuaddr;
+		ibdesc.sizedwords = (size_t) ibdesc32.sizedwords;
+		ibdesc.ctrl = (unsigned int) ibdesc32.ctrl;
+
+		ret = kgsl_cmdbatch_add_ibdesc(device, cmdbatch, &ibdesc);
+		if (ret)
+			break;
+
+		ptr += sizeof(ibdesc32);
+	}
+
+	return ret;
+}
+
+static int add_syncpoints_compat(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch, void __user *ptr, int count)
+{
+	struct kgsl_cmd_syncpoint_compat sync32;
+	struct kgsl_cmd_syncpoint sync;
+	int i, ret = 0;
+
+	for (i = 0; i < count; i++) {
+		memset(&sync32, 0, sizeof(sync32));
+
+		if (copy_from_user(&sync32, ptr, sizeof(sync32))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		sync.type = sync32.type;
+		sync.priv = compat_ptr(sync32.priv);
+		sync.size = (size_t) sync32.size;
+
+		ret = kgsl_cmdbatch_add_sync(device, cmdbatch, &sync);
+		if (ret)
+			break;
+
+		ptr += sizeof(sync32);
+	}
+
+	return ret;
+}
+#else
+static int add_ibdesc_list_compat(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch, void __user *ptr, int count)
+{
+	return -EINVAL;
+}
+
+static int add_syncpoints_compat(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch, void __user *ptr, int count)
+{
+	return -EINVAL;
+}
+#endif
+
+int kgsl_cmdbatch_add_ibdesc_list(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch, void __user *ptr, int count)
+{
+	struct kgsl_ibdesc ibdesc;
+	int i, ret;
+
+	if (is_compat_task())
+		return add_ibdesc_list_compat(device, cmdbatch, ptr, count);
+
+	for (i = 0; i < count; i++) {
+		memset(&ibdesc, 0, sizeof(ibdesc));
+
+		if (copy_from_user(&ibdesc, ptr, sizeof(ibdesc)))
+			return -EFAULT;
+
+		ret = kgsl_cmdbatch_add_ibdesc(device, cmdbatch, &ibdesc);
+		if (ret)
+			return ret;
+
+		ptr += sizeof(ibdesc);
+	}
+
+	return 0;
+}
+
+int kgsl_cmdbatch_add_syncpoints(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch, void __user *ptr, int count)
+{
+	struct kgsl_cmd_syncpoint sync;
+	int i, ret;
+
+	if (count == 0)
+		return 0;
+
+	if (count > KGSL_MAX_SYNCPOINTS)
+		return -EINVAL;
+
+	cmdbatch->synclist = kcalloc(count,
+		sizeof(struct kgsl_cmdbatch_sync_event), GFP_KERNEL);
+
+	if (cmdbatch->synclist == NULL)
+		return -ENOMEM;
+
+	if (is_compat_task())
+		return add_syncpoints_compat(device, cmdbatch, ptr, count);
+
+	for (i = 0; i < count; i++) {
+		memset(&sync, 0, sizeof(sync));
+
+		if (copy_from_user(&sync, ptr, sizeof(sync)))
+			return -EFAULT;
+
+		ret = kgsl_cmdbatch_add_sync(device, cmdbatch, &sync);
+		if (ret)
+			return ret;
+
+		ptr += sizeof(sync);
+	}
+
+	return 0;
+}
+
+static int kgsl_cmdbatch_add_object(struct list_head *head,
+		struct kgsl_command_object *obj)
+{
+	struct kgsl_memobj_node *mem;
+
+	mem = kmem_cache_alloc(memobjs_cache, GFP_KERNEL);
+	if (mem == NULL)
+		return -ENOMEM;
+
+	mem->gpuaddr = obj->gpuaddr;
+	mem->size = obj->size;
+	mem->id = obj->id;
+	mem->offset = obj->offset;
+	mem->flags = obj->flags;
+	mem->priv = 0;
+
+	list_add_tail(&mem->node, head);
+	return 0;
+}
+
+#define CMDLIST_FLAGS \
+	(KGSL_CMDLIST_IB | \
+	 KGSL_CMDLIST_CTXTSWITCH_PREAMBLE | \
+	 KGSL_CMDLIST_IB_PREAMBLE)
+
+int kgsl_cmdbatch_add_cmdlist(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch, void __user *ptr,
+		unsigned int size, unsigned int count)
+{
+	struct kgsl_command_object obj;
+	int i, ret = 0;
+
+	/* Return early if nothing going on */
+	if (count == 0 && ptr == NULL && size == 0)
+		return 0;
+
+	/* Sanity check inputs */
+	if (count == 0 || ptr == NULL || size == 0)
+		return -EINVAL;
+
+	/* Ignore all if SYNC or MARKER is specified */
+	if (cmdbatch->flags & (KGSL_CMDBATCH_SYNC | KGSL_CMDBATCH_MARKER))
+		return 0;
+
+	for (i = 0; i < count; i++) {
+		memset(&obj, 0, sizeof(obj));
+
+		ret = _copy_from_user(&obj, ptr, sizeof(obj), size);
+		if (ret)
+			return ret;
+
+		/* Sanity check the flags */
+		if (!(obj.flags & CMDLIST_FLAGS)) {
+			KGSL_DRV_ERR(device,
+				"invalid cmdobj ctxt %d flags %d id %d offset %lld addr %lld size %lld\n",
+				cmdbatch->context->id, obj.flags, obj.id,
+				obj.offset, obj.gpuaddr, obj.size);
+			return -EINVAL;
+		}
+
+		ret = kgsl_cmdbatch_add_object(&cmdbatch->cmdlist, &obj);
+		if (ret)
+			return ret;
+
+		ptr += sizeof(obj);
+	}
+
+	return 0;
+}
+
+int kgsl_cmdbatch_add_memlist(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch, void __user *ptr,
+		unsigned int size, unsigned int count)
+{
+	struct kgsl_command_object obj;
+	int i, ret = 0;
+
+	/* Return early if nothing going on */
+	if (count == 0 && ptr == NULL && size == 0)
+		return 0;
+
+	/* Sanity check inputs */
+	if (count == 0 || ptr == NULL || size == 0)
+		return -EINVAL;
+
+	for (i = 0; i < count; i++) {
+		memset(&obj, 0, sizeof(obj));
+
+		ret = _copy_from_user(&obj, ptr, sizeof(obj), size);
+		if (ret)
+			return ret;
+
+		if (!(obj.flags & KGSL_OBJLIST_MEMOBJ)) {
+			KGSL_DRV_ERR(device,
+				"invalid memobj ctxt %d flags %d id %d offset %lld addr %lld size %lld\n",
+				cmdbatch->context->id, obj.flags, obj.id,
+				obj.offset, obj.gpuaddr, obj.size);
+			return -EINVAL;
+		}
+
+		if (obj.flags & KGSL_OBJLIST_PROFILE)
+			add_profiling_buffer(device, cmdbatch, obj.gpuaddr,
+				obj.size, obj.id, obj.offset);
+		else {
+			ret = kgsl_cmdbatch_add_object(&cmdbatch->memlist,
+				&obj);
+			if (ret)
+				return ret;
+		}
+
+		ptr += sizeof(obj);
+	}
+
+	return 0;
+}
+
+int kgsl_cmdbatch_add_synclist(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch, void __user *ptr,
+		unsigned int size, unsigned int count)
+{
+	struct kgsl_command_syncpoint syncpoint;
+	struct kgsl_cmd_syncpoint sync;
+	int i, ret = 0;
+
+	/* Return early if nothing going on */
+	if (count == 0 && ptr == NULL && size == 0)
+		return 0;
+
+	/* Sanity check inputs */
+	if (count == 0 || ptr == NULL || size == 0)
+		return -EINVAL;
+
+	if (count > KGSL_MAX_SYNCPOINTS)
+		return -EINVAL;
+
+	cmdbatch->synclist = kcalloc(count,
+		sizeof(struct kgsl_cmdbatch_sync_event), GFP_KERNEL);
+
+	if (cmdbatch->synclist == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < count; i++) {
+		memset(&syncpoint, 0, sizeof(syncpoint));
+
+		ret = _copy_from_user(&syncpoint, ptr, sizeof(syncpoint), size);
+		if (ret)
+			return ret;
+
+		sync.type = syncpoint.type;
+		sync.priv = to_user_ptr(syncpoint.priv);
+		sync.size = syncpoint.size;
+
+		ret = kgsl_cmdbatch_add_sync(device, cmdbatch, &sync);
+		if (ret)
+			return ret;
+
+		ptr += sizeof(syncpoint);
+	}
+
+	return 0;
+}
+
+void kgsl_cmdbatch_exit(void)
+{
+	if (memobjs_cache != NULL)
+		kmem_cache_destroy(memobjs_cache);
+}
+
+int kgsl_cmdbatch_init(void)
+{
+	memobjs_cache = KMEM_CACHE(kgsl_memobj_node, 0);
+	if (memobjs_cache == NULL) {
+		KGSL_CORE_ERR("failed to create memobjs_cache");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/msm/kgsl_cmdbatch.h b/drivers/gpu/msm/kgsl_cmdbatch.h
new file mode 100644
index 000000000000..1547ac02fdbf
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_cmdbatch.h
@@ -0,0 +1,170 @@
+/* Copyright (c) 2008-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __KGSL_CMDBATCH_H
+#define __KGSL_CMDBATCH_H
+
+#define KGSL_CMDBATCH_FLAGS \
+	{ KGSL_CMDBATCH_MARKER, "MARKER" }, \
+	{ KGSL_CMDBATCH_CTX_SWITCH, "CTX_SWITCH" }, \
+	{ KGSL_CMDBATCH_SYNC, "SYNC" }, \
+	{ KGSL_CMDBATCH_END_OF_FRAME, "EOF" }, \
+	{ KGSL_CMDBATCH_PWR_CONSTRAINT, "PWR_CONSTRAINT" }, \
+	{ KGSL_CMDBATCH_SUBMIT_IB_LIST, "IB_LIST" }
+
+/**
+ * struct kgsl_cmdbatch - KGSl command descriptor
+ * @device: KGSL GPU device that the command was created for
+ * @context: KGSL context that created the command
+ * @timestamp: Timestamp assigned to the command
+ * @flags: flags
+ * @priv: Internal flags
+ * @fault_policy: Internal policy describing how to handle this command in case
+ * of a fault
+ * @fault_recovery: recovery actions actually tried for this batch
+ * @expires: Point in time when the cmdbatch is considered to be hung
+ * @refcount: kref structure to maintain the reference count
+ * @cmdlist: List of IBs to issue
+ * @memlist: List of all memory used in this command batch
+ * @synclist: Array of context/timestamp tuples to wait for before issuing
+ * @numsyncs: Number of sync entries in the array
+ * @pending: Bitmask of sync events that are active
+ * @timer: a timer used to track possible sync timeouts for this cmdbatch
+ * @marker_timestamp: For markers, the timestamp of the last "real" command that
+ * was queued
+ * @profiling_buf_entry: Mem entry containing the profiling buffer
+ * @profiling_buffer_gpuaddr: GPU virt address of the profile buffer added here
+ * for easy access
+ * @profile_index: Index to store the start/stop ticks in the kernel profiling
+ * buffer
+ * @submit_ticks: Variable to hold ticks at the time of cmdbatch submit.
+ * @global_ts: The ringbuffer timestamp corresponding to this cmdbatch
+ * @timeout_jiffies: For a syncpoint cmdbatch the jiffies at which the
+ * timer will expire
+ * This structure defines an atomic batch of command buffers issued from
+ * userspace.
+ */
+struct kgsl_cmdbatch {
+	struct kgsl_device *device;
+	struct kgsl_context *context;
+	uint32_t timestamp;
+	uint32_t flags;
+	unsigned long priv;
+	unsigned long fault_policy;
+	unsigned long fault_recovery;
+	unsigned long expires;
+	struct kref refcount;
+	struct list_head cmdlist;
+	struct list_head memlist;
+	struct kgsl_cmdbatch_sync_event *synclist;
+	unsigned int numsyncs;
+	unsigned long pending;
+	struct timer_list timer;
+	unsigned int marker_timestamp;
+	struct kgsl_mem_entry *profiling_buf_entry;
+	uint64_t profiling_buffer_gpuaddr;
+	unsigned int profile_index;
+	uint64_t submit_ticks;
+	unsigned int global_ts;
+	unsigned long timeout_jiffies;
+};
+
+/**
+ * struct kgsl_cmdbatch_sync_event
+ * @id: identifer (positiion within the pending bitmap)
+ * @type: Syncpoint type
+ * @cmdbatch: Pointer to the cmdbatch that owns the sync event
+ * @context: Pointer to the KGSL context that owns the cmdbatch
+ * @timestamp: Pending timestamp for the event
+ * @handle: Pointer to a sync fence handle
+ * @device: Pointer to the KGSL device
+ */
+struct kgsl_cmdbatch_sync_event {
+	unsigned int id;
+	int type;
+	struct kgsl_cmdbatch *cmdbatch;
+	struct kgsl_context *context;
+	unsigned int timestamp;
+	struct kgsl_sync_fence_waiter *handle;
+	struct kgsl_device *device;
+};
+
+/**
+ * enum kgsl_cmdbatch_priv - Internal cmdbatch flags
+ * @CMDBATCH_FLAG_SKIP - skip the entire command batch
+ * @CMDBATCH_FLAG_FORCE_PREAMBLE - Force the preamble on for the cmdbatch
+ * @CMDBATCH_FLAG_WFI - Force wait-for-idle for the submission
+ * @CMDBATCH_FLAG_PROFILE - store the start / retire ticks for the command batch
+ * in the profiling buffer
+ * @CMDBATCH_FLAG_FENCE_LOG - Set if the cmdbatch is dumping fence logs via the
+ * cmdbatch timer - this is used to avoid recursion
+ */
+
+enum kgsl_cmdbatch_priv {
+	CMDBATCH_FLAG_SKIP = 0,
+	CMDBATCH_FLAG_FORCE_PREAMBLE,
+	CMDBATCH_FLAG_WFI,
+	CMDBATCH_FLAG_PROFILE,
+	CMDBATCH_FLAG_FENCE_LOG,
+};
+
+
+int kgsl_cmdbatch_add_memobj(struct kgsl_cmdbatch *cmdbatch,
+		struct kgsl_ibdesc *ibdesc);
+
+int kgsl_cmdbatch_add_sync(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch,
+		struct kgsl_cmd_syncpoint *sync);
+
+struct kgsl_cmdbatch *kgsl_cmdbatch_create(struct kgsl_device *device,
+		struct kgsl_context *context, unsigned int flags);
+int kgsl_cmdbatch_add_ibdesc(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch, struct kgsl_ibdesc *ibdesc);
+int kgsl_cmdbatch_add_ibdesc_list(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch, void __user *ptr, int count);
+int kgsl_cmdbatch_add_syncpoints(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch, void __user *ptr, int count);
+int kgsl_cmdbatch_add_cmdlist(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch, void __user *ptr,
+		unsigned int size, unsigned int count);
+int kgsl_cmdbatch_add_memlist(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch, void __user *ptr,
+		unsigned int size, unsigned int count);
+int kgsl_cmdbatch_add_synclist(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch, void __user *ptr,
+		unsigned int size, unsigned int count);
+
+int kgsl_cmdbatch_init(void);
+void kgsl_cmdbatch_exit(void);
+
+void kgsl_dump_syncpoints(struct kgsl_device *device,
+	struct kgsl_cmdbatch *cmdbatch);
+
+void kgsl_cmdbatch_destroy(struct kgsl_cmdbatch *cmdbatch);
+
+void kgsl_cmdbatch_destroy_object(struct kref *kref);
+
+static inline bool kgsl_cmdbatch_events_pending(struct kgsl_cmdbatch *cmdbatch)
+{
+	return !bitmap_empty(&cmdbatch->pending, KGSL_MAX_SYNCPOINTS);
+}
+
+static inline bool kgsl_cmdbatch_event_pending(struct kgsl_cmdbatch *cmdbatch,
+		unsigned int bit)
+{
+	if (bit >= KGSL_MAX_SYNCPOINTS)
+		return false;
+
+	return test_bit(bit, &cmdbatch->pending);
+}
+
+#endif /* __KGSL_CMDBATCH_H */
diff --git a/drivers/gpu/msm/kgsl_compat.c b/drivers/gpu/msm/kgsl_compat.c
new file mode 100644
index 000000000000..248c78b7e5c4
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_compat.c
@@ -0,0 +1,398 @@
+/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/uaccess.h>
+#include <asm/ioctl.h>
+
+#include "kgsl.h"
+#include "kgsl_compat.h"
+#include "kgsl_device.h"
+#include "kgsl_sync.h"
+
+static long
+kgsl_ioctl_device_getproperty_compat(struct kgsl_device_private *dev_priv,
+				unsigned int cmd, void *data)
+{
+	struct kgsl_device_getproperty_compat *param32 = data;
+	struct kgsl_device_getproperty param;
+
+	param.type = param32->type;
+	param.value = compat_ptr(param32->value);
+	param.sizebytes = (size_t)param32->sizebytes;
+
+	return kgsl_ioctl_device_getproperty(dev_priv, cmd, &param);
+}
+
+static long
+kgsl_ioctl_device_setproperty_compat(struct kgsl_device_private *dev_priv,
+				unsigned int cmd, void *data)
+{
+	struct kgsl_device_getproperty_compat *param32 = data;
+	struct kgsl_device_getproperty param;
+
+	param.type = param32->type;
+	param.value = compat_ptr(param32->value);
+	param.sizebytes = (size_t)param32->sizebytes;
+
+	return kgsl_ioctl_device_setproperty(dev_priv, cmd, &param);
+}
+
+static long
+kgsl_ioctl_submit_commands_compat(struct kgsl_device_private *dev_priv,
+				      unsigned int cmd, void *data)
+{
+	int result;
+	struct kgsl_submit_commands_compat *param32 = data;
+	struct kgsl_submit_commands param;
+
+	param.context_id = param32->context_id;
+	param.flags = param32->flags;
+	param.cmdlist = compat_ptr(param32->cmdlist);
+	param.numcmds = param32->numcmds;
+	param.synclist = compat_ptr(param32->synclist);
+	param.numsyncs = param32->numsyncs;
+	param.timestamp = param32->timestamp;
+
+	result = kgsl_ioctl_submit_commands(dev_priv, cmd, &param);
+
+	param32->timestamp = param.timestamp;
+
+	return result;
+}
+
+static long
+kgsl_ioctl_rb_issueibcmds_compat(struct kgsl_device_private *dev_priv,
+				      unsigned int cmd, void *data)
+{
+	int result;
+	struct kgsl_ringbuffer_issueibcmds_compat *param32 = data;
+	struct kgsl_ringbuffer_issueibcmds param;
+
+	param.drawctxt_id = param32->drawctxt_id;
+	param.flags = param32->flags;
+	param.ibdesc_addr = (unsigned long)param32->ibdesc_addr;
+	param.numibs = param32->numibs;
+	param.timestamp = param32->timestamp;
+
+	result = kgsl_ioctl_rb_issueibcmds(dev_priv, cmd, &param);
+
+	param32->timestamp = param.timestamp;
+
+	return result;
+}
+
+static long kgsl_ioctl_cmdstream_freememontimestamp_ctxtid_compat(
+						struct kgsl_device_private
+						*dev_priv, unsigned int cmd,
+						void *data)
+{
+	struct kgsl_cmdstream_freememontimestamp_ctxtid_compat *param32 = data;
+	struct kgsl_cmdstream_freememontimestamp_ctxtid param;
+
+	param.context_id = param32->context_id;
+	param.gpuaddr = (unsigned long)param32->gpuaddr;
+	param.type = param32->type;
+	param.timestamp = param32->timestamp;
+
+	return kgsl_ioctl_cmdstream_freememontimestamp_ctxtid(dev_priv, cmd,
+								&param);
+}
+
+static long kgsl_ioctl_sharedmem_free_compat(struct kgsl_device_private
+					*dev_priv, unsigned int cmd,
+					void *data)
+{
+	struct kgsl_sharedmem_free_compat *param32 = data;
+	struct kgsl_sharedmem_free param;
+
+	param.gpuaddr = (unsigned long)param32->gpuaddr;
+
+	return kgsl_ioctl_sharedmem_free(dev_priv, cmd, &param);
+}
+
+static long kgsl_ioctl_map_user_mem_compat(struct kgsl_device_private
+					*dev_priv, unsigned int cmd,
+					void *data)
+{
+	int result = 0;
+	struct kgsl_map_user_mem_compat *param32 = data;
+	struct kgsl_map_user_mem param;
+
+	param.fd = param32->fd;
+	param.gpuaddr = (unsigned long)param32->gpuaddr;
+	param.len = (size_t)param32->len;
+	param.offset = (size_t)param32->offset;
+	param.hostptr = (unsigned long)param32->hostptr;
+	param.memtype = param32->memtype;
+	param.flags = param32->flags;
+
+	result = kgsl_ioctl_map_user_mem(dev_priv, cmd, &param);
+
+	param32->gpuaddr = gpuaddr_to_compat(param.gpuaddr);
+	param32->flags = param.flags;
+	return result;
+}
+
+static long
+kgsl_ioctl_gpumem_sync_cache_compat(struct kgsl_device_private *dev_priv,
+				unsigned int cmd, void *data)
+{
+	struct kgsl_gpumem_sync_cache_compat *param32 = data;
+	struct kgsl_gpumem_sync_cache param;
+
+	param.gpuaddr = (unsigned long)param32->gpuaddr;
+	param.id = param32->id;
+	param.op = param32->op;
+	param.offset = (size_t)param32->offset;
+	param.length = (size_t)param32->length;
+
+	return kgsl_ioctl_gpumem_sync_cache(dev_priv, cmd, &param);
+}
+
+static long
+kgsl_ioctl_gpumem_sync_cache_bulk_compat(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	struct kgsl_gpumem_sync_cache_bulk_compat *param32 = data;
+	struct kgsl_gpumem_sync_cache_bulk param;
+
+	param.id_list = to_user_ptr(param32->id_list);
+	param.count = param32->count;
+	param.op = param32->op;
+
+	return kgsl_ioctl_gpumem_sync_cache_bulk(dev_priv, cmd, &param);
+}
+
+static long
+kgsl_ioctl_sharedmem_flush_cache_compat(struct kgsl_device_private *dev_priv,
+				 unsigned int cmd, void *data)
+{
+	struct kgsl_sharedmem_free_compat *param32 = data;
+	struct kgsl_sharedmem_free param;
+
+	param.gpuaddr = (unsigned long)param32->gpuaddr;
+
+	return kgsl_ioctl_sharedmem_flush_cache(dev_priv, cmd, &param);
+}
+
+static long
+kgsl_ioctl_gpumem_alloc_compat(struct kgsl_device_private *dev_priv,
+			unsigned int cmd, void *data)
+{
+	int result = 0;
+	struct kgsl_gpumem_alloc_compat *param32 = data;
+	struct kgsl_gpumem_alloc param;
+
+	param.gpuaddr = (unsigned long)param32->gpuaddr;
+	param.size = (size_t)param32->size;
+	param.flags = param32->flags;
+
+	/*
+	 * Since this is a 32 bit application the page aligned size is expected
+	 * to fit inside of 32 bits - check for overflow and return error if so
+	 */
+	if (PAGE_ALIGN(param.size) >= UINT_MAX)
+		return -EINVAL;
+
+	result = kgsl_ioctl_gpumem_alloc(dev_priv, cmd, &param);
+
+	param32->gpuaddr = gpuaddr_to_compat(param.gpuaddr);
+	param32->size = sizet_to_compat(param.size);
+	param32->flags = param.flags;
+
+	return result;
+}
+
+static long
+kgsl_ioctl_gpumem_alloc_id_compat(struct kgsl_device_private *dev_priv,
+			unsigned int cmd, void *data)
+{
+	int result = 0;
+	struct kgsl_gpumem_alloc_id_compat *param32 = data;
+	struct kgsl_gpumem_alloc_id param;
+
+	param.id = param32->id;
+	param.flags = param32->flags;
+	param.size = (size_t)param32->size;
+	param.mmapsize = (size_t)param32->mmapsize;
+	param.gpuaddr = (unsigned long)param32->gpuaddr;
+
+	/*
+	 * Since this is a 32 bit application the page aligned size is expected
+	 * to fit inside of 32 bits - check for overflow and return error if so
+	 */
+	if (PAGE_ALIGN(param.size) >= UINT_MAX)
+		return -EINVAL;
+
+	result = kgsl_ioctl_gpumem_alloc_id(dev_priv, cmd, &param);
+
+	param32->id = param.id;
+	param32->flags = param.flags;
+	param32->size = sizet_to_compat(param.size);
+	param32->mmapsize = sizet_to_compat(param.mmapsize);
+	param32->gpuaddr = gpuaddr_to_compat(param.gpuaddr);
+
+	return result;
+}
+
+static long
+kgsl_ioctl_gpumem_get_info_compat(struct kgsl_device_private *dev_priv,
+				unsigned int cmd, void *data)
+{
+	int result = 0;
+	struct kgsl_gpumem_get_info_compat *param32 = data;
+	struct kgsl_gpumem_get_info param;
+
+	param.gpuaddr = (unsigned long)param32->gpuaddr;
+	param.id = param32->id;
+	param.flags = param32->flags;
+	param.size = (size_t)param32->size;
+	param.mmapsize = (size_t)param32->mmapsize;
+	param.useraddr = (unsigned long)param32->useraddr;
+
+	result = kgsl_ioctl_gpumem_get_info(dev_priv, cmd, &param);
+
+	param32->gpuaddr = gpuaddr_to_compat(param.gpuaddr);
+	param32->id = param.id;
+	param32->flags = param.flags;
+	param32->size = sizet_to_compat(param.size);
+	param32->mmapsize = sizet_to_compat(param.mmapsize);
+	param32->useraddr = (compat_ulong_t)param.useraddr;
+
+	return result;
+}
+
+static long kgsl_ioctl_cff_syncmem_compat(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	struct kgsl_cff_syncmem_compat *param32 = data;
+	struct kgsl_cff_syncmem param;
+
+	param.gpuaddr = (unsigned long)param32->gpuaddr;
+	param.len = (size_t)param32->len;
+
+	return kgsl_ioctl_cff_syncmem(dev_priv, cmd, &param);
+}
+
+static long kgsl_ioctl_timestamp_event_compat(struct kgsl_device_private
+				*dev_priv, unsigned int cmd, void *data)
+{
+	struct kgsl_timestamp_event_compat *param32 = data;
+	struct kgsl_timestamp_event param;
+
+	param.type = param32->type;
+	param.timestamp = param32->timestamp;
+	param.context_id = param32->context_id;
+	param.priv = compat_ptr(param32->priv);
+	param.len = (size_t)param32->len;
+
+	return kgsl_ioctl_timestamp_event(dev_priv, cmd, &param);
+}
+
+
+static const struct kgsl_ioctl kgsl_compat_ioctl_funcs[] = {
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_GETPROPERTY_COMPAT,
+			kgsl_ioctl_device_getproperty_compat),
+	/* IOCTL_KGSL_DEVICE_WAITTIMESTAMP is no longer supported */
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID,
+			kgsl_ioctl_device_waittimestamp_ctxtid),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS_COMPAT,
+			kgsl_ioctl_rb_issueibcmds_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SUBMIT_COMMANDS_COMPAT,
+			kgsl_ioctl_submit_commands_compat),
+	/* IOCTL_KGSL_CMDSTREAM_READTIMESTAMP is no longer supported */
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID,
+			kgsl_ioctl_cmdstream_readtimestamp_ctxtid),
+	/* IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP is no longer supported */
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID_COMPAT,
+			kgsl_ioctl_cmdstream_freememontimestamp_ctxtid_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_CREATE,
+			kgsl_ioctl_drawctxt_create),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_DESTROY,
+			kgsl_ioctl_drawctxt_destroy),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_MAP_USER_MEM_COMPAT,
+			kgsl_ioctl_map_user_mem_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FREE_COMPAT,
+			kgsl_ioctl_sharedmem_free_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE_COMPAT,
+			kgsl_ioctl_sharedmem_flush_cache_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC_COMPAT,
+			kgsl_ioctl_gpumem_alloc_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_CFF_SYNCMEM_COMPAT,
+			kgsl_ioctl_cff_syncmem_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_CFF_USER_EVENT,
+			kgsl_ioctl_cff_user_event),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMESTAMP_EVENT_COMPAT,
+			kgsl_ioctl_timestamp_event_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SETPROPERTY_COMPAT,
+			kgsl_ioctl_device_setproperty_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC_ID_COMPAT,
+			kgsl_ioctl_gpumem_alloc_id_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_FREE_ID,
+			kgsl_ioctl_gpumem_free_id),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_GET_INFO_COMPAT,
+			kgsl_ioctl_gpumem_get_info_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE_COMPAT,
+			kgsl_ioctl_gpumem_sync_cache_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK_COMPAT,
+			kgsl_ioctl_gpumem_sync_cache_bulk_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_CREATE,
+			kgsl_ioctl_syncsource_create),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_DESTROY,
+			kgsl_ioctl_syncsource_destroy),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE,
+			kgsl_ioctl_syncsource_create_fence),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE,
+			kgsl_ioctl_syncsource_signal_fence),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_CFF_SYNC_GPUOBJ,
+			kgsl_ioctl_cff_sync_gpuobj),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_ALLOC,
+			kgsl_ioctl_gpuobj_alloc),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_FREE,
+			kgsl_ioctl_gpuobj_free),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_INFO,
+			kgsl_ioctl_gpuobj_info),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_IMPORT,
+			kgsl_ioctl_gpuobj_import),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_SYNC,
+			kgsl_ioctl_gpuobj_sync),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPU_COMMAND,
+			kgsl_ioctl_gpu_command),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_SET_INFO,
+			kgsl_ioctl_gpuobj_set_info),
+};
+
+long kgsl_compat_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
+{
+	struct kgsl_device_private *dev_priv = filep->private_data;
+	struct kgsl_device *device = dev_priv->device;
+
+	long ret = kgsl_ioctl_helper(filep, cmd, arg, kgsl_compat_ioctl_funcs,
+		ARRAY_SIZE(kgsl_compat_ioctl_funcs));
+
+	/*
+	 * If the command was unrecognized in the generic core, try the device
+	 * specific function
+	 */
+
+	if (ret == -ENOIOCTLCMD) {
+		if (device->ftbl->compat_ioctl != NULL)
+			return device->ftbl->compat_ioctl(dev_priv, cmd, arg);
+
+		KGSL_DRV_INFO(device, "invalid ioctl code 0x%08X\n", cmd);
+	}
+
+	return ret;
+}
diff --git a/drivers/gpu/msm/kgsl_compat.h b/drivers/gpu/msm/kgsl_compat.h
new file mode 100644
index 000000000000..b7a1eb174baf
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_compat.h
@@ -0,0 +1,273 @@
+/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __KGSL_COMPAT_H
+#define __KGSL_COMPAT_H
+
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#include "kgsl.h"
+#include "kgsl_device.h"
+
+struct kgsl_ibdesc_compat {
+	compat_ulong_t gpuaddr;
+	unsigned int __pad;
+	compat_size_t sizedwords;
+	unsigned int ctrl;
+};
+
+struct kgsl_cmd_syncpoint_compat {
+	int type;
+	compat_uptr_t priv;
+	compat_size_t size;
+};
+
+struct kgsl_devinfo_compat {
+	unsigned int device_id;
+	unsigned int chip_id;
+	unsigned int mmu_enabled;
+	compat_ulong_t gmem_gpubaseaddr;
+	unsigned int gpu_id;
+	compat_size_t gmem_sizebytes;
+};
+
+struct kgsl_shadowprop_compat {
+	compat_ulong_t gpuaddr;
+	compat_size_t size;
+	unsigned int flags;
+};
+
+struct kgsl_device_constraint_compat {
+	unsigned int type;
+	unsigned int context_id;
+	compat_uptr_t data;
+	compat_size_t size;
+};
+
+struct kgsl_device_getproperty_compat {
+	unsigned int type;
+	compat_uptr_t value;
+	compat_size_t sizebytes;
+};
+
+#define IOCTL_KGSL_DEVICE_GETPROPERTY_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x2, struct kgsl_device_getproperty_compat)
+
+#define IOCTL_KGSL_SETPROPERTY_COMPAT \
+	_IOW(KGSL_IOC_TYPE, 0x32, struct kgsl_device_getproperty_compat)
+
+
+struct kgsl_submit_commands_compat {
+	unsigned int context_id;
+	unsigned int flags;
+	compat_uptr_t cmdlist;
+	unsigned int numcmds;
+	compat_uptr_t synclist;
+	unsigned int numsyncs;
+	unsigned int timestamp;
+/* private: reserved for future use */
+	unsigned int __pad[4];
+};
+
+#define IOCTL_KGSL_SUBMIT_COMMANDS_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x3D, struct kgsl_submit_commands_compat)
+
+struct kgsl_ringbuffer_issueibcmds_compat {
+	unsigned int drawctxt_id;
+	compat_ulong_t ibdesc_addr;
+	unsigned int numibs;
+	unsigned int timestamp; /* output param */
+	unsigned int flags;
+};
+
+#define IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x10, struct kgsl_ringbuffer_issueibcmds_compat)
+
+struct kgsl_cmdstream_freememontimestamp_compat {
+	compat_ulong_t gpuaddr;
+	unsigned int type;
+	unsigned int timestamp;
+};
+
+#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_COMPAT \
+	_IOW(KGSL_IOC_TYPE, 0x12, \
+	struct kgsl_cmdstream_freememontimestamp_compat)
+
+struct kgsl_cmdstream_freememontimestamp_ctxtid_compat {
+	unsigned int context_id;
+	compat_ulong_t gpuaddr;
+	unsigned int type;
+	unsigned int timestamp;
+};
+
+#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID_COMPAT \
+	_IOW(KGSL_IOC_TYPE, 0x17, \
+	struct kgsl_cmdstream_freememontimestamp_ctxtid_compat)
+
+struct kgsl_map_user_mem_compat {
+	int fd;
+	compat_ulong_t gpuaddr;
+	compat_size_t len;
+	compat_size_t offset;
+	compat_ulong_t hostptr;
+	enum kgsl_user_mem_type memtype;
+	unsigned int flags;
+};
+
+#define IOCTL_KGSL_MAP_USER_MEM_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x15, struct kgsl_map_user_mem_compat)
+
+struct kgsl_sharedmem_free_compat {
+	compat_ulong_t gpuaddr;
+};
+
+#define IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE_COMPAT \
+	_IOW(KGSL_IOC_TYPE, 0x24, struct kgsl_sharedmem_free_compat)
+
+#define IOCTL_KGSL_SHAREDMEM_FREE_COMPAT \
+	_IOW(KGSL_IOC_TYPE, 0x21, struct kgsl_sharedmem_free_compat)
+
+struct kgsl_gpumem_alloc_compat {
+	compat_ulong_t gpuaddr; /* output param */
+	compat_size_t size;
+	unsigned int flags;
+};
+
+#define IOCTL_KGSL_GPUMEM_ALLOC_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x2f, struct kgsl_gpumem_alloc_compat)
+
+struct kgsl_cff_syncmem_compat {
+	compat_ulong_t gpuaddr;
+	compat_size_t len;
+	unsigned int __pad[2]; /* For future binary compatibility */
+};
+
+#define IOCTL_KGSL_CFF_SYNCMEM_COMPAT \
+	_IOW(KGSL_IOC_TYPE, 0x30, struct kgsl_cff_syncmem_compat)
+
+struct kgsl_timestamp_event_compat {
+	int type;                /* Type of event (see list below) */
+	unsigned int timestamp;  /* Timestamp to trigger event on */
+	unsigned int context_id; /* Context for the timestamp */
+	compat_uptr_t priv;      /* Pointer to the event specific blob */
+	compat_size_t len;       /* Size of the event specific blob */
+};
+
+#define IOCTL_KGSL_TIMESTAMP_EVENT_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x33, struct kgsl_timestamp_event_compat)
+
+struct kgsl_gpumem_alloc_id_compat {
+	unsigned int id;
+	unsigned int flags;
+	compat_size_t size;
+	compat_size_t mmapsize;
+	compat_ulong_t gpuaddr;
+/* private: reserved for future use*/
+	unsigned int __pad[2];
+};
+
+#define IOCTL_KGSL_GPUMEM_ALLOC_ID_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x34, struct kgsl_gpumem_alloc_id_compat)
+
+struct kgsl_gpumem_get_info_compat {
+	compat_ulong_t gpuaddr;
+	unsigned int id;
+	unsigned int flags;
+	compat_size_t size;
+	compat_size_t mmapsize;
+	compat_ulong_t useraddr;
+/* private: reserved for future use*/
+	unsigned int __pad[4];
+};
+
+#define IOCTL_KGSL_GPUMEM_GET_INFO_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x36, struct kgsl_gpumem_get_info_compat)
+
+struct kgsl_gpumem_sync_cache_compat {
+	compat_ulong_t gpuaddr;
+	unsigned int id;
+	unsigned int op;
+	compat_size_t offset;
+	compat_size_t length;
+};
+
+#define IOCTL_KGSL_GPUMEM_SYNC_CACHE_COMPAT \
+	_IOW(KGSL_IOC_TYPE, 0x37, struct kgsl_gpumem_sync_cache_compat)
+
+struct kgsl_gpumem_sync_cache_bulk_compat {
+	compat_uptr_t id_list;
+	unsigned int count;
+	unsigned int op;
+/* private: reserved for future use */
+	unsigned int __pad[2]; /* For future binary compatibility */
+};
+
+#define IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x3C, struct kgsl_gpumem_sync_cache_bulk_compat)
+
+struct kgsl_perfcounter_query_compat {
+	unsigned int groupid;
+	compat_uptr_t countables;
+	unsigned int count;
+	unsigned int max_counters;
+	unsigned int __pad[2];
+};
+
+#define IOCTL_KGSL_PERFCOUNTER_QUERY_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x3A, struct kgsl_perfcounter_query_compat)
+
+struct kgsl_perfcounter_read_compat {
+	compat_uptr_t reads;
+	unsigned int count;
+	unsigned int __pad[2];
+};
+
+#define IOCTL_KGSL_PERFCOUNTER_READ_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x3B, struct kgsl_perfcounter_read_compat)
+
+static inline compat_ulong_t gpuaddr_to_compat(unsigned long gpuaddr)
+{
+	WARN(gpuaddr >> 32, "Top 32 bits of gpuaddr have been set\n");
+	return (compat_ulong_t)gpuaddr;
+}
+
+static inline compat_size_t sizet_to_compat(size_t size)
+{
+	WARN(size >> 32, "Size greater than 4G\n");
+	return (compat_size_t)size;
+}
+
+int kgsl_cmdbatch_create_compat(struct kgsl_device *device, unsigned int flags,
+			struct kgsl_cmdbatch *cmdbatch, void __user *cmdlist,
+			unsigned int numcmds, void __user *synclist,
+			unsigned int numsyncs);
+
+long kgsl_compat_ioctl(struct file *filep, unsigned int cmd,
+			unsigned long arg);
+
+#else
+static inline int kgsl_cmdbatch_create_compat(struct kgsl_device *device,
+			unsigned int flags, struct kgsl_cmdbatch *cmdbatch,
+			void __user *cmdlist, unsigned int numcmds,
+			void __user *synclist, unsigned int numsyncs)
+{
+	BUG();
+}
+
+static inline long kgsl_compat_ioctl(struct file *filep, unsigned int cmd,
+			unsigned long arg)
+{
+	BUG();
+}
+
+#endif /* CONFIG_COMPAT */
+#endif /* __KGSL_COMPAT_H */
diff --git a/drivers/gpu/msm/kgsl_debugfs.c b/drivers/gpu/msm/kgsl_debugfs.c
new file mode 100644
index 000000000000..11095f38bad7
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_debugfs.c
@@ -0,0 +1,272 @@
+/* Copyright (c) 2002,2008-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/debugfs.h>
+
+#include "kgsl.h"
+#include "kgsl_device.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_debugfs.h"
+
+/*default log levels is error for everything*/
+#define KGSL_LOG_LEVEL_MAX     7
+
+struct dentry *kgsl_debugfs_dir;
+static struct dentry *proc_d_debugfs;
+
+static inline int kgsl_log_set(unsigned int *log_val, void *data, u64 val)
+{
+	*log_val = min((unsigned int)val, (unsigned int)KGSL_LOG_LEVEL_MAX);
+	return 0;
+}
+
+#define KGSL_DEBUGFS_LOG(__log)                         \
+static int __log ## _set(void *data, u64 val)           \
+{                                                       \
+	struct kgsl_device *device = data;              \
+	return kgsl_log_set(&device->__log, data, val); \
+}                                                       \
+static int __log ## _get(void *data, u64 *val)	        \
+{                                                       \
+	struct kgsl_device *device = data;              \
+	*val = device->__log;                           \
+	return 0;                                       \
+}                                                       \
+DEFINE_SIMPLE_ATTRIBUTE(__log ## _fops,                 \
+__log ## _get, __log ## _set, "%llu\n");                \
+
+KGSL_DEBUGFS_LOG(drv_log);
+KGSL_DEBUGFS_LOG(cmd_log);
+KGSL_DEBUGFS_LOG(ctxt_log);
+KGSL_DEBUGFS_LOG(mem_log);
+KGSL_DEBUGFS_LOG(pwr_log);
+
+static int _strict_set(void *data, u64 val)
+{
+	kgsl_sharedmem_set_noretry(val ? true : false);
+	return 0;
+}
+
+static int _strict_get(void *data, u64 *val)
+{
+	*val = kgsl_sharedmem_get_noretry();
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(_strict_fops, _strict_get, _strict_set, "%llu\n");
+
+void kgsl_device_debugfs_init(struct kgsl_device *device)
+{
+	if (kgsl_debugfs_dir && !IS_ERR(kgsl_debugfs_dir))
+		device->d_debugfs = debugfs_create_dir(device->name,
+						       kgsl_debugfs_dir);
+
+	if (!device->d_debugfs || IS_ERR(device->d_debugfs))
+		return;
+
+	debugfs_create_file("log_level_cmd", 0644, device->d_debugfs, device,
+			    &cmd_log_fops);
+	debugfs_create_file("log_level_ctxt", 0644, device->d_debugfs, device,
+			    &ctxt_log_fops);
+	debugfs_create_file("log_level_drv", 0644, device->d_debugfs, device,
+			    &drv_log_fops);
+	debugfs_create_file("log_level_mem", 0644, device->d_debugfs, device,
+				&mem_log_fops);
+	debugfs_create_file("log_level_pwr", 0644, device->d_debugfs, device,
+				&pwr_log_fops);
+}
+
+struct type_entry {
+	int type;
+	const char *str;
+};
+
+static const struct type_entry memtypes[] = { KGSL_MEM_TYPES };
+
+static const char *memtype_str(int memtype)
+{
+	int i;
+	for (i = 0; i < ARRAY_SIZE(memtypes); i++)
+		if (memtypes[i].type == memtype)
+			return memtypes[i].str;
+	return "unknown";
+}
+
+static char get_alignflag(const struct kgsl_memdesc *m)
+{
+	int align = kgsl_memdesc_get_align(m);
+	if (align >= ilog2(SZ_1M))
+		return 'L';
+	else if (align >= ilog2(SZ_64K))
+		return 'l';
+	return '-';
+}
+
+static char get_cacheflag(const struct kgsl_memdesc *m)
+{
+	static const char table[] = {
+		[KGSL_CACHEMODE_WRITECOMBINE] = '-',
+		[KGSL_CACHEMODE_UNCACHED] = 'u',
+		[KGSL_CACHEMODE_WRITEBACK] = 'b',
+		[KGSL_CACHEMODE_WRITETHROUGH] = 't',
+	};
+	return table[kgsl_memdesc_get_cachemode(m)];
+}
+
+
+static int print_mem_entry(int id, void *ptr, void *data)
+{
+	struct seq_file *s = data;
+	struct kgsl_mem_entry *entry = ptr;
+	char flags[8];
+	char usage[16];
+	struct kgsl_memdesc *m = &entry->memdesc;
+
+	flags[0] = kgsl_memdesc_is_global(m) ?  'g' : '-';
+	flags[1] = '-';
+	flags[2] = !(m->flags & KGSL_MEMFLAGS_GPUREADONLY) ? 'w' : '-';
+	flags[3] = get_alignflag(m);
+	flags[4] = get_cacheflag(m);
+	flags[5] = kgsl_memdesc_use_cpu_map(m) ? 'p' : '-';
+	flags[6] = (m->useraddr) ? 'Y' : 'N';
+	flags[7] = '\0';
+
+	kgsl_get_memory_usage(usage, sizeof(usage), m->flags);
+
+	seq_printf(s, "%pK %pK %16llu %5d %8s %10s %16s %5d",
+			(uint64_t *)(uintptr_t) m->gpuaddr,
+			(unsigned long *) m->useraddr,
+			m->size, entry->id, flags,
+			memtype_str(kgsl_memdesc_usermem_type(m)),
+			usage, m->sgt->nents);
+
+	if (entry->metadata[0] != 0)
+		seq_printf(s, " %s", entry->metadata);
+
+	seq_putc(s, '\n');
+
+	return 0;
+}
+
+static int process_mem_print(struct seq_file *s, void *unused)
+{
+	struct kgsl_process_private *private = s->private;
+
+	seq_printf(s, "%8s %8s %8s %5s %8s %10s %16s %5s\n",
+		   "gpuaddr", "useraddr", "size", "id", "flags", "type",
+		   "usage", "sglen");
+
+	spin_lock(&private->mem_lock);
+	idr_for_each(&private->mem_idr, print_mem_entry, s);
+	spin_unlock(&private->mem_lock);
+
+	return 0;
+}
+
+static int process_mem_open(struct inode *inode, struct file *file)
+{
+	int ret;
+	pid_t pid = (pid_t) (unsigned long) inode->i_private;
+	struct kgsl_process_private *private = NULL;
+
+	private = kgsl_process_private_find(pid);
+
+	if (!private)
+		return -ENODEV;
+
+	ret = single_open(file, process_mem_print, private);
+	if (ret)
+		kgsl_process_private_put(private);
+
+	return ret;
+}
+
+static int process_mem_release(struct inode *inode, struct file *file)
+{
+	struct kgsl_process_private *private =
+		((struct seq_file *)file->private_data)->private;
+
+	if (private)
+		kgsl_process_private_put(private);
+
+	return single_release(inode, file);
+}
+
+static const struct file_operations process_mem_fops = {
+	.open = process_mem_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = process_mem_release,
+};
+
+
+/**
+ * kgsl_process_init_debugfs() - Initialize debugfs for a process
+ * @private: Pointer to process private structure created for the process
+ *
+ * kgsl_process_init_debugfs() is called at the time of creating the
+ * process struct when a process opens kgsl device for the first time.
+ * This function is not fatal - all we do is print a warning message if
+ * the files can't be created
+ */
+void kgsl_process_init_debugfs(struct kgsl_process_private *private)
+{
+	unsigned char name[16];
+	struct dentry *dentry;
+
+	snprintf(name, sizeof(name), "%d", private->pid);
+
+	private->debug_root = debugfs_create_dir(name, proc_d_debugfs);
+
+	/*
+	 * Both debugfs_create_dir() and debugfs_create_file() return
+	 * ERR_PTR(-ENODEV) if debugfs is disabled in the kernel but return
+	 * NULL on error when it is enabled. For both usages we need to check
+	 * for ERROR or NULL and only print a warning on an actual failure
+	 * (i.e. - when the return value is NULL)
+	 */
+
+	if (IS_ERR_OR_NULL(private->debug_root)) {
+		WARN((private->debug_root == NULL),
+			"Unable to create debugfs dir for %s\n", name);
+		private->debug_root = NULL;
+		return;
+	}
+
+	dentry = debugfs_create_file("mem", 0444, private->debug_root,
+		(void *) ((unsigned long) private->pid), &process_mem_fops);
+
+	if (IS_ERR_OR_NULL(dentry))
+		WARN((dentry == NULL),
+			"Unable to create 'mem' file for %s\n", name);
+}
+
+void kgsl_core_debugfs_init(void)
+{
+	struct dentry *debug_dir;
+
+	kgsl_debugfs_dir = debugfs_create_dir("kgsl", NULL);
+
+	debug_dir = debugfs_create_dir("debug", kgsl_debugfs_dir);
+
+	debugfs_create_file("strict_memory", 0644, debug_dir, NULL,
+		&_strict_fops);
+
+	proc_d_debugfs = debugfs_create_dir("proc", kgsl_debugfs_dir);
+}
+
+void kgsl_core_debugfs_close(void)
+{
+	debugfs_remove_recursive(kgsl_debugfs_dir);
+}
diff --git a/drivers/gpu/msm/kgsl_debugfs.h b/drivers/gpu/msm/kgsl_debugfs.h
new file mode 100644
index 000000000000..34875954bb8b
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_debugfs.h
@@ -0,0 +1,44 @@
+/* Copyright (c) 2002,2008-2011,2013,2015 The Linux Foundation.
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _KGSL_DEBUGFS_H
+#define _KGSL_DEBUGFS_H
+
+struct kgsl_device;
+struct kgsl_process_private;
+
+#ifdef CONFIG_DEBUG_FS
+void kgsl_core_debugfs_init(void);
+void kgsl_core_debugfs_close(void);
+
+void kgsl_device_debugfs_init(struct kgsl_device *device);
+
+extern struct dentry *kgsl_debugfs_dir;
+static inline struct dentry *kgsl_get_debugfs_dir(void)
+{
+	return kgsl_debugfs_dir;
+}
+
+void kgsl_process_init_debugfs(struct kgsl_process_private *);
+#else
+static inline void kgsl_core_debugfs_init(void) { }
+static inline void kgsl_device_debugfs_init(struct kgsl_device *device) { }
+static inline void kgsl_core_debugfs_close(void) { }
+static inline struct dentry *kgsl_get_debugfs_dir(void) { return NULL; }
+static inline void kgsl_process_init_debugfs(struct kgsl_process_private *priv)
+{
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h
new file mode 100644
index 000000000000..8fc3fa1311b5
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_device.h
@@ -0,0 +1,869 @@
+/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __KGSL_DEVICE_H
+#define __KGSL_DEVICE_H
+
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <linux/pm_qos.h>
+#include <linux/sched.h>
+
+#include "kgsl.h"
+#include "kgsl_mmu.h"
+#include "kgsl_pwrctrl.h"
+#include "kgsl_log.h"
+#include "kgsl_pwrscale.h"
+#include "kgsl_snapshot.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_cmdbatch.h"
+
+#include <linux/sync.h>
+
+#define KGSL_TIMEOUT_NONE           0
+#define KGSL_TIMEOUT_DEFAULT        0xFFFFFFFF
+#define KGSL_TIMEOUT_PART           50 /* 50 msec */
+
+#define FIRST_TIMEOUT (HZ / 2)
+
+#define KGSL_IOCTL_FUNC(_cmd, _func) \
+	[_IOC_NR((_cmd))] = \
+		{ .cmd = (_cmd), .func = (_func) }
+
+/* KGSL device state is initialized to INIT when platform_probe		*
+ * sucessfully initialized the device.  Once a device has been opened	*
+ * (started) it becomes active.  NAP implies that only low latency	*
+ * resources (for now clocks on some platforms) are off.  SLEEP implies	*
+ * that the KGSL module believes a device is idle (has been inactive	*
+ * past its timer) and all system resources are released.  SUSPEND is	*
+ * requested by the kernel and will be enforced upon all open devices.	*/
+
+#define KGSL_STATE_NONE		0x00000000
+#define KGSL_STATE_INIT		0x00000001
+#define KGSL_STATE_ACTIVE	0x00000002
+#define KGSL_STATE_NAP		0x00000004
+#define KGSL_STATE_SLEEP	0x00000008
+#define KGSL_STATE_SUSPEND	0x00000010
+#define KGSL_STATE_AWARE	0x00000020
+#define KGSL_STATE_SLUMBER	0x00000080
+#define KGSL_STATE_DEEP_NAP	0x00000100
+
+#define KGSL_GRAPHICS_MEMORY_LOW_WATERMARK  0x1000000
+
+#define KGSL_IS_PAGE_ALIGNED(addr) (!((addr) & (~PAGE_MASK)))
+
+/**
+ * enum kgsl_event_results - result codes passed to an event callback when the
+ * event is retired or cancelled
+ * @KGSL_EVENT_RETIRED: The timestamp associated with the event retired
+ * successflly
+ * @KGSL_EVENT_CANCELLED: The event was cancelled before the event was fired
+ */
+enum kgsl_event_results {
+	KGSL_EVENT_RETIRED = 1,
+	KGSL_EVENT_CANCELLED = 2,
+};
+
+#define KGSL_FLAG_WAKE_ON_TOUCH BIT(0)
+
+/*
+ * "list" of event types for ftrace symbolic magic
+ */
+
+#define KGSL_EVENT_TYPES \
+	{ KGSL_EVENT_RETIRED, "retired" }, \
+	{ KGSL_EVENT_CANCELLED, "cancelled" }
+
+#define KGSL_CONTEXT_FLAGS \
+	{ KGSL_CONTEXT_NO_GMEM_ALLOC , "NO_GMEM_ALLOC" }, \
+	{ KGSL_CONTEXT_PREAMBLE, "PREAMBLE" }, \
+	{ KGSL_CONTEXT_TRASH_STATE, "TRASH_STATE" }, \
+	{ KGSL_CONTEXT_CTX_SWITCH, "CTX_SWITCH" }, \
+	{ KGSL_CONTEXT_PER_CONTEXT_TS, "PER_CONTEXT_TS" }, \
+	{ KGSL_CONTEXT_USER_GENERATED_TS, "USER_TS" }, \
+	{ KGSL_CONTEXT_NO_FAULT_TOLERANCE, "NO_FT" }, \
+	{ KGSL_CONTEXT_PWR_CONSTRAINT, "PWR" }, \
+	{ KGSL_CONTEXT_SAVE_GMEM, "SAVE_GMEM" }
+
+#define KGSL_CONTEXT_TYPES \
+	{ KGSL_CONTEXT_TYPE_ANY, "ANY" }, \
+	{ KGSL_CONTEXT_TYPE_GL, "GL" }, \
+	{ KGSL_CONTEXT_TYPE_CL, "CL" }, \
+	{ KGSL_CONTEXT_TYPE_C2D, "C2D" }, \
+	{ KGSL_CONTEXT_TYPE_RS, "RS" }
+
+#define KGSL_CONTEXT_ID(_context) \
+	((_context != NULL) ? (_context)->id : KGSL_MEMSTORE_GLOBAL)
+
+/* Allocate 600K for the snapshot static region*/
+#define KGSL_SNAPSHOT_MEMSIZE (600 * 1024)
+
+struct kgsl_device;
+struct platform_device;
+struct kgsl_device_private;
+struct kgsl_context;
+struct kgsl_power_stats;
+struct kgsl_event;
+struct kgsl_snapshot;
+
+struct kgsl_functable {
+	/* Mandatory functions - these functions must be implemented
+	   by the client device.  The driver will not check for a NULL
+	   pointer before calling the hook.
+	 */
+	void (*regread) (struct kgsl_device *device,
+		unsigned int offsetwords, unsigned int *value);
+	void (*regwrite) (struct kgsl_device *device,
+		unsigned int offsetwords, unsigned int value);
+	int (*idle) (struct kgsl_device *device);
+	bool (*isidle) (struct kgsl_device *device);
+	int (*suspend_context) (struct kgsl_device *device);
+	int (*init) (struct kgsl_device *device);
+	int (*start) (struct kgsl_device *device, int priority);
+	int (*stop) (struct kgsl_device *device);
+	int (*getproperty) (struct kgsl_device *device,
+		unsigned int type, void __user *value,
+		size_t sizebytes);
+	int (*getproperty_compat) (struct kgsl_device *device,
+		unsigned int type, void __user *value,
+		size_t sizebytes);
+	int (*waittimestamp) (struct kgsl_device *device,
+		struct kgsl_context *context, unsigned int timestamp,
+		unsigned int msecs);
+	int (*readtimestamp) (struct kgsl_device *device, void *priv,
+		enum kgsl_timestamp_type type, unsigned int *timestamp);
+	int (*issueibcmds) (struct kgsl_device_private *dev_priv,
+		struct kgsl_context *context, struct kgsl_cmdbatch *cmdbatch,
+		uint32_t *timestamps);
+	void (*power_stats)(struct kgsl_device *device,
+		struct kgsl_power_stats *stats);
+	unsigned int (*gpuid)(struct kgsl_device *device, unsigned int *chipid);
+	void (*snapshot)(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot, struct kgsl_context *context);
+	irqreturn_t (*irq_handler)(struct kgsl_device *device);
+	int (*drain)(struct kgsl_device *device);
+	/* Optional functions - these functions are not mandatory.  The
+	   driver will check that the function pointer is not NULL before
+	   calling the hook */
+	struct kgsl_context *(*drawctxt_create) (struct kgsl_device_private *,
+						uint32_t *flags);
+	void (*drawctxt_detach)(struct kgsl_context *context);
+	void (*drawctxt_destroy) (struct kgsl_context *context);
+	void (*drawctxt_dump) (struct kgsl_device *device,
+		struct kgsl_context *context);
+	long (*ioctl) (struct kgsl_device_private *dev_priv,
+		unsigned int cmd, unsigned long arg);
+	long (*compat_ioctl) (struct kgsl_device_private *dev_priv,
+		unsigned int cmd, unsigned long arg);
+	int (*setproperty) (struct kgsl_device_private *dev_priv,
+		unsigned int type, void __user *value,
+		unsigned int sizebytes);
+	int (*setproperty_compat) (struct kgsl_device_private *dev_priv,
+		unsigned int type, void __user *value,
+		unsigned int sizebytes);
+	void (*drawctxt_sched)(struct kgsl_device *device,
+		struct kgsl_context *context);
+	void (*resume)(struct kgsl_device *device);
+	int (*regulator_enable)(struct kgsl_device *);
+	bool (*is_hw_collapsible)(struct kgsl_device *);
+	void (*regulator_disable)(struct kgsl_device *);
+	void (*pwrlevel_change_settings)(struct kgsl_device *device,
+		unsigned int prelevel, unsigned int postlevel, bool post);
+	void (*regulator_disable_poll)(struct kgsl_device *device);
+};
+
+struct kgsl_ioctl {
+	unsigned int cmd;
+	long (*func)(struct kgsl_device_private *, unsigned int, void *);
+};
+
+long kgsl_ioctl_helper(struct file *filep, unsigned int cmd, unsigned long arg,
+		const struct kgsl_ioctl *cmds, int len);
+
+/* Flag to mark the memobj_node as a preamble */
+#define MEMOBJ_PREAMBLE BIT(0)
+/* Flag to mark that the memobj_node should not go to the hadrware */
+#define MEMOBJ_SKIP BIT(1)
+
+/**
+ * struct kgsl_memobj_node - Memory object descriptor
+ * @node: Local list node for the cmdbatch
+ * @id: GPU memory ID for the object
+ * offset: Offset within the object
+ * @gpuaddr: GPU address for the object
+ * @flags: External flags passed by the user
+ * @priv: Internal flags set by the driver
+ */
+struct kgsl_memobj_node {
+	struct list_head node;
+	unsigned int id;
+	uint64_t offset;
+	uint64_t gpuaddr;
+	uint64_t size;
+	unsigned long flags;
+	unsigned long priv;
+};
+
+struct kgsl_device {
+	struct device *dev;
+	const char *name;
+	unsigned int ver_major;
+	unsigned int ver_minor;
+	uint32_t flags;
+	enum kgsl_deviceid id;
+
+	/* Starting physical address for GPU registers */
+	unsigned long reg_phys;
+
+	/* Starting Kernel virtual address for GPU registers */
+	void __iomem *reg_virt;
+
+	/* Total memory size for all GPU registers */
+	unsigned int reg_len;
+
+	/* Kernel virtual address for GPU shader memory */
+	void __iomem *shader_mem_virt;
+
+	/* Starting physical address for GPU shader memory */
+	unsigned long shader_mem_phys;
+
+	/* GPU shader memory size */
+	unsigned int shader_mem_len;
+	struct kgsl_memdesc memstore;
+	const char *iomemname;
+	const char *shadermemname;
+
+	struct kgsl_mmu mmu;
+	struct completion hwaccess_gate;
+	struct completion cmdbatch_gate;
+	const struct kgsl_functable *ftbl;
+	struct work_struct idle_check_ws;
+	struct timer_list idle_timer;
+	struct kgsl_pwrctrl pwrctrl;
+	int open_count;
+
+	struct mutex mutex;
+	uint32_t state;
+	uint32_t requested_state;
+
+	atomic_t active_cnt;
+
+	wait_queue_head_t wait_queue;
+	wait_queue_head_t active_cnt_wq;
+	struct platform_device *pdev;
+	struct dentry *d_debugfs;
+	struct idr context_idr;
+	rwlock_t context_lock;
+
+	struct {
+		void *ptr;
+		size_t size;
+	} snapshot_memory;
+
+	struct kgsl_snapshot *snapshot;
+
+	u32 snapshot_faultcount;	/* Total number of faults since boot */
+	struct kobject snapshot_kobj;
+
+	struct kobject ppd_kobj;
+
+	/* Logging levels */
+	int cmd_log;
+	int ctxt_log;
+	int drv_log;
+	int mem_log;
+	int pwr_log;
+	struct kgsl_pwrscale pwrscale;
+	struct work_struct event_work;
+
+	int reset_counter; /* Track how many GPU core resets have occured */
+	int cff_dump_enable;
+	struct workqueue_struct *events_wq;
+
+	struct device *busmondev; /* pseudo dev for GPU BW voting governor */
+};
+
+
+#define KGSL_DEVICE_COMMON_INIT(_dev) \
+	.hwaccess_gate = COMPLETION_INITIALIZER((_dev).hwaccess_gate),\
+	.cmdbatch_gate = COMPLETION_INITIALIZER((_dev).cmdbatch_gate),\
+	.idle_check_ws = __WORK_INITIALIZER((_dev).idle_check_ws,\
+			kgsl_idle_check),\
+	.event_work  = __WORK_INITIALIZER((_dev).event_work,\
+			kgsl_process_events),\
+	.context_idr = IDR_INIT((_dev).context_idr),\
+	.wait_queue = __WAIT_QUEUE_HEAD_INITIALIZER((_dev).wait_queue),\
+	.active_cnt_wq = __WAIT_QUEUE_HEAD_INITIALIZER((_dev).active_cnt_wq),\
+	.mutex = __MUTEX_INITIALIZER((_dev).mutex),\
+	.state = KGSL_STATE_NONE,\
+	.ver_major = DRIVER_VERSION_MAJOR,\
+	.ver_minor = DRIVER_VERSION_MINOR
+
+
+/**
+ * enum bits for struct kgsl_context.priv
+ * @KGSL_CONTEXT_PRIV_DETACHED  - The context has been destroyed by userspace
+ *	and is no longer using the gpu.
+ * @KGSL_CONTEXT_PRIV_INVALID - The context has been destroyed by the kernel
+ *	because it caused a GPU fault.
+ * @KGSL_CONTEXT_PRIV_PAGEFAULT - The context has caused a page fault.
+ * @KGSL_CONTEXT_PRIV_DEVICE_SPECIFIC - this value and higher values are
+ *	reserved for devices specific use.
+ */
+enum kgsl_context_priv {
+	KGSL_CONTEXT_PRIV_DETACHED = 0,
+	KGSL_CONTEXT_PRIV_INVALID,
+	KGSL_CONTEXT_PRIV_PAGEFAULT,
+	KGSL_CONTEXT_PRIV_DEVICE_SPECIFIC = 16,
+};
+
+struct kgsl_process_private;
+
+/**
+ * struct kgsl_context - The context fields that are valid for a user defined
+ * context
+ * @refcount: kref object for reference counting the context
+ * @id: integer identifier for the context
+ * @priority; The context's priority to submit commands to GPU
+ * @tid: task that created this context.
+ * @dev_priv: pointer to the owning device instance
+ * @proc_priv: pointer to process private, the process that allocated the
+ * context
+ * @priv: in-kernel context flags, use KGSL_CONTEXT_* values
+ * @reset_status: status indication whether a gpu reset occured and whether
+ * this context was responsible for causing it
+ * @wait_on_invalid_ts: flag indicating if this context has tried to wait on a
+ * bad timestamp
+ * @timeline: sync timeline used to create fences that can be signaled when a
+ * sync_pt timestamp expires
+ * @events: A kgsl_event_group for this context - contains the list of GPU
+ * events
+ * @pagefault_ts: global timestamp of the pagefault, if KGSL_CONTEXT_PAGEFAULT
+ * is set.
+ * @flags: flags from userspace controlling the behavior of this context
+ * @pwr_constraint: power constraint from userspace for this context
+ * @fault_count: number of times gpu hanged in last _context_throttle_time ms
+ * @fault_time: time of the first gpu hang in last _context_throttle_time ms
+ */
+struct kgsl_context {
+	struct kref refcount;
+	uint32_t id;
+	uint32_t priority;
+	pid_t tid;
+	struct kgsl_device_private *dev_priv;
+	struct kgsl_process_private *proc_priv;
+	unsigned long priv;
+	struct kgsl_device *device;
+	unsigned int reset_status;
+	bool wait_on_invalid_ts;
+	struct sync_timeline *timeline;
+	struct kgsl_event_group events;
+	unsigned int pagefault_ts;
+	unsigned int flags;
+	struct kgsl_pwr_constraint pwr_constraint;
+	unsigned int fault_count;
+	unsigned long fault_time;
+};
+
+#define _context_comm(_c) \
+	(((_c) && (_c)->proc_priv) ? (_c)->proc_priv->comm : "unknown")
+
+/*
+ * Print log messages with the context process name/pid:
+ * [...] kgsl kgsl-3d0: kgsl-api-test[22182]:
+ */
+
+#define pr_context(_d, _c, fmt, args...) \
+		dev_err((_d)->dev, "%s[%d]: " fmt, \
+		_context_comm((_c)), \
+		(_c)->proc_priv->pid, ##args)
+
+/**
+ * struct kgsl_process_private -  Private structure for a KGSL process (across
+ * all devices)
+ * @priv: Internal flags, use KGSL_PROCESS_* values
+ * @pid: ID for the task owner of the process
+ * @comm: task name of the process
+ * @mem_lock: Spinlock to protect the process memory lists
+ * @refcount: kref object for reference counting the process
+ * @idr: Iterator for assigning IDs to memory allocations
+ * @pagetable: Pointer to the pagetable owned by this process
+ * @kobj: Pointer to a kobj for the sysfs directory for this process
+ * @debug_root: Pointer to the debugfs root for this process
+ * @stats: Memory allocation statistics for this process
+ * @syncsource_idr: sync sources created by this process
+ * @syncsource_lock: Spinlock to protect the syncsource idr
+ * @fd_count: Counter for the number of FDs for this process
+ */
+struct kgsl_process_private {
+	unsigned long priv;
+	pid_t pid;
+	char comm[TASK_COMM_LEN];
+	spinlock_t mem_lock;
+	struct kref refcount;
+	struct idr mem_idr;
+	struct kgsl_pagetable *pagetable;
+	struct list_head list;
+	struct kobject kobj;
+	struct dentry *debug_root;
+	struct {
+		uint64_t cur;
+		uint64_t max;
+	} stats[KGSL_MEM_ENTRY_MAX];
+	struct idr syncsource_idr;
+	spinlock_t syncsource_lock;
+	int fd_count;
+};
+
+/**
+ * enum kgsl_process_priv_flags - Private flags for kgsl_process_private
+ * @KGSL_PROCESS_INIT: Set if the process structure has been set up
+ */
+enum kgsl_process_priv_flags {
+	KGSL_PROCESS_INIT = 0,
+};
+
+struct kgsl_device_private {
+	struct kgsl_device *device;
+	struct kgsl_process_private *process_priv;
+};
+
+/**
+ * struct kgsl_snapshot - details for a specific snapshot instance
+ * @start: Pointer to the start of the static snapshot region
+ * @size: Size of the current snapshot instance
+ * @ptr: Pointer to the next block of memory to write to during snapshotting
+ * @remain: Bytes left in the snapshot region
+ * @timestamp: Timestamp of the snapshot instance (in seconds since boot)
+ * @mempool: Pointer to the memory pool for storing memory objects
+ * @mempool_size: Size of the memory pool
+ * @obj_list: List of frozen GPU buffers that are waiting to be dumped.
+ * @cp_list: List of IB's to be dumped.
+ * @work: worker to dump the frozen memory
+ * @dump_gate: completion gate signaled by worker when it is finished.
+ * @process: the process that caused the hang, if known.
+ */
+struct kgsl_snapshot {
+	u8 *start;
+	size_t size;
+	u8 *ptr;
+	size_t remain;
+	unsigned long timestamp;
+	u8 *mempool;
+	size_t mempool_size;
+	struct list_head obj_list;
+	struct list_head cp_list;
+	struct work_struct work;
+	struct completion dump_gate;
+	struct kgsl_process_private *process;
+};
+
+/**
+ * struct kgsl_snapshot_object  - GPU memory in the snapshot
+ * @gpuaddr: The GPU address identified during snapshot
+ * @size: The buffer size identified during snapshot
+ * @offset: offset from start of the allocated kgsl_mem_entry
+ * @type: SNAPSHOT_OBJ_TYPE_* identifier.
+ * @entry: the reference counted memory entry for this buffer
+ * @node: node for kgsl_snapshot.obj_list
+ */
+struct kgsl_snapshot_object {
+	uint64_t gpuaddr;
+	uint64_t size;
+	uint64_t offset;
+	int type;
+	struct kgsl_mem_entry *entry;
+	struct list_head node;
+};
+
+struct kgsl_device *kgsl_get_device(int dev_idx);
+
+static inline void kgsl_process_add_stats(struct kgsl_process_private *priv,
+	unsigned int type, uint64_t size)
+{
+	priv->stats[type].cur += size;
+	if (priv->stats[type].max < priv->stats[type].cur)
+		priv->stats[type].max = priv->stats[type].cur;
+}
+
+static inline void kgsl_regread(struct kgsl_device *device,
+				unsigned int offsetwords,
+				unsigned int *value)
+{
+	device->ftbl->regread(device, offsetwords, value);
+}
+
+static inline void kgsl_regwrite(struct kgsl_device *device,
+				 unsigned int offsetwords,
+				 unsigned int value)
+{
+	device->ftbl->regwrite(device, offsetwords, value);
+}
+
+static inline void kgsl_regrmw(struct kgsl_device *device,
+		unsigned int offsetwords,
+		unsigned int mask, unsigned int bits)
+{
+	unsigned int val = 0;
+
+	device->ftbl->regread(device, offsetwords, &val);
+	val &= ~mask;
+	device->ftbl->regwrite(device, offsetwords, val | bits);
+}
+
+static inline int kgsl_idle(struct kgsl_device *device)
+{
+	return device->ftbl->idle(device);
+}
+
+static inline unsigned int kgsl_gpuid(struct kgsl_device *device,
+	unsigned int *chipid)
+{
+	return device->ftbl->gpuid(device, chipid);
+}
+
+static inline int kgsl_create_device_sysfs_files(struct device *root,
+	const struct device_attribute **list)
+{
+	int ret = 0, i;
+	for (i = 0; list[i] != NULL; i++)
+		ret |= device_create_file(root, list[i]);
+	return ret;
+}
+
+static inline void kgsl_remove_device_sysfs_files(struct device *root,
+	const struct device_attribute **list)
+{
+	int i;
+	for (i = 0; list[i] != NULL; i++)
+		device_remove_file(root, list[i]);
+}
+
+static inline struct kgsl_device *kgsl_device_from_dev(struct device *dev)
+{
+	int i;
+
+	for (i = 0; i < KGSL_DEVICE_MAX; i++) {
+		if (kgsl_driver.devp[i] && kgsl_driver.devp[i]->dev == dev)
+			return kgsl_driver.devp[i];
+	}
+
+	return NULL;
+}
+
+static inline int kgsl_state_is_awake(struct kgsl_device *device)
+{
+	if (device->state == KGSL_STATE_ACTIVE ||
+		device->state == KGSL_STATE_AWARE)
+		return true;
+	else
+		return false;
+}
+
+int kgsl_readtimestamp(struct kgsl_device *device, void *priv,
+		enum kgsl_timestamp_type type, unsigned int *timestamp);
+
+int kgsl_check_timestamp(struct kgsl_device *device,
+		struct kgsl_context *context, unsigned int timestamp);
+
+int kgsl_device_platform_probe(struct kgsl_device *device);
+
+void kgsl_device_platform_remove(struct kgsl_device *device);
+
+const char *kgsl_pwrstate_to_str(unsigned int state);
+
+int kgsl_device_snapshot_init(struct kgsl_device *device);
+void kgsl_device_snapshot(struct kgsl_device *device,
+			struct kgsl_context *context);
+void kgsl_device_snapshot_close(struct kgsl_device *device);
+void kgsl_snapshot_save_frozen_objs(struct work_struct *work);
+
+void kgsl_events_init(void);
+void kgsl_events_exit(void);
+
+void kgsl_del_event_group(struct kgsl_event_group *group);
+
+void kgsl_add_event_group(struct kgsl_event_group *group,
+		struct kgsl_context *context, const char *name,
+		readtimestamp_func readtimestamp, void *priv);
+
+void kgsl_cancel_events_timestamp(struct kgsl_device *device,
+		struct kgsl_event_group *group, unsigned int timestamp);
+void kgsl_cancel_events(struct kgsl_device *device,
+		struct kgsl_event_group *group);
+void kgsl_cancel_event(struct kgsl_device *device,
+		struct kgsl_event_group *group, unsigned int timestamp,
+		kgsl_event_func func, void *priv);
+bool kgsl_event_pending(struct kgsl_device *device,
+		struct kgsl_event_group *group, unsigned int timestamp,
+		kgsl_event_func func, void *priv);
+int kgsl_add_event(struct kgsl_device *device, struct kgsl_event_group *group,
+		unsigned int timestamp, kgsl_event_func func, void *priv);
+void kgsl_process_event_group(struct kgsl_device *device,
+	struct kgsl_event_group *group);
+void kgsl_flush_event_group(struct kgsl_device *device,
+		struct kgsl_event_group *group);
+void kgsl_process_events(struct work_struct *work);
+
+void kgsl_context_destroy(struct kref *kref);
+
+int kgsl_context_init(struct kgsl_device_private *, struct kgsl_context
+		*context);
+
+void kgsl_context_dump(struct kgsl_context *context);
+
+int kgsl_memfree_find_entry(pid_t ptname, uint64_t *gpuaddr,
+	uint64_t *size, uint64_t *flags, pid_t *pid);
+
+long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
+
+long kgsl_ioctl_copy_in(unsigned int kernel_cmd, unsigned int user_cmd,
+		unsigned long arg, unsigned char *ptr);
+
+long kgsl_ioctl_copy_out(unsigned int kernel_cmd, unsigned int user_cmd,
+		unsigned long, unsigned char *ptr);
+
+int kgsl_mem_entry_attach_process(struct kgsl_mem_entry *entry,
+				   struct kgsl_device_private *dev_priv);
+
+/**
+ * kgsl_context_put() - Release context reference count
+ * @context: Pointer to the KGSL context to be released
+ *
+ * Reduce the reference count on a KGSL context and destroy it if it is no
+ * longer needed
+ */
+static inline void
+kgsl_context_put(struct kgsl_context *context)
+{
+	if (context)
+		kref_put(&context->refcount, kgsl_context_destroy);
+}
+
+/**
+ * kgsl_context_detached() - check if a context is detached
+ * @context: the context
+ *
+ * Check if a context has been destroyed by userspace and is only waiting
+ * for reference counts to go away. This check is used to weed out
+ * contexts that shouldn't use the gpu so NULL is considered detached.
+ */
+static inline bool kgsl_context_detached(struct kgsl_context *context)
+{
+	return (context == NULL || test_bit(KGSL_CONTEXT_PRIV_DETACHED,
+						&context->priv));
+}
+
+/**
+ * kgsl_context_invalid() - check if a context is invalid
+ * @context: the context
+ *
+ * Check if a context has been invalidated by the kernel and may no
+ * longer use the GPU.
+ */
+static inline bool kgsl_context_invalid(struct kgsl_context *context)
+{
+	return (context == NULL || test_bit(KGSL_CONTEXT_PRIV_INVALID,
+						&context->priv));
+}
+
+
+/**
+ * kgsl_context_get() - get a pointer to a KGSL context
+ * @device: Pointer to the KGSL device that owns the context
+ * @id: Context ID
+ *
+ * Find the context associated with the given ID number, increase the reference
+ * count on it and return it.  The caller must make sure that this call is
+ * paired with a kgsl_context_put.  This function is for internal use because it
+ * doesn't validate the ownership of the context with the calling process - use
+ * kgsl_context_get_owner for that
+ */
+static inline struct kgsl_context *kgsl_context_get(struct kgsl_device *device,
+		uint32_t id)
+{
+	int result = 0;
+	struct kgsl_context *context = NULL;
+
+	read_lock(&device->context_lock);
+
+	context = idr_find(&device->context_idr, id);
+
+	/* Don't return a context that has been detached */
+	if (kgsl_context_detached(context))
+		context = NULL;
+	else
+		result = kref_get_unless_zero(&context->refcount);
+
+	read_unlock(&device->context_lock);
+
+	if (!result)
+		return NULL;
+	return context;
+}
+
+/**
+* _kgsl_context_get() - lightweight function to just increment the ref count
+* @context: Pointer to the KGSL context
+*
+* Get a reference to the specified KGSL context structure. This is a
+* lightweight way to just increase the refcount on a known context rather than
+* walking through kgsl_context_get and searching the iterator
+*/
+static inline int _kgsl_context_get(struct kgsl_context *context)
+{
+	int ret = 0;
+
+	if (context)
+		ret = kref_get_unless_zero(&context->refcount);
+
+	return ret;
+}
+
+/**
+ * kgsl_context_get_owner() - get a pointer to a KGSL context in a specific
+ * process
+ * @dev_priv: Pointer to the process struct
+ * @id: Context ID to return
+ *
+ * Find the context associated with the given ID number, increase the reference
+ * count on it and return it.  The caller must make sure that this call is
+ * paired with a kgsl_context_put. This function validates that the context id
+ * given is owned by the dev_priv instancet that is passed in.  See
+ * kgsl_context_get for the internal version that doesn't do the check
+ */
+static inline struct kgsl_context *kgsl_context_get_owner(
+		struct kgsl_device_private *dev_priv, uint32_t id)
+{
+	struct kgsl_context *context;
+
+	context = kgsl_context_get(dev_priv->device, id);
+
+	/* Verify that the context belongs to current calling fd. */
+	if (context != NULL && context->dev_priv != dev_priv) {
+		kgsl_context_put(context);
+		return NULL;
+	}
+
+	return context;
+}
+
+/**
+* kgsl_process_private_get() - increment the refcount on a kgsl_process_private
+*   struct
+* @process: Pointer to the KGSL process_private
+*
+* Returns 0 if the structure is invalid and a reference count could not be
+* obtained, nonzero otherwise.
+*/
+static inline int kgsl_process_private_get(struct kgsl_process_private *process)
+{
+	int ret = 0;
+	if (process != NULL)
+		ret = kref_get_unless_zero(&process->refcount);
+	return ret;
+}
+
+void kgsl_process_private_put(struct kgsl_process_private *private);
+
+
+struct kgsl_process_private *kgsl_process_private_find(pid_t pid);
+
+/**
+ * kgsl_property_read_u32() - Read a u32 property from the device tree
+ * @device: Pointer to the KGSL device
+ * @prop: String name of the property to query
+ * @ptr: Pointer to the variable to store the property
+ */
+static inline int kgsl_property_read_u32(struct kgsl_device *device,
+	const char *prop, unsigned int *ptr)
+{
+	return of_property_read_u32(device->pdev->dev.of_node, prop, ptr);
+}
+
+/**
+ * kgsl_sysfs_store() - parse a string from a sysfs store function
+ * @buf: Incoming string to parse
+ * @ptr: Pointer to an unsigned int to store the value
+ */
+static inline int kgsl_sysfs_store(const char *buf, unsigned int *ptr)
+{
+	unsigned int val;
+	int rc;
+
+	rc = kstrtou32(buf, 0, &val);
+	if (rc)
+		return rc;
+
+	if (ptr)
+		*ptr = val;
+
+	return 0;
+}
+
+/*
+ * A helper macro to print out "not enough memory functions" - this
+ * makes it easy to standardize the messages as well as cut down on
+ * the number of strings in the binary
+ */
+#define SNAPSHOT_ERR_NOMEM(_d, _s) \
+	KGSL_DRV_ERR((_d), \
+	"snapshot: not enough snapshot memory for section %s\n", (_s))
+
+/**
+ * struct kgsl_snapshot_registers - list of registers to snapshot
+ * @regs: Pointer to an array of register ranges
+ * @count: Number of entries in the array
+ */
+struct kgsl_snapshot_registers {
+	const unsigned int *regs;
+	unsigned int count;
+};
+
+size_t kgsl_snapshot_dump_registers(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv);
+
+void kgsl_snapshot_indexed_registers(struct kgsl_device *device,
+	struct kgsl_snapshot *snapshot, unsigned int index,
+	unsigned int data, unsigned int start, unsigned int count);
+
+int kgsl_snapshot_get_object(struct kgsl_snapshot *snapshot,
+	struct kgsl_process_private *process, uint64_t gpuaddr,
+	uint64_t size, unsigned int type);
+
+int kgsl_snapshot_have_object(struct kgsl_snapshot *snapshot,
+	struct kgsl_process_private *process,
+	uint64_t gpuaddr, uint64_t size);
+
+struct adreno_ib_object_list;
+
+int kgsl_snapshot_add_ib_obj_list(struct kgsl_snapshot *snapshot,
+	struct adreno_ib_object_list *ib_obj_list);
+
+void kgsl_snapshot_add_section(struct kgsl_device *device, u16 id,
+	struct kgsl_snapshot *snapshot,
+	size_t (*func)(struct kgsl_device *, u8 *, size_t, void *),
+	void *priv);
+
+/**
+ * struct kgsl_pwr_limit - limit structure for each client
+ * @node: Local list node for the limits list
+ * @level: requested power level
+ * @device: pointer to the device structure
+ */
+struct kgsl_pwr_limit {
+	struct list_head node;
+	unsigned int level;
+	struct kgsl_device *device;
+};
+
+#endif  /* __KGSL_DEVICE_H */
diff --git a/drivers/gpu/msm/kgsl_events.c b/drivers/gpu/msm/kgsl_events.c
new file mode 100644
index 000000000000..e1f9ad17d0ff
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_events.c
@@ -0,0 +1,445 @@
+/* Copyright (c) 2011-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/debugfs.h>
+#include <kgsl_device.h>
+
+#include "kgsl_debugfs.h"
+#include "kgsl_trace.h"
+
+/*
+ * Define an kmem cache for the event structures since we allocate and free them
+ * so frequently
+ */
+static struct kmem_cache *events_cache;
+static struct dentry *events_dentry;
+
+static inline void signal_event(struct kgsl_device *device,
+		struct kgsl_event *event, int result)
+{
+	list_del(&event->node);
+	event->result = result;
+	queue_work(device->events_wq, &event->work);
+}
+
+/**
+ * _kgsl_event_worker() - Work handler for processing GPU event callbacks
+ * @work: Pointer to the work_struct for the event
+ *
+ * Each event callback has its own work struct and is run on a event specific
+ * workqeuue.  This is the worker that queues up the event callback function.
+ */
+static void _kgsl_event_worker(struct work_struct *work)
+{
+	struct kgsl_event *event = container_of(work, struct kgsl_event, work);
+	int id = KGSL_CONTEXT_ID(event->context);
+
+	trace_kgsl_fire_event(id, event->timestamp, event->result,
+		jiffies - event->created, event->func);
+
+	event->func(event->device, event->group, event->priv, event->result);
+
+	kgsl_context_put(event->context);
+	kmem_cache_free(events_cache, event);
+}
+
+static void _process_event_group(struct kgsl_device *device,
+		struct kgsl_event_group *group, bool flush)
+{
+	struct kgsl_event *event, *tmp;
+	unsigned int timestamp;
+	struct kgsl_context *context;
+
+	if (group == NULL)
+		return;
+
+	context = group->context;
+
+	/*
+	 * Sanity check to be sure that we we aren't racing with the context
+	 * getting destroyed
+	 */
+	if (context != NULL && !_kgsl_context_get(context))
+		BUG();
+
+	spin_lock(&group->lock);
+
+	group->readtimestamp(device, group->priv, KGSL_TIMESTAMP_RETIRED,
+		&timestamp);
+
+	/*
+	 * If no timestamps have been retired since the last time we were here
+	 * then we can avoid going through this loop
+	 */
+	if (!flush && timestamp_cmp(timestamp, group->processed) <= 0)
+		goto out;
+
+	list_for_each_entry_safe(event, tmp, &group->events, node) {
+		if (timestamp_cmp(event->timestamp, timestamp) <= 0)
+			signal_event(device, event, KGSL_EVENT_RETIRED);
+		else if (flush)
+			signal_event(device, event, KGSL_EVENT_CANCELLED);
+
+	}
+
+	group->processed = timestamp;
+
+out:
+	spin_unlock(&group->lock);
+	kgsl_context_put(context);
+}
+
+/**
+ * kgsl_process_event_group() - Handle all the retired events in a group
+ * @device: Pointer to a KGSL device
+ * @group: Pointer to a GPU events group to process
+ */
+
+void kgsl_process_event_group(struct kgsl_device *device,
+		struct kgsl_event_group *group)
+{
+	_process_event_group(device, group, false);
+}
+EXPORT_SYMBOL(kgsl_process_event_group);
+
+/**
+ * kgsl_flush_event_group() - flush all the events in a group by retiring the
+ * ones can be retired and cancelling the ones that are pending
+ * @device: Pointer to a KGSL device
+ * @group: Pointer to a GPU events group to process
+ */
+void kgsl_flush_event_group(struct kgsl_device *device,
+		struct kgsl_event_group *group)
+{
+	_process_event_group(device, group, true);
+}
+EXPORT_SYMBOL(kgsl_flush_event_group);
+
+/**
+ * kgsl_cancel_events_timestamp() - Cancel pending events for a given timestamp
+ * @device: Pointer to a KGSL device
+ * @group: Ponter to the GPU event group that owns the event
+ * @timestamp: Registered expiry timestamp for the event
+ */
+void kgsl_cancel_events_timestamp(struct kgsl_device *device,
+		struct kgsl_event_group *group, unsigned int timestamp)
+{
+	struct kgsl_event *event, *tmp;
+
+	spin_lock(&group->lock);
+
+	list_for_each_entry_safe(event, tmp, &group->events, node) {
+		if (timestamp_cmp(timestamp, event->timestamp) == 0)
+			signal_event(device, event, KGSL_EVENT_CANCELLED);
+	}
+
+	spin_unlock(&group->lock);
+}
+EXPORT_SYMBOL(kgsl_cancel_events_timestamp);
+
+/**
+ * kgsl_cancel_events() - Cancel all pending events in the group
+ * @device: Pointer to a KGSL device
+ * @group: Pointer to a kgsl_events_group
+ */
+void kgsl_cancel_events(struct kgsl_device *device,
+		struct kgsl_event_group *group)
+{
+	struct kgsl_event *event, *tmp;
+
+	spin_lock(&group->lock);
+
+	list_for_each_entry_safe(event, tmp, &group->events, node)
+		signal_event(device, event, KGSL_EVENT_CANCELLED);
+
+	spin_unlock(&group->lock);
+}
+EXPORT_SYMBOL(kgsl_cancel_events);
+
+/**
+ * kgsl_cancel_event() - Cancel a specific event from a group
+ * @device: Pointer to a KGSL device
+ * @group: Pointer to the group that contains the events
+ * @timestamp: Registered expiry timestamp for the event
+ * @func: Registered callback for the function
+ * @priv: Registered priv data for the function
+ */
+void kgsl_cancel_event(struct kgsl_device *device,
+		struct kgsl_event_group *group, unsigned int timestamp,
+		kgsl_event_func func, void *priv)
+{
+	struct kgsl_event *event, *tmp;
+	spin_lock(&group->lock);
+
+	list_for_each_entry_safe(event, tmp, &group->events, node) {
+		if (timestamp == event->timestamp && func == event->func &&
+			event->priv == priv)
+			signal_event(device, event, KGSL_EVENT_CANCELLED);
+	}
+
+	spin_unlock(&group->lock);
+}
+EXPORT_SYMBOL(kgsl_cancel_event);
+
+/**
+ * kgsl_event_pending() - Searches for an event in an event group
+ * @device: Pointer to a KGSL device
+ * @group: Pointer to the group that contains the events
+ * @timestamp: Registered expiry timestamp for the event
+ * @func: Registered callback for the function
+ * @priv: Registered priv data for the function
+ */
+bool kgsl_event_pending(struct kgsl_device *device,
+		struct kgsl_event_group *group,
+		unsigned int timestamp, kgsl_event_func func, void *priv)
+{
+	struct kgsl_event *event;
+	bool result = false;
+	spin_lock(&group->lock);
+	list_for_each_entry(event, &group->events, node) {
+		if (timestamp == event->timestamp && func == event->func &&
+			event->priv == priv) {
+			result = true;
+			break;
+		}
+	}
+	spin_unlock(&group->lock);
+	return result;
+}
+/**
+ * kgsl_add_event() - Add a new GPU event to a group
+ * @device: Pointer to a KGSL device
+ * @group: Pointer to the group to add the event to
+ * @timestamp: Timestamp that the event will expire on
+ * @func: Callback function for the event
+ * @priv: Private data to send to the callback function
+ */
+int kgsl_add_event(struct kgsl_device *device, struct kgsl_event_group *group,
+		unsigned int timestamp, kgsl_event_func func, void *priv)
+{
+	unsigned int queued;
+	struct kgsl_context *context = group->context;
+	struct kgsl_event *event;
+	unsigned int retired;
+
+	if (!func)
+		return -EINVAL;
+
+	/*
+	 * If the caller is creating their own timestamps, let them schedule
+	 * events in the future. Otherwise only allow timestamps that have been
+	 * queued.
+	 */
+	if (!context || !(context->flags & KGSL_CONTEXT_USER_GENERATED_TS)) {
+		group->readtimestamp(device, group->priv, KGSL_TIMESTAMP_QUEUED,
+			&queued);
+
+		if (timestamp_cmp(timestamp, queued) > 0)
+			return -EINVAL;
+	}
+
+	event = kmem_cache_alloc(events_cache, GFP_KERNEL);
+	if (event == NULL)
+		return -ENOMEM;
+
+	/* Get a reference to the context while the event is active */
+	if (context != NULL && !_kgsl_context_get(context)) {
+		kmem_cache_free(events_cache, event);
+		return -ENOENT;
+	}
+
+	event->device = device;
+	event->context = context;
+	event->timestamp = timestamp;
+	event->priv = priv;
+	event->func = func;
+	event->created = jiffies;
+	event->group = group;
+
+	INIT_WORK(&event->work, _kgsl_event_worker);
+
+	trace_kgsl_register_event(KGSL_CONTEXT_ID(context), timestamp, func);
+
+	spin_lock(&group->lock);
+
+	/*
+	 * Check to see if the requested timestamp has already retired.  If so,
+	 * schedule the callback right away
+	 */
+	group->readtimestamp(device, group->priv, KGSL_TIMESTAMP_RETIRED,
+		&retired);
+
+	if (timestamp_cmp(retired, timestamp) >= 0) {
+		event->result = KGSL_EVENT_RETIRED;
+		queue_work(device->events_wq, &event->work);
+		spin_unlock(&group->lock);
+		return 0;
+	}
+
+	/* Add the event to the group list */
+	list_add_tail(&event->node, &group->events);
+
+	spin_unlock(&group->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_add_event);
+
+static DEFINE_RWLOCK(group_lock);
+static LIST_HEAD(group_list);
+
+/**
+ * kgsl_process_events() - Work queue for processing new timestamp events
+ * @work: Pointer to a work_struct
+ */
+void kgsl_process_events(struct work_struct *work)
+{
+	struct kgsl_event_group *group;
+	struct kgsl_device *device = container_of(work, struct kgsl_device,
+		event_work);
+
+	read_lock(&group_lock);
+	list_for_each_entry(group, &group_list, group)
+		_process_event_group(device, group, false);
+	read_unlock(&group_lock);
+}
+EXPORT_SYMBOL(kgsl_process_events);
+
+/**
+ * kgsl_del_event_group() - Remove a GPU event group
+ * @group: GPU event group to remove
+ */
+void kgsl_del_event_group(struct kgsl_event_group *group)
+{
+	/* Make sure that all the events have been deleted from the list */
+	BUG_ON(!list_empty(&group->events));
+
+	write_lock(&group_lock);
+	list_del(&group->group);
+	write_unlock(&group_lock);
+}
+EXPORT_SYMBOL(kgsl_del_event_group);
+
+/**
+ * kgsl_add_event_group() - Add a new GPU event group
+ * group: Pointer to the new group to add to the list
+ * context: Context that owns the group (or NULL for global)
+ * name: Name of the group
+ * readtimestamp: Function pointer to the readtimestamp function to call when
+ * processing events
+ * priv: Priv member to pass to the readtimestamp function
+ */
+void kgsl_add_event_group(struct kgsl_event_group *group,
+		struct kgsl_context *context, const char *name,
+		readtimestamp_func readtimestamp, void *priv)
+{
+	BUG_ON(readtimestamp == NULL);
+
+	spin_lock_init(&group->lock);
+	INIT_LIST_HEAD(&group->events);
+
+	group->context = context;
+	group->readtimestamp = readtimestamp;
+	group->priv = priv;
+
+	if (name)
+		strlcpy(group->name, name, sizeof(group->name));
+
+	write_lock(&group_lock);
+	list_add_tail(&group->group, &group_list);
+	write_unlock(&group_lock);
+}
+EXPORT_SYMBOL(kgsl_add_event_group);
+
+static void events_debugfs_print_group(struct seq_file *s,
+		struct kgsl_event_group *group)
+{
+	struct kgsl_event *event;
+	unsigned int retired;
+
+	spin_lock(&group->lock);
+
+	seq_printf(s, "%s: last=%d\n", group->name, group->processed);
+
+	list_for_each_entry(event, &group->events, node) {
+
+		group->readtimestamp(event->device, group->priv,
+			KGSL_TIMESTAMP_RETIRED, &retired);
+
+		seq_printf(s, "\t%d:%d age=%lu func=%ps [retired=%d]\n",
+			group->context ? group->context->id :
+						KGSL_MEMSTORE_GLOBAL,
+			event->timestamp, jiffies  - event->created,
+			event->func, retired);
+	}
+	spin_unlock(&group->lock);
+}
+
+static int events_debugfs_print(struct seq_file *s, void *unused)
+{
+	struct kgsl_event_group *group;
+
+	seq_puts(s, "event groups:\n");
+	seq_puts(s, "--------------\n");
+
+	read_lock(&group_lock);
+	list_for_each_entry(group, &group_list, group) {
+		events_debugfs_print_group(s, group);
+		seq_puts(s, "\n");
+	}
+	read_unlock(&group_lock);
+
+	return 0;
+}
+
+static int events_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, events_debugfs_print, NULL);
+}
+
+static const struct file_operations events_fops = {
+	.open = events_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/**
+ * kgsl_events_exit() - Destroy the event kmem cache on module exit
+ */
+void kgsl_events_exit(void)
+{
+	if (events_cache)
+		kmem_cache_destroy(events_cache);
+
+	debugfs_remove(events_dentry);
+}
+
+/**
+ * kgsl_events_init() - Create the event kmem cache on module start
+ */
+void __init kgsl_events_init(void)
+{
+	struct dentry *debugfs_dir = kgsl_get_debugfs_dir();
+	events_cache = KMEM_CACHE(kgsl_event, 0);
+
+	events_dentry = debugfs_create_file("events", 0444, debugfs_dir, NULL,
+		&events_fops);
+
+	/* Failure to create a debugfs entry is non fatal */
+	if (IS_ERR(events_dentry))
+		events_dentry = NULL;
+}
diff --git a/drivers/gpu/msm/kgsl_ioctl.c b/drivers/gpu/msm/kgsl_ioctl.c
new file mode 100644
index 000000000000..0802e94f56ad
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_ioctl.c
@@ -0,0 +1,176 @@
+/* Copyright (c) 2008-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/ioctl.h>
+#include <linux/compat.h>
+#include <linux/uaccess.h>
+#include <linux/fs.h>
+#include "kgsl_device.h"
+#include "kgsl_sync.h"
+
+static const struct kgsl_ioctl kgsl_ioctl_funcs[] = {
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_GETPROPERTY,
+			kgsl_ioctl_device_getproperty),
+	/* IOCTL_KGSL_DEVICE_WAITTIMESTAMP is no longer supported */
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID,
+			kgsl_ioctl_device_waittimestamp_ctxtid),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS,
+			kgsl_ioctl_rb_issueibcmds),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SUBMIT_COMMANDS,
+			kgsl_ioctl_submit_commands),
+	/* IOCTL_KGSL_CMDSTREAM_READTIMESTAMP is no longer supported */
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID,
+			kgsl_ioctl_cmdstream_readtimestamp_ctxtid),
+	/* IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP is no longer supported */
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID,
+			kgsl_ioctl_cmdstream_freememontimestamp_ctxtid),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_CREATE,
+			kgsl_ioctl_drawctxt_create),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_DESTROY,
+			kgsl_ioctl_drawctxt_destroy),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_MAP_USER_MEM,
+			kgsl_ioctl_map_user_mem),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FROM_PMEM,
+			kgsl_ioctl_map_user_mem),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FREE,
+			kgsl_ioctl_sharedmem_free),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE,
+			kgsl_ioctl_sharedmem_flush_cache),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC,
+			kgsl_ioctl_gpumem_alloc),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_CFF_SYNCMEM,
+			kgsl_ioctl_cff_syncmem),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_CFF_USER_EVENT,
+			kgsl_ioctl_cff_user_event),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMESTAMP_EVENT,
+			kgsl_ioctl_timestamp_event),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SETPROPERTY,
+			kgsl_ioctl_device_setproperty),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC_ID,
+			kgsl_ioctl_gpumem_alloc_id),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_FREE_ID,
+			kgsl_ioctl_gpumem_free_id),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_GET_INFO,
+			kgsl_ioctl_gpumem_get_info),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE,
+			kgsl_ioctl_gpumem_sync_cache),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK,
+			kgsl_ioctl_gpumem_sync_cache_bulk),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_CREATE,
+			kgsl_ioctl_syncsource_create),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_DESTROY,
+			kgsl_ioctl_syncsource_destroy),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE,
+			kgsl_ioctl_syncsource_create_fence),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE,
+			kgsl_ioctl_syncsource_signal_fence),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_CFF_SYNC_GPUOBJ,
+			kgsl_ioctl_cff_sync_gpuobj),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_ALLOC,
+			kgsl_ioctl_gpuobj_alloc),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_FREE,
+			kgsl_ioctl_gpuobj_free),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_INFO,
+			kgsl_ioctl_gpuobj_info),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_IMPORT,
+			kgsl_ioctl_gpuobj_import),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_SYNC,
+			kgsl_ioctl_gpuobj_sync),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPU_COMMAND,
+			kgsl_ioctl_gpu_command),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_SET_INFO,
+			kgsl_ioctl_gpuobj_set_info),
+};
+
+long kgsl_ioctl_copy_in(unsigned int kernel_cmd, unsigned int user_cmd,
+		unsigned long arg, unsigned char *ptr)
+{
+	unsigned int usize = _IOC_SIZE(user_cmd);
+	unsigned int ksize = _IOC_SIZE(kernel_cmd);
+	unsigned int copy = ksize < usize ? ksize : usize;
+
+	if ((kernel_cmd & IOC_IN) && (user_cmd & IOC_IN)) {
+		if (copy > 0 && copy_from_user(ptr, (void __user *) arg, copy))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+long kgsl_ioctl_copy_out(unsigned int kernel_cmd, unsigned int user_cmd,
+		unsigned long arg, unsigned char *ptr)
+{
+	unsigned int usize = _IOC_SIZE(user_cmd);
+	unsigned int ksize = _IOC_SIZE(kernel_cmd);
+	unsigned int copy = ksize < usize ? ksize : usize;
+
+	if ((kernel_cmd & IOC_OUT) && (user_cmd & IOC_OUT)) {
+		if (copy > 0 && copy_to_user((void __user *) arg, ptr, copy))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+long kgsl_ioctl_helper(struct file *filep, unsigned int cmd, unsigned long arg,
+		const struct kgsl_ioctl *cmds, int len)
+{
+	struct kgsl_device_private *dev_priv = filep->private_data;
+	unsigned char data[128] = { 0 };
+	unsigned int nr = _IOC_NR(cmd);
+	long ret;
+
+	if (nr >= len || cmds[nr].func == NULL)
+		return -ENOIOCTLCMD;
+
+	BUG_ON(_IOC_SIZE(cmds[nr].cmd) > sizeof(data));
+
+	if (_IOC_SIZE(cmds[nr].cmd)) {
+		ret = kgsl_ioctl_copy_in(cmds[nr].cmd, cmd, arg, data);
+		if (ret)
+			return ret;
+	}
+
+	ret = cmds[nr].func(dev_priv, cmd, data);
+
+	if (ret == 0 && _IOC_SIZE(cmds[nr].cmd))
+		ret = kgsl_ioctl_copy_out(cmds[nr].cmd, cmd, arg, data);
+
+	return ret;
+}
+
+long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
+{
+	struct kgsl_device_private *dev_priv = filep->private_data;
+	struct kgsl_device *device = dev_priv->device;
+	long ret;
+
+	ret = kgsl_ioctl_helper(filep, cmd, arg, kgsl_ioctl_funcs,
+		ARRAY_SIZE(kgsl_ioctl_funcs));
+
+	/*
+	 * If the command was unrecognized in the generic core, try the device
+	 * specific function
+	 */
+
+	if (ret == -ENOIOCTLCMD) {
+		if (is_compat_task() && device->ftbl->compat_ioctl != NULL)
+			return device->ftbl->compat_ioctl(dev_priv, cmd, arg);
+		else if (device->ftbl->ioctl != NULL)
+			return device->ftbl->ioctl(dev_priv, cmd, arg);
+
+		KGSL_DRV_INFO(device, "invalid ioctl code 0x%08X\n", cmd);
+	}
+
+	return ret;
+}
diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c
new file mode 100644
index 000000000000..be77db93994e
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_iommu.c
@@ -0,0 +1,1834 @@
+/* Copyright (c) 2011-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/spinlock.h>
+#include <linux/genalloc.h>
+#include <linux/slab.h>
+#include <linux/iommu.h>
+#include <linux/msm_kgsl.h>
+#include <linux/ratelimit.h>
+#include <soc/qcom/scm.h>
+#include <soc/qcom/secure_buffer.h>
+#include <stddef.h>
+#include <linux/compat.h>
+
+#include "kgsl.h"
+#include "kgsl_device.h"
+#include "kgsl_mmu.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_iommu.h"
+#include "adreno_pm4types.h"
+#include "adreno.h"
+#include "kgsl_trace.h"
+#include "kgsl_cffdump.h"
+#include "kgsl_pwrctrl.h"
+
+static struct kgsl_mmu_pt_ops iommu_pt_ops;
+static bool need_iommu_sync;
+
+const unsigned int kgsl_iommu_reg_list[KGSL_IOMMU_REG_MAX] = {
+	0x0,/* SCTLR */
+	0x20,/* TTBR0 */
+	0x34,/* CONTEXTIDR */
+	0x58,/* FSR */
+	0x60,/* FAR_0 */
+	0x618,/* TLBIALL */
+	0x008,/* RESUME */
+	0x68,/* FSYNR0 */
+	0x6C,/* FSYNR1 */
+	0x7F0,/* TLBSYNC */
+	0x7F4,/* TLBSTATUS */
+};
+
+/*
+ * struct kgsl_iommu_addr_entry - entry in the kgsl_iommu_pt rbtree.
+ * @base: starting virtual address of the entry
+ * @size: size of the entry
+ * @node: the rbtree node
+ *
+ */
+struct kgsl_iommu_addr_entry {
+	uint64_t base;
+	uint64_t size;
+	struct rb_node node;
+};
+
+static struct kmem_cache *addr_entry_cache;
+
+static inline void _iommu_sync_mmu_pc(bool lock)
+{
+	if (need_iommu_sync == false)
+		return;
+
+	if (lock)
+		mutex_lock(&kgsl_mmu_sync);
+	else
+		mutex_unlock(&kgsl_mmu_sync);
+}
+
+static void _detach_pt(struct kgsl_iommu_pt *iommu_pt,
+			  struct kgsl_iommu_context *ctx)
+{
+	if (iommu_pt->attached) {
+		_iommu_sync_mmu_pc(true);
+		iommu_detach_device(iommu_pt->domain, ctx->dev);
+		_iommu_sync_mmu_pc(false);
+		iommu_pt->attached = false;
+	}
+}
+
+static int _attach_pt(struct kgsl_iommu_pt *iommu_pt,
+			struct kgsl_iommu_context *ctx)
+{
+	int ret;
+
+	if (iommu_pt->attached)
+		return 0;
+
+	_iommu_sync_mmu_pc(true);
+	ret = iommu_attach_device(iommu_pt->domain, ctx->dev);
+	_iommu_sync_mmu_pc(false);
+
+	if (ret == 0)
+		iommu_pt->attached = true;
+	else
+		KGSL_CORE_ERR("iommu_attach_device(%s) failed: %d\n",
+				ctx->name, ret);
+
+	return ret;
+}
+
+/*
+ * One page allocation for a guard region to protect against over-zealous
+ * GPU pre-fetch
+ */
+
+static struct page *kgsl_guard_page;
+static struct kgsl_memdesc kgsl_secure_guard_page_memdesc;
+
+/* These functions help find the nearest allocated memory entries on either side
+ * of a faulting address. If we know the nearby allocations memory we can
+ * get a better determination of what we think should have been located in the
+ * faulting region
+ */
+
+/*
+ * A local structure to make it easy to store the interesting bits for the
+ * memory entries on either side of the faulting address
+ */
+
+struct _mem_entry {
+	uint64_t gpuaddr;
+	uint64_t size;
+	uint64_t flags;
+	unsigned int priv;
+	int pending_free;
+	pid_t pid;
+};
+
+static void _get_entries(struct kgsl_process_private *private,
+		uint64_t faultaddr, struct _mem_entry *prev,
+		struct _mem_entry *next)
+{
+	int id;
+	struct kgsl_mem_entry *entry;
+
+	uint64_t prevaddr = 0;
+	struct kgsl_mem_entry *p = NULL;
+
+	uint64_t nextaddr = (uint64_t) -1;
+	struct kgsl_mem_entry *n = NULL;
+
+	idr_for_each_entry(&private->mem_idr, entry, id) {
+		uint64_t addr = entry->memdesc.gpuaddr;
+
+		if ((addr < faultaddr) && (addr > prevaddr)) {
+			prevaddr = addr;
+			p = entry;
+		}
+
+		if ((addr > faultaddr) && (addr < nextaddr)) {
+			nextaddr = addr;
+			n = entry;
+		}
+	}
+
+	if (p != NULL) {
+		prev->gpuaddr = p->memdesc.gpuaddr;
+		prev->size = p->memdesc.size;
+		prev->flags = p->memdesc.flags;
+		prev->priv = p->memdesc.priv;
+		prev->pending_free = p->pending_free;
+		prev->pid = private->pid;
+	}
+
+	if (n != NULL) {
+		next->gpuaddr = n->memdesc.gpuaddr;
+		next->size = n->memdesc.size;
+		next->flags = n->memdesc.flags;
+		next->priv = n->memdesc.priv;
+		next->pending_free = n->pending_free;
+		next->pid = private->pid;
+	}
+}
+
+static void _find_mem_entries(struct kgsl_mmu *mmu, uint64_t faultaddr,
+	phys_addr_t ptbase, struct _mem_entry *preventry,
+	struct _mem_entry *nextentry)
+{
+	struct kgsl_process_private *private = NULL, *p;
+	int id = kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase);
+
+	memset(preventry, 0, sizeof(*preventry));
+	memset(nextentry, 0, sizeof(*nextentry));
+
+	/* Set the maximum possible size as an initial value */
+	nextentry->gpuaddr = (uint64_t) -1;
+
+	mutex_lock(&kgsl_driver.process_mutex);
+	list_for_each_entry(p, &kgsl_driver.process_list, list) {
+		if (p->pagetable && (p->pagetable->name == id)) {
+			if (kgsl_process_private_get(p))
+				private = p;
+			break;
+		}
+	}
+	mutex_unlock(&kgsl_driver.process_mutex);
+
+	if (private != NULL) {
+		spin_lock(&private->mem_lock);
+		_get_entries(private, faultaddr, preventry, nextentry);
+		spin_unlock(&private->mem_lock);
+
+		kgsl_process_private_put(private);
+	}
+}
+
+static void _print_entry(struct kgsl_device *device, struct _mem_entry *entry)
+{
+	char name[32];
+	memset(name, 0, sizeof(name));
+
+	kgsl_get_memory_usage(name, sizeof(name) - 1, entry->flags);
+
+	KGSL_LOG_DUMP(device,
+		"[%016llX - %016llX] %s %s (pid = %d) (%s)\n",
+		entry->gpuaddr,
+		entry->gpuaddr + entry->size,
+		entry->priv & KGSL_MEMDESC_GUARD_PAGE ? "(+guard)" : "",
+		entry->pending_free ? "(pending free)" : "",
+		entry->pid, name);
+}
+
+static void _check_if_freed(struct kgsl_iommu_context *ctx,
+	uint64_t addr, pid_t ptname)
+{
+	uint64_t gpuaddr = addr;
+	uint64_t size = 0;
+	uint64_t flags = 0;
+	pid_t pid;
+
+	char name[32];
+	memset(name, 0, sizeof(name));
+
+	if (kgsl_memfree_find_entry(ptname, &gpuaddr, &size, &flags, &pid)) {
+		kgsl_get_memory_usage(name, sizeof(name) - 1, flags);
+		KGSL_LOG_DUMP(ctx->kgsldev, "---- premature free ----\n");
+		KGSL_LOG_DUMP(ctx->kgsldev,
+			"[%8.8llX-%8.8llX] (%s) was already freed by pid %d\n",
+			gpuaddr, gpuaddr + size, name, pid);
+	}
+}
+
+static int kgsl_iommu_fault_handler(struct iommu_domain *domain,
+	struct device *dev, unsigned long addr, int flags, void *token)
+{
+	int ret = 0;
+	struct kgsl_pagetable *pt = token;
+	struct kgsl_mmu *mmu = pt->mmu;
+	struct kgsl_iommu *iommu;
+	struct kgsl_iommu_context *ctx;
+	u64 ptbase;
+	u32 contextidr;
+	pid_t ptname;
+	struct _mem_entry prev, next;
+	int write;
+	struct kgsl_device *device;
+	struct adreno_device *adreno_dev;
+	unsigned int no_page_fault_log = 0;
+	unsigned int curr_context_id = 0;
+	struct kgsl_context *context;
+	char *fault_type = "unknown";
+
+	static DEFINE_RATELIMIT_STATE(_rs,
+					DEFAULT_RATELIMIT_INTERVAL,
+					DEFAULT_RATELIMIT_BURST);
+
+	if (mmu == NULL || mmu->priv == NULL)
+		return ret;
+
+	iommu = mmu->priv;
+	ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+	device = mmu->device;
+	adreno_dev = ADRENO_DEVICE(device);
+
+	if (pt->name == KGSL_MMU_SECURE_PT)
+		ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_SECURE];
+
+	/*
+	 * set the fault bits and stuff before any printks so that if fault
+	 * handler runs then it will know it's dealing with a pagefault.
+	 * Read the global current timestamp because we could be in middle of
+	 * RB switch and hence the cur RB may not be reliable but global
+	 * one will always be reliable
+	 */
+	kgsl_sharedmem_readl(&device->memstore, &curr_context_id,
+		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context));
+
+	context = kgsl_context_get(device, curr_context_id);
+
+	if (context != NULL) {
+		/* save pagefault timestamp for GFT */
+		set_bit(KGSL_CONTEXT_PRIV_PAGEFAULT, &context->priv);
+
+		kgsl_context_put(context);
+		context = NULL;
+	}
+
+	ctx->fault = 1;
+
+	if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE,
+		&adreno_dev->ft_pf_policy) &&
+		(flags & IOMMU_FAULT_TRANSACTION_STALLED)) {
+		/*
+		 * Turn off GPU IRQ so we don't get faults from it too.
+		 * The device mutex must be held to change power state
+		 */
+		mutex_lock(&device->mutex);
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE);
+		mutex_unlock(&device->mutex);
+	}
+
+	write = (flags & IOMMU_FAULT_WRITE) ? 1 : 0;
+	if (flags & IOMMU_FAULT_TRANSLATION)
+		fault_type = "translation";
+	else if (flags & IOMMU_FAULT_PERMISSION)
+		fault_type = "permission";
+
+	ptbase = KGSL_IOMMU_GET_CTX_REG_Q(ctx, TTBR0);
+	contextidr = KGSL_IOMMU_GET_CTX_REG(ctx, CONTEXTIDR);
+
+	ptname = kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase);
+
+	if (test_bit(KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE,
+		&adreno_dev->ft_pf_policy))
+		no_page_fault_log = kgsl_mmu_log_fault_addr(mmu, ptbase, addr);
+
+	if (!no_page_fault_log && __ratelimit(&_rs)) {
+		KGSL_MEM_CRIT(ctx->kgsldev,
+			"GPU PAGE FAULT: addr = %lX pid= %d\n", addr, ptname);
+		KGSL_MEM_CRIT(ctx->kgsldev,
+			"context=%s TTBR0=0x%llx CIDR=0x%x (%s %s fault)\n",
+			ctx->name, ptbase, contextidr,
+			write ? "write" : "read", fault_type);
+
+		/* Don't print the debug if this is a permissions fault */
+		if (!(flags & IOMMU_FAULT_PERMISSION)) {
+			_check_if_freed(ctx, addr, ptname);
+
+			KGSL_LOG_DUMP(ctx->kgsldev,
+				"---- nearby memory ----\n");
+
+			_find_mem_entries(mmu, addr, ptbase, &prev, &next);
+
+			if (prev.gpuaddr)
+				_print_entry(ctx->kgsldev, &prev);
+			else
+				KGSL_LOG_DUMP(ctx->kgsldev, "*EMPTY*\n");
+
+			KGSL_LOG_DUMP(ctx->kgsldev, " <- fault @ %8.8lX\n",
+				addr);
+
+			if (next.gpuaddr != (uint64_t) -1)
+				_print_entry(ctx->kgsldev, &next);
+			else
+				KGSL_LOG_DUMP(ctx->kgsldev, "*EMPTY*\n");
+
+		}
+	}
+
+	trace_kgsl_mmu_pagefault(ctx->kgsldev, addr,
+			kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase),
+			write ? "write" : "read");
+
+	/*
+	 * We do not want the h/w to resume fetching data from an iommu
+	 * that has faulted, this is better for debugging as it will stall
+	 * the GPU and trigger a snapshot. Return EBUSY error.
+	 */
+	if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE,
+		&adreno_dev->ft_pf_policy) &&
+		(flags & IOMMU_FAULT_TRANSACTION_STALLED)) {
+		uint32_t sctlr_val;
+		ret = -EBUSY;
+		/*
+		 * Disable context fault interrupts
+		 * as we do not clear FSR in the ISR.
+		 * Will be re-enabled after FSR is cleared.
+		 */
+		sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, SCTLR);
+		sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_CFIE_SHIFT);
+		KGSL_IOMMU_SET_CTX_REG(ctx, SCTLR, sctlr_val);
+
+		adreno_set_gpu_fault(adreno_dev, ADRENO_IOMMU_PAGE_FAULT);
+		/* Go ahead with recovery*/
+		adreno_dispatcher_schedule(device);
+	}
+
+	return ret;
+}
+
+/*
+ * kgsl_iommu_disable_clk() - Disable iommu clocks
+ * Disable IOMMU clocks
+ */
+static void kgsl_iommu_disable_clk(struct kgsl_mmu *mmu)
+{
+	struct kgsl_iommu *iommu = mmu->priv;
+	int j;
+
+	atomic_dec(&iommu->clk_enable_count);
+	BUG_ON(atomic_read(&iommu->clk_enable_count) < 0);
+
+	for (j = (KGSL_IOMMU_MAX_CLKS - 1); j >= 0; j--)
+		if (iommu->clks[j])
+			clk_disable_unprepare(iommu->clks[j]);
+}
+
+/*
+ * kgsl_iommu_enable_clk_prepare_enable - Enable the specified IOMMU clock
+ * Try 4 times to enable it and then BUG() for debug
+ */
+static void kgsl_iommu_clk_prepare_enable(struct clk *clk)
+{
+	int num_retries = 4;
+
+	while (num_retries--) {
+		if (!clk_prepare_enable(clk))
+			return;
+	}
+
+	/* Failure is fatal so BUG() to facilitate debug */
+	KGSL_CORE_ERR("IOMMU clock enable failed\n");
+	BUG();
+}
+
+/*
+ * kgsl_iommu_enable_clk - Enable iommu clocks
+ * Enable all the IOMMU clocks
+ */
+static void kgsl_iommu_enable_clk(struct kgsl_mmu *mmu)
+{
+	int j;
+	struct kgsl_iommu *iommu = mmu->priv;
+
+	for (j = 0; j < KGSL_IOMMU_MAX_CLKS; j++) {
+		if (iommu->clks[j])
+			kgsl_iommu_clk_prepare_enable(iommu->clks[j]);
+	}
+	atomic_inc(&iommu->clk_enable_count);
+}
+
+/* kgsl_iommu_get_ttbr0 - Get TTBR0 setting for a pagetable */
+static u64 kgsl_iommu_get_ttbr0(struct kgsl_pagetable *pt)
+{
+	struct kgsl_iommu_pt *iommu_pt = pt ? pt->priv : NULL;
+
+	BUG_ON(iommu_pt == NULL);
+
+	return iommu_pt->ttbr0;
+}
+
+/* kgsl_iommu_get_contextidr - query CONTEXTIDR setting for a pagetable */
+static u32 kgsl_iommu_get_contextidr(struct kgsl_pagetable *pt)
+{
+	struct kgsl_iommu_pt *iommu_pt = pt ? pt->priv : NULL;
+
+	BUG_ON(iommu_pt == NULL);
+
+	return iommu_pt->contextidr;
+}
+
+/*
+ * kgsl_iommu_destroy_pagetable - Free up reaources help by a pagetable
+ * @mmu_specific_pt - Pointer to pagetable which is to be freed
+ *
+ * Return - void
+ */
+static void kgsl_iommu_destroy_pagetable(struct kgsl_pagetable *pt)
+{
+	struct kgsl_iommu_pt *iommu_pt = pt->priv;
+	struct kgsl_mmu *mmu = pt->mmu;
+	struct kgsl_iommu *iommu;
+	struct kgsl_iommu_context  *ctx;
+
+	BUG_ON(!list_empty(&pt->list));
+
+	iommu = mmu->priv;
+
+	if (KGSL_MMU_SECURE_PT == pt->name)
+		ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_SECURE];
+	else
+		ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+
+	if (iommu_pt->domain) {
+		trace_kgsl_pagetable_destroy(iommu_pt->ttbr0, pt->name);
+
+		_detach_pt(iommu_pt, ctx);
+
+		iommu_domain_free(iommu_pt->domain);
+	}
+
+	kfree(iommu_pt);
+}
+
+static void setup_64bit_pagetable(struct kgsl_mmu *mmu,
+		struct kgsl_pagetable *pagetable,
+		struct kgsl_iommu_pt *pt)
+{
+	if (mmu->secured && pagetable->name == KGSL_MMU_SECURE_PT) {
+		pt->compat_va_start = KGSL_IOMMU_SECURE_BASE;
+		pt->compat_va_end = KGSL_IOMMU_SECURE_END;
+		pt->va_start = KGSL_IOMMU_SECURE_BASE;
+		pt->va_end = KGSL_IOMMU_SECURE_END;
+	} else {
+		pt->compat_va_start = KGSL_IOMMU_SVM_BASE32;
+		pt->compat_va_end = KGSL_IOMMU_SVM_END32;
+		pt->va_start = KGSL_IOMMU_VA_BASE64;
+		pt->va_end = KGSL_IOMMU_VA_END64;
+	}
+
+	if (pagetable->name != KGSL_MMU_GLOBAL_PT &&
+		pagetable->name != KGSL_MMU_SECURE_PT) {
+		if ((BITS_PER_LONG == 32) || is_compat_task()) {
+			pt->svm_start = KGSL_IOMMU_SVM_BASE32;
+			pt->svm_end = KGSL_IOMMU_SVM_END32;
+		} else {
+			pt->svm_start = KGSL_IOMMU_SVM_BASE64;
+			pt->svm_end = KGSL_IOMMU_SVM_END64;
+		}
+	}
+}
+
+static void setup_32bit_pagetable(struct kgsl_mmu *mmu,
+		struct kgsl_pagetable *pagetable,
+		struct kgsl_iommu_pt *pt)
+{
+	if (mmu->secured) {
+		if (pagetable->name == KGSL_MMU_SECURE_PT) {
+			pt->compat_va_start = KGSL_IOMMU_SECURE_BASE;
+			pt->compat_va_end = KGSL_IOMMU_SECURE_END;
+			pt->va_start = KGSL_IOMMU_SECURE_BASE;
+			pt->va_end = KGSL_IOMMU_SECURE_END;
+		} else {
+			pt->va_start = KGSL_IOMMU_SVM_BASE32;
+			pt->va_end = KGSL_IOMMU_SECURE_BASE;
+			pt->compat_va_start = pt->va_start;
+			pt->compat_va_end = pt->va_end;
+		}
+	} else {
+		pt->va_start = KGSL_IOMMU_SVM_BASE32;
+		pt->va_end = KGSL_MMU_GLOBAL_MEM_BASE;
+		pt->compat_va_start = pt->va_start;
+		pt->compat_va_end = pt->va_end;
+	}
+
+	if (pagetable->name != KGSL_MMU_GLOBAL_PT &&
+		pagetable->name != KGSL_MMU_SECURE_PT) {
+		pt->svm_start = KGSL_IOMMU_SVM_BASE32;
+		pt->svm_end = KGSL_IOMMU_SVM_END32;
+	}
+}
+
+
+static struct kgsl_iommu_pt *
+_alloc_pt(struct device *dev, struct kgsl_mmu *mmu, struct kgsl_pagetable *pt)
+{
+	struct kgsl_iommu_pt *iommu_pt;
+	struct bus_type *bus = kgsl_mmu_get_bus(dev);
+
+	if (bus == NULL)
+		return ERR_PTR(-ENODEV);
+
+	iommu_pt = kzalloc(sizeof(struct kgsl_iommu_pt), GFP_KERNEL);
+	if (iommu_pt == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	iommu_pt->domain = iommu_domain_alloc(bus);
+	if (iommu_pt->domain == NULL) {
+		kfree(iommu_pt);
+		return ERR_PTR(-ENODEV);
+	}
+
+	pt->pt_ops = &iommu_pt_ops;
+	pt->priv = iommu_pt;
+	iommu_pt->rbtree = RB_ROOT;
+
+	if (MMU_FEATURE(mmu, KGSL_MMU_64BIT))
+		setup_64bit_pagetable(mmu, pt, iommu_pt);
+	else
+		setup_32bit_pagetable(mmu, pt, iommu_pt);
+
+
+	return iommu_pt;
+}
+
+static void _free_pt(struct kgsl_iommu_context *ctx, struct kgsl_pagetable *pt)
+{
+	struct kgsl_iommu_pt *iommu_pt = pt->priv;
+
+	pt->pt_ops = NULL;
+	pt->priv = NULL;
+
+	if (iommu_pt == NULL)
+		return;
+
+	_detach_pt(iommu_pt, ctx);
+
+	if (iommu_pt->domain != NULL)
+		iommu_domain_free(iommu_pt->domain);
+	kfree(iommu_pt);
+}
+
+static int _init_global_pt(struct kgsl_mmu *mmu, struct kgsl_pagetable *pt)
+{
+	int ret = 0;
+	struct kgsl_iommu_pt *iommu_pt = NULL;
+	int disable_htw = !MMU_FEATURE(mmu, KGSL_MMU_COHERENT_HTW);
+	unsigned int cb_num;
+	struct kgsl_iommu *iommu = mmu->priv;
+	struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+
+	iommu_pt = _alloc_pt(ctx->dev, mmu, pt);
+
+	if (IS_ERR(iommu_pt))
+		return PTR_ERR(iommu_pt);
+
+	iommu_domain_set_attr(iommu_pt->domain,
+				DOMAIN_ATTR_COHERENT_HTW_DISABLE, &disable_htw);
+
+	if (kgsl_mmu_is_perprocess(mmu)) {
+		ret = iommu_domain_set_attr(iommu_pt->domain,
+				DOMAIN_ATTR_PROCID, &pt->name);
+		if (ret) {
+			KGSL_CORE_ERR("set DOMAIN_ATTR_PROCID failed: %d\n",
+					ret);
+			goto done;
+		}
+	}
+
+	ret = _attach_pt(iommu_pt, ctx);
+	if (ret)
+		goto done;
+
+	iommu_set_fault_handler(iommu_pt->domain,
+				kgsl_iommu_fault_handler, pt);
+
+	ret = iommu_domain_get_attr(iommu_pt->domain,
+				DOMAIN_ATTR_CONTEXT_BANK, &cb_num);
+	if (ret) {
+		KGSL_CORE_ERR("get DOMAIN_ATTR_PROCID failed: %d\n",
+				ret);
+		goto done;
+	}
+
+	ctx->cb_num = cb_num;
+	ctx->regbase = iommu->regbase + KGSL_IOMMU_CB0_OFFSET
+			+ (cb_num << KGSL_IOMMU_CB_SHIFT);
+
+	ret = iommu_domain_get_attr(iommu_pt->domain,
+			DOMAIN_ATTR_TTBR0, &iommu_pt->ttbr0);
+	if (ret) {
+		KGSL_CORE_ERR("get DOMAIN_ATTR_TTBR0 failed: %d\n",
+				ret);
+		goto done;
+	}
+	ret = iommu_domain_get_attr(iommu_pt->domain,
+			DOMAIN_ATTR_CONTEXTIDR, &iommu_pt->contextidr);
+	if (ret) {
+		KGSL_CORE_ERR("get DOMAIN_ATTR_CONTEXTIDR failed: %d\n",
+				ret);
+		goto done;
+	}
+
+done:
+	if (ret)
+		_free_pt(ctx, pt);
+
+	return ret;
+}
+
+static int _init_secure_pt(struct kgsl_mmu *mmu, struct kgsl_pagetable *pt)
+{
+	int ret = 0;
+	struct kgsl_iommu_pt *iommu_pt = NULL;
+	struct kgsl_iommu *iommu = mmu->priv;
+	int disable_htw = !MMU_FEATURE(mmu, KGSL_MMU_COHERENT_HTW);
+	struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_SECURE];
+	int secure_vmid = VMID_CP_PIXEL;
+	unsigned int cb_num;
+
+	if (!mmu->secured)
+		return -EPERM;
+
+	if (!MMU_FEATURE(mmu, KGSL_MMU_HYP_SECURE_ALLOC)) {
+		if (!kgsl_mmu_bus_secured(ctx->dev))
+			return -EPERM;
+	}
+
+	iommu_pt = _alloc_pt(ctx->dev, mmu, pt);
+
+	if (IS_ERR(iommu_pt))
+		return PTR_ERR(iommu_pt);
+
+	iommu_domain_set_attr(iommu_pt->domain,
+				DOMAIN_ATTR_COHERENT_HTW_DISABLE, &disable_htw);
+
+	ret = iommu_domain_set_attr(iommu_pt->domain,
+				    DOMAIN_ATTR_SECURE_VMID, &secure_vmid);
+	if (ret) {
+		KGSL_CORE_ERR("set DOMAIN_ATTR_SECURE_VMID failed: %d\n", ret);
+		goto done;
+	}
+
+	ret = _attach_pt(iommu_pt, ctx);
+
+	if (MMU_FEATURE(mmu, KGSL_MMU_HYP_SECURE_ALLOC))
+		iommu_set_fault_handler(iommu_pt->domain,
+					kgsl_iommu_fault_handler, pt);
+
+	ret = iommu_domain_get_attr(iommu_pt->domain,
+				DOMAIN_ATTR_CONTEXT_BANK, &cb_num);
+	if (ret) {
+		KGSL_CORE_ERR("get DOMAIN_ATTR_PROCID failed: %d\n",
+				ret);
+		goto done;
+	}
+
+	ctx->cb_num = cb_num;
+	ctx->regbase = iommu->regbase + KGSL_IOMMU_CB0_OFFSET
+			+ (cb_num << KGSL_IOMMU_CB_SHIFT);
+
+done:
+	if (ret)
+		_free_pt(ctx, pt);
+	return ret;
+}
+
+static int _init_per_process_pt(struct kgsl_mmu *mmu, struct kgsl_pagetable *pt)
+{
+	int ret = 0;
+	struct kgsl_iommu_pt *iommu_pt = NULL;
+	struct kgsl_iommu *iommu = mmu->priv;
+	struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+	int dynamic = 1;
+	unsigned int cb_num = ctx->cb_num;
+	int disable_htw = !MMU_FEATURE(mmu, KGSL_MMU_COHERENT_HTW);
+
+	iommu_pt = _alloc_pt(ctx->dev, mmu, pt);
+
+	if (IS_ERR(iommu_pt))
+		return PTR_ERR(iommu_pt);
+
+	ret = iommu_domain_set_attr(iommu_pt->domain,
+				DOMAIN_ATTR_DYNAMIC, &dynamic);
+	if (ret) {
+		KGSL_CORE_ERR("set DOMAIN_ATTR_DYNAMIC failed: %d\n", ret);
+		goto done;
+	}
+	ret = iommu_domain_set_attr(iommu_pt->domain,
+				DOMAIN_ATTR_CONTEXT_BANK, &cb_num);
+	if (ret) {
+		KGSL_CORE_ERR("set DOMAIN_ATTR_CONTEXT_BANK failed: %d\n", ret);
+		goto done;
+	}
+
+	ret = iommu_domain_set_attr(iommu_pt->domain,
+				DOMAIN_ATTR_PROCID, &pt->name);
+	if (ret) {
+		KGSL_CORE_ERR("set DOMAIN_ATTR_PROCID failed: %d\n", ret);
+		goto done;
+	}
+
+	iommu_domain_set_attr(iommu_pt->domain,
+				DOMAIN_ATTR_COHERENT_HTW_DISABLE, &disable_htw);
+
+	ret = _attach_pt(iommu_pt, ctx);
+	if (ret)
+		goto done;
+
+	/* now read back the attributes needed for self programming */
+	ret = iommu_domain_get_attr(iommu_pt->domain,
+				DOMAIN_ATTR_TTBR0, &iommu_pt->ttbr0);
+	if (ret) {
+		KGSL_CORE_ERR("get DOMAIN_ATTR_TTBR0 failed: %d\n", ret);
+		goto done;
+	}
+
+	ret = iommu_domain_get_attr(iommu_pt->domain,
+				DOMAIN_ATTR_CONTEXTIDR, &iommu_pt->contextidr);
+	if (ret) {
+		KGSL_CORE_ERR("get DOMAIN_ATTR_CONTEXTIDR failed: %d\n", ret);
+		goto done;
+	}
+
+done:
+	if (ret)
+		_free_pt(ctx, pt);
+
+	return ret;
+}
+
+/* kgsl_iommu_init_pt - Set up an IOMMU pagetable */
+static int kgsl_iommu_init_pt(struct kgsl_mmu *mmu, struct kgsl_pagetable *pt)
+{
+	if (pt == NULL)
+		return -EINVAL;
+
+	switch (pt->name) {
+	case KGSL_MMU_GLOBAL_PT:
+		return _init_global_pt(mmu, pt);
+
+	case KGSL_MMU_SECURE_PT:
+		return _init_secure_pt(mmu, pt);
+
+	default:
+		return _init_per_process_pt(mmu, pt);
+	}
+}
+
+/*
+ * kgsl_iommu_get_reg_ahbaddr - Returns the ahb address of the register
+ * @mmu - Pointer to mmu structure
+ * @id - The context ID of the IOMMU ctx
+ * @reg - The register for which address is required
+ *
+ * Return - The address of register which can be used in type0 packet
+ */
+static unsigned int kgsl_iommu_get_reg_ahbaddr(struct kgsl_mmu *mmu,
+		enum kgsl_iommu_context_id id, enum kgsl_iommu_reg_map reg)
+{
+	unsigned int result;
+	struct kgsl_iommu *iommu = mmu->priv;
+	struct kgsl_iommu_context *ctx = &iommu->ctx[id];
+
+	result = ctx->gpu_offset + kgsl_iommu_reg_list[reg];
+	return result;
+}
+
+static int kgsl_iommu_init(struct kgsl_mmu *mmu)
+{
+	/*
+	 * intialize device mmu
+	 *
+	 * call this with the global lock held
+	 */
+	int status = 0;
+	struct kgsl_iommu *iommu = mmu->priv;
+	struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+	struct platform_device *pdev = mmu->device->pdev;
+
+	if (ctx->name == NULL) {
+		KGSL_CORE_ERR("dt: gfx3d0_user context bank not found\n");
+		return -EINVAL;
+	}
+
+	/* check requirements for per process pagetables */
+	if (ctx->gpu_offset == UINT_MAX) {
+		KGSL_CORE_ERR("missing qcom,gpu-offset forces global pt\n");
+		mmu->features |= KGSL_MMU_GLOBAL_PAGETABLE;
+	}
+
+	if (iommu->version == 1 && iommu->micro_mmu_ctrl == UINT_MAX) {
+		KGSL_CORE_ERR(
+			"missing qcom,micro-mmu-control forces global pt\n");
+		mmu->features |= KGSL_MMU_GLOBAL_PAGETABLE;
+	}
+
+	/* Check to see if we need to do the IOMMU sync dance */
+	need_iommu_sync = of_property_read_bool(pdev->dev.of_node,
+		"qcom,gpu-quirk-iommu-sync");
+
+	iommu->regbase = ioremap(iommu->regstart, iommu->regsize);
+	if (iommu->regbase == NULL) {
+		KGSL_CORE_ERR("Could not map IOMMU registers 0x%lx:0x%x\n",
+			iommu->regstart, iommu->regsize);
+		return -ENOMEM;
+	}
+
+	if (addr_entry_cache == NULL) {
+		addr_entry_cache = KMEM_CACHE(kgsl_iommu_addr_entry, 0);
+		if (addr_entry_cache == NULL) {
+			status = -ENOMEM;
+			goto done;
+		}
+	}
+
+	if (kgsl_guard_page == NULL) {
+		kgsl_guard_page = alloc_page(GFP_KERNEL | __GFP_ZERO |
+				__GFP_HIGHMEM);
+		if (kgsl_guard_page == NULL) {
+			status = -ENOMEM;
+			goto done;
+		}
+	}
+
+done:
+	return status;
+}
+
+static void _detach_context(struct kgsl_iommu_context *ctx)
+{
+	struct kgsl_iommu_pt *iommu_pt;
+
+	if (ctx->default_pt == NULL)
+		return;
+
+	iommu_pt = ctx->default_pt->priv;
+
+	_detach_pt(iommu_pt, ctx);
+
+	ctx->default_pt = NULL;
+}
+
+static int _setup_user_context(struct kgsl_mmu *mmu)
+{
+	int ret = 0;
+	struct kgsl_iommu *iommu = mmu->priv;
+	struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(mmu->device);
+	struct kgsl_iommu_pt *iommu_pt = NULL;
+	unsigned int  sctlr_val;
+
+	if (mmu->defaultpagetable == NULL) {
+		mmu->defaultpagetable = kgsl_mmu_getpagetable(mmu,
+				KGSL_MMU_GLOBAL_PT);
+		/* if we don't have a default pagetable, nothing will work */
+		if (IS_ERR(mmu->defaultpagetable)) {
+			ret = PTR_ERR(mmu->defaultpagetable);
+			mmu->defaultpagetable = NULL;
+			return ret;
+		}
+	}
+
+	iommu_pt = mmu->defaultpagetable->priv;
+
+	ret = _attach_pt(iommu_pt, ctx);
+	if (ret)
+		return ret;
+
+	ctx->default_pt = mmu->defaultpagetable;
+
+	kgsl_iommu_enable_clk(mmu);
+
+	sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, SCTLR);
+
+	/*
+	 * If pagefault policy is GPUHALT_ENABLE,
+	 * 1) Program CFCFG to 1 to enable STALL mode
+	 * 2) Program HUPCF to 0 (Stall or terminate subsequent
+	 *    transactions in the presence of an outstanding fault)
+	 * else
+	 * 1) Program CFCFG to 0 to disable STALL mode (0=Terminate)
+	 * 2) Program HUPCF to 1 (Process subsequent transactions
+	 *    independently of any outstanding fault)
+	 */
+
+	sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, SCTLR);
+	if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE,
+				&adreno_dev->ft_pf_policy)) {
+		sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT);
+		sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT);
+	} else {
+		sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT);
+		sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT);
+	}
+	KGSL_IOMMU_SET_CTX_REG(ctx, SCTLR, sctlr_val);
+	kgsl_iommu_disable_clk(mmu);
+
+	return 0;
+}
+
+static int _setup_secure_context(struct kgsl_mmu *mmu)
+{
+	int ret;
+	struct kgsl_iommu *iommu = mmu->priv;
+	struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_SECURE];
+	unsigned int cb_num;
+
+	struct kgsl_iommu_pt *iommu_pt;
+
+	if (ctx->dev == NULL || !mmu->secured)
+		return 0;
+
+	if (mmu->securepagetable == NULL) {
+		mmu->securepagetable = kgsl_mmu_getpagetable(mmu,
+						KGSL_MMU_SECURE_PT);
+		if (IS_ERR(mmu->securepagetable)) {
+			ret = PTR_ERR(mmu->securepagetable);
+			mmu->securepagetable = NULL;
+			return ret;
+		} else if (mmu->securepagetable == NULL) {
+			return -ENOMEM;
+		}
+	}
+	iommu_pt = mmu->securepagetable->priv;
+
+	ret = _attach_pt(iommu_pt, ctx);
+	if (ret)
+		goto done;
+
+	ctx->default_pt = mmu->securepagetable;
+
+	ret = iommu_domain_get_attr(iommu_pt->domain, DOMAIN_ATTR_CONTEXT_BANK,
+					&cb_num);
+	if (ret) {
+		KGSL_CORE_ERR("get CONTEXT_BANK attr, err %d\n", ret);
+		goto done;
+	}
+	ctx->cb_num = cb_num;
+done:
+	if (ret)
+		_detach_context(ctx);
+	return ret;
+}
+
+static int kgsl_iommu_start(struct kgsl_mmu *mmu)
+{
+	int status;
+	struct kgsl_iommu *iommu = mmu->priv;
+	struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+
+	status = _setup_user_context(mmu);
+	if (status)
+		return status;
+
+	status = _setup_secure_context(mmu);
+	if (status)
+		_detach_context(&iommu->ctx[KGSL_IOMMU_CONTEXT_USER]);
+	else {
+		kgsl_iommu_enable_clk(mmu);
+		KGSL_IOMMU_SET_CTX_REG(ctx, TLBIALL, 1);
+		kgsl_iommu_disable_clk(mmu);
+	}
+	return status;
+}
+
+static int
+kgsl_iommu_unmap(struct kgsl_pagetable *pt,
+		struct kgsl_memdesc *memdesc)
+{
+	struct kgsl_device *device = pt->mmu->device;
+	int ret = 0;
+	uint64_t range = memdesc->size;
+	size_t unmapped = 0;
+	struct kgsl_iommu_pt *iommu_pt = pt->priv;
+
+	/* All GPU addresses as assigned are page aligned, but some
+	   functions purturb the gpuaddr with an offset, so apply the
+	   mask here to make sure we have the right address */
+
+	uint64_t gpuaddr = PAGE_ALIGN(memdesc->gpuaddr);
+
+	if (range == 0 || gpuaddr == 0)
+		return 0;
+
+	if (kgsl_memdesc_has_guard_page(memdesc))
+		range += kgsl_memdesc_guard_page_size(pt->mmu, memdesc);
+
+	if (kgsl_memdesc_is_secured(memdesc)) {
+
+		if (!kgsl_mmu_is_secured(pt->mmu))
+			return -EINVAL;
+
+		mutex_lock(&device->mutex);
+		ret = kgsl_active_count_get(device);
+		if (!ret) {
+			_iommu_sync_mmu_pc(true);
+			unmapped = iommu_unmap(iommu_pt->domain, gpuaddr,
+					range);
+			_iommu_sync_mmu_pc(false);
+			kgsl_active_count_put(device);
+		}
+		mutex_unlock(&device->mutex);
+	} else {
+		_iommu_sync_mmu_pc(true);
+		unmapped = iommu_unmap(iommu_pt->domain, gpuaddr, range);
+		_iommu_sync_mmu_pc(false);
+	}
+	if (unmapped != range) {
+		KGSL_CORE_ERR(
+			"iommu_unmap(%p, %llx, %lld) failed with unmapped size: %zd\n",
+			iommu_pt->domain, gpuaddr, range, unmapped);
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+/**
+ * _iommu_add_guard_page - Add iommu guard page
+ * @pt - Pointer to kgsl pagetable structure
+ * @memdesc - memdesc to add guard page
+ * @gpuaddr - GPU addr of guard page
+ * @protflags - flags for mapping
+ *
+ * Return 0 on success, error on map fail
+ */
+static int _iommu_add_guard_page(struct kgsl_pagetable *pt,
+				   struct kgsl_memdesc *memdesc,
+				   uint64_t gpuaddr,
+				   unsigned int protflags)
+{
+	struct kgsl_iommu_pt *iommu_pt = pt->priv;
+	phys_addr_t physaddr = page_to_phys(kgsl_guard_page);
+	int ret;
+
+	if (kgsl_memdesc_has_guard_page(memdesc)) {
+
+		/*
+		 * Allocate guard page for secure buffers.
+		 * This has to be done after we attach a smmu pagetable.
+		 * Allocate the guard page when first secure buffer is.
+		 * mapped to save 1MB of memory if CPZ is not used.
+		 */
+		if (kgsl_memdesc_is_secured(memdesc)) {
+			struct scatterlist *sg;
+			unsigned int sgp_size = pt->mmu->secure_align_mask + 1;
+			if (!kgsl_secure_guard_page_memdesc.sgt) {
+				if (kgsl_allocate_user(pt->mmu->device,
+					&kgsl_secure_guard_page_memdesc, pt,
+					sgp_size, sgp_size,
+					KGSL_MEMFLAGS_SECURE)) {
+					KGSL_CORE_ERR(
+					"Secure guard page alloc failed\n");
+					return -ENOMEM;
+				}
+			}
+
+			sg = kgsl_secure_guard_page_memdesc.sgt->sgl;
+			physaddr = page_to_phys(sg_page(sg));
+		}
+
+		_iommu_sync_mmu_pc(true);
+		ret = iommu_map(iommu_pt->domain, gpuaddr, physaddr,
+				kgsl_memdesc_guard_page_size(pt->mmu, memdesc),
+				protflags & ~IOMMU_WRITE);
+		_iommu_sync_mmu_pc(false);
+		if (ret) {
+			KGSL_CORE_ERR(
+			"iommu_map(%p, addr %016llX, flags %x) err: %d\n",
+			iommu_pt->domain, gpuaddr, protflags & ~IOMMU_WRITE,
+			ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int
+kgsl_iommu_map(struct kgsl_pagetable *pt,
+			struct kgsl_memdesc *memdesc)
+{
+	int ret = 0;
+	uint64_t addr = memdesc->gpuaddr;
+	struct kgsl_iommu_pt *iommu_pt = pt->priv;
+	uint64_t size = memdesc->size;
+	unsigned int flags = 0;
+	struct kgsl_device *device = pt->mmu->device;
+	size_t mapped = 0;
+
+	BUG_ON(NULL == iommu_pt);
+
+	flags = IOMMU_READ | IOMMU_WRITE | IOMMU_NOEXEC;
+
+	/* Set up the protection for the page(s) */
+	if (memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY)
+		flags &= ~IOMMU_WRITE;
+
+	if (memdesc->priv & KGSL_MEMDESC_PRIVILEGED)
+		flags |= IOMMU_PRIV;
+
+	if (kgsl_memdesc_is_secured(memdesc)) {
+
+		if (!kgsl_mmu_is_secured(pt->mmu))
+			return -EINVAL;
+
+		mutex_lock(&device->mutex);
+		ret = kgsl_active_count_get(device);
+		if (!ret) {
+			_iommu_sync_mmu_pc(true);
+			mapped = iommu_map_sg(iommu_pt->domain, addr,
+					memdesc->sgt->sgl, memdesc->sgt->nents,
+					flags);
+			_iommu_sync_mmu_pc(false);
+			kgsl_active_count_put(device);
+		}
+		mutex_unlock(&device->mutex);
+	} else {
+		_iommu_sync_mmu_pc(true);
+		mapped = iommu_map_sg(iommu_pt->domain, addr,
+				memdesc->sgt->sgl, memdesc->sgt->nents, flags);
+		_iommu_sync_mmu_pc(false);
+	}
+
+	if (mapped != size) {
+		KGSL_CORE_ERR("iommu_map_sg(%p, %016llX, %lld, %x) err: %zd\n",
+				iommu_pt->domain, addr, size,
+				flags, mapped);
+		return -ENODEV;
+	}
+
+	ret = _iommu_add_guard_page(pt, memdesc, addr + size, flags);
+	if (ret) {
+		/* cleanup the partial mapping */
+		_iommu_sync_mmu_pc(true);
+		iommu_unmap(iommu_pt->domain, addr, size);
+		_iommu_sync_mmu_pc(false);
+	}
+
+	return ret;
+}
+
+/* This function must be called with context bank attached */
+static void kgsl_iommu_clear_fsr(struct kgsl_mmu *mmu)
+{
+	struct kgsl_iommu *iommu = mmu->priv;
+	struct kgsl_iommu_context  *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+	unsigned int sctlr_val;
+
+	if (ctx->default_pt != NULL) {
+		kgsl_iommu_enable_clk(mmu);
+		KGSL_IOMMU_SET_CTX_REG(ctx, FSR, 0xffffffff);
+		/*
+		 * Re-enable context fault interrupts after clearing
+		 * FSR to prevent the interrupt from firing repeatedly
+		 */
+		sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, SCTLR);
+		sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_CFIE_SHIFT);
+		KGSL_IOMMU_SET_CTX_REG(ctx, SCTLR, sctlr_val);
+		/*
+		 * Make sure the above register writes
+		 * are not reordered across the barrier
+		 * as we use writel_relaxed to write them
+		 */
+		wmb();
+		kgsl_iommu_disable_clk(mmu);
+	}
+}
+
+static void kgsl_iommu_pagefault_resume(struct kgsl_mmu *mmu)
+{
+	struct kgsl_iommu *iommu = mmu->priv;
+	struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+
+	if (ctx->default_pt != NULL && ctx->fault) {
+		/*
+		 * Write 1 to RESUME.TnR to terminate the
+		 * stalled transaction.
+		 */
+		KGSL_IOMMU_SET_CTX_REG(ctx, RESUME, 1);
+		/*
+		 * Make sure the above register writes
+		 * are not reordered across the barrier
+		 * as we use writel_relaxed to write them
+		 */
+		wmb();
+		ctx->fault = 0;
+	}
+}
+
+static void kgsl_iommu_stop(struct kgsl_mmu *mmu)
+{
+	int i;
+	struct kgsl_iommu *iommu = mmu->priv;
+
+	/*
+	 * If the iommu supports retention, we don't need
+	 * to detach when stopping.
+	 */
+	if (!MMU_FEATURE(mmu, KGSL_MMU_RETENTION)) {
+		for (i = 0; i < KGSL_IOMMU_CONTEXT_MAX; i++)
+			_detach_context(&iommu->ctx[i]);
+	}
+}
+
+static int kgsl_iommu_close(struct kgsl_mmu *mmu)
+{
+	struct kgsl_iommu *iommu = mmu->priv;
+	int i;
+
+	for (i = 0; i < KGSL_IOMMU_CONTEXT_MAX; i++)
+		_detach_context(&iommu->ctx[i]);
+
+	kgsl_mmu_putpagetable(mmu->defaultpagetable);
+	mmu->defaultpagetable = NULL;
+
+
+	kgsl_mmu_putpagetable(mmu->securepagetable);
+	mmu->securepagetable = NULL;
+
+	if (iommu->regbase != NULL)
+		iounmap(iommu->regbase);
+
+	kgsl_sharedmem_free(&kgsl_secure_guard_page_memdesc);
+
+	if (kgsl_guard_page != NULL) {
+		__free_page(kgsl_guard_page);
+		kgsl_guard_page = NULL;
+	}
+
+	return 0;
+}
+
+static u64
+kgsl_iommu_get_current_ttbr0(struct kgsl_mmu *mmu)
+{
+	u64 val;
+	struct kgsl_iommu *iommu = mmu->priv;
+	/*
+	 * We cannot enable or disable the clocks in interrupt context, this
+	 * function is called from interrupt context if there is an axi error
+	 */
+	if (in_interrupt())
+		return 0;
+
+	kgsl_iommu_enable_clk(mmu);
+	val = KGSL_IOMMU_GET_CTX_REG_Q(&iommu->ctx[KGSL_IOMMU_CONTEXT_USER],
+					TTBR0);
+	kgsl_iommu_disable_clk(mmu);
+	return val;
+}
+
+/*
+ * kgsl_iommu_set_pt - Change the IOMMU pagetable of the primary context bank
+ * @mmu - Pointer to mmu structure
+ * @pt - Pagetable to switch to
+ *
+ * Set the new pagetable for the IOMMU by doing direct register writes
+ * to the IOMMU registers through the cpu
+ *
+ * Return - void
+ */
+static int kgsl_iommu_set_pt(struct kgsl_mmu *mmu,
+				struct kgsl_pagetable *pt)
+{
+	struct kgsl_iommu *iommu = mmu->priv;
+	struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+	int ret = 0;
+	uint64_t ttbr0, temp;
+	unsigned int contextidr;
+	unsigned long wait_for_flush;
+
+	/*
+	 * If using a global pagetable, we can skip all this
+	 * because the pagetable will be set up by the iommu
+	 * driver and never changed at runtime.
+	 */
+	if (!kgsl_mmu_is_perprocess(mmu))
+		return 0;
+
+	kgsl_iommu_enable_clk(mmu);
+
+	ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pt);
+	contextidr = kgsl_mmu_pagetable_get_contextidr(pt);
+
+	/*
+	 * Taking the liberty to spin idle since this codepath
+	 * is invoked when we can spin safely for it to be idle
+	 */
+	ret = adreno_spin_idle(mmu->device, ADRENO_IDLE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	KGSL_IOMMU_SET_CTX_REG_Q(ctx, TTBR0, ttbr0);
+	KGSL_IOMMU_SET_CTX_REG(ctx, CONTEXTIDR, contextidr);
+
+	mb();
+	temp = KGSL_IOMMU_GET_CTX_REG_Q(ctx, TTBR0);
+
+	KGSL_IOMMU_SET_CTX_REG(ctx, TLBIALL, 1);
+	/* make sure the TBLI write completes before we wait */
+	mb();
+	/*
+	 * Wait for flush to complete by polling the flush
+	 * status bit of TLBSTATUS register for not more than
+	 * 2 s. After 2s just exit, at that point the SMMU h/w
+	 * may be stuck and will eventually cause GPU to hang
+	 * or bring the system down.
+	 */
+	wait_for_flush = jiffies + msecs_to_jiffies(2000);
+	KGSL_IOMMU_SET_CTX_REG(ctx, TLBSYNC, 0);
+	while (KGSL_IOMMU_GET_CTX_REG(ctx, TLBSTATUS) &
+		(KGSL_IOMMU_CTX_TLBSTATUS_SACTIVE)) {
+		if (time_after(jiffies, wait_for_flush)) {
+			KGSL_DRV_WARN(mmu->device,
+			"Wait limit reached for IOMMU tlb flush\n");
+			break;
+		}
+		cpu_relax();
+	}
+
+	/* Disable smmu clock */
+	kgsl_iommu_disable_clk(mmu);
+
+	return ret;
+}
+
+/*
+ * kgsl_iommu_set_pf_policy() - Set the pagefault policy for IOMMU
+ * @mmu: Pointer to mmu structure
+ * @pf_policy: The pagefault polict to set
+ *
+ * Check if the new policy indicated by pf_policy is same as current
+ * policy, if same then return else set the policy
+ */
+static int kgsl_iommu_set_pf_policy(struct kgsl_mmu *mmu,
+				unsigned long pf_policy)
+{
+	struct kgsl_iommu *iommu = mmu->priv;
+	struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(mmu->device);
+	int ret = 0;
+	unsigned int sctlr_val;
+
+	if ((adreno_dev->ft_pf_policy &
+		BIT(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE)) ==
+		(pf_policy & BIT(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE)))
+		return 0;
+
+	/* If not attached, policy will be updated during the next attach */
+	if (ctx->default_pt != NULL) {
+		/* Need to idle device before changing options */
+		ret = mmu->device->ftbl->idle(mmu->device);
+		if (ret)
+			return ret;
+
+		kgsl_iommu_enable_clk(mmu);
+
+		sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, SCTLR);
+
+		if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &pf_policy)) {
+			sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT);
+			sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT);
+		} else {
+			sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT);
+			sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT);
+		}
+
+		KGSL_IOMMU_SET_CTX_REG(ctx, SCTLR, sctlr_val);
+
+		kgsl_iommu_disable_clk(mmu);
+	}
+
+	return ret;
+}
+
+static struct kgsl_protected_registers *
+kgsl_iommu_get_prot_regs(struct kgsl_mmu *mmu)
+{
+	struct kgsl_iommu *iommu = mmu->priv;
+
+	return &iommu->protect;
+}
+
+static struct kgsl_iommu_addr_entry *_find_gpuaddr(
+		struct kgsl_pagetable *pagetable, uint64_t gpuaddr)
+{
+	struct kgsl_iommu_pt *pt = pagetable->priv;
+	struct rb_node *node = pt->rbtree.rb_node;
+
+	while (node != NULL) {
+		struct kgsl_iommu_addr_entry *entry = rb_entry(node,
+			struct kgsl_iommu_addr_entry, node);
+
+		if (gpuaddr < entry->base)
+			node = node->rb_left;
+		else if (gpuaddr > entry->base)
+			node = node->rb_right;
+		else
+			return entry;
+	}
+
+	return NULL;
+}
+
+static int _remove_gpuaddr(struct kgsl_pagetable *pagetable,
+		uint64_t gpuaddr)
+{
+	struct kgsl_iommu_pt *pt = pagetable->priv;
+	struct kgsl_iommu_addr_entry *entry;
+
+	entry = _find_gpuaddr(pagetable, gpuaddr);
+
+	if (entry != NULL) {
+		rb_erase(&entry->node, &pt->rbtree);
+		kmem_cache_free(addr_entry_cache, entry);
+		return 0;
+	}
+
+	return -ENOMEM;
+}
+
+static int _insert_gpuaddr(struct kgsl_pagetable *pagetable,
+		uint64_t gpuaddr, uint64_t size)
+{
+	struct kgsl_iommu_pt *pt = pagetable->priv;
+	struct rb_node **node, *parent = NULL;
+	struct kgsl_iommu_addr_entry *new =
+		kmem_cache_alloc(addr_entry_cache, GFP_ATOMIC);
+
+	if (new == NULL)
+		return -ENOMEM;
+
+	new->base = gpuaddr;
+	new->size = size;
+
+	node = &pt->rbtree.rb_node;
+
+	while (*node != NULL) {
+		struct kgsl_iommu_addr_entry *this;
+
+		parent = *node;
+		this = rb_entry(parent, struct kgsl_iommu_addr_entry, node);
+
+		if (new->base < this->base)
+			node = &parent->rb_left;
+		else if (new->base > this->base)
+			node = &parent->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&new->node, parent, node);
+	rb_insert_color(&new->node, &pt->rbtree);
+
+	return 0;
+}
+
+static uint64_t _get_unmapped_area(struct kgsl_pagetable *pagetable,
+		uint64_t bottom, uint64_t top, uint64_t size,
+		uint64_t align)
+{
+	struct kgsl_iommu_pt *pt = pagetable->priv;
+	struct rb_node *node = rb_first(&pt->rbtree);
+	uint64_t start;
+
+	bottom = ALIGN(bottom, align);
+	start = bottom;
+
+	while (node != NULL) {
+		uint64_t gap;
+		struct kgsl_iommu_addr_entry *entry = rb_entry(node,
+			struct kgsl_iommu_addr_entry, node);
+
+		/*
+		 * Skip any entries that are outside of the range, but make sure
+		 * to account for some that might straddle the lower bound
+		 */
+		if (entry->base < bottom) {
+			if (entry->base + entry->size > bottom)
+				start = ALIGN(entry->base + entry->size, align);
+			node = rb_next(node);
+			continue;
+		}
+
+		/* Stop if we went over the top */
+		if (entry->base >= top)
+			break;
+
+		/* Make sure there is a gap to consider */
+		if (start < entry->base) {
+			gap = entry->base - start;
+
+			if (gap >= size)
+				return start;
+		}
+
+		/* Stop if there is no more room in the region */
+		if (entry->base + entry->size >= top)
+			return (uint64_t) -ENOMEM;
+
+		/* Start the next cycle at the end of the current entry */
+		start = ALIGN(entry->base + entry->size, align);
+		node = rb_next(node);
+	}
+
+	if (start + size <= top)
+		return start;
+
+	return (uint64_t) -ENOMEM;
+}
+
+static uint64_t _get_unmapped_area_topdown(struct kgsl_pagetable *pagetable,
+		uint64_t bottom, uint64_t top, uint64_t size,
+		uint64_t align)
+{
+	struct kgsl_iommu_pt *pt = pagetable->priv;
+	struct rb_node *node = rb_last(&pt->rbtree);
+	uint64_t end = top;
+	uint64_t mask = ~(align - 1);
+	struct kgsl_iommu_addr_entry *entry;
+
+	/* Make sure that the bottom is correctly aligned */
+	bottom = ALIGN(bottom, align);
+
+	/* Make sure the requested size will fit in the range */
+	if (size > (top - bottom))
+		return -ENOMEM;
+
+	/* Walk back through the list to find the highest entry in the range */
+	for (node = rb_last(&pt->rbtree); node != NULL; node = rb_prev(node)) {
+		entry = rb_entry(node, struct kgsl_iommu_addr_entry, node);
+		if (entry->base < top)
+			break;
+	}
+
+	while (node != NULL) {
+		uint64_t offset;
+
+		entry = rb_entry(node, struct kgsl_iommu_addr_entry, node);
+
+		/* If the entire entry is below the range the search is over */
+		if ((entry->base + entry->size) < bottom)
+			break;
+
+		/* Get the top of the entry properly aligned */
+		offset = ALIGN(entry->base + entry->size, align);
+
+		/*
+		 * Try to allocate the memory from the top of the gap,
+		 * making sure that it fits between the top of this entry and
+		 * the bottom of the previous one
+		 */
+
+		if (offset < end) {
+			uint64_t chunk = (end - size) & mask;
+
+			if (chunk >= offset)
+				return chunk;
+		}
+
+		/*
+		 * If we get here and the current entry is outside of the range
+		 * then we are officially out of room
+		 */
+
+		if (entry->base < bottom)
+			return (uint64_t) -ENOMEM;
+
+		/* Set the top of the gap to the current entry->base */
+		end = entry->base;
+
+		/* And move on to the next lower entry */
+		node = rb_prev(node);
+	}
+
+	/* If we get here then there are no more entries in the region */
+	if ((end > size) && (((end - size) & mask) >= bottom))
+		return (end - size) & mask;
+
+	return (uint64_t) -ENOMEM;
+}
+
+static uint64_t kgsl_iommu_find_svm_region(struct kgsl_pagetable *pagetable,
+		uint64_t start, uint64_t end, uint64_t size,
+		uint64_t alignment)
+{
+	uint64_t addr;
+
+	/* Avoid black holes */
+	BUG_ON(end <= start);
+
+	spin_lock(&pagetable->lock);
+	addr = _get_unmapped_area_topdown(pagetable,
+			start, end, size, alignment);
+	spin_unlock(&pagetable->lock);
+	return addr;
+}
+
+#define ADDR_IN_GLOBAL(_a) \
+	(((_a) >= KGSL_MMU_GLOBAL_MEM_BASE) && \
+	 ((_a) < (KGSL_MMU_GLOBAL_MEM_BASE + KGSL_MMU_GLOBAL_MEM_SIZE)))
+
+static int kgsl_iommu_set_svm_region(struct kgsl_pagetable *pagetable,
+		uint64_t gpuaddr, uint64_t size)
+{
+	int ret = -ENOMEM;
+	struct kgsl_iommu_pt *pt = pagetable->priv;
+	struct rb_node *node;
+
+	/* Make sure the requested address doesn't fall in the global range */
+	if (ADDR_IN_GLOBAL(gpuaddr) || ADDR_IN_GLOBAL(gpuaddr + size))
+		return -ENOMEM;
+
+	spin_lock(&pagetable->lock);
+	node = pt->rbtree.rb_node;
+
+	while (node != NULL) {
+		uint64_t start, end;
+		struct kgsl_iommu_addr_entry *entry = rb_entry(node,
+			struct kgsl_iommu_addr_entry, node);
+
+		start = entry->base;
+		end = entry->base + entry->size;
+
+		if (gpuaddr  + size <= start)
+			node = node->rb_left;
+		else if (end <= gpuaddr)
+			node = node->rb_right;
+		else
+			goto out;
+	}
+
+	ret = _insert_gpuaddr(pagetable, gpuaddr, size);
+out:
+	spin_unlock(&pagetable->lock);
+	return ret;
+}
+
+
+static int kgsl_iommu_get_gpuaddr(struct kgsl_pagetable *pagetable,
+		struct kgsl_memdesc *memdesc)
+{
+	struct kgsl_iommu_pt *pt = pagetable->priv;
+	int ret = 0;
+	uint64_t addr, start, end;
+	uint64_t size = memdesc->size;
+	unsigned int align;
+
+	BUG_ON(kgsl_memdesc_use_cpu_map(memdesc));
+
+	if (memdesc->flags & KGSL_MEMFLAGS_SECURE &&
+			pagetable->name != KGSL_MMU_SECURE_PT)
+		return -EINVAL;
+
+	if (kgsl_memdesc_has_guard_page(memdesc))
+		size += kgsl_memdesc_guard_page_size(pagetable->mmu, memdesc);
+
+	align = 1 << kgsl_memdesc_get_align(memdesc);
+
+	if (memdesc->flags & KGSL_MEMFLAGS_FORCE_32BIT) {
+		start = pt->compat_va_start;
+		end = pt->compat_va_end;
+	} else {
+		start = pt->va_start;
+		end = pt->va_end;
+	}
+
+	spin_lock(&pagetable->lock);
+
+	addr = _get_unmapped_area(pagetable, start, end, size, align);
+
+	if (addr == (uint64_t) -ENOMEM) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = _insert_gpuaddr(pagetable, addr, size);
+	if (ret == 0)
+		memdesc->gpuaddr = addr;
+
+out:
+	spin_unlock(&pagetable->lock);
+	return ret;
+}
+
+static void kgsl_iommu_put_gpuaddr(struct kgsl_pagetable *pagetable,
+		struct kgsl_memdesc *memdesc)
+{
+	spin_lock(&pagetable->lock);
+
+	if (_remove_gpuaddr(pagetable, memdesc->gpuaddr))
+		BUG();
+
+	spin_unlock(&pagetable->lock);
+}
+
+static int kgsl_iommu_svm_range(struct kgsl_pagetable *pagetable,
+		uint64_t *lo, uint64_t *hi, uint64_t memflags)
+{
+	struct kgsl_iommu_pt *pt = pagetable->priv;
+	bool gpu_compat = (memflags & KGSL_MEMFLAGS_FORCE_32BIT) != 0;
+
+	if (lo != NULL)
+		*lo = gpu_compat ? pt->compat_va_start : pt->svm_start;
+	if (hi != NULL)
+		*hi = gpu_compat ? pt->compat_va_end : pt->svm_end;
+
+	return 0;
+}
+
+static bool kgsl_iommu_addr_in_range(struct kgsl_pagetable *pagetable,
+		uint64_t gpuaddr)
+{
+	struct kgsl_iommu_pt *pt = pagetable->priv;
+
+	if (gpuaddr == 0)
+		return false;
+
+	if (gpuaddr >= pt->va_start && gpuaddr < pt->va_end)
+		return true;
+
+	if (gpuaddr >= pt->compat_va_start && gpuaddr < pt->compat_va_end)
+		return true;
+
+	if (gpuaddr >= pt->svm_start && gpuaddr < pt->svm_end)
+		return true;
+
+	return false;
+}
+
+struct kgsl_mmu_ops kgsl_iommu_ops = {
+	.mmu_init = kgsl_iommu_init,
+	.mmu_close = kgsl_iommu_close,
+	.mmu_start = kgsl_iommu_start,
+	.mmu_stop = kgsl_iommu_stop,
+	.mmu_set_pt = kgsl_iommu_set_pt,
+	.mmu_clear_fsr = kgsl_iommu_clear_fsr,
+	.mmu_get_current_ttbr0 = kgsl_iommu_get_current_ttbr0,
+	.mmu_enable_clk = kgsl_iommu_enable_clk,
+	.mmu_disable_clk = kgsl_iommu_disable_clk,
+	.mmu_get_reg_ahbaddr = kgsl_iommu_get_reg_ahbaddr,
+	.mmu_set_pf_policy = kgsl_iommu_set_pf_policy,
+	.mmu_pagefault_resume = kgsl_iommu_pagefault_resume,
+	.mmu_get_prot_regs = kgsl_iommu_get_prot_regs,
+	.mmu_init_pt = kgsl_iommu_init_pt,
+};
+
+static struct kgsl_mmu_pt_ops iommu_pt_ops = {
+	.mmu_map = kgsl_iommu_map,
+	.mmu_unmap = kgsl_iommu_unmap,
+	.mmu_destroy_pagetable = kgsl_iommu_destroy_pagetable,
+	.get_ttbr0 = kgsl_iommu_get_ttbr0,
+	.get_contextidr = kgsl_iommu_get_contextidr,
+	.get_gpuaddr = kgsl_iommu_get_gpuaddr,
+	.put_gpuaddr = kgsl_iommu_put_gpuaddr,
+	.set_svm_region = kgsl_iommu_set_svm_region,
+	.find_svm_region = kgsl_iommu_find_svm_region,
+	.svm_range = kgsl_iommu_svm_range,
+	.addr_in_range = kgsl_iommu_addr_in_range,
+};
diff --git a/drivers/gpu/msm/kgsl_iommu.h b/drivers/gpu/msm/kgsl_iommu.h
new file mode 100644
index 000000000000..738c8296642a
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_iommu.h
@@ -0,0 +1,210 @@
+/* Copyright (c) 2012-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __KGSL_IOMMU_H
+#define __KGSL_IOMMU_H
+
+#ifdef CONFIG_MSM_IOMMU
+#include <linux/qcom_iommu.h>
+#endif
+#include <linux/of.h>
+#include "kgsl.h"
+
+#define KGSL_IOMMU_SECURE_SIZE SZ_256M
+#define KGSL_IOMMU_SECURE_END KGSL_MMU_GLOBAL_MEM_BASE
+#define KGSL_IOMMU_SECURE_BASE	\
+	(KGSL_MMU_GLOBAL_MEM_BASE - KGSL_IOMMU_SECURE_SIZE)
+
+#define KGSL_IOMMU_SVM_BASE32		0x300000
+#define KGSL_IOMMU_SVM_END32		(0xC0000000 - SZ_16M)
+
+#define KGSL_IOMMU_VA_BASE64		0x500000000ULL
+#define KGSL_IOMMU_VA_END64		0x600000000ULL
+/*
+ * Note: currently we only support 36 bit addresses,
+ * but the CPU supports 39. Eventually this range
+ * should change to high part of the 39 bit address
+ * space just like the CPU.
+ */
+#define KGSL_IOMMU_SVM_BASE64		0x700000000ULL
+#define KGSL_IOMMU_SVM_END64		0x800000000ULL
+
+/* Pagetable virtual base */
+#define KGSL_IOMMU_CTX_OFFSET_V1	0x8000
+#define KGSL_IOMMU_CTX_OFFSET_V2	0x9000
+#define KGSL_IOMMU_CTX_OFFSET_V2_A530	0x8000
+#define KGSL_IOMMU_CTX_OFFSET_A405V2	0x8000
+#define KGSL_IOMMU_CTX_SHIFT		12
+
+/* FSYNR1 V0 fields */
+#define KGSL_IOMMU_FSYNR1_AWRITE_MASK		0x00000001
+#define KGSL_IOMMU_FSYNR1_AWRITE_SHIFT		8
+/* FSYNR0 V1 fields */
+#define KGSL_IOMMU_V1_FSYNR0_WNR_MASK		0x00000001
+#define KGSL_IOMMU_V1_FSYNR0_WNR_SHIFT		4
+
+/* TLBSTATUS register fields */
+#define KGSL_IOMMU_CTX_TLBSTATUS_SACTIVE BIT(0)
+
+/* IMPLDEF_MICRO_MMU_CTRL register fields */
+#define KGSL_IOMMU_IMPLDEF_MICRO_MMU_CTRL_HALT  0x00000004
+#define KGSL_IOMMU_IMPLDEF_MICRO_MMU_CTRL_IDLE  0x00000008
+
+/* SCTLR fields */
+#define KGSL_IOMMU_SCTLR_HUPCF_SHIFT		8
+#define KGSL_IOMMU_SCTLR_CFCFG_SHIFT		7
+#define KGSL_IOMMU_SCTLR_CFIE_SHIFT		6
+
+enum kgsl_iommu_reg_map {
+	KGSL_IOMMU_CTX_SCTLR = 0,
+	KGSL_IOMMU_CTX_TTBR0,
+	KGSL_IOMMU_CTX_CONTEXTIDR,
+	KGSL_IOMMU_CTX_FSR,
+	KGSL_IOMMU_CTX_FAR,
+	KGSL_IOMMU_CTX_TLBIALL,
+	KGSL_IOMMU_CTX_RESUME,
+	KGSL_IOMMU_CTX_FSYNR0,
+	KGSL_IOMMU_CTX_FSYNR1,
+	KGSL_IOMMU_CTX_TLBSYNC,
+	KGSL_IOMMU_CTX_TLBSTATUS,
+	KGSL_IOMMU_REG_MAX
+};
+
+/* Max number of iommu clks per IOMMU unit */
+#define KGSL_IOMMU_MAX_CLKS 5
+
+enum kgsl_iommu_context_id {
+	KGSL_IOMMU_CONTEXT_USER = 0,
+	KGSL_IOMMU_CONTEXT_SECURE = 1,
+	KGSL_IOMMU_CONTEXT_MAX,
+};
+
+/* offset at which a nop command is placed in setstate_memory */
+#define KGSL_IOMMU_SETSTATE_NOP_OFFSET	1024
+
+/*
+ * struct kgsl_iommu_context - Structure holding data about an iommu context
+ * bank
+ * @dev: pointer to the iommu context's device
+ * @name: context name
+ * @id: The id of the context, used for deciding how it is used.
+ * @cb_num: The hardware context bank number, used for calculating register
+ *		offsets.
+ * @kgsldev: The kgsl device that uses this context.
+ * @fault: Flag when set indicates that this iommu device has caused a page
+ * fault
+ * @gpu_offset: Offset of this context bank in the GPU register space
+ * @default_pt: The default pagetable for this context,
+ *		it may be changed by self programming.
+ */
+struct kgsl_iommu_context {
+	struct device *dev;
+	const char *name;
+	enum kgsl_iommu_context_id id;
+	unsigned int cb_num;
+	struct kgsl_device *kgsldev;
+	int fault;
+	void __iomem *regbase;
+	unsigned int gpu_offset;
+	struct kgsl_pagetable *default_pt;
+};
+
+/*
+ * struct kgsl_iommu - Structure holding iommu data for kgsl driver
+ * @ctx: Array of kgsl_iommu_context structs
+ * @regbase: Virtual address of the IOMMU register base
+ * @regstart: Physical address of the iommu registers
+ * @regsize: Length of the iommu register region.
+ * @clk_enable_count: The ref count of clock enable calls
+ * @clks: Array of pointers to IOMMU clocks
+ * @micro_mmu_ctrl: GPU register offset of this glob al register
+ * @smmu_info: smmu info used in a5xx preemption
+ * @protect: register protection settings for the iommu.
+ */
+struct kgsl_iommu {
+	struct kgsl_iommu_context ctx[KGSL_IOMMU_CONTEXT_MAX];
+	void __iomem *regbase;
+	unsigned long regstart;
+	unsigned int regsize;
+	atomic_t clk_enable_count;
+	struct clk *clks[KGSL_IOMMU_MAX_CLKS];
+	unsigned int micro_mmu_ctrl;
+	struct kgsl_memdesc smmu_info;
+	unsigned int version;
+	struct kgsl_protected_registers protect;
+};
+
+/*
+ * struct kgsl_iommu_pt - Iommu pagetable structure private to kgsl driver
+ * @domain: Pointer to the iommu domain that contains the iommu pagetable
+ * @ttbr0: register value to set when using this pagetable
+ * @contextidr: register value to set when using this pagetable
+ * @attached: is the pagetable attached?
+ * @rbtree: all buffers mapped into the pagetable, indexed by gpuaddr
+ * @va_start: Start of virtual range used in this pagetable.
+ * @va_end: End of virtual range.
+ * @svm_start: Start of shared virtual memory range. Addresses in this
+ *		range are also valid in the process's CPU address space.
+ * @svm_end: End of the shared virtual memory range.
+ * @svm_start: 32 bit compatible range, for old clients who lack bits
+ * @svm_end: end of 32 bit compatible range
+ */
+struct kgsl_iommu_pt {
+	struct iommu_domain *domain;
+	u64 ttbr0;
+	u32 contextidr;
+	bool attached;
+
+	struct rb_root rbtree;
+
+	uint64_t va_start;
+	uint64_t va_end;
+	uint64_t svm_start;
+	uint64_t svm_end;
+	uint64_t compat_va_start;
+	uint64_t compat_va_end;
+};
+
+/*
+ * offset of context bank 0 from the start of the SMMU register space.
+ */
+#define KGSL_IOMMU_CB0_OFFSET		0x8000
+/* size of each context bank's register space */
+#define KGSL_IOMMU_CB_SHIFT		12
+
+/* Macros to read/write IOMMU registers */
+extern const unsigned int kgsl_iommu_reg_list[KGSL_IOMMU_REG_MAX];
+
+static inline void __iomem *
+kgsl_iommu_reg(struct kgsl_iommu_context *ctx, enum kgsl_iommu_reg_map reg)
+{
+	BUG_ON(ctx->regbase == NULL);
+	BUG_ON(reg >= KGSL_IOMMU_REG_MAX);
+	return ctx->regbase + kgsl_iommu_reg_list[reg];
+}
+
+#define KGSL_IOMMU_SET_CTX_REG_Q(_ctx, REG, val) \
+		writeq_relaxed((val), \
+			kgsl_iommu_reg((_ctx), KGSL_IOMMU_CTX_##REG))
+
+#define KGSL_IOMMU_GET_CTX_REG_Q(_ctx, REG) \
+		readq_relaxed(kgsl_iommu_reg((_ctx), KGSL_IOMMU_CTX_##REG))
+
+#define KGSL_IOMMU_SET_CTX_REG(_ctx, REG, val) \
+		writel_relaxed((val), \
+			kgsl_iommu_reg((_ctx), KGSL_IOMMU_CTX_##REG))
+
+#define KGSL_IOMMU_GET_CTX_REG(_ctx, REG) \
+		readl_relaxed(kgsl_iommu_reg((_ctx), KGSL_IOMMU_CTX_##REG))
+
+
+#endif
diff --git a/drivers/gpu/msm/kgsl_log.h b/drivers/gpu/msm/kgsl_log.h
new file mode 100644
index 000000000000..70480f8e9189
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_log.h
@@ -0,0 +1,137 @@
+/* Copyright (c) 2002,2008-2011,2013-2014 The Linux Foundation.
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __KGSL_LOG_H
+#define __KGSL_LOG_H
+
+#define KGSL_LOG_INFO(dev, lvl, fmt, args...) \
+	do { \
+		if ((lvl) >= 6)  \
+			dev_info(dev, "|%s| " fmt, \
+					__func__, ##args);\
+	} while (0)
+
+#define KGSL_LOG_WARN(dev, lvl, fmt, args...) \
+	do { \
+		if ((lvl) >= 4)  \
+			dev_warn(dev, "|%s| " fmt, \
+					__func__, ##args);\
+	} while (0)
+
+#define KGSL_LOG_ERR(dev, lvl, fmt, args...) \
+	do { \
+		if ((lvl) >= 3)  \
+			dev_err(dev, "|%s| " fmt, \
+					__func__, ##args);\
+	} while (0)
+
+#define KGSL_LOG_CRIT(dev, lvl, fmt, args...) \
+	do { \
+		if ((lvl) >= 2) \
+			dev_crit(dev, "|%s| " fmt, \
+					__func__, ##args);\
+	} while (0)
+
+#define KGSL_LOG_FATAL(dev, lvl, fmt, args...) \
+	do { \
+		dev_crit(dev, "|%s| " fmt, __func__, ##args);\
+		BUG(); \
+	} while (0)
+
+#define KGSL_LOG_POSTMORTEM_WRITE(_dev, fmt, args...) \
+	do { dev_crit(_dev->dev, fmt, ##args); } while (0)
+
+#define KGSL_LOG_DUMP(_dev, fmt, args...)	dev_err(_dev->dev, fmt, ##args)
+
+#define KGSL_DEV_ERR_ONCE(_dev, fmt, args...) \
+({ \
+	static bool kgsl_dev_err_once; \
+							\
+	if (!kgsl_dev_err_once) { \
+		kgsl_dev_err_once = true; \
+		dev_crit(_dev->dev, "|%s| " fmt, __func__, ##args); \
+	} \
+})
+
+#define KGSL_LOG_CRIT_RATELIMITED(dev, lvl, fmt, args...) \
+	do { \
+		if ((lvl) >= 2) \
+			dev_crit_ratelimited(dev, "|%s| " fmt, \
+					__func__, ##args);\
+	} while (0)
+
+#define KGSL_DRV_INFO(_dev, fmt, args...) \
+KGSL_LOG_INFO(_dev->dev, _dev->drv_log, fmt, ##args)
+#define KGSL_DRV_WARN(_dev, fmt, args...) \
+KGSL_LOG_WARN(_dev->dev, _dev->drv_log, fmt, ##args)
+#define KGSL_DRV_ERR(_dev, fmt, args...)  \
+KGSL_LOG_ERR(_dev->dev, _dev->drv_log, fmt, ##args)
+#define KGSL_DRV_CRIT(_dev, fmt, args...) \
+KGSL_LOG_CRIT(_dev->dev, _dev->drv_log, fmt, ##args)
+#define KGSL_DRV_CRIT_RATELIMIT(_dev, fmt, args...) \
+KGSL_LOG_CRIT_RATELIMITED(_dev->dev, _dev->drv_log, fmt, ##args)
+#define KGSL_DRV_FATAL(_dev, fmt, args...) \
+KGSL_LOG_FATAL((_dev)->dev, (_dev)->drv_log, fmt, ##args)
+
+#define KGSL_CMD_INFO(_dev, fmt, args...) \
+KGSL_LOG_INFO(_dev->dev, _dev->cmd_log, fmt, ##args)
+#define KGSL_CMD_WARN(_dev, fmt, args...) \
+KGSL_LOG_WARN(_dev->dev, _dev->cmd_log, fmt, ##args)
+#define KGSL_CMD_ERR(_dev, fmt, args...) \
+KGSL_LOG_ERR(_dev->dev, _dev->cmd_log, fmt, ##args)
+#define KGSL_CMD_CRIT(_dev, fmt, args...) \
+KGSL_LOG_CRIT(_dev->dev, _dev->cmd_log, fmt, ##args)
+
+#define KGSL_CTXT_INFO(_dev, fmt, args...) \
+KGSL_LOG_INFO(_dev->dev, _dev->ctxt_log, fmt, ##args)
+#define KGSL_CTXT_WARN(_dev, fmt, args...) \
+KGSL_LOG_WARN(_dev->dev, _dev->ctxt_log, fmt, ##args)
+#define KGSL_CTXT_ERR(_dev, fmt, args...)  \
+KGSL_LOG_ERR(_dev->dev, _dev->ctxt_log, fmt, ##args)
+#define KGSL_CTXT_CRIT(_dev, fmt, args...) \
+KGSL_LOG_CRIT(_dev->dev, _dev->ctxt_log, fmt, ##args)
+
+#define KGSL_MEM_INFO(_dev, fmt, args...) \
+KGSL_LOG_INFO(_dev->dev, _dev->mem_log, fmt, ##args)
+#define KGSL_MEM_WARN(_dev, fmt, args...) \
+KGSL_LOG_WARN(_dev->dev, _dev->mem_log, fmt, ##args)
+#define KGSL_MEM_ERR(_dev, fmt, args...)  \
+KGSL_LOG_ERR(_dev->dev, _dev->mem_log, fmt, ##args)
+#define KGSL_MEM_CRIT(_dev, fmt, args...) \
+KGSL_LOG_CRIT(_dev->dev, _dev->mem_log, fmt, ##args)
+
+#define KGSL_PWR_INFO(_dev, fmt, args...) \
+KGSL_LOG_INFO(_dev->dev, _dev->pwr_log, fmt, ##args)
+#define KGSL_PWR_WARN(_dev, fmt, args...) \
+KGSL_LOG_WARN(_dev->dev, _dev->pwr_log, fmt, ##args)
+#define KGSL_PWR_ERR(_dev, fmt, args...) \
+KGSL_LOG_ERR(_dev->dev, _dev->pwr_log, fmt, ##args)
+#define KGSL_PWR_CRIT(_dev, fmt, args...) \
+KGSL_LOG_CRIT(_dev->dev, _dev->pwr_log, fmt, ##args)
+
+/* Core error messages - these are for core KGSL functions that have
+   no device associated with them (such as memory) */
+
+#define KGSL_CORE_ERR(fmt, args...) \
+pr_err("kgsl: %s: " fmt, __func__, ##args)
+
+#define KGSL_CORE_ERR_ONCE(fmt, args...) \
+({ \
+	static bool kgsl_core_err_once; \
+	if (!kgsl_core_err_once) { \
+		kgsl_core_err_once = true; \
+		pr_err("kgsl: %s: " fmt, __func__, ##args); \
+	} \
+})
+
+#endif /* __KGSL_LOG_H */
diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c
new file mode 100644
index 000000000000..9fc6310d0d36
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_mmu.c
@@ -0,0 +1,886 @@
+/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/spinlock.h>
+#include <linux/genalloc.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/iommu.h>
+#include <linux/types.h>
+
+#include "kgsl.h"
+#include "kgsl_mmu.h"
+#include "kgsl_device.h"
+#include "kgsl_sharedmem.h"
+
+static enum kgsl_mmutype kgsl_mmu_type = KGSL_MMU_TYPE_NONE;
+
+static void pagetable_remove_sysfs_objects(struct kgsl_pagetable *pagetable);
+
+/*
+ * There are certain memory allocations (ringbuffer, memstore, etc) that need to
+ * be present at the same address in every pagetable. We call these "global"
+ * pagetable entries. There are relatively few of these and they are mostly
+ * stable (defined at init time) but the actual number of globals can differ
+ * slight depending on the target and implementation.
+ *
+ * Here we define an array and a simple allocator to keep track of the currently
+ * active global entries. Each entry is assigned a unique address inside of a
+ * MMU implementation specific "global" region. The addresses are assigned
+ * sequentially and never re-used to avoid having to go back and reprogram
+ * existing pagetables. The entire list of active entries are mapped and
+ * unmapped into every new pagetable as it is created and destroyed.
+ *
+ * Because there are relatively few entries and they are defined at boot time we
+ * don't need to go over the top to define a dynamic allocation scheme. It will
+ * be less wasteful to pick a static number with a little bit of growth
+ * potential.
+ */
+
+#define KGSL_MAX_GLOBAL_PT_ENTRIES 32
+
+/**
+ * struct kgsl_global_pt_entries - Collection of global pagetable entries
+ * @offset - offset into the global PT space to be assigned to then next
+ * allocation
+ * @entries: Array of assigned memdesc entries
+ * @count: Number of currently assigned entries
+ *
+ * Maintain a list of global pagetable entries. Pagetables are shared between
+ * devices so the global pt entry list needs to be driver wide too
+ */
+static struct kgsl_global_pt_entries {
+	unsigned int offset;
+	struct kgsl_memdesc *entries[KGSL_MAX_GLOBAL_PT_ENTRIES];
+	int count;
+} kgsl_global_pt_entries;
+
+/**
+ * kgsl_search_global_pt_entries() - Check to see if the given GPU address
+ * belongs to any of the global PT entries
+ * @gpuaddr: GPU address to search for
+ * @size: Size of the region to search for
+ *
+ * Search all the global pagetable entries for the GPU address and size and
+ * return the memory descriptor
+ */
+struct kgsl_memdesc *kgsl_search_global_pt_entries(unsigned int gpuaddr,
+		unsigned int size)
+{
+	int i;
+
+	for (i = 0; i < KGSL_MAX_GLOBAL_PT_ENTRIES; i++) {
+		struct kgsl_memdesc *memdesc =
+			kgsl_global_pt_entries.entries[i];
+
+		if (memdesc && kgsl_gpuaddr_in_memdesc(memdesc, gpuaddr, size))
+			return memdesc;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(kgsl_search_global_pt_entries);
+
+/**
+ * kgsl_unmap_global_pt_entries() - Unmap all global entries from the given
+ * pagetable
+ * @pagetable: Pointer to a kgsl_pagetable structure
+ *
+ * Unmap all the current active global entries from the specified pagetable
+ */
+static void kgsl_unmap_global_pt_entries(struct kgsl_pagetable *pagetable)
+{
+	int i;
+	unsigned long flags;
+
+	BUG_ON(pagetable->name == KGSL_MMU_GLOBAL_PT);
+
+	spin_lock_irqsave(&kgsl_driver.ptlock, flags);
+	if (pagetable->globals_mapped == false) {
+		spin_unlock_irqrestore(&kgsl_driver.ptlock, flags);
+		return;
+	}
+	spin_unlock_irqrestore(&kgsl_driver.ptlock, flags);
+
+	for (i = 0; i < KGSL_MAX_GLOBAL_PT_ENTRIES; i++) {
+		struct kgsl_memdesc *entry = kgsl_global_pt_entries.entries[i];
+		if (entry != NULL)
+			kgsl_mmu_unmap(pagetable, entry);
+	}
+
+	spin_lock_irqsave(&kgsl_driver.ptlock, flags);
+	pagetable->globals_mapped = false;
+	spin_unlock_irqrestore(&kgsl_driver.ptlock, flags);
+}
+
+/**
+ * kgsl_map_global_pt_entries() - Map all active global entries into the given
+ * pagetable
+ * @pagetable: Pointer to a kgsl_pagetable structure
+ *
+ * Map all the current global PT entries into the specified pagetable.
+ */
+void kgsl_map_global_pt_entries(struct kgsl_pagetable *pagetable)
+{
+	int i, ret = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kgsl_driver.ptlock, flags);
+	if (pagetable->globals_mapped == true) {
+		spin_unlock_irqrestore(&kgsl_driver.ptlock, flags);
+		return;
+	}
+	spin_unlock_irqrestore(&kgsl_driver.ptlock, flags);
+
+	for (i = 0; !ret && i < KGSL_MAX_GLOBAL_PT_ENTRIES; i++) {
+		struct kgsl_memdesc *entry = kgsl_global_pt_entries.entries[i];
+
+		if (entry != NULL) {
+			ret = kgsl_mmu_map(pagetable, entry);
+			BUG_ON(ret);
+		}
+	}
+
+	spin_lock_irqsave(&kgsl_driver.ptlock, flags);
+	pagetable->globals_mapped = true;
+	spin_unlock_irqrestore(&kgsl_driver.ptlock, flags);
+}
+EXPORT_SYMBOL(kgsl_map_global_pt_entries);
+
+/**
+ * kgsl_remove_global_pt_entry() - Remove a memory descriptor from the global PT
+ * entry list
+ * @memdesc: Pointer to the kgsl memory descriptor to remove
+ *
+ * Remove the specified memory descriptor from the current list of global
+ * pagetable entries
+ */
+void kgsl_remove_global_pt_entry(struct kgsl_memdesc *memdesc)
+{
+	int i, j;
+
+	if (kgsl_mmu_type == KGSL_MMU_TYPE_NONE)
+		return;
+
+	if (memdesc->gpuaddr == 0)
+		return;
+
+	for (i = 0; i < kgsl_global_pt_entries.count; i++) {
+		if (kgsl_global_pt_entries.entries[i] == memdesc) {
+			memdesc->gpuaddr = 0;
+			memdesc->priv &= ~KGSL_MEMDESC_GLOBAL;
+			for (j = i; j < kgsl_global_pt_entries.count; j++)
+				kgsl_global_pt_entries.entries[j] =
+				kgsl_global_pt_entries.entries[j + 1];
+			kgsl_global_pt_entries.entries[j - 1] = NULL;
+			kgsl_global_pt_entries.count--;
+			break;
+		}
+	}
+}
+EXPORT_SYMBOL(kgsl_remove_global_pt_entry);
+
+/**
+ * kgsl_add_global_pt_entry() - Add a new global PT entry to the active list
+ * @mmu: Pointer to a kgsl_mmu structure for the active MMU implementation
+ * @memdesc: Pointer to the kgsl memory descriptor to add
+ *
+ * Add a memory descriptor to the list of global pagetable entries.
+ */
+int kgsl_add_global_pt_entry(struct kgsl_device *device,
+		struct kgsl_memdesc *memdesc)
+{
+	int i;
+	int index = 0;
+	uint64_t gaddr = KGSL_MMU_GLOBAL_MEM_BASE;
+	uint64_t size = ALIGN(memdesc->size, PAGE_SIZE);
+
+	if (kgsl_mmu_type == KGSL_MMU_TYPE_NONE) {
+		memdesc->gpuaddr = (uint64_t) memdesc->physaddr;
+		return 0;
+	}
+
+	/* do we already have a mapping? */
+	if (memdesc->gpuaddr != 0)
+		return 0;
+
+	if (kgsl_global_pt_entries.count == KGSL_MAX_GLOBAL_PT_ENTRIES)
+		return -ENOMEM;
+
+	/*
+	 * search for the first free slot by going through all valid entries
+	 * and checking for overlap. All entries are in increasing order of
+	 * gpuaddr
+	 */
+	for (i = 0; i < kgsl_global_pt_entries.count; i++) {
+		if (kgsl_addr_range_overlap(gaddr, size,
+			kgsl_global_pt_entries.entries[i]->gpuaddr,
+			kgsl_global_pt_entries.entries[i]->size))
+			/* On a clash set gaddr to end of clashing entry */
+			gaddr = kgsl_global_pt_entries.entries[i]->gpuaddr +
+				kgsl_global_pt_entries.entries[i]->size;
+		else
+			break;
+	}
+	index = i;
+	if ((gaddr + size) >=
+		(KGSL_MMU_GLOBAL_MEM_BASE + KGSL_MMU_GLOBAL_MEM_SIZE))
+		return -ENOMEM;
+
+	memdesc->gpuaddr = gaddr;
+
+	memdesc->priv |= KGSL_MEMDESC_GLOBAL;
+	/*
+	 * Move the entries from index till the last entry 1 slot right leaving
+	 * the slot at index empty for the newcomer
+	 */
+	for (i = kgsl_global_pt_entries.count - 1; i >= index; i--)
+		kgsl_global_pt_entries.entries[i + 1] =
+			kgsl_global_pt_entries.entries[i];
+	kgsl_global_pt_entries.entries[index] = memdesc;
+	kgsl_global_pt_entries.count++;
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_add_global_pt_entry);
+
+static void kgsl_destroy_pagetable(struct kref *kref)
+{
+	struct kgsl_pagetable *pagetable = container_of(kref,
+		struct kgsl_pagetable, refcount);
+
+	kgsl_mmu_detach_pagetable(pagetable);
+
+	kgsl_unmap_global_pt_entries(pagetable);
+
+	pagetable->pt_ops->mmu_destroy_pagetable(pagetable);
+
+	kfree(pagetable);
+}
+
+static inline void kgsl_put_pagetable(struct kgsl_pagetable *pagetable)
+{
+	if (pagetable)
+		kref_put(&pagetable->refcount, kgsl_destroy_pagetable);
+}
+
+static struct kgsl_pagetable *
+kgsl_get_pagetable(unsigned long name)
+{
+	struct kgsl_pagetable *pt, *ret = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kgsl_driver.ptlock, flags);
+	list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) {
+		if (name == pt->name && kref_get_unless_zero(&pt->refcount)) {
+			ret = pt;
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&kgsl_driver.ptlock, flags);
+	return ret;
+}
+
+static struct kgsl_pagetable *
+_get_pt_from_kobj(struct kobject *kobj)
+{
+	unsigned int ptname;
+
+	if (!kobj)
+		return NULL;
+
+	if (kstrtou32(kobj->name, 0, &ptname))
+		return NULL;
+
+	return kgsl_get_pagetable(ptname);
+}
+
+static ssize_t
+sysfs_show_entries(struct kobject *kobj,
+		   struct kobj_attribute *attr,
+		   char *buf)
+{
+	struct kgsl_pagetable *pt;
+	int ret = 0;
+
+	pt = _get_pt_from_kobj(kobj);
+
+	if (pt) {
+		unsigned int val = atomic_read(&pt->stats.entries);
+
+		ret += snprintf(buf, PAGE_SIZE, "%d\n", val);
+	}
+
+	kgsl_put_pagetable(pt);
+	return ret;
+}
+
+static ssize_t
+sysfs_show_mapped(struct kobject *kobj,
+		  struct kobj_attribute *attr,
+		  char *buf)
+{
+	struct kgsl_pagetable *pt;
+	int ret = 0;
+
+	pt = _get_pt_from_kobj(kobj);
+
+	if (pt) {
+		uint64_t val = atomic_long_read(&pt->stats.mapped);
+
+		ret += snprintf(buf, PAGE_SIZE, "%llu\n", val);
+	}
+
+	kgsl_put_pagetable(pt);
+	return ret;
+}
+
+static ssize_t
+sysfs_show_max_mapped(struct kobject *kobj,
+		      struct kobj_attribute *attr,
+		      char *buf)
+{
+	struct kgsl_pagetable *pt;
+	int ret = 0;
+
+	pt = _get_pt_from_kobj(kobj);
+
+	if (pt) {
+		uint64_t val = atomic_long_read(&pt->stats.max_mapped);
+
+		ret += snprintf(buf, PAGE_SIZE, "%llu\n", val);
+	}
+
+	kgsl_put_pagetable(pt);
+	return ret;
+}
+
+static struct kobj_attribute attr_entries = {
+	.attr = { .name = "entries", .mode = 0444 },
+	.show = sysfs_show_entries,
+	.store = NULL,
+};
+
+static struct kobj_attribute attr_mapped = {
+	.attr = { .name = "mapped", .mode = 0444 },
+	.show = sysfs_show_mapped,
+	.store = NULL,
+};
+
+static struct kobj_attribute attr_max_mapped = {
+	.attr = { .name = "max_mapped", .mode = 0444 },
+	.show = sysfs_show_max_mapped,
+	.store = NULL,
+};
+
+static struct attribute *pagetable_attrs[] = {
+	&attr_entries.attr,
+	&attr_mapped.attr,
+	&attr_max_mapped.attr,
+	NULL,
+};
+
+static struct attribute_group pagetable_attr_group = {
+	.attrs = pagetable_attrs,
+};
+
+static void
+pagetable_remove_sysfs_objects(struct kgsl_pagetable *pagetable)
+{
+	if (pagetable->kobj)
+		sysfs_remove_group(pagetable->kobj,
+				   &pagetable_attr_group);
+
+	kobject_put(pagetable->kobj);
+	pagetable->kobj = NULL;
+}
+
+static int
+pagetable_add_sysfs_objects(struct kgsl_pagetable *pagetable)
+{
+	char ptname[16];
+	int ret = -ENOMEM;
+
+	snprintf(ptname, sizeof(ptname), "%d", pagetable->name);
+	pagetable->kobj = kobject_create_and_add(ptname,
+						 kgsl_driver.ptkobj);
+	if (pagetable->kobj == NULL)
+		goto err;
+
+	ret = sysfs_create_group(pagetable->kobj, &pagetable_attr_group);
+
+err:
+	if (ret) {
+		if (pagetable->kobj)
+			kobject_put(pagetable->kobj);
+
+		pagetable->kobj = NULL;
+	}
+
+	return ret;
+}
+
+void
+kgsl_mmu_detach_pagetable(struct kgsl_pagetable *pagetable)
+{
+	unsigned long flags;
+
+	/* NOMMU has no pagetable so return early if its NULL */
+	if (!pagetable)
+		return;
+
+	spin_lock_irqsave(&kgsl_driver.ptlock, flags);
+
+	if (!list_empty(&pagetable->list))
+		list_del_init(&pagetable->list);
+
+	spin_unlock_irqrestore(&kgsl_driver.ptlock, flags);
+
+	pagetable_remove_sysfs_objects(pagetable);
+}
+
+int
+kgsl_mmu_get_ptname_from_ptbase(struct kgsl_mmu *mmu, u64 pt_base)
+{
+	struct kgsl_pagetable *pt;
+	int ptid = -1;
+
+	if (!mmu->mmu_ops)
+		return KGSL_MMU_GLOBAL_PT;
+	spin_lock(&kgsl_driver.ptlock);
+	list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) {
+		if (kgsl_mmu_pagetable_get_ttbr0(pt) == pt_base) {
+			ptid = (int) pt->name;
+			break;
+		}
+	}
+	spin_unlock(&kgsl_driver.ptlock);
+
+	return ptid;
+}
+EXPORT_SYMBOL(kgsl_mmu_get_ptname_from_ptbase);
+
+struct kgsl_pagetable *kgsl_mmu_get_pt_from_ptname(struct kgsl_mmu *mmu,
+						int ptname)
+{
+	struct kgsl_pagetable *pt;
+
+	spin_lock(&kgsl_driver.ptlock);
+	list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) {
+		if (pt->name == ptname) {
+			spin_unlock(&kgsl_driver.ptlock);
+			return pt;
+		}
+	}
+	spin_unlock(&kgsl_driver.ptlock);
+	return NULL;
+
+}
+EXPORT_SYMBOL(kgsl_mmu_get_pt_from_ptname);
+
+unsigned int
+kgsl_mmu_log_fault_addr(struct kgsl_mmu *mmu, phys_addr_t pt_base,
+					unsigned int addr)
+{
+	struct kgsl_pagetable *pt;
+	unsigned int ret = 0;
+
+	if (!mmu->mmu_ops)
+		return 0;
+	spin_lock(&kgsl_driver.ptlock);
+	list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) {
+		if (kgsl_mmu_pagetable_get_ttbr0(pt) == pt_base) {
+			if ((addr & ~(PAGE_SIZE-1)) == pt->fault_addr) {
+				ret = 1;
+				break;
+			} else {
+				pt->fault_addr =
+					(addr & ~(PAGE_SIZE-1));
+				ret = 0;
+				break;
+			}
+		}
+	}
+	spin_unlock(&kgsl_driver.ptlock);
+
+	return ret;
+}
+EXPORT_SYMBOL(kgsl_mmu_log_fault_addr);
+
+int kgsl_mmu_init(struct kgsl_device *device, char *mmutype)
+{
+	int status = 0;
+	struct kgsl_mmu *mmu = &device->mmu;
+	mmu->device = device;
+
+	if (mmutype && !strcmp(mmutype, "nommu"))
+		kgsl_mmu_type = KGSL_MMU_TYPE_NONE;
+
+	/*
+	 * Don't use kgsl_allocate_global here because we need to get the MMU
+	 * set up before we can add the global entry but the MMU init needs the
+	 * setstate block. Allocate the memory here and map it later
+	 */
+
+	status = kgsl_allocate_contiguous(device, &mmu->setstate_memory,
+					PAGE_SIZE);
+	if (status)
+		return status;
+
+	/* Mark the setstate memory as read only */
+	mmu->setstate_memory.flags |= KGSL_MEMFLAGS_GPUREADONLY;
+
+	kgsl_sharedmem_set(device, &mmu->setstate_memory, 0, 0,
+				mmu->setstate_memory.size);
+
+	if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_type) {
+		mmu->mmu_ops = &kgsl_iommu_ops;
+		status =  mmu->mmu_ops->mmu_init(mmu);
+	}
+
+	if (status)
+		goto done;
+
+	/* Add the setstate memory to the global PT entry list */
+	status = kgsl_add_global_pt_entry(device, &mmu->setstate_memory);
+
+done:
+	if (status)
+		kgsl_sharedmem_free(&mmu->setstate_memory);
+
+	return status;
+}
+EXPORT_SYMBOL(kgsl_mmu_init);
+
+int kgsl_mmu_start(struct kgsl_device *device)
+{
+	struct kgsl_mmu *mmu = &device->mmu;
+	int ret = 0;
+
+	if (kgsl_mmu_type != KGSL_MMU_TYPE_NONE)
+		ret = mmu->mmu_ops->mmu_start(mmu);
+
+	return ret;
+}
+EXPORT_SYMBOL(kgsl_mmu_start);
+
+static struct kgsl_pagetable *
+kgsl_mmu_createpagetableobject(struct kgsl_mmu *mmu,
+				unsigned int name)
+{
+	int status = 0;
+	struct kgsl_pagetable *pagetable = NULL;
+	unsigned long flags;
+
+	pagetable = kzalloc(sizeof(struct kgsl_pagetable), GFP_KERNEL);
+	if (pagetable == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&pagetable->refcount);
+
+	spin_lock_init(&pagetable->lock);
+
+	pagetable->mmu = mmu;
+	pagetable->name = name;
+	pagetable->fault_addr = 0xFFFFFFFF;
+
+	atomic_set(&pagetable->stats.entries, 0);
+	atomic_long_set(&pagetable->stats.mapped, 0);
+	atomic_long_set(&pagetable->stats.max_mapped, 0);
+
+	if (mmu->mmu_ops && mmu->mmu_ops->mmu_init_pt) {
+		status = mmu->mmu_ops->mmu_init_pt(mmu, pagetable);
+		if (status)
+			goto err;
+	}
+
+	if (KGSL_MMU_SECURE_PT != name)
+		kgsl_map_global_pt_entries(pagetable);
+
+	spin_lock_irqsave(&kgsl_driver.ptlock, flags);
+	list_add(&pagetable->list, &kgsl_driver.pagetable_list);
+	spin_unlock_irqrestore(&kgsl_driver.ptlock, flags);
+
+	/* Create the sysfs entries */
+	pagetable_add_sysfs_objects(pagetable);
+
+	return pagetable;
+
+err:
+	if (pagetable->priv)
+		pagetable->pt_ops->mmu_destroy_pagetable(pagetable);
+
+	kfree(pagetable);
+
+	return ERR_PTR(status);
+}
+
+struct kgsl_pagetable *kgsl_mmu_getpagetable(struct kgsl_mmu *mmu,
+						unsigned long name)
+{
+	struct kgsl_pagetable *pt;
+
+	if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type)
+		return (void *)(-1);
+
+	if (!kgsl_mmu_is_perprocess(mmu) && (KGSL_MMU_SECURE_PT != name)) {
+		name = KGSL_MMU_GLOBAL_PT;
+		if (mmu->defaultpagetable)
+			return mmu->defaultpagetable;
+	}
+
+	pt = kgsl_get_pagetable(name);
+
+	if (pt == NULL)
+		pt = kgsl_mmu_createpagetableobject(mmu, name);
+
+	return pt;
+}
+
+void kgsl_mmu_putpagetable(struct kgsl_pagetable *pagetable)
+{
+	kgsl_put_pagetable(pagetable);
+}
+EXPORT_SYMBOL(kgsl_mmu_putpagetable);
+
+static int _nommu_get_gpuaddr(struct kgsl_memdesc *memdesc)
+{
+	if (memdesc->sgt->nents > 1) {
+		KGSL_CORE_ERR(
+			"Attempt to map non-contiguous memory with NOMMU\n");
+		return -EINVAL;
+	}
+
+	memdesc->gpuaddr = (uint64_t) sg_phys(memdesc->sgt->sgl);
+
+	if (memdesc->gpuaddr == 0) {
+		KGSL_CORE_ERR("Unable to get a physical address\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * kgsl_mmu_find_svm_region() - Find a empty spot in the SVM region
+ * @pagetable: KGSL pagetable to search
+ * @start: start of search range, must be within kgsl_mmu_svm_range()
+ * @end: end of search range, must be within kgsl_mmu_svm_range()
+ * @size: Size of the region to find
+ * @align: Desired alignment of the address
+ */
+uint64_t kgsl_mmu_find_svm_region(struct kgsl_pagetable *pagetable,
+		uint64_t start, uint64_t end, uint64_t size,
+		uint64_t align)
+{
+	BUG_ON(pagetable == NULL || pagetable->pt_ops->find_svm_region == NULL);
+	return pagetable->pt_ops->find_svm_region(pagetable, start, end, size,
+							align);
+}
+
+/**
+ * kgsl_mmu_set_svm_region() - Check if a region is empty and reserve it if so
+ * @pagetable: KGSL pagetable to search
+ * @gpuaddr: GPU address to check/reserve
+ * @size: Size of the region to check/reserve
+ */
+int kgsl_mmu_set_svm_region(struct kgsl_pagetable *pagetable, uint64_t gpuaddr,
+		uint64_t size)
+{
+	BUG_ON(pagetable == NULL || pagetable->pt_ops->set_svm_region == NULL);
+	return pagetable->pt_ops->set_svm_region(pagetable, gpuaddr, size);
+}
+
+/**
+ * kgsl_mmu_get_gpuaddr() - Assign a GPU address to the memdesc
+ * @pagetable: GPU pagetable to assign the address in
+ * @memdesc: mem descriptor to assign the memory to
+ */
+int
+kgsl_mmu_get_gpuaddr(struct kgsl_pagetable *pagetable,
+		struct kgsl_memdesc *memdesc)
+{
+	if (kgsl_mmu_type == KGSL_MMU_TYPE_NONE)
+		return _nommu_get_gpuaddr(memdesc);
+
+	BUG_ON(pagetable == NULL || pagetable->pt_ops->get_gpuaddr == NULL);
+	return pagetable->pt_ops->get_gpuaddr(pagetable, memdesc);
+}
+EXPORT_SYMBOL(kgsl_mmu_get_gpuaddr);
+
+int
+kgsl_mmu_map(struct kgsl_pagetable *pagetable,
+				struct kgsl_memdesc *memdesc)
+{
+	int ret = 0;
+	int size;
+
+	if (!memdesc->gpuaddr)
+		return -EINVAL;
+	/* Only global mappings should be mapped multiple times */
+	if (!kgsl_memdesc_is_global(memdesc) &&
+		(KGSL_MEMDESC_MAPPED & memdesc->priv))
+		return -EINVAL;
+
+	if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_NONE)
+		return 0;
+
+	/* Add space for the guard page when allocating the mmu VA. */
+	size = memdesc->size;
+	if (kgsl_memdesc_has_guard_page(memdesc))
+		size += kgsl_memdesc_guard_page_size(pagetable->mmu, memdesc);
+
+	ret = pagetable->pt_ops->mmu_map(pagetable, memdesc);
+
+	if (ret == 0) {
+		KGSL_STATS_ADD(size, &pagetable->stats.mapped,
+			&pagetable->stats.max_mapped);
+
+		atomic_inc(&pagetable->stats.entries);
+
+		memdesc->priv |= KGSL_MEMDESC_MAPPED;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(kgsl_mmu_map);
+
+/**
+ * kgsl_mmu_put_gpuaddr() - Remove a GPU address from a pagetable
+ * @pagetable: Pagetable to release the memory from
+ * @memdesc: Memory descriptor containing the GPU address to free
+ */
+int kgsl_mmu_put_gpuaddr(struct kgsl_pagetable *pagetable,
+		struct kgsl_memdesc *memdesc)
+{
+	if (memdesc->size == 0 || memdesc->gpuaddr == 0)
+		return 0;
+
+	if (pagetable != NULL && pagetable->pt_ops->put_gpuaddr != NULL)
+		pagetable->pt_ops->put_gpuaddr(pagetable, memdesc);
+
+	if (!kgsl_memdesc_is_global(memdesc))
+		memdesc->gpuaddr = 0;
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_mmu_put_gpuaddr);
+
+/**
+ * kgsl_mmu_svm_range() - Return the range for SVM (if applicable)
+ * @pagetable: Pagetable to query the range from
+ * @lo: Pointer to store the start of the SVM range
+ * @hi: Pointer to store the end of the SVM range
+ * @memflags: Flags from the buffer we are mapping
+ */
+int kgsl_mmu_svm_range(struct kgsl_pagetable *pagetable,
+		uint64_t *lo, uint64_t *hi, uint64_t memflags)
+{
+	if (pagetable == NULL || pagetable->pt_ops->svm_range == NULL)
+		return -ENODEV;
+
+	return pagetable->pt_ops->svm_range(pagetable, lo, hi, memflags);
+}
+EXPORT_SYMBOL(kgsl_mmu_svm_range);
+
+int
+kgsl_mmu_unmap(struct kgsl_pagetable *pagetable,
+		struct kgsl_memdesc *memdesc)
+{
+	int size;
+	uint64_t start_addr = 0;
+	uint64_t end_addr = 0;
+
+	if (memdesc->size == 0 || memdesc->gpuaddr == 0 ||
+		!(KGSL_MEMDESC_MAPPED & memdesc->priv))
+		return -EINVAL;
+
+	if (kgsl_mmu_type == KGSL_MMU_TYPE_NONE)
+		return 0;
+
+	/* Add space for the guard page when freeing the mmu VA. */
+	size = memdesc->size;
+	if (kgsl_memdesc_has_guard_page(memdesc))
+		size += kgsl_memdesc_guard_page_size(pagetable->mmu, memdesc);
+
+	start_addr = memdesc->gpuaddr;
+	end_addr = (memdesc->gpuaddr + size);
+
+	pagetable->pt_ops->mmu_unmap(pagetable, memdesc);
+
+	/* If buffer is unmapped 0 fault addr */
+	if ((pagetable->fault_addr >= start_addr) &&
+		(pagetable->fault_addr < end_addr))
+		pagetable->fault_addr = 0;
+
+	/* Remove the statistics */
+	atomic_dec(&pagetable->stats.entries);
+	atomic_long_sub(size, &pagetable->stats.mapped);
+
+	if (!kgsl_memdesc_is_global(memdesc))
+		memdesc->priv &= ~KGSL_MEMDESC_MAPPED;
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_mmu_unmap);
+
+int kgsl_mmu_close(struct kgsl_device *device)
+{
+	struct kgsl_mmu *mmu = &device->mmu;
+	int ret = 0;
+
+	kgsl_free_global(&mmu->setstate_memory);
+
+	if (mmu->mmu_ops != NULL)
+		ret = mmu->mmu_ops->mmu_close(mmu);
+
+	return ret;
+}
+EXPORT_SYMBOL(kgsl_mmu_close);
+
+int kgsl_mmu_enabled(void)
+{
+	if (KGSL_MMU_TYPE_NONE != kgsl_mmu_type)
+		return 1;
+	else
+		return 0;
+}
+EXPORT_SYMBOL(kgsl_mmu_enabled);
+
+enum kgsl_mmutype kgsl_mmu_get_mmutype(void)
+{
+	return kgsl_mmu_type;
+}
+EXPORT_SYMBOL(kgsl_mmu_get_mmutype);
+
+void kgsl_mmu_set_mmutype(enum kgsl_mmutype type)
+{
+	kgsl_mmu_type = type;
+}
+EXPORT_SYMBOL(kgsl_mmu_set_mmutype);
+
+bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pagetable,
+		uint64_t gpuaddr)
+{
+	if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type)
+		return (gpuaddr != 0);
+
+	if (pagetable == NULL || pagetable->pt_ops->addr_in_range == NULL)
+		return false;
+
+	return pagetable->pt_ops->addr_in_range(pagetable, gpuaddr);
+}
+EXPORT_SYMBOL(kgsl_mmu_gpuaddr_in_range);
diff --git a/drivers/gpu/msm/kgsl_mmu.h b/drivers/gpu/msm/kgsl_mmu.h
new file mode 100644
index 000000000000..dc266ab9f381
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_mmu.h
@@ -0,0 +1,364 @@
+/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __KGSL_MMU_H
+#define __KGSL_MMU_H
+
+#include "kgsl_iommu.h"
+/*
+ * These defines control the address range for allocations that
+ * are mapped into all pagetables.
+ */
+#define KGSL_MMU_GLOBAL_MEM_SIZE	SZ_8M
+#define KGSL_MMU_GLOBAL_MEM_BASE	0xf8000000
+
+/* Identifier for the global page table */
+/* Per process page tables will probably pass in the thread group
+   as an identifier */
+#define KGSL_MMU_GLOBAL_PT 0
+#define KGSL_MMU_SECURE_PT 1
+
+struct kgsl_device;
+
+enum kgsl_mmutype {
+	KGSL_MMU_TYPE_IOMMU = 0,
+	KGSL_MMU_TYPE_NONE
+};
+
+struct kgsl_pagetable {
+	spinlock_t lock;
+	struct kref refcount;
+	struct list_head list;
+	unsigned int name;
+	struct kobject *kobj;
+
+	struct {
+		atomic_t entries;
+		atomic_long_t mapped;
+		atomic_long_t max_mapped;
+	} stats;
+	const struct kgsl_mmu_pt_ops *pt_ops;
+	unsigned int fault_addr;
+	void *priv;
+	struct kgsl_mmu *mmu;
+	bool globals_mapped;
+};
+
+struct kgsl_mmu;
+
+struct kgsl_mmu_ops {
+	int (*mmu_init) (struct kgsl_mmu *mmu);
+	int (*mmu_close) (struct kgsl_mmu *mmu);
+	int (*mmu_start) (struct kgsl_mmu *mmu);
+	void (*mmu_stop) (struct kgsl_mmu *mmu);
+	int (*mmu_set_pt) (struct kgsl_mmu *mmu, struct kgsl_pagetable *pt);
+	uint64_t (*mmu_get_current_ttbr0)(struct kgsl_mmu *mmu);
+	void (*mmu_pagefault_resume)(struct kgsl_mmu *mmu);
+	void (*mmu_clear_fsr)(struct kgsl_mmu *mmu);
+	void (*mmu_enable_clk)(struct kgsl_mmu *mmu);
+	void (*mmu_disable_clk)(struct kgsl_mmu *mmu);
+	unsigned int (*mmu_get_reg_ahbaddr)(struct kgsl_mmu *mmu,
+			enum kgsl_iommu_context_id ctx_id,
+			enum kgsl_iommu_reg_map reg);
+	int (*mmu_set_pf_policy)(struct kgsl_mmu *mmu, unsigned long pf_policy);
+	struct kgsl_protected_registers *(*mmu_get_prot_regs)
+			(struct kgsl_mmu *mmu);
+	int (*mmu_init_pt)(struct kgsl_mmu *mmu, struct kgsl_pagetable *);
+};
+
+struct kgsl_mmu_pt_ops {
+	int (*mmu_map)(struct kgsl_pagetable *pt,
+			struct kgsl_memdesc *memdesc);
+	int (*mmu_unmap)(struct kgsl_pagetable *pt,
+			struct kgsl_memdesc *memdesc);
+	void (*mmu_destroy_pagetable) (struct kgsl_pagetable *);
+	u64 (*get_ttbr0)(struct kgsl_pagetable *);
+	u32 (*get_contextidr)(struct kgsl_pagetable *);
+	int (*get_gpuaddr)(struct kgsl_pagetable *, struct kgsl_memdesc *);
+	void (*put_gpuaddr)(struct kgsl_pagetable *, struct kgsl_memdesc *);
+	uint64_t (*find_svm_region)(struct kgsl_pagetable *, uint64_t, uint64_t,
+		uint64_t, uint64_t);
+	int (*set_svm_region)(struct kgsl_pagetable *, uint64_t, uint64_t);
+	int (*svm_range)(struct kgsl_pagetable *, uint64_t *, uint64_t *,
+			uint64_t);
+	bool (*addr_in_range)(struct kgsl_pagetable *pagetable, uint64_t);
+};
+
+/*
+ * MMU_FEATURE - return true if the specified feature is supported by the GPU
+ * MMU
+ */
+#define MMU_FEATURE(_mmu, _bit) \
+	((_mmu)->features & (_bit))
+
+/* MMU has register retention */
+#define KGSL_MMU_RETENTION  BIT(1)
+/* MMU requires the TLB to be flushed on map */
+#define KGSL_MMU_FLUSH_TLB_ON_MAP BIT(2)
+/* MMU uses global pagetable */
+#define KGSL_MMU_GLOBAL_PAGETABLE BIT(3)
+/* MMU uses hypervisor for content protection */
+#define KGSL_MMU_HYP_SECURE_ALLOC BIT(4)
+/* Force 32 bit, even if the MMU can do 64 bit */
+#define KGSL_MMU_FORCE_32BIT BIT(5)
+/* 64 bit address is live */
+#define KGSL_MMU_64BIT BIT(6)
+/* MMU can do coherent hardware table walks */
+#define KGSL_MMU_COHERENT_HTW BIT(7)
+
+struct kgsl_mmu {
+	uint32_t      flags;
+	struct kgsl_device     *device;
+	struct kgsl_memdesc    setstate_memory;
+	/* current page table object being used by device mmu */
+	struct kgsl_pagetable  *defaultpagetable;
+	/* secure global pagetable device mmu */
+	struct kgsl_pagetable  *securepagetable;
+	const struct kgsl_mmu_ops *mmu_ops;
+	void *priv;
+	bool secured;
+	uint features;
+	unsigned int secure_align_mask;
+};
+
+extern struct kgsl_mmu_ops kgsl_iommu_ops;
+
+struct kgsl_pagetable *kgsl_mmu_getpagetable(struct kgsl_mmu *,
+						unsigned long name);
+
+struct kgsl_pagetable *kgsl_mmu_getpagetable_ptbase(struct kgsl_mmu *,
+						u64 ptbase);
+
+void kgsl_mmu_putpagetable(struct kgsl_pagetable *pagetable);
+int kgsl_mmu_init(struct kgsl_device *device, char *mmutype);
+int kgsl_mmu_start(struct kgsl_device *device);
+int kgsl_mmu_close(struct kgsl_device *device);
+int kgsl_mmu_map(struct kgsl_pagetable *pagetable,
+		 struct kgsl_memdesc *memdesc);
+int kgsl_mmu_get_gpuaddr(struct kgsl_pagetable *pagetable,
+		 struct kgsl_memdesc *memdesc);
+int kgsl_mmu_map_global(struct kgsl_pagetable *pagetable,
+			struct kgsl_memdesc *memdesc);
+int kgsl_mmu_unmap(struct kgsl_pagetable *pagetable,
+		    struct kgsl_memdesc *memdesc);
+int kgsl_mmu_put_gpuaddr(struct kgsl_pagetable *pagetable,
+		 struct kgsl_memdesc *memdesc);
+unsigned int kgsl_virtaddr_to_physaddr(void *virtaddr);
+int kgsl_mmu_get_ptname_from_ptbase(struct kgsl_mmu *mmu, u64 pt_base);
+unsigned int kgsl_mmu_log_fault_addr(struct kgsl_mmu *mmu,
+			phys_addr_t pt_base, unsigned int addr);
+int kgsl_mmu_enabled(void);
+void kgsl_mmu_set_mmutype(enum kgsl_mmutype type);
+enum kgsl_mmutype kgsl_mmu_get_mmutype(void);
+bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pt, uint64_t gpuaddr);
+
+int kgsl_mmu_get_region(struct kgsl_pagetable *pagetable,
+		uint64_t gpuaddr, uint64_t size);
+
+int kgsl_mmu_find_region(struct kgsl_pagetable *pagetable,
+		uint64_t region_start, uint64_t region_end,
+		uint64_t *gpuaddr, uint64_t size, unsigned int align);
+
+int kgsl_add_global_pt_entry(struct kgsl_device *device,
+	struct kgsl_memdesc *memdesc);
+void kgsl_remove_global_pt_entry(struct kgsl_memdesc *memdesc);
+void kgsl_map_global_pt_entries(struct kgsl_pagetable *pagetable);
+
+struct kgsl_memdesc *kgsl_search_global_pt_entries(unsigned int gpuaddr,
+		unsigned int size);
+struct kgsl_pagetable *kgsl_mmu_get_pt_from_ptname(struct kgsl_mmu *mmu,
+							int ptname);
+
+uint64_t kgsl_mmu_find_svm_region(struct kgsl_pagetable *pagetable,
+		uint64_t start, uint64_t end, uint64_t size,
+		uint64_t alignment);
+
+int kgsl_mmu_set_svm_region(struct kgsl_pagetable *pagetable, uint64_t gpuaddr,
+		uint64_t size);
+
+void kgsl_mmu_detach_pagetable(struct kgsl_pagetable *pagetable);
+
+int kgsl_mmu_svm_range(struct kgsl_pagetable *pagetable,
+		uint64_t *lo, uint64_t *hi, uint64_t memflags);
+
+/*
+ * Static inline functions of MMU that simply call the SMMU specific
+ * function using a function pointer. These functions can be thought
+ * of as wrappers around the actual function
+ */
+
+static inline u64 kgsl_mmu_get_current_ttbr0(struct kgsl_mmu *mmu)
+{
+	if (mmu->mmu_ops && mmu->mmu_ops->mmu_get_current_ttbr0)
+		return mmu->mmu_ops->mmu_get_current_ttbr0(mmu);
+	else
+		return 0;
+}
+
+static inline int kgsl_mmu_set_pt(struct kgsl_mmu *mmu,
+					struct kgsl_pagetable *pagetable)
+{
+	if (mmu->mmu_ops && mmu->mmu_ops->mmu_set_pt)
+		return mmu->mmu_ops->mmu_set_pt(mmu, pagetable);
+
+	return 0;
+}
+
+static inline void kgsl_mmu_stop(struct kgsl_mmu *mmu)
+{
+	if (mmu->mmu_ops && mmu->mmu_ops->mmu_stop)
+		mmu->mmu_ops->mmu_stop(mmu);
+}
+
+static inline void kgsl_mmu_enable_clk(struct kgsl_mmu *mmu)
+{
+	if (mmu->mmu_ops && mmu->mmu_ops->mmu_enable_clk)
+		mmu->mmu_ops->mmu_enable_clk(mmu);
+	else
+		return;
+}
+
+static inline void kgsl_mmu_disable_clk(struct kgsl_mmu *mmu)
+{
+	if (mmu->mmu_ops && mmu->mmu_ops->mmu_disable_clk)
+		mmu->mmu_ops->mmu_disable_clk(mmu);
+}
+
+/*
+ * kgsl_mmu_get_reg_ahbaddr() - Calls the mmu specific function pointer to
+ * return the address that GPU can use to access register
+ * @mmu:		Pointer to the device mmu
+ * @ctx_id:		The MMU HW context ID
+ * @reg:		Register whose address is to be returned
+ *
+ * Returns the ahb address of reg else 0
+ */
+static inline unsigned int kgsl_mmu_get_reg_ahbaddr(struct kgsl_mmu *mmu,
+				enum kgsl_iommu_context_id ctx_id,
+				enum kgsl_iommu_reg_map reg)
+{
+	if (mmu->mmu_ops && mmu->mmu_ops->mmu_get_reg_ahbaddr)
+		return mmu->mmu_ops->mmu_get_reg_ahbaddr(mmu, ctx_id, reg);
+	else
+		return 0;
+}
+
+/*
+ * kgsl_mmu_is_perprocess() - Runtime check for per-process
+ * pagetables.
+ * @mmu: the mmu
+ *
+ * Returns true if per-process pagetables are enabled,
+ * false if not.
+ */
+static inline int kgsl_mmu_is_perprocess(struct kgsl_mmu *mmu)
+{
+	return MMU_FEATURE(mmu, KGSL_MMU_GLOBAL_PAGETABLE) ? 0 : 1;
+}
+
+/*
+ * kgsl_mmu_use_cpu_map() - Runtime check for matching the CPU
+ * address space on the GPU.
+ * @mmu: the mmu
+ *
+ * Returns true if supported false if not.
+ */
+static inline int kgsl_mmu_use_cpu_map(struct kgsl_mmu *mmu)
+{
+	return kgsl_mmu_is_perprocess(mmu) &&
+		kgsl_mmu_get_mmutype() != KGSL_MMU_TYPE_NONE;
+}
+
+static inline int kgsl_mmu_set_pagefault_policy(struct kgsl_mmu *mmu,
+						unsigned long pf_policy)
+{
+	if (mmu->mmu_ops && mmu->mmu_ops->mmu_set_pf_policy)
+		return mmu->mmu_ops->mmu_set_pf_policy(mmu, pf_policy);
+	else
+		return 0;
+}
+
+static inline void kgsl_mmu_pagefault_resume(struct kgsl_mmu *mmu)
+{
+	if (mmu->mmu_ops && mmu->mmu_ops->mmu_pagefault_resume)
+		return mmu->mmu_ops->mmu_pagefault_resume(mmu);
+}
+
+static inline void kgsl_mmu_clear_fsr(struct kgsl_mmu *mmu)
+{
+	if (mmu->mmu_ops && mmu->mmu_ops->mmu_clear_fsr)
+		return mmu->mmu_ops->mmu_clear_fsr(mmu);
+}
+
+static inline struct kgsl_protected_registers *kgsl_mmu_get_prot_regs
+						(struct kgsl_mmu *mmu)
+{
+	if (mmu->mmu_ops && mmu->mmu_ops->mmu_get_prot_regs)
+		return mmu->mmu_ops->mmu_get_prot_regs(mmu);
+	else
+		return NULL;
+}
+
+static inline int kgsl_mmu_is_secured(struct kgsl_mmu *mmu)
+{
+	return mmu && (mmu->secured) && (mmu->securepagetable);
+}
+
+static inline u64
+kgsl_mmu_pagetable_get_ttbr0(struct kgsl_pagetable *pagetable)
+{
+	if (pagetable && pagetable->pt_ops->get_ttbr0)
+		return pagetable->pt_ops->get_ttbr0(pagetable);
+	return 0;
+}
+
+static inline u32
+kgsl_mmu_pagetable_get_contextidr(struct kgsl_pagetable *pagetable)
+{
+	if (pagetable && pagetable->pt_ops->get_contextidr)
+		return pagetable->pt_ops->get_contextidr(pagetable);
+	return 0;
+}
+
+#ifdef CONFIG_MSM_IOMMU
+#include <linux/qcom_iommu.h>
+static inline bool kgsl_mmu_bus_secured(struct device *dev)
+{
+	struct bus_type *bus = msm_iommu_get_bus(dev);
+
+	return (bus == &msm_iommu_sec_bus_type) ? true : false;
+}
+static inline struct bus_type *kgsl_mmu_get_bus(struct device *dev)
+{
+	return msm_iommu_get_bus(dev);
+}
+static inline struct device *kgsl_mmu_get_ctx(const char *name)
+{
+	return msm_iommu_get_ctx(name);
+}
+#else
+static inline bool kgsl_mmu_bus_secured(struct device *dev)
+{
+	return false;
+}
+
+static inline struct bus_type *kgsl_mmu_get_bus(struct device *dev)
+{
+	return &platform_bus_type;
+}
+static inline struct device *kgsl_mmu_get_ctx(const char *name)
+{
+	return ERR_PTR(-ENODEV);
+}
+#endif
+
+#endif /* __KGSL_MMU_H */
diff --git a/drivers/gpu/msm/kgsl_pwrctrl.c b/drivers/gpu/msm/kgsl_pwrctrl.c
new file mode 100644
index 000000000000..54a93e62a580
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_pwrctrl.c
@@ -0,0 +1,2620 @@
+/* Copyright (c) 2010-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/interrupt.h>
+#include <asm/page.h>
+#include <linux/pm_runtime.h>
+#include <linux/msm-bus.h>
+#include <linux/msm-bus-board.h>
+#include <linux/ktime.h>
+#include <linux/delay.h>
+#include <linux/msm_adreno_devfreq.h>
+#include <linux/of_device.h>
+
+#include "kgsl.h"
+#include "kgsl_pwrscale.h"
+#include "kgsl_device.h"
+#include "kgsl_trace.h"
+#include <soc/qcom/devfreq_devbw.h>
+
+#define KGSL_PWRFLAGS_POWER_ON 0
+#define KGSL_PWRFLAGS_CLK_ON   1
+#define KGSL_PWRFLAGS_AXI_ON   2
+#define KGSL_PWRFLAGS_IRQ_ON   3
+#define KGSL_PWRFLAGS_RETENTION_ON  4
+
+#define UPDATE_BUSY_VAL		1000000
+
+/*
+ * Expected delay for post-interrupt processing on A3xx.
+ * The delay may be longer, gradually increase the delay
+ * to compensate.  If the GPU isn't done by max delay,
+ * it's working on something other than just the final
+ * command sequence so stop waiting for it to be idle.
+ */
+#define INIT_UDELAY		200
+#define MAX_UDELAY		2000
+
+/* Number of jiffies for a full thermal cycle */
+#define TH_HZ			20
+
+#define KGSL_MAX_BUSLEVELS	20
+
+#define DEFAULT_BUS_P 25
+#define DEFAULT_BUS_DIV (100 / DEFAULT_BUS_P)
+
+/* Order deeply matters here because reasons. New entries go on the end */
+static const char * const clocks[] = {
+	"src_clk",
+	"core_clk",
+	"iface_clk",
+	"mem_clk",
+	"mem_iface_clk",
+	"alt_mem_iface_clk",
+	"rbbmtimer_clk",
+	"gtcu_clk",
+	"gtbu_clk",
+	"gtcu_iface_clk",
+	"alwayson_clk"
+};
+
+static unsigned int ib_votes[KGSL_MAX_BUSLEVELS];
+static int last_vote_buslevel;
+static int max_vote_buslevel;
+
+static void kgsl_pwrctrl_clk(struct kgsl_device *device, int state,
+					int requested_state);
+static void kgsl_pwrctrl_axi(struct kgsl_device *device, int state);
+static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, int state);
+static void kgsl_pwrctrl_set_state(struct kgsl_device *device,
+				unsigned int state);
+static void kgsl_pwrctrl_request_state(struct kgsl_device *device,
+				unsigned int state);
+static void kgsl_pwrctrl_retention_clk(struct kgsl_device *device, int state);
+
+/**
+ * _record_pwrevent() - Record the history of the new event
+ * @device: Pointer to the kgsl_device struct
+ * @t: Timestamp
+ * @event: Event type
+ *
+ * Finish recording the duration of the previous event.  Then update the
+ * index, record the start of the new event, and the relevant data.
+ */
+static void _record_pwrevent(struct kgsl_device *device,
+			ktime_t t, int event) {
+	struct kgsl_pwrscale *psc = &device->pwrscale;
+	struct kgsl_pwr_history *history = &psc->history[event];
+	int i = history->index;
+	if (history->events == NULL)
+		return;
+	history->events[i].duration = ktime_us_delta(t,
+					history->events[i].start);
+	i = (i + 1) % history->size;
+	history->index = i;
+	history->events[i].start = t;
+	switch (event) {
+	case KGSL_PWREVENT_STATE:
+		history->events[i].data = device->state;
+		break;
+	case KGSL_PWREVENT_GPU_FREQ:
+		history->events[i].data = device->pwrctrl.active_pwrlevel;
+		break;
+	case KGSL_PWREVENT_BUS_FREQ:
+		history->events[i].data = last_vote_buslevel;
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * kgsl_get_bw() - Return latest msm bus IB vote
+ */
+static unsigned int kgsl_get_bw(void)
+{
+	return ib_votes[last_vote_buslevel];
+}
+
+/**
+ * _ab_buslevel_update() - Return latest msm bus AB vote
+ * @pwr: Pointer to the kgsl_pwrctrl struct
+ * @ab: Pointer to be updated with the calculated AB vote
+ */
+static void _ab_buslevel_update(struct kgsl_pwrctrl *pwr,
+				unsigned long *ab)
+{
+	unsigned int ib = ib_votes[last_vote_buslevel];
+	unsigned int max_bw = ib_votes[max_vote_buslevel];
+	if (!ab)
+		return;
+	if (ib == 0)
+		*ab = 0;
+	else if ((!pwr->bus_percent_ab) && (!pwr->bus_ab_mbytes))
+		*ab = DEFAULT_BUS_P * ib / 100;
+	else if (pwr->bus_width)
+		*ab = pwr->bus_ab_mbytes;
+	else
+		*ab = (pwr->bus_percent_ab * max_bw) / 100;
+
+	if (*ab > ib)
+		*ab = ib;
+}
+
+/**
+ * _adjust_pwrlevel() - Given a requested power level do bounds checking on the
+ * constraints and return the nearest possible level
+ * @device: Pointer to the kgsl_device struct
+ * @level: Requested level
+ * @pwrc: Pointer to the power constraint to be applied
+ *
+ * Apply thermal and max/min limits first.  Then force the level with a
+ * constraint if one exists.
+ */
+static unsigned int _adjust_pwrlevel(struct kgsl_pwrctrl *pwr, int level,
+					struct kgsl_pwr_constraint *pwrc,
+					int popp)
+{
+	unsigned int max_pwrlevel = max_t(unsigned int, pwr->thermal_pwrlevel,
+		pwr->max_pwrlevel);
+	unsigned int min_pwrlevel = max_t(unsigned int, pwr->thermal_pwrlevel,
+		pwr->min_pwrlevel);
+
+	switch (pwrc->type) {
+	case KGSL_CONSTRAINT_PWRLEVEL: {
+		switch (pwrc->sub_type) {
+		case KGSL_CONSTRAINT_PWR_MAX:
+			return max_pwrlevel;
+			break;
+		case KGSL_CONSTRAINT_PWR_MIN:
+			return min_pwrlevel;
+			break;
+		default:
+			break;
+		}
+	}
+	break;
+	}
+
+	if (popp && (max_pwrlevel < pwr->active_pwrlevel))
+		max_pwrlevel = pwr->active_pwrlevel;
+
+	if (level < max_pwrlevel)
+		return max_pwrlevel;
+	if (level > min_pwrlevel)
+		return min_pwrlevel;
+
+	return level;
+}
+
+/**
+ * kgsl_pwrctrl_buslevel_update() - Recalculate the bus vote and send it
+ * @device: Pointer to the kgsl_device struct
+ * @on: true for setting and active bus vote, false to turn off the vote
+ */
+void kgsl_pwrctrl_buslevel_update(struct kgsl_device *device,
+			bool on)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int cur = pwr->pwrlevels[pwr->active_pwrlevel].bus_freq;
+	int buslevel = 0;
+	unsigned long ab;
+
+	/* the bus should be ON to update the active frequency */
+	if (on && !(test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->power_flags)))
+		return;
+	/*
+	 * If the bus should remain on calculate our request and submit it,
+	 * otherwise request bus level 0, off.
+	 */
+	if (on) {
+		buslevel = min_t(int, pwr->pwrlevels[0].bus_max,
+				cur + pwr->bus_mod);
+		buslevel = max_t(int, buslevel, 1);
+	} else {
+		/* If the bus is being turned off, reset to default level */
+		pwr->bus_mod = 0;
+		pwr->bus_percent_ab = 0;
+		pwr->bus_ab_mbytes = 0;
+	}
+	trace_kgsl_buslevel(device, pwr->active_pwrlevel, buslevel);
+	last_vote_buslevel = buslevel;
+
+	/* buslevel is the IB vote, update the AB */
+	_ab_buslevel_update(pwr, &ab);
+
+	/**
+	 * vote for ocmem if target supports ocmem scaling,
+	 * shut down based on "on" parameter
+	 */
+	if (pwr->ocmem_pcl)
+		msm_bus_scale_client_update_request(pwr->ocmem_pcl,
+			on ? pwr->active_pwrlevel : pwr->num_pwrlevels - 1);
+
+	/* vote for bus if gpubw-dev support is not enabled */
+	if (pwr->pcl)
+		msm_bus_scale_client_update_request(pwr->pcl, buslevel);
+
+	/* ask a governor to vote on behalf of us */
+	if (pwr->devbw)
+		devfreq_vbif_update_bw(ib_votes[last_vote_buslevel], ab);
+}
+EXPORT_SYMBOL(kgsl_pwrctrl_buslevel_update);
+
+/**
+ * kgsl_pwrctrl_pwrlevel_change_settings() - Program h/w during powerlevel
+ * transitions
+ * @device: Pointer to the kgsl_device struct
+ * @post: flag to check if the call is before/after the clk_rate change
+ * @wake_up: flag to check if device is active or waking up
+ */
+static void kgsl_pwrctrl_pwrlevel_change_settings(struct kgsl_device *device,
+			bool post)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	unsigned int old = pwr->previous_pwrlevel;
+	unsigned int new = pwr->active_pwrlevel;
+
+	if (device->state != KGSL_STATE_ACTIVE)
+		return;
+	if (old == new)
+		return;
+	if (!device->ftbl->pwrlevel_change_settings)
+		return;
+
+	device->ftbl->pwrlevel_change_settings(device, old, new, post);
+}
+
+/**
+ * kgsl_pwrctrl_set_thermal_cycle() - set the thermal cycle if required
+ * @pwr: Pointer to the kgsl_pwrctrl struct
+ * @new_level: the level to transition to
+ */
+static void kgsl_pwrctrl_set_thermal_cycle(struct kgsl_pwrctrl *pwr,
+						unsigned int new_level)
+{
+	if ((new_level != pwr->thermal_pwrlevel) || !pwr->sysfs_pwr_limit)
+		return;
+	if (pwr->thermal_pwrlevel == pwr->sysfs_pwr_limit->level) {
+		/* Thermal cycle for sysfs pwr limit, start cycling*/
+		if (pwr->thermal_cycle == CYCLE_ENABLE) {
+			pwr->thermal_cycle = CYCLE_ACTIVE;
+			mod_timer(&pwr->thermal_timer, jiffies +
+					(TH_HZ - pwr->thermal_timeout));
+			pwr->thermal_highlow = 1;
+		}
+	} else {
+		/* Non sysfs pwr limit, stop thermal cycle if active*/
+		if (pwr->thermal_cycle == CYCLE_ACTIVE) {
+			pwr->thermal_cycle = CYCLE_ENABLE;
+			del_timer_sync(&pwr->thermal_timer);
+		}
+	}
+}
+
+/**
+ * kgsl_pwrctrl_pwrlevel_change() - Validate and change power levels
+ * @device: Pointer to the kgsl_device struct
+ * @new_level: Requested powerlevel, an index into the pwrlevel array
+ *
+ * Check that any power level constraints are still valid.  Update the
+ * requested level according to any thermal, max/min, or power constraints.
+ * If a new GPU level is going to be set, update the bus to that level's
+ * default value.  Do not change the bus if a constraint keeps the new
+ * level at the current level.  Set the new GPU frequency.
+ */
+void kgsl_pwrctrl_pwrlevel_change(struct kgsl_device *device,
+				unsigned int new_level)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct kgsl_pwrlevel *pwrlevel;
+	unsigned int old_level = pwr->active_pwrlevel;
+
+	/* If a pwr constraint is expired, remove it */
+	if ((pwr->constraint.type != KGSL_CONSTRAINT_NONE) &&
+		(time_after(jiffies, pwr->constraint.expires))) {
+		/* Trace the constraint being un-set by the driver */
+		trace_kgsl_constraint(device, pwr->constraint.type,
+						old_level, 0);
+		/*Invalidate the constraint set */
+		pwr->constraint.expires = 0;
+		pwr->constraint.type = KGSL_CONSTRAINT_NONE;
+	}
+
+	/*
+	 * Adjust the power level if required by thermal, max/min,
+	 * constraints, etc
+	 */
+	new_level = _adjust_pwrlevel(pwr, new_level, &pwr->constraint,
+					device->pwrscale.popp_level);
+
+	/*
+	 * If thermal cycling is required and the new level hits the
+	 * thermal limit, kick off the cycling.
+	 */
+	kgsl_pwrctrl_set_thermal_cycle(pwr, new_level);
+
+	if (new_level == old_level)
+		return;
+
+	/*
+	 * Set the active and previous powerlevel first in case the clocks are
+	 * off - if we don't do this then the pwrlevel change won't take effect
+	 * when the clocks come back
+	 */
+	pwr->active_pwrlevel = new_level;
+	pwr->previous_pwrlevel = old_level;
+
+	/*
+	 * If the bus is running faster than its default level and the GPU
+	 * frequency is moving down keep the DDR at a relatively high level.
+	 */
+	if (pwr->bus_mod < 0 || new_level < old_level) {
+		pwr->bus_mod = 0;
+		pwr->bus_percent_ab = 0;
+	}
+	/*
+	 * Update the bus before the GPU clock to prevent underrun during
+	 * frequency increases.
+	 */
+	kgsl_pwrctrl_buslevel_update(device, true);
+
+	pwrlevel = &pwr->pwrlevels[pwr->active_pwrlevel];
+	/* Change register settings if any  BEFORE pwrlevel change*/
+	kgsl_pwrctrl_pwrlevel_change_settings(device, 0);
+	clk_set_rate(pwr->grp_clks[0], pwrlevel->gpu_freq);
+	trace_kgsl_pwrlevel(device,
+			pwr->active_pwrlevel, pwrlevel->gpu_freq,
+			pwr->previous_pwrlevel,
+			pwr->pwrlevels[old_level].gpu_freq);
+	/* Change register settings if any AFTER pwrlevel change*/
+	kgsl_pwrctrl_pwrlevel_change_settings(device, 1);
+
+	/* Timestamp the frequency change */
+	device->pwrscale.freq_change_time = ktime_to_ms(ktime_get());
+}
+EXPORT_SYMBOL(kgsl_pwrctrl_pwrlevel_change);
+
+/**
+ * kgsl_pwrctrl_set_constraint() - Validate and change enforced constraint
+ * @device: Pointer to the kgsl_device struct
+ * @pwrc: Pointer to requested constraint
+ * @id: Context id which owns the constraint
+ *
+ * Accept the new constraint if no previous constraint existed or if the
+ * new constraint is faster than the previous one.  If the new and previous
+ * constraints are equal, update the timestamp and ownership to make sure
+ * the constraint expires at the correct time.
+ */
+void kgsl_pwrctrl_set_constraint(struct kgsl_device *device,
+			struct kgsl_pwr_constraint *pwrc, uint32_t id)
+{
+	unsigned int constraint;
+	struct kgsl_pwr_constraint *pwrc_old;
+
+	if (device == NULL || pwrc == NULL)
+		return;
+	constraint = _adjust_pwrlevel(&device->pwrctrl,
+				device->pwrctrl.active_pwrlevel, pwrc, 0);
+	pwrc_old = &device->pwrctrl.constraint;
+
+	/*
+	 * If a constraint is already set, set a new constraint only
+	 * if it is faster.  If the requested constraint is the same
+	 * as the current one, update ownership and timestamp.
+	 */
+	if ((pwrc_old->type == KGSL_CONSTRAINT_NONE) ||
+		(constraint < pwrc_old->hint.pwrlevel.level)) {
+		pwrc_old->type = pwrc->type;
+		pwrc_old->sub_type = pwrc->sub_type;
+		pwrc_old->hint.pwrlevel.level = constraint;
+		pwrc_old->owner_id = id;
+		pwrc_old->expires = jiffies + device->pwrctrl.interval_timeout;
+		kgsl_pwrctrl_pwrlevel_change(device, constraint);
+		/* Trace the constraint being set by the driver */
+		trace_kgsl_constraint(device, pwrc_old->type, constraint, 1);
+	} else if ((pwrc_old->type == pwrc->type) &&
+		(pwrc_old->hint.pwrlevel.level == constraint)) {
+			pwrc_old->owner_id = id;
+			pwrc_old->expires = jiffies +
+					device->pwrctrl.interval_timeout;
+	}
+}
+EXPORT_SYMBOL(kgsl_pwrctrl_set_constraint);
+
+static ssize_t kgsl_pwrctrl_thermal_pwrlevel_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+	int ret;
+	unsigned int level = 0;
+
+	if (device == NULL)
+		return 0;
+
+	pwr = &device->pwrctrl;
+
+	ret = kgsl_sysfs_store(buf, &level);
+
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+
+	if (level > pwr->num_pwrlevels - 2)
+		level = pwr->num_pwrlevels - 2;
+
+	pwr->thermal_pwrlevel = level;
+
+	/* Update the current level using the new limit */
+	kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel);
+	mutex_unlock(&device->mutex);
+
+	return count;
+}
+
+static ssize_t kgsl_pwrctrl_thermal_pwrlevel_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+	if (device == NULL)
+		return 0;
+	pwr = &device->pwrctrl;
+	return snprintf(buf, PAGE_SIZE, "%d\n", pwr->thermal_pwrlevel);
+}
+
+static ssize_t kgsl_pwrctrl_max_pwrlevel_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+	int ret;
+	unsigned int level = 0;
+
+	if (device == NULL)
+		return 0;
+
+	pwr = &device->pwrctrl;
+
+	ret = kgsl_sysfs_store(buf, &level);
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+
+	/* You can't set a maximum power level lower than the minimum */
+	if (level > pwr->min_pwrlevel)
+		level = pwr->min_pwrlevel;
+
+	pwr->max_pwrlevel = level;
+
+	/* Update the current level using the new limit */
+	kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel);
+	mutex_unlock(&device->mutex);
+
+	return count;
+}
+
+static ssize_t kgsl_pwrctrl_max_pwrlevel_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+	if (device == NULL)
+		return 0;
+	pwr = &device->pwrctrl;
+	return snprintf(buf, PAGE_SIZE, "%u\n", pwr->max_pwrlevel);
+}
+
+static ssize_t kgsl_pwrctrl_min_pwrlevel_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+	int ret;
+	unsigned int level = 0;
+
+	if (device == NULL)
+		return 0;
+
+	pwr = &device->pwrctrl;
+
+	ret = kgsl_sysfs_store(buf, &level);
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+	if (level > pwr->num_pwrlevels - 2)
+		level = pwr->num_pwrlevels - 2;
+
+	/* You can't set a minimum power level lower than the maximum */
+	if (level < pwr->max_pwrlevel)
+		level = pwr->max_pwrlevel;
+
+	pwr->min_pwrlevel = level;
+
+	/* Update the current level using the new limit */
+	kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel);
+
+	mutex_unlock(&device->mutex);
+
+	return count;
+}
+
+static ssize_t kgsl_pwrctrl_min_pwrlevel_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+	if (device == NULL)
+		return 0;
+	pwr = &device->pwrctrl;
+	return snprintf(buf, PAGE_SIZE, "%u\n", pwr->min_pwrlevel);
+}
+
+static ssize_t kgsl_pwrctrl_num_pwrlevels_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+	if (device == NULL)
+		return 0;
+	pwr = &device->pwrctrl;
+	return snprintf(buf, PAGE_SIZE, "%d\n", pwr->num_pwrlevels - 1);
+}
+
+/* Given a GPU clock value, return the lowest matching powerlevel */
+
+static int _get_nearest_pwrlevel(struct kgsl_pwrctrl *pwr, unsigned int clock)
+{
+	int i;
+
+	for (i = pwr->num_pwrlevels - 1; i >= 0; i--) {
+		if (abs(pwr->pwrlevels[i].gpu_freq - clock) < 5000000)
+			return i;
+	}
+
+	return -ERANGE;
+}
+
+static ssize_t kgsl_pwrctrl_max_gpuclk_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+	unsigned int val = 0;
+	int level, ret;
+
+	if (device == NULL)
+		return 0;
+
+	pwr = &device->pwrctrl;
+
+	ret = kgsl_sysfs_store(buf, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+	level = _get_nearest_pwrlevel(pwr, val);
+	/* If the requested power level is not supported by hw, try cycling */
+	if (level < 0) {
+		unsigned int hfreq, diff, udiff, i;
+		if ((val < pwr->pwrlevels[pwr->num_pwrlevels - 1].gpu_freq) ||
+			(val > pwr->pwrlevels[0].gpu_freq))
+			goto err;
+
+		/* Find the neighboring frequencies */
+		for (i = 0; i < pwr->num_pwrlevels - 1; i++) {
+			if ((pwr->pwrlevels[i].gpu_freq > val) &&
+				(pwr->pwrlevels[i + 1].gpu_freq < val)) {
+				level = i;
+				break;
+			}
+		}
+		if (i == pwr->num_pwrlevels - 1)
+			goto err;
+		hfreq = pwr->pwrlevels[i].gpu_freq;
+		diff =  hfreq - pwr->pwrlevels[i + 1].gpu_freq;
+		udiff = hfreq - val;
+		pwr->thermal_timeout = (udiff * TH_HZ) / diff;
+		pwr->thermal_cycle = CYCLE_ENABLE;
+	} else {
+		pwr->thermal_cycle = CYCLE_DISABLE;
+		del_timer_sync(&pwr->thermal_timer);
+	}
+	mutex_unlock(&device->mutex);
+
+	if (pwr->sysfs_pwr_limit)
+		kgsl_pwr_limits_set_freq(pwr->sysfs_pwr_limit,
+					pwr->pwrlevels[level].gpu_freq);
+	return count;
+
+err:
+	mutex_unlock(&device->mutex);
+	return count;
+}
+
+static ssize_t kgsl_pwrctrl_max_gpuclk_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+	unsigned int freq;
+	if (device == NULL)
+		return 0;
+	pwr = &device->pwrctrl;
+	freq = pwr->pwrlevels[pwr->thermal_pwrlevel].gpu_freq;
+	/* Calculate the effective frequency if we're cycling */
+	if (pwr->thermal_cycle) {
+		unsigned int hfreq = freq;
+		unsigned int lfreq = pwr->pwrlevels[pwr->
+				thermal_pwrlevel + 1].gpu_freq;
+		freq = pwr->thermal_timeout * (lfreq / TH_HZ) +
+			(TH_HZ - pwr->thermal_timeout) * (hfreq / TH_HZ);
+	}
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", freq);
+}
+
+static ssize_t kgsl_pwrctrl_gpuclk_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+	unsigned int val = 0;
+	int ret, level;
+
+	if (device == NULL)
+		return 0;
+
+	pwr = &device->pwrctrl;
+
+	ret = kgsl_sysfs_store(buf, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+	level = _get_nearest_pwrlevel(pwr, val);
+	if (level >= 0)
+		kgsl_pwrctrl_pwrlevel_change(device, (unsigned int) level);
+
+	mutex_unlock(&device->mutex);
+	return count;
+}
+
+static ssize_t kgsl_pwrctrl_gpuclk_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+	if (device == NULL)
+		return 0;
+	pwr = &device->pwrctrl;
+	return snprintf(buf, PAGE_SIZE, "%ld\n", kgsl_pwrctrl_active_freq(pwr));
+}
+
+static ssize_t __timer_store(struct device *dev, struct device_attribute *attr,
+					const char *buf, size_t count,
+					enum kgsl_pwrctrl_timer_type timer)
+{
+	unsigned int val = 0;
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	int ret;
+
+	if (device == NULL)
+		return 0;
+
+	ret = kgsl_sysfs_store(buf, &val);
+	if (ret)
+		return ret;
+
+	/*
+	 * We don't quite accept a maximum of 0xFFFFFFFF due to internal jiffy
+	 * math, so make sure the value falls within the largest offset we can
+	 * deal with
+	 */
+
+	if (val > jiffies_to_usecs(MAX_JIFFY_OFFSET))
+		return -EINVAL;
+
+	mutex_lock(&device->mutex);
+	/* Let the timeout be requested in ms, but convert to jiffies. */
+	if (timer == KGSL_PWR_IDLE_TIMER)
+		device->pwrctrl.interval_timeout = msecs_to_jiffies(val);
+	else if (timer == KGSL_PWR_DEEP_NAP_TIMER)
+		device->pwrctrl.deep_nap_timeout = msecs_to_jiffies(val);
+
+	mutex_unlock(&device->mutex);
+
+	return count;
+}
+
+static ssize_t kgsl_pwrctrl_idle_timer_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	return __timer_store(dev, attr, buf, count, KGSL_PWR_IDLE_TIMER);
+}
+
+static ssize_t kgsl_pwrctrl_idle_timer_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	if (device == NULL)
+		return 0;
+	/* Show the idle_timeout converted to msec */
+	return snprintf(buf, PAGE_SIZE, "%u\n",
+		jiffies_to_msecs(device->pwrctrl.interval_timeout));
+}
+
+static ssize_t kgsl_pwrctrl_deep_nap_timer_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+
+	return __timer_store(dev, attr, buf, count, KGSL_PWR_DEEP_NAP_TIMER);
+}
+
+static ssize_t kgsl_pwrctrl_deep_nap_timer_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+
+	if (device == NULL)
+		return 0;
+	/* Show the idle_timeout converted to msec */
+	return snprintf(buf, PAGE_SIZE, "%u\n",
+		jiffies_to_msecs(device->pwrctrl.deep_nap_timeout));
+}
+
+static ssize_t kgsl_pwrctrl_pmqos_active_latency_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	unsigned int val = 0;
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	int ret;
+
+	if (device == NULL)
+		return 0;
+
+	ret = kgsl_sysfs_store(buf, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+	device->pwrctrl.pm_qos_active_latency = val;
+	mutex_unlock(&device->mutex);
+
+	return count;
+}
+
+static ssize_t kgsl_pwrctrl_pmqos_active_latency_show(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	if (device == NULL)
+		return 0;
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+		device->pwrctrl.pm_qos_active_latency);
+}
+
+static ssize_t kgsl_pwrctrl_gpubusy_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	int ret;
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_clk_stats *stats;
+
+	if (device == NULL)
+		return 0;
+	stats = &device->pwrctrl.clk_stats;
+	ret = snprintf(buf, PAGE_SIZE, "%7d %7d\n",
+			stats->busy_old, stats->total_old);
+	if (!test_bit(KGSL_PWRFLAGS_AXI_ON, &device->pwrctrl.power_flags)) {
+		stats->busy_old = 0;
+		stats->total_old = 0;
+	}
+	return ret;
+}
+
+static ssize_t kgsl_pwrctrl_gpu_available_frequencies_show(
+					struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+	int index, num_chars = 0;
+
+	if (device == NULL)
+		return 0;
+	pwr = &device->pwrctrl;
+	for (index = 0; index < pwr->num_pwrlevels - 1; index++)
+		num_chars += snprintf(buf + num_chars, PAGE_SIZE, "%d ",
+		pwr->pwrlevels[index].gpu_freq);
+	buf[num_chars++] = '\n';
+	return num_chars;
+}
+
+static ssize_t kgsl_pwrctrl_reset_count_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	if (device == NULL)
+		return 0;
+	return snprintf(buf, PAGE_SIZE, "%d\n", device->reset_counter);
+}
+
+static void __force_on(struct kgsl_device *device, int flag, int on)
+{
+	if (on) {
+		switch (flag) {
+		case KGSL_PWRFLAGS_CLK_ON:
+			kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_ON,
+				KGSL_STATE_ACTIVE);
+			break;
+		case KGSL_PWRFLAGS_AXI_ON:
+			kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_ON);
+			break;
+		case KGSL_PWRFLAGS_POWER_ON:
+			kgsl_pwrctrl_pwrrail(device, KGSL_PWRFLAGS_ON);
+			break;
+		case KGSL_PWRFLAGS_RETENTION_ON:
+			kgsl_pwrctrl_retention_clk(device, KGSL_PWRFLAGS_ON);
+			break;
+		}
+		set_bit(flag, &device->pwrctrl.ctrl_flags);
+	} else {
+		clear_bit(flag, &device->pwrctrl.ctrl_flags);
+	}
+}
+
+static ssize_t __force_on_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf, int flag)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	if (device == NULL)
+		return 0;
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+		test_bit(flag, &device->pwrctrl.ctrl_flags));
+}
+
+static ssize_t __force_on_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count,
+					int flag)
+{
+	unsigned int val = 0;
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	int ret;
+
+	if (device == NULL)
+		return 0;
+
+	ret = kgsl_sysfs_store(buf, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+	__force_on(device, flag, val);
+	mutex_unlock(&device->mutex);
+
+	return count;
+}
+
+static ssize_t kgsl_pwrctrl_force_clk_on_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return __force_on_show(dev, attr, buf, KGSL_PWRFLAGS_CLK_ON);
+}
+
+static ssize_t kgsl_pwrctrl_force_clk_on_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	return __force_on_store(dev, attr, buf, count, KGSL_PWRFLAGS_CLK_ON);
+}
+
+static ssize_t kgsl_pwrctrl_force_bus_on_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return __force_on_show(dev, attr, buf, KGSL_PWRFLAGS_AXI_ON);
+}
+
+static ssize_t kgsl_pwrctrl_force_bus_on_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	return __force_on_store(dev, attr, buf, count, KGSL_PWRFLAGS_AXI_ON);
+}
+
+static ssize_t kgsl_pwrctrl_force_rail_on_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return __force_on_show(dev, attr, buf, KGSL_PWRFLAGS_POWER_ON);
+}
+
+static ssize_t kgsl_pwrctrl_force_rail_on_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	return __force_on_store(dev, attr, buf, count, KGSL_PWRFLAGS_POWER_ON);
+}
+
+static ssize_t kgsl_pwrctrl_force_non_retention_on_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return __force_on_show(dev, attr, buf, KGSL_PWRFLAGS_RETENTION_ON);
+}
+
+static ssize_t kgsl_pwrctrl_force_non_retention_on_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	return __force_on_store(dev, attr, buf, count,
+					KGSL_PWRFLAGS_RETENTION_ON);
+}
+
+static ssize_t kgsl_pwrctrl_bus_split_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	if (device == NULL)
+		return 0;
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+		device->pwrctrl.bus_control);
+}
+
+static ssize_t kgsl_pwrctrl_bus_split_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	unsigned int val = 0;
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	int ret;
+
+	if (device == NULL)
+		return 0;
+
+	ret = kgsl_sysfs_store(buf, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+	device->pwrctrl.bus_control = val ? true : false;
+	mutex_unlock(&device->mutex);
+
+	return count;
+}
+
+static ssize_t kgsl_pwrctrl_default_pwrlevel_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	if (device == NULL)
+		return 0;
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+		device->pwrctrl.default_pwrlevel);
+}
+
+static ssize_t kgsl_pwrctrl_default_pwrlevel_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+	struct kgsl_pwrscale *pwrscale;
+	int ret;
+	unsigned int level = 0;
+
+	if (device == NULL)
+		return 0;
+
+	pwr = &device->pwrctrl;
+	pwrscale = &device->pwrscale;
+
+	ret = kgsl_sysfs_store(buf, &level);
+	if (ret)
+		return ret;
+
+	if (level > pwr->num_pwrlevels - 2)
+		goto done;
+
+	mutex_lock(&device->mutex);
+	pwr->default_pwrlevel = level;
+	pwrscale->gpu_profile.profile.initial_freq
+			= pwr->pwrlevels[level].gpu_freq;
+
+	mutex_unlock(&device->mutex);
+done:
+	return count;
+}
+
+
+static ssize_t kgsl_popp_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	unsigned int val = 0;
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	int ret;
+
+	if (device == NULL)
+		return 0;
+
+	ret = kgsl_sysfs_store(buf, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+	if (val)
+		set_bit(POPP_ON, &device->pwrscale.popp_state);
+	else
+		clear_bit(POPP_ON, &device->pwrscale.popp_state);
+	mutex_unlock(&device->mutex);
+
+	return count;
+}
+
+static ssize_t kgsl_popp_show(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	if (device == NULL)
+		return 0;
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+		test_bit(POPP_ON, &device->pwrscale.popp_state));
+}
+
+static DEVICE_ATTR(gpuclk, 0644, kgsl_pwrctrl_gpuclk_show,
+	kgsl_pwrctrl_gpuclk_store);
+static DEVICE_ATTR(max_gpuclk, 0644, kgsl_pwrctrl_max_gpuclk_show,
+	kgsl_pwrctrl_max_gpuclk_store);
+static DEVICE_ATTR(idle_timer, 0644, kgsl_pwrctrl_idle_timer_show,
+	kgsl_pwrctrl_idle_timer_store);
+static DEVICE_ATTR(deep_nap_timer, 0644, kgsl_pwrctrl_deep_nap_timer_show,
+	kgsl_pwrctrl_deep_nap_timer_store);
+static DEVICE_ATTR(gpubusy, 0444, kgsl_pwrctrl_gpubusy_show,
+	NULL);
+static DEVICE_ATTR(gpu_available_frequencies, 0444,
+	kgsl_pwrctrl_gpu_available_frequencies_show,
+	NULL);
+static DEVICE_ATTR(max_pwrlevel, 0644,
+	kgsl_pwrctrl_max_pwrlevel_show,
+	kgsl_pwrctrl_max_pwrlevel_store);
+static DEVICE_ATTR(min_pwrlevel, 0644,
+	kgsl_pwrctrl_min_pwrlevel_show,
+	kgsl_pwrctrl_min_pwrlevel_store);
+static DEVICE_ATTR(thermal_pwrlevel, 0644,
+	kgsl_pwrctrl_thermal_pwrlevel_show,
+	kgsl_pwrctrl_thermal_pwrlevel_store);
+static DEVICE_ATTR(num_pwrlevels, 0444,
+	kgsl_pwrctrl_num_pwrlevels_show,
+	NULL);
+static DEVICE_ATTR(pmqos_active_latency, 0644,
+	kgsl_pwrctrl_pmqos_active_latency_show,
+	kgsl_pwrctrl_pmqos_active_latency_store);
+static DEVICE_ATTR(reset_count, 0444,
+	kgsl_pwrctrl_reset_count_show,
+	NULL);
+static DEVICE_ATTR(force_clk_on, 0644,
+	kgsl_pwrctrl_force_clk_on_show,
+	kgsl_pwrctrl_force_clk_on_store);
+static DEVICE_ATTR(force_bus_on, 0644,
+	kgsl_pwrctrl_force_bus_on_show,
+	kgsl_pwrctrl_force_bus_on_store);
+static DEVICE_ATTR(force_rail_on, 0644,
+	kgsl_pwrctrl_force_rail_on_show,
+	kgsl_pwrctrl_force_rail_on_store);
+static DEVICE_ATTR(bus_split, 0644,
+	kgsl_pwrctrl_bus_split_show,
+	kgsl_pwrctrl_bus_split_store);
+static DEVICE_ATTR(default_pwrlevel, 0644,
+	kgsl_pwrctrl_default_pwrlevel_show,
+	kgsl_pwrctrl_default_pwrlevel_store);
+static DEVICE_ATTR(popp, 0644, kgsl_popp_show, kgsl_popp_store);
+static DEVICE_ATTR(force_non_retention_on, 0644,
+	kgsl_pwrctrl_force_non_retention_on_show,
+	kgsl_pwrctrl_force_non_retention_on_store);
+
+static const struct device_attribute *pwrctrl_attr_list[] = {
+	&dev_attr_gpuclk,
+	&dev_attr_max_gpuclk,
+	&dev_attr_idle_timer,
+	&dev_attr_deep_nap_timer,
+	&dev_attr_gpubusy,
+	&dev_attr_gpu_available_frequencies,
+	&dev_attr_max_pwrlevel,
+	&dev_attr_min_pwrlevel,
+	&dev_attr_thermal_pwrlevel,
+	&dev_attr_num_pwrlevels,
+	&dev_attr_pmqos_active_latency,
+	&dev_attr_reset_count,
+	&dev_attr_force_clk_on,
+	&dev_attr_force_bus_on,
+	&dev_attr_force_rail_on,
+	&dev_attr_force_non_retention_on,
+	&dev_attr_bus_split,
+	&dev_attr_default_pwrlevel,
+	&dev_attr_popp,
+	NULL
+};
+
+int kgsl_pwrctrl_init_sysfs(struct kgsl_device *device)
+{
+	return kgsl_create_device_sysfs_files(device->dev, pwrctrl_attr_list);
+}
+
+void kgsl_pwrctrl_uninit_sysfs(struct kgsl_device *device)
+{
+	kgsl_remove_device_sysfs_files(device->dev, pwrctrl_attr_list);
+}
+
+/* Track the amount of time the gpu is on vs the total system time. *
+ * Regularly update the percentage of busy time displayed by sysfs. */
+void kgsl_pwrctrl_busy_time(struct kgsl_device *device, u64 time, u64 busy)
+{
+	struct kgsl_clk_stats *stats = &device->pwrctrl.clk_stats;
+	stats->total += time;
+	stats->busy += busy;
+
+	if (stats->total < UPDATE_BUSY_VAL)
+		return;
+
+	/* Update the output regularly and reset the counters. */
+	stats->total_old = stats->total;
+	stats->busy_old = stats->busy;
+	stats->total = 0;
+	stats->busy = 0;
+
+	trace_kgsl_gpubusy(device, stats->busy_old, stats->total_old);
+}
+EXPORT_SYMBOL(kgsl_pwrctrl_busy_time);
+
+static void kgsl_pwrctrl_retention_clk(struct kgsl_device *device, int state)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int i = 0;
+
+	if (!(pwr->gx_retention) || test_bit(KGSL_PWRFLAGS_RETENTION_ON,
+					&device->pwrctrl.ctrl_flags))
+		return;
+
+	if (state == KGSL_PWRFLAGS_OFF) {
+		if (test_and_clear_bit(KGSL_PWRFLAGS_RETENTION_ON,
+			&pwr->power_flags)) {
+			trace_kgsl_retention_clk(device, state);
+			/* prepare the mx clk to avoid RPM transactions*/
+			clk_set_rate(pwr->dummy_mx_clk,
+				pwr->pwrlevels
+				[pwr->active_pwrlevel].
+				gpu_freq);
+			clk_prepare(pwr->dummy_mx_clk);
+			/*
+			 * Unprepare Gfx clocks to put Gfx rail to
+			 * retention voltage.
+			 */
+			for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
+				if (pwr->grp_clks[i])
+					clk_unprepare(pwr->grp_clks[i]);
+		}
+	} else if (state == KGSL_PWRFLAGS_ON) {
+		if (!test_and_set_bit(KGSL_PWRFLAGS_RETENTION_ON,
+					&pwr->power_flags)) {
+			trace_kgsl_retention_clk(device, state);
+			/*
+			 * Prepare Gfx clocks to put Gfx rail out
+			 * of rentention
+			 */
+			for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
+				if (pwr->grp_clks[i])
+					clk_prepare(pwr->grp_clks[i]);
+
+			/* unprepare the dummy mx clk*/
+			clk_unprepare(pwr->dummy_mx_clk);
+			clk_set_rate(pwr->dummy_mx_clk,
+				pwr->pwrlevels[pwr->num_pwrlevels - 1].
+				gpu_freq);
+		}
+	}
+}
+
+static void kgsl_pwrctrl_clk(struct kgsl_device *device, int state,
+					  int requested_state)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int i = 0;
+
+	if (test_bit(KGSL_PWRFLAGS_CLK_ON, &pwr->ctrl_flags))
+		return;
+
+	if (state == KGSL_PWRFLAGS_OFF) {
+		if (test_and_clear_bit(KGSL_PWRFLAGS_CLK_ON,
+			&pwr->power_flags)) {
+			trace_kgsl_clk(device, state,
+					kgsl_pwrctrl_active_freq(pwr));
+			for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
+				clk_disable(pwr->grp_clks[i]);
+			/* High latency clock maintenance. */
+			if ((pwr->pwrlevels[0].gpu_freq > 0) &&
+				(requested_state != KGSL_STATE_NAP) &&
+				(requested_state !=
+						KGSL_STATE_DEEP_NAP)) {
+				for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
+					clk_unprepare(pwr->grp_clks[i]);
+				clk_set_rate(pwr->grp_clks[0],
+					pwr->pwrlevels[pwr->num_pwrlevels - 1].
+					gpu_freq);
+			}
+		} else if (requested_state == KGSL_STATE_SLEEP) {
+			/* High latency clock maintenance. */
+			for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
+				clk_unprepare(pwr->grp_clks[i]);
+			if ((pwr->pwrlevels[0].gpu_freq > 0))
+				clk_set_rate(pwr->grp_clks[0],
+					pwr->pwrlevels[pwr->num_pwrlevels - 1].
+					gpu_freq);
+		}
+	} else if (state == KGSL_PWRFLAGS_ON) {
+		if (!test_and_set_bit(KGSL_PWRFLAGS_CLK_ON,
+			&pwr->power_flags)) {
+			trace_kgsl_clk(device, state,
+					kgsl_pwrctrl_active_freq(pwr));
+			/* High latency clock maintenance. */
+			if ((device->state != KGSL_STATE_NAP) &&
+			(device->state != KGSL_STATE_DEEP_NAP)) {
+				if (pwr->pwrlevels[0].gpu_freq > 0)
+					clk_set_rate(pwr->grp_clks[0],
+						pwr->pwrlevels
+						[pwr->active_pwrlevel].
+						gpu_freq);
+				for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
+					clk_prepare(pwr->grp_clks[i]);
+			}
+			/* as last step, enable grp_clk
+			   this is to let GPU interrupt to come */
+			for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
+				clk_enable(pwr->grp_clks[i]);
+		}
+	}
+}
+
+static void kgsl_pwrctrl_axi(struct kgsl_device *device, int state)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+
+	if (test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->ctrl_flags))
+		return;
+
+	if (state == KGSL_PWRFLAGS_OFF) {
+		if (test_and_clear_bit(KGSL_PWRFLAGS_AXI_ON,
+			&pwr->power_flags)) {
+			trace_kgsl_bus(device, state);
+			kgsl_pwrctrl_buslevel_update(device, false);
+
+			if (pwr->devbw)
+				devfreq_suspend_devbw(pwr->devbw);
+		}
+	} else if (state == KGSL_PWRFLAGS_ON) {
+		if (!test_and_set_bit(KGSL_PWRFLAGS_AXI_ON,
+			&pwr->power_flags)) {
+			trace_kgsl_bus(device, state);
+			kgsl_pwrctrl_buslevel_update(device, true);
+
+			if (pwr->devbw)
+				devfreq_resume_devbw(pwr->devbw);
+		}
+	}
+}
+
+static int _regulator_enable(struct kgsl_device *device,
+		struct kgsl_regulator *regulator)
+{
+	int ret;
+
+	if (IS_ERR_OR_NULL(regulator->reg))
+		return 0;
+
+	ret = regulator_enable(regulator->reg);
+	if (ret)
+		KGSL_DRV_ERR(device, "Failed to enable regulator '%s': %d\n",
+			regulator->name, ret);
+	return ret;
+}
+
+static void _regulator_disable(struct kgsl_regulator *regulator)
+{
+	if (!IS_ERR_OR_NULL(regulator->reg))
+		regulator_disable(regulator->reg);
+}
+
+static int _enable_regulators(struct kgsl_device *device,
+		struct kgsl_pwrctrl *pwr)
+{
+	int i;
+
+	for (i = 0; i < KGSL_MAX_REGULATORS; i++) {
+		int ret = _regulator_enable(device, &pwr->regulators[i]);
+
+		if (ret) {
+			for (i = i - 1; i >= 0; i--)
+				_regulator_disable(&pwr->regulators[i]);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, int state)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int status = 0;
+
+	if (test_bit(KGSL_PWRFLAGS_POWER_ON, &pwr->ctrl_flags))
+		return 0;
+
+	if (state == KGSL_PWRFLAGS_OFF) {
+		if (test_and_clear_bit(KGSL_PWRFLAGS_POWER_ON,
+			&pwr->power_flags)) {
+			trace_kgsl_rail(device, state);
+			device->ftbl->regulator_disable_poll(device);
+		}
+	} else if (state == KGSL_PWRFLAGS_ON) {
+		if (!test_and_set_bit(KGSL_PWRFLAGS_POWER_ON,
+			&pwr->power_flags)) {
+				status = _enable_regulators(device, pwr);
+
+				if (status)
+					clear_bit(KGSL_PWRFLAGS_POWER_ON,
+						&pwr->power_flags);
+				else
+					trace_kgsl_rail(device, state);
+		}
+	}
+
+	return status;
+}
+
+static void kgsl_pwrctrl_irq(struct kgsl_device *device, int state)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+
+	if (state == KGSL_PWRFLAGS_ON) {
+		if (!test_and_set_bit(KGSL_PWRFLAGS_IRQ_ON,
+			&pwr->power_flags)) {
+			trace_kgsl_irq(device, state);
+			enable_irq(pwr->interrupt_num);
+		}
+	} else if (state == KGSL_PWRFLAGS_OFF) {
+		if (test_and_clear_bit(KGSL_PWRFLAGS_IRQ_ON,
+			&pwr->power_flags)) {
+			trace_kgsl_irq(device, state);
+			if (in_interrupt())
+				disable_irq_nosync(pwr->interrupt_num);
+			else
+				disable_irq(pwr->interrupt_num);
+		}
+	}
+}
+
+/**
+ * kgsl_thermal_cycle() - Work function for thermal timer.
+ * @work: The input work
+ *
+ * This function is called for work that is queued by the thermal
+ * timer.  It cycles to the alternate thermal frequency.
+ */
+static void kgsl_thermal_cycle(struct work_struct *work)
+{
+	struct kgsl_pwrctrl *pwr = container_of(work, struct kgsl_pwrctrl,
+						thermal_cycle_ws);
+	struct kgsl_device *device = container_of(pwr, struct kgsl_device,
+							pwrctrl);
+
+	if (device == NULL)
+		return;
+
+	mutex_lock(&device->mutex);
+	if (pwr->thermal_cycle == CYCLE_ACTIVE) {
+		if (pwr->thermal_highlow)
+			kgsl_pwrctrl_pwrlevel_change(device,
+					pwr->thermal_pwrlevel);
+		else
+			kgsl_pwrctrl_pwrlevel_change(device,
+					pwr->thermal_pwrlevel + 1);
+	}
+	mutex_unlock(&device->mutex);
+}
+
+static void kgsl_thermal_timer(unsigned long data)
+{
+	struct kgsl_device *device = (struct kgsl_device *) data;
+
+	/* Keep the timer running consistently despite processing time */
+	if (device->pwrctrl.thermal_highlow) {
+		mod_timer(&device->pwrctrl.thermal_timer,
+					jiffies +
+					device->pwrctrl.thermal_timeout);
+		device->pwrctrl.thermal_highlow = 0;
+	} else {
+		mod_timer(&device->pwrctrl.thermal_timer,
+					jiffies + (TH_HZ -
+					device->pwrctrl.thermal_timeout));
+		device->pwrctrl.thermal_highlow = 1;
+	}
+	/* Have work run in a non-interrupt context. */
+	kgsl_schedule_work(&device->pwrctrl.thermal_cycle_ws);
+}
+
+void kgsl_deep_nap_timer(unsigned long data)
+{
+	struct kgsl_device *device = (struct kgsl_device *) data;
+
+	if (device->state == KGSL_STATE_NAP) {
+		kgsl_pwrctrl_request_state(device, KGSL_STATE_DEEP_NAP);
+		kgsl_schedule_work(&device->idle_check_ws);
+	}
+}
+
+static int _get_regulator(struct kgsl_device *device,
+		struct kgsl_regulator *regulator, const char *str)
+{
+	regulator->reg = devm_regulator_get(&device->pdev->dev, str);
+	if (IS_ERR(regulator->reg)) {
+		KGSL_CORE_ERR("Couldn't get regulator: %s (%ld)\n",
+			str, PTR_ERR(regulator->reg));
+		return PTR_ERR(regulator->reg);
+	}
+
+	strlcpy(regulator->name, str, sizeof(regulator->name));
+	return 0;
+}
+
+static int get_legacy_regulators(struct kgsl_device *device)
+{
+	struct device *dev = &device->pdev->dev;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int ret;
+
+	ret = _get_regulator(device, &pwr->regulators[0], "vdd");
+
+	/* Use vddcx only on targets that have it. */
+	if (ret == 0 && of_find_property(dev->of_node, "vddcx-supply", NULL))
+		ret = _get_regulator(device, &pwr->regulators[1], "vddcx");
+
+	return ret;
+}
+
+static int get_regulators(struct kgsl_device *device)
+{
+	struct device *dev = &device->pdev->dev;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int index = 0;
+	const char *name;
+	struct property *prop;
+
+	if (!of_find_property(dev->of_node, "regulator-names", NULL))
+		return get_legacy_regulators(device);
+
+	of_property_for_each_string(dev->of_node,
+		"regulator-names", prop, name) {
+		int ret;
+
+		if (index == KGSL_MAX_REGULATORS) {
+			KGSL_CORE_ERR("Too many regulators defined\n");
+			return -ENOMEM;
+		}
+
+		ret = _get_regulator(device, &pwr->regulators[index], name);
+		if (ret)
+			return ret;
+		index++;
+	}
+
+	return 0;
+}
+
+static int _get_clocks(struct kgsl_device *device)
+{
+	struct device *dev = &device->pdev->dev;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	const char *name;
+	struct property *prop;
+
+	of_property_for_each_string(dev->of_node, "clock-names", prop, name) {
+		int i;
+
+		for (i = 0; i < KGSL_MAX_CLKS; i++) {
+			if (pwr->grp_clks[i] || strcmp(clocks[i], name))
+				continue;
+
+			pwr->grp_clks[i] = devm_clk_get(dev, name);
+
+			if (IS_ERR(pwr->grp_clks[i])) {
+				int ret = PTR_ERR(pwr->grp_clks[i]);
+
+				KGSL_CORE_ERR("Couldn't get clock: %s (%d)\n",
+					name, ret);
+				pwr->grp_clks[i] = NULL;
+				return ret;
+			}
+
+			break;
+		}
+	}
+
+	return 0;
+}
+
+int kgsl_pwrctrl_init(struct kgsl_device *device)
+{
+	int i, k, m, n = 0, result;
+	struct platform_device *pdev = device->pdev;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct device_node *ocmem_bus_node;
+	struct msm_bus_scale_pdata *ocmem_scale_table = NULL;
+	struct msm_bus_scale_pdata *bus_scale_table;
+	struct device_node *gpubw_dev_node;
+	struct platform_device *p2dev;
+
+	bus_scale_table = msm_bus_cl_get_pdata(device->pdev);
+	if (bus_scale_table == NULL)
+		return -EINVAL;
+
+	result = _get_clocks(device);
+	if (result)
+		return result;
+
+	/* Make sure we have a source clk for freq setting */
+	if (pwr->grp_clks[0] == NULL)
+		pwr->grp_clks[0] = pwr->grp_clks[1];
+
+	if (of_property_read_u32(pdev->dev.of_node, "qcom,deep-nap-timeout",
+		&pwr->deep_nap_timeout))
+		pwr->deep_nap_timeout = HZ/50;
+
+	pwr->gx_retention = of_property_read_bool(pdev->dev.of_node,
+						"qcom,gx-retention");
+	if (pwr->gx_retention) {
+		pwr->dummy_mx_clk = clk_get(&pdev->dev, "mx_clk");
+		if (IS_ERR(pwr->dummy_mx_clk)) {
+			pwr->gx_retention = 0;
+			pwr->dummy_mx_clk = NULL;
+			KGSL_CORE_ERR("Couldn't get clock: mx_clk\n");
+		}
+	}
+
+	pwr->power_flags = BIT(KGSL_PWRFLAGS_RETENTION_ON);
+
+	if (pwr->num_pwrlevels == 0) {
+		KGSL_PWR_ERR(device, "No power levels are defined\n");
+		return -EINVAL;
+	}
+
+	/* Initialize the user and thermal clock constraints */
+
+	pwr->max_pwrlevel = 0;
+	pwr->min_pwrlevel = pwr->num_pwrlevels - 2;
+	pwr->thermal_pwrlevel = 0;
+
+	pwr->wakeup_maxpwrlevel = 0;
+
+	for (i = 0; i < pwr->num_pwrlevels; i++) {
+		unsigned int freq = pwr->pwrlevels[i].gpu_freq;
+
+		if (freq > 0)
+			freq = clk_round_rate(pwr->grp_clks[0], freq);
+
+		pwr->pwrlevels[i].gpu_freq = freq;
+	}
+
+	clk_set_rate(pwr->grp_clks[0],
+		pwr->pwrlevels[pwr->num_pwrlevels - 1].gpu_freq);
+
+	clk_set_rate(pwr->grp_clks[6],
+		clk_round_rate(pwr->grp_clks[6], KGSL_RBBMTIMER_CLK_FREQ));
+
+	result = get_regulators(device);
+	if (result)
+		return result;
+
+	pwr->power_flags = 0;
+
+	if (kgsl_property_read_u32(device, "qcom,pm-qos-active-latency",
+		&pwr->pm_qos_active_latency))
+		pwr->pm_qos_active_latency = 501;
+
+	if (kgsl_property_read_u32(device, "qcom,pm-qos-wakeup-latency",
+		&pwr->pm_qos_wakeup_latency))
+		pwr->pm_qos_wakeup_latency = 101;
+
+	pm_runtime_enable(&pdev->dev);
+
+	ocmem_bus_node = of_find_node_by_name(
+				device->pdev->dev.of_node,
+				"qcom,ocmem-bus-client");
+	/* If platform has splitted ocmem bus client - use it */
+	if (ocmem_bus_node) {
+		ocmem_scale_table = msm_bus_pdata_from_node
+				(device->pdev, ocmem_bus_node);
+		if (ocmem_scale_table)
+			pwr->ocmem_pcl = msm_bus_scale_register_client
+					(ocmem_scale_table);
+
+		if (!pwr->ocmem_pcl)
+			return -EINVAL;
+	}
+
+	/* Bus width in bytes, set it to zero if not found */
+	if (of_property_read_u32(pdev->dev.of_node, "qcom,bus-width",
+		&pwr->bus_width))
+		pwr->bus_width = 0;
+
+	/* Check if gpu bandwidth vote device is defined in dts */
+	if (pwr->bus_control)
+		/* Check if gpu bandwidth vote device is defined in dts */
+		gpubw_dev_node = of_parse_phandle(pdev->dev.of_node,
+					"qcom,gpubw-dev", 0);
+
+	/*
+	 * Governor support enables the gpu bus scaling via governor
+	 * and hence no need to register for bus scaling client
+	 * if gpubw-dev is defined.
+	 */
+	if (gpubw_dev_node) {
+		p2dev = of_find_device_by_node(gpubw_dev_node);
+		if (p2dev)
+			pwr->devbw = &p2dev->dev;
+	} else {
+		/*
+		 * Register for gpu bus scaling if governor support
+		 * is not enabled and gpu bus voting is to be done
+		 * from the driver.
+		 */
+		pwr->pcl = msm_bus_scale_register_client(bus_scale_table);
+		if (pwr->pcl == 0)
+			return -EINVAL;
+	}
+
+	pwr->bus_ib = kzalloc(bus_scale_table->num_usecases *
+		sizeof(*pwr->bus_ib), GFP_KERNEL);
+	if (pwr->bus_ib == NULL)
+		return -ENOMEM;
+
+	/*
+	 * Pull the BW vote out of the bus table.  They will be used to
+	 * calculate the ratio between the votes.
+	 */
+	for (i = 0; i < bus_scale_table->num_usecases; i++) {
+		struct msm_bus_paths *usecase =
+				&bus_scale_table->usecase[i];
+		struct msm_bus_vectors *vector = &usecase->vectors[0];
+		if (vector->dst == MSM_BUS_SLAVE_EBI_CH0 &&
+				vector->ib != 0) {
+
+			if (i < KGSL_MAX_BUSLEVELS) {
+				/* Convert bytes to Mbytes. */
+				ib_votes[i] =
+					DIV_ROUND_UP_ULL(vector->ib, 1048576)
+					- 1;
+				if (ib_votes[i] > ib_votes[max_vote_buslevel])
+					max_vote_buslevel = i;
+			}
+
+			/* check for duplicate values */
+			for (k = 0; k < n; k++)
+				if (vector->ib == pwr->bus_ib[k])
+					break;
+
+			/* if this is a new ib value, save it */
+			if (k == n) {
+				pwr->bus_ib[k] = vector->ib;
+				n++;
+				/* find which pwrlevels use this ib */
+				for (m = 0; m < pwr->num_pwrlevels - 1; m++) {
+					if (bus_scale_table->
+						usecase[pwr->pwrlevels[m].
+						bus_freq].vectors[0].ib
+						== vector->ib)
+						pwr->bus_index[m] = k;
+				}
+			}
+		}
+	}
+
+	INIT_WORK(&pwr->thermal_cycle_ws, kgsl_thermal_cycle);
+	setup_timer(&pwr->thermal_timer, kgsl_thermal_timer,
+			(unsigned long) device);
+
+	INIT_LIST_HEAD(&pwr->limits);
+	spin_lock_init(&pwr->limits_lock);
+	pwr->sysfs_pwr_limit = kgsl_pwr_limits_add(KGSL_DEVICE_3D0);
+
+	setup_timer(&pwr->deep_nap_timer, kgsl_deep_nap_timer,
+			(unsigned long) device);
+	devfreq_vbif_register_callback(kgsl_get_bw);
+
+	return result;
+}
+
+void kgsl_pwrctrl_close(struct kgsl_device *device)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int i;
+
+	KGSL_PWR_INFO(device, "close device %d\n", device->id);
+
+	pm_runtime_disable(&device->pdev->dev);
+
+	if (pwr->pcl)
+		msm_bus_scale_unregister_client(pwr->pcl);
+
+	pwr->pcl = 0;
+
+	if (pwr->ocmem_pcl)
+		msm_bus_scale_unregister_client(pwr->ocmem_pcl);
+
+	pwr->ocmem_pcl = 0;
+
+	for (i = 0; i < KGSL_MAX_REGULATORS; i++)
+		pwr->regulators[i].reg = NULL;
+
+	for (i = 0; i < KGSL_MAX_REGULATORS; i++)
+		pwr->grp_clks[i] = NULL;
+
+	pwr->power_flags = 0;
+
+	if (!IS_ERR_OR_NULL(pwr->sysfs_pwr_limit)) {
+		list_del(&pwr->sysfs_pwr_limit->node);
+		kfree(pwr->sysfs_pwr_limit);
+		pwr->sysfs_pwr_limit = NULL;
+	}
+	kfree(pwr->bus_ib);
+}
+
+/**
+ * kgsl_idle_check() - Work function for GPU interrupts and idle timeouts.
+ * @device: The device
+ *
+ * This function is called for work that is queued by the interrupt
+ * handler or the idle timer. It attempts to transition to a clocks
+ * off state if the active_cnt is 0 and the hardware is idle.
+ */
+void kgsl_idle_check(struct work_struct *work)
+{
+	struct kgsl_device *device = container_of(work, struct kgsl_device,
+							idle_check_ws);
+	WARN_ON(device == NULL);
+	if (device == NULL)
+		return;
+
+	mutex_lock(&device->mutex);
+
+	if (device->state == KGSL_STATE_ACTIVE
+		   || device->state ==  KGSL_STATE_NAP
+			|| device->state == KGSL_STATE_DEEP_NAP) {
+
+		if (!atomic_read(&device->active_cnt))
+			kgsl_pwrctrl_change_state(device,
+					device->requested_state);
+
+		kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
+		if (device->state == KGSL_STATE_ACTIVE)
+			mod_timer(&device->idle_timer,
+					jiffies +
+					device->pwrctrl.interval_timeout);
+	}
+	if (device->state != KGSL_STATE_DEEP_NAP)
+		kgsl_pwrscale_update(device);
+	mutex_unlock(&device->mutex);
+}
+EXPORT_SYMBOL(kgsl_idle_check);
+
+void kgsl_timer(unsigned long data)
+{
+	struct kgsl_device *device = (struct kgsl_device *) data;
+
+	KGSL_PWR_INFO(device, "idle timer expired device %d\n", device->id);
+	if (device->requested_state != KGSL_STATE_SUSPEND) {
+		if (device->pwrctrl.strtstp_sleepwake)
+			kgsl_pwrctrl_request_state(device, KGSL_STATE_SLUMBER);
+		else
+			kgsl_pwrctrl_request_state(device, KGSL_STATE_SLEEP);
+		/* Have work run in a non-interrupt context. */
+		kgsl_schedule_work(&device->idle_check_ws);
+	}
+}
+
+static bool kgsl_pwrctrl_isenabled(struct kgsl_device *device)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	return ((test_bit(KGSL_PWRFLAGS_CLK_ON, &pwr->power_flags) != 0) &&
+		(test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->power_flags) != 0));
+}
+
+/**
+ * kgsl_pre_hwaccess - Enforce preconditions for touching registers
+ * @device: The device
+ *
+ * This function ensures that the correct lock is held and that the GPU
+ * clock is on immediately before a register is read or written. Note
+ * that this function does not check active_cnt because the registers
+ * must be accessed during device start and stop, when the active_cnt
+ * may legitimately be 0.
+ */
+void kgsl_pre_hwaccess(struct kgsl_device *device)
+{
+	/* In order to touch a register you must hold the device mutex...*/
+	BUG_ON(!mutex_is_locked(&device->mutex));
+	/* and have the clock on! */
+	BUG_ON(!kgsl_pwrctrl_isenabled(device));
+}
+EXPORT_SYMBOL(kgsl_pre_hwaccess);
+
+static int kgsl_pwrctrl_enable(struct kgsl_device *device)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int level, status;
+
+	if (pwr->wakeup_maxpwrlevel) {
+		level = pwr->max_pwrlevel;
+		pwr->wakeup_maxpwrlevel = 0;
+	} else if (kgsl_popp_check(device)) {
+		level = pwr->active_pwrlevel;
+	} else {
+		level = pwr->default_pwrlevel;
+	}
+
+	kgsl_pwrctrl_pwrlevel_change(device, level);
+
+	/* Order pwrrail/clk sequence based upon platform */
+	status = kgsl_pwrctrl_pwrrail(device, KGSL_PWRFLAGS_ON);
+	if (status)
+		return status;
+	kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_ON, KGSL_STATE_ACTIVE);
+	kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_ON);
+	return device->ftbl->regulator_enable(device);
+}
+
+static void kgsl_pwrctrl_disable(struct kgsl_device *device)
+{
+	/* Order pwrrail/clk sequence based upon platform */
+	device->ftbl->regulator_disable(device);
+	kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_OFF);
+	kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_OFF, KGSL_STATE_SLEEP);
+	kgsl_pwrctrl_pwrrail(device, KGSL_PWRFLAGS_OFF);
+}
+
+/**
+ * _init() - Get the GPU ready to start, but don't turn anything on
+ * @device - Pointer to the kgsl_device struct
+ */
+static int _init(struct kgsl_device *device)
+{
+	int status = 0;
+	switch (device->state) {
+	case KGSL_STATE_NAP:
+	case KGSL_STATE_DEEP_NAP:
+	case KGSL_STATE_SLEEP:
+		/* Get the device out of retention */
+		kgsl_pwrctrl_retention_clk(device, KGSL_PWRFLAGS_ON);
+		/* Force power on to do the stop */
+		status = kgsl_pwrctrl_enable(device);
+	case KGSL_STATE_ACTIVE:
+		kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
+		del_timer_sync(&device->idle_timer);
+		device->ftbl->stop(device);
+		/* fall through */
+	case KGSL_STATE_AWARE:
+		kgsl_pwrctrl_disable(device);
+		/* fall through */
+	case KGSL_STATE_SLUMBER:
+	case KGSL_STATE_NONE:
+		kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT);
+	}
+
+	return status;
+}
+
+/**
+ * _wake() - Power up the GPU from a slumber/sleep state
+ * @device - Pointer to the kgsl_device struct
+ *
+ * Resume the GPU from a lower power state to ACTIVE.
+ */
+static int _wake(struct kgsl_device *device)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int status = 0;
+
+	switch (device->state) {
+	case KGSL_STATE_SUSPEND:
+		complete_all(&device->hwaccess_gate);
+		/* Call the GPU specific resume function */
+		device->ftbl->resume(device);
+		/* fall through */
+	case KGSL_STATE_SLUMBER:
+		status = device->ftbl->start(device,
+				device->pwrctrl.superfast);
+		device->pwrctrl.superfast = false;
+
+		if (status) {
+			kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
+			KGSL_DRV_ERR(device, "start failed %d\n", status);
+			break;
+		}
+		/* fall through */
+	case KGSL_STATE_SLEEP:
+		kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_ON);
+		kgsl_pwrscale_wake(device);
+		kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON);
+		/* fall through */
+	case KGSL_STATE_DEEP_NAP:
+		pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
+					device->pwrctrl.pm_qos_active_latency);
+		/* Get the device out of retention */
+		kgsl_pwrctrl_retention_clk(device, KGSL_PWRFLAGS_ON);
+		/* fall through */
+	case KGSL_STATE_NAP:
+		/* Turn on the core clocks */
+		kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_ON, KGSL_STATE_ACTIVE);
+
+		/*
+		 * No need to turn on/off irq here as it no longer affects
+		 * power collapse
+		 */
+		kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+		/* Change register settings if any after pwrlevel change*/
+		kgsl_pwrctrl_pwrlevel_change_settings(device, 1);
+		/* All settings for power level transitions are complete*/
+		pwr->previous_pwrlevel = pwr->active_pwrlevel;
+		mod_timer(&device->idle_timer, jiffies +
+				device->pwrctrl.interval_timeout);
+		del_timer_sync(&device->pwrctrl.deep_nap_timer);
+
+		break;
+	case KGSL_STATE_AWARE:
+		/* Enable state before turning on irq */
+		kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+		kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON);
+		mod_timer(&device->idle_timer, jiffies +
+				device->pwrctrl.interval_timeout);
+		del_timer_sync(&device->pwrctrl.deep_nap_timer);
+		break;
+	default:
+		KGSL_PWR_WARN(device, "unhandled state %s\n",
+				kgsl_pwrstate_to_str(device->state));
+		kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
+		status = -EINVAL;
+		break;
+	}
+	return status;
+}
+
+/*
+ * _aware() - Put device into AWARE
+ * @device: Device pointer
+ *
+ * The GPU should be available for register reads/writes and able
+ * to communicate with the rest of the system.  However disable all
+ * paths that allow a switch to an interrupt context (interrupts &
+ * timers).
+ * Return 0 on success else error code
+ */
+static int
+_aware(struct kgsl_device *device)
+{
+	int status = 0;
+	switch (device->state) {
+	case KGSL_STATE_INIT:
+		status = kgsl_pwrctrl_enable(device);
+		break;
+	/* The following 3 cases shouldn't occur, but don't panic. */
+	case KGSL_STATE_DEEP_NAP:
+	case KGSL_STATE_NAP:
+	case KGSL_STATE_SLEEP:
+		status = _wake(device);
+	case KGSL_STATE_ACTIVE:
+		kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
+		del_timer_sync(&device->idle_timer);
+		break;
+	case KGSL_STATE_SLUMBER:
+		status = kgsl_pwrctrl_enable(device);
+		break;
+	default:
+		status = -EINVAL;
+	}
+	if (status)
+		kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
+	else
+		kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE);
+	return status;
+}
+
+static int
+_nap(struct kgsl_device *device)
+{
+	switch (device->state) {
+	case KGSL_STATE_ACTIVE:
+		if (!device->ftbl->is_hw_collapsible(device)) {
+			kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
+			return -EBUSY;
+		}
+
+		/*
+		 * Read HW busy counters before going to NAP state.
+		 * The data might be used by power scale governors
+		 * independently of the HW activity. For example
+		 * the simple-on-demand governor will get the latest
+		 * busy_time data even if the gpu isn't active.
+		*/
+		kgsl_pwrscale_update_stats(device);
+
+		mod_timer(&device->pwrctrl.deep_nap_timer, jiffies +
+					device->pwrctrl.deep_nap_timeout);
+
+		kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_OFF, KGSL_STATE_NAP);
+		kgsl_pwrctrl_set_state(device, KGSL_STATE_NAP);
+	case KGSL_STATE_SLEEP:
+	case KGSL_STATE_SLUMBER:
+		break;
+	case KGSL_STATE_AWARE:
+		KGSL_PWR_WARN(device,
+			"transition AWARE -> NAP is not permitted\n");
+	default:
+		kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
+		break;
+	}
+	return 0;
+}
+
+static int
+_deep_nap(struct kgsl_device *device)
+{
+	switch (device->state) {
+		/*
+		 * Device is expected to be clock gated to move to
+		 * a deeper low power state. No other transition is permitted
+		 */
+	case KGSL_STATE_NAP:
+		kgsl_pwrctrl_retention_clk(device, KGSL_PWRFLAGS_OFF);
+		pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
+						PM_QOS_DEFAULT_VALUE);
+		kgsl_pwrctrl_set_state(device, KGSL_STATE_DEEP_NAP);
+		break;
+	default:
+		kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
+		break;
+	}
+	return 0;
+}
+
+static int
+_sleep(struct kgsl_device *device)
+{
+	switch (device->state) {
+	case KGSL_STATE_ACTIVE:
+		if (!device->ftbl->is_hw_collapsible(device)) {
+			kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
+			return -EBUSY;
+		}
+		/* fall through */
+	case KGSL_STATE_NAP:
+		kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
+		kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_OFF);
+		kgsl_pwrscale_sleep(device);
+		kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_OFF, KGSL_STATE_SLEEP);
+		kgsl_pwrctrl_set_state(device, KGSL_STATE_SLEEP);
+		pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
+					PM_QOS_DEFAULT_VALUE);
+		break;
+	case KGSL_STATE_SLUMBER:
+		break;
+	case KGSL_STATE_AWARE:
+		KGSL_PWR_WARN(device,
+			"transition AWARE -> SLEEP is not permitted\n");
+	default:
+		kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
+		break;
+	}
+
+	return 0;
+}
+
+static int
+_slumber(struct kgsl_device *device)
+{
+	int status = 0;
+	switch (device->state) {
+	case KGSL_STATE_ACTIVE:
+		if (!device->ftbl->is_hw_collapsible(device)) {
+			kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
+			return -EBUSY;
+		}
+		/* fall through */
+	case KGSL_STATE_NAP:
+	case KGSL_STATE_SLEEP:
+	case KGSL_STATE_DEEP_NAP:
+		del_timer_sync(&device->idle_timer);
+		if (device->pwrctrl.thermal_cycle == CYCLE_ACTIVE) {
+			device->pwrctrl.thermal_cycle = CYCLE_ENABLE;
+			del_timer_sync(&device->pwrctrl.thermal_timer);
+		}
+		del_timer_sync(&device->pwrctrl.deep_nap_timer);
+		kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
+		/* Get the device out of retention */
+		kgsl_pwrctrl_retention_clk(device, KGSL_PWRFLAGS_ON);
+		/* make sure power is on to stop the device*/
+		status = kgsl_pwrctrl_enable(device);
+		device->ftbl->suspend_context(device);
+		device->ftbl->stop(device);
+		kgsl_pwrctrl_disable(device);
+		kgsl_pwrscale_sleep(device);
+		kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
+		kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER);
+		pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
+						PM_QOS_DEFAULT_VALUE);
+		break;
+	case KGSL_STATE_SUSPEND:
+		complete_all(&device->hwaccess_gate);
+		device->ftbl->resume(device);
+		kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER);
+		break;
+	case KGSL_STATE_AWARE:
+		kgsl_pwrctrl_disable(device);
+		kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER);
+		break;
+	default:
+		kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
+		break;
+
+	}
+	return status;
+}
+
+/*
+ * _suspend() - Put device into suspend
+ * @device: Device pointer
+ *
+ * Return 0 on success else error code
+ */
+static int _suspend(struct kgsl_device *device)
+{
+	int ret = 0;
+
+	if ((KGSL_STATE_NONE == device->state) ||
+			(KGSL_STATE_INIT == device->state))
+		return ret;
+
+	/* drain to prevent from more commands being submitted */
+	device->ftbl->drain(device);
+	/* wait for active count so device can be put in slumber */
+	ret = kgsl_active_count_wait(device, 0);
+	if (ret)
+		goto err;
+
+	ret = device->ftbl->idle(device);
+	if (ret)
+		goto err;
+
+	ret = _slumber(device);
+	if (ret)
+		goto err;
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_SUSPEND);
+	return ret;
+
+err:
+	device->ftbl->resume(device);
+	KGSL_PWR_ERR(device, "device failed to SUSPEND %d\n", ret);
+	return ret;
+}
+
+/*
+ * kgsl_pwrctrl_change_state() changes the GPU state to the input
+ * @device: Pointer to a KGSL device
+ * @state: desired KGSL state
+ *
+ * Caller must hold the device mutex. If the requested state change
+ * is valid, execute it.  Otherwise return an error code explaining
+ * why the change has not taken place.  Also print an error if an
+ * unexpected state change failure occurs.  For example, a change to
+ * NAP may be rejected because the GPU is busy, this is not an error.
+ * A change to SUSPEND should go through no matter what, so if it
+ * fails an additional error message will be printed to dmesg.
+ */
+int kgsl_pwrctrl_change_state(struct kgsl_device *device, int state)
+{
+	int status = 0;
+	if (device->state == state)
+		return status;
+	kgsl_pwrctrl_request_state(device, state);
+
+	/* Work through the legal state transitions */
+	switch (state) {
+	case KGSL_STATE_INIT:
+		status = _init(device);
+		break;
+	case KGSL_STATE_AWARE:
+		status = _aware(device);
+		break;
+	case KGSL_STATE_ACTIVE:
+		status = _wake(device);
+		break;
+	case KGSL_STATE_NAP:
+		status = _nap(device);
+		break;
+	case KGSL_STATE_SLEEP:
+		status = _sleep(device);
+		break;
+	case KGSL_STATE_SLUMBER:
+		status = _slumber(device);
+		break;
+	case KGSL_STATE_SUSPEND:
+		status = _suspend(device);
+		break;
+	case KGSL_STATE_DEEP_NAP:
+		status = _deep_nap(device);
+		break;
+	default:
+		KGSL_PWR_INFO(device, "bad state request 0x%x\n", state);
+		kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
+		status = -EINVAL;
+		break;
+	}
+
+	/* Record the state timing info */
+	if (!status) {
+		ktime_t t = ktime_get();
+		_record_pwrevent(device, t, KGSL_PWREVENT_STATE);
+	}
+	return status;
+}
+EXPORT_SYMBOL(kgsl_pwrctrl_change_state);
+
+static void kgsl_pwrctrl_set_state(struct kgsl_device *device,
+				unsigned int state)
+{
+	trace_kgsl_pwr_set_state(device, state);
+	device->state = state;
+	device->requested_state = KGSL_STATE_NONE;
+}
+
+static void kgsl_pwrctrl_request_state(struct kgsl_device *device,
+				unsigned int state)
+{
+	if (state != KGSL_STATE_NONE && state != device->requested_state)
+		trace_kgsl_pwr_request_state(device, state);
+	device->requested_state = state;
+}
+
+const char *kgsl_pwrstate_to_str(unsigned int state)
+{
+	switch (state) {
+	case KGSL_STATE_NONE:
+		return "NONE";
+	case KGSL_STATE_INIT:
+		return "INIT";
+	case KGSL_STATE_AWARE:
+		return "AWARE";
+	case KGSL_STATE_ACTIVE:
+		return "ACTIVE";
+	case KGSL_STATE_NAP:
+		return "NAP";
+	case KGSL_STATE_DEEP_NAP:
+		return "DEEP_NAP";
+	case KGSL_STATE_SLEEP:
+		return "SLEEP";
+	case KGSL_STATE_SUSPEND:
+		return "SUSPEND";
+	case KGSL_STATE_SLUMBER:
+		return "SLUMBER";
+	default:
+		break;
+	}
+	return "UNKNOWN";
+}
+EXPORT_SYMBOL(kgsl_pwrstate_to_str);
+
+
+/**
+ * kgsl_active_count_get() - Increase the device active count
+ * @device: Pointer to a KGSL device
+ *
+ * Increase the active count for the KGSL device and turn on
+ * clocks if this is the first reference. Code paths that need
+ * to touch the hardware or wait for the hardware to complete
+ * an operation must hold an active count reference until they
+ * are finished. An error code will be returned if waking the
+ * device fails. The device mutex must be held while *calling
+ * this function.
+ */
+int kgsl_active_count_get(struct kgsl_device *device)
+{
+	int ret = 0;
+	BUG_ON(!mutex_is_locked(&device->mutex));
+
+	if ((atomic_read(&device->active_cnt) == 0) &&
+		(device->state != KGSL_STATE_ACTIVE)) {
+		mutex_unlock(&device->mutex);
+		wait_for_completion(&device->hwaccess_gate);
+		mutex_lock(&device->mutex);
+		device->pwrctrl.superfast = true;
+		ret = kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE);
+	}
+	if (ret == 0)
+		atomic_inc(&device->active_cnt);
+	trace_kgsl_active_count(device,
+		(unsigned long) __builtin_return_address(0));
+	return ret;
+}
+EXPORT_SYMBOL(kgsl_active_count_get);
+
+/**
+ * kgsl_active_count_put() - Decrease the device active count
+ * @device: Pointer to a KGSL device
+ *
+ * Decrease the active count for the KGSL device and turn off
+ * clocks if there are no remaining references. This function will
+ * transition the device to NAP if there are no other pending state
+ * changes. It also completes the suspend gate.  The device mutex must
+ * be held while calling this function.
+ */
+void kgsl_active_count_put(struct kgsl_device *device)
+{
+	BUG_ON(!mutex_is_locked(&device->mutex));
+	BUG_ON(atomic_read(&device->active_cnt) == 0);
+
+	if (atomic_dec_and_test(&device->active_cnt)) {
+		if (device->state == KGSL_STATE_ACTIVE &&
+			device->requested_state == KGSL_STATE_NONE) {
+			kgsl_pwrctrl_request_state(device, KGSL_STATE_NAP);
+			kgsl_schedule_work(&device->idle_check_ws);
+		}
+
+		mod_timer(&device->idle_timer,
+			jiffies + device->pwrctrl.interval_timeout);
+	}
+
+	trace_kgsl_active_count(device,
+		(unsigned long) __builtin_return_address(0));
+
+	wake_up(&device->active_cnt_wq);
+}
+EXPORT_SYMBOL(kgsl_active_count_put);
+
+static int _check_active_count(struct kgsl_device *device, int count)
+{
+	/* Return 0 if the active count is greater than the desired value */
+	return atomic_read(&device->active_cnt) > count ? 0 : 1;
+}
+
+/**
+ * kgsl_active_count_wait() - Wait for activity to finish.
+ * @device: Pointer to a KGSL device
+ * @count: Active count value to wait for
+ *
+ * Block until the active_cnt value hits the desired value
+ */
+int kgsl_active_count_wait(struct kgsl_device *device, int count)
+{
+	int result = 0;
+	long wait_jiffies = HZ;
+
+	BUG_ON(!mutex_is_locked(&device->mutex));
+
+	while (atomic_read(&device->active_cnt) > count) {
+		long ret;
+		mutex_unlock(&device->mutex);
+		ret = wait_event_timeout(device->active_cnt_wq,
+			_check_active_count(device, count), wait_jiffies);
+		mutex_lock(&device->mutex);
+		result = ret == 0 ? -ETIMEDOUT : 0;
+		if (!result)
+			wait_jiffies = ret;
+		else
+			break;
+	}
+
+	return result;
+}
+EXPORT_SYMBOL(kgsl_active_count_wait);
+
+/**
+ * _update_limits() - update the limits based on the current requests
+ * @limit: Pointer to the limits structure
+ * @reason: Reason for the update
+ * @level: Level if any to be set
+ *
+ * Set the thermal pwrlevel based on the current limits
+ */
+static void _update_limits(struct kgsl_pwr_limit *limit, unsigned int reason,
+							unsigned int level)
+{
+	struct kgsl_device *device = limit->device;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct kgsl_pwr_limit *temp_limit;
+	unsigned int max_level = 0;
+
+	spin_lock(&pwr->limits_lock);
+	switch (reason) {
+	case KGSL_PWR_ADD_LIMIT:
+		list_add(&limit->node, &pwr->limits);
+		break;
+	case KGSL_PWR_DEL_LIMIT:
+		list_del(&limit->node);
+		if (list_empty(&pwr->limits))
+			goto done;
+		break;
+	case KGSL_PWR_SET_LIMIT:
+		limit->level = level;
+		break;
+	default:
+		break;
+	}
+
+	list_for_each_entry(temp_limit, &pwr->limits, node) {
+		max_level = max_t(unsigned int, max_level, temp_limit->level);
+	}
+
+done:
+	spin_unlock(&pwr->limits_lock);
+
+	mutex_lock(&device->mutex);
+	pwr->thermal_pwrlevel = max_level;
+	kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel);
+	mutex_unlock(&device->mutex);
+}
+
+/**
+ * kgsl_pwr_limits_add() - Add a new pwr limit
+ * @id: Device ID
+ *
+ * Allocate a pwr limit structure for the client, add it to the limits
+ * list and return the pointer to the client
+ */
+void *kgsl_pwr_limits_add(enum kgsl_deviceid id)
+{
+	struct kgsl_device *device = kgsl_get_device(id);
+	struct kgsl_pwr_limit *limit;
+
+	if (IS_ERR_OR_NULL(device))
+		return NULL;
+
+	limit = kzalloc(sizeof(struct kgsl_pwr_limit),
+						GFP_KERNEL);
+	if (limit == NULL)
+		return ERR_PTR(-ENOMEM);
+	limit->device = device;
+
+	_update_limits(limit, KGSL_PWR_ADD_LIMIT, 0);
+	return limit;
+}
+EXPORT_SYMBOL(kgsl_pwr_limits_add);
+
+/**
+ * kgsl_pwr_limits_del() - Unregister the pwr limit client and
+ * adjust the thermal limits
+ * @limit_ptr: Client handle
+ *
+ * Delete the client handle from the thermal list and adjust the
+ * active clocks if needed.
+ */
+void kgsl_pwr_limits_del(void *limit_ptr)
+{
+	struct kgsl_pwr_limit *limit = limit_ptr;
+	if (IS_ERR(limit))
+		return;
+
+	_update_limits(limit, KGSL_PWR_DEL_LIMIT, 0);
+	kfree(limit);
+}
+EXPORT_SYMBOL(kgsl_pwr_limits_del);
+
+/**
+ * kgsl_pwr_limits_set_freq() - Set the requested limit for the client
+ * @limit_ptr: Client handle
+ * @freq: Client requested frequency
+ *
+ * Set the new limit for the client and adjust the clocks
+ */
+int kgsl_pwr_limits_set_freq(void *limit_ptr, unsigned int freq)
+{
+	struct kgsl_pwrctrl *pwr;
+	struct kgsl_pwr_limit *limit = limit_ptr;
+	int level;
+
+	if (IS_ERR(limit))
+		return -EINVAL;
+
+	pwr = &limit->device->pwrctrl;
+	level = _get_nearest_pwrlevel(pwr, freq);
+	if (level < 0)
+		return -EINVAL;
+	_update_limits(limit, KGSL_PWR_SET_LIMIT, level);
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_pwr_limits_set_freq);
+
+/**
+ * kgsl_pwr_limits_set_default() - Set the default thermal limit for the client
+ * @limit_ptr: Client handle
+ *
+ * Set the default for the client and adjust the clocks
+ */
+void kgsl_pwr_limits_set_default(void *limit_ptr)
+{
+	struct kgsl_pwr_limit *limit = limit_ptr;
+
+	if (IS_ERR(limit))
+		return;
+
+	_update_limits(limit, KGSL_PWR_SET_LIMIT, 0);
+}
+EXPORT_SYMBOL(kgsl_pwr_limits_set_default);
+
+/**
+ * kgsl_pwr_limits_get_freq() - Get the current limit
+ * @id: Device ID
+ *
+ * Get the current limit set for the device
+ */
+unsigned int kgsl_pwr_limits_get_freq(enum kgsl_deviceid id)
+{
+	struct kgsl_device *device = kgsl_get_device(id);
+	struct kgsl_pwrctrl *pwr;
+	unsigned int freq;
+
+	if (IS_ERR_OR_NULL(device))
+		return 0;
+	pwr = &device->pwrctrl;
+	mutex_lock(&device->mutex);
+	freq = pwr->pwrlevels[pwr->thermal_pwrlevel].gpu_freq;
+	mutex_unlock(&device->mutex);
+
+	return freq;
+}
+EXPORT_SYMBOL(kgsl_pwr_limits_get_freq);
diff --git a/drivers/gpu/msm/kgsl_pwrctrl.h b/drivers/gpu/msm/kgsl_pwrctrl.h
new file mode 100644
index 000000000000..5335dbfa6a58
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_pwrctrl.h
@@ -0,0 +1,241 @@
+/* Copyright (c) 2010-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __KGSL_PWRCTRL_H
+#define __KGSL_PWRCTRL_H
+
+#include <linux/pm_qos.h>
+
+/*****************************************************************************
+** power flags
+*****************************************************************************/
+#define KGSL_PWRFLAGS_ON   1
+#define KGSL_PWRFLAGS_OFF  0
+
+#define KGSL_PWRLEVEL_TURBO 0
+#define KGSL_PWRLEVEL_NOMINAL 1
+#define KGSL_PWRLEVEL_LAST_OFFSET 2
+
+#define KGSL_PWR_ON	0xFFFF
+
+#define KGSL_MAX_CLKS 11
+#define KGSL_MAX_REGULATORS 2
+
+#define KGSL_MAX_PWRLEVELS 10
+
+/* Only two supported levels, min & max */
+#define KGSL_CONSTRAINT_PWR_MAXLEVELS 2
+
+#define KGSL_RBBMTIMER_CLK_FREQ	19200000
+
+/* Symbolic table for the constraint type */
+#define KGSL_CONSTRAINT_TYPES \
+	{ KGSL_CONSTRAINT_NONE, "None" }, \
+	{ KGSL_CONSTRAINT_PWRLEVEL, "Pwrlevel" }
+/* Symbolic table for the constraint sub type */
+#define KGSL_CONSTRAINT_PWRLEVEL_SUBTYPES \
+	{ KGSL_CONSTRAINT_PWR_MIN, "Min" }, \
+	{ KGSL_CONSTRAINT_PWR_MAX, "Max" }
+
+#define KGSL_PWR_ADD_LIMIT 0
+#define KGSL_PWR_DEL_LIMIT 1
+#define KGSL_PWR_SET_LIMIT 2
+
+enum kgsl_pwrctrl_timer_type {
+	KGSL_PWR_IDLE_TIMER,
+	KGSL_PWR_DEEP_NAP_TIMER,
+};
+
+/*
+ * States for thermal cycling.  _DISABLE means that no cycling has been
+ * requested.  _ENABLE means that cycling has been requested, but GPU
+ * DCVS is currently recommending running at a lower frequency than the
+ * cycle frequency.  _ACTIVE means that the frequency is actively being
+ * cycled.
+ */
+#define CYCLE_DISABLE	0
+#define CYCLE_ENABLE	1
+#define CYCLE_ACTIVE	2
+
+struct platform_device;
+
+struct kgsl_clk_stats {
+	unsigned int busy;
+	unsigned int total;
+	unsigned int busy_old;
+	unsigned int total_old;
+};
+
+struct kgsl_pwr_constraint {
+	unsigned int type;
+	unsigned int sub_type;
+	union {
+		struct {
+			unsigned int level;
+		} pwrlevel;
+	} hint;
+	unsigned long expires;
+	uint32_t owner_id;
+};
+
+/**
+ * struct kgsl_pwrlevel - Struct holding different pwrlevel info obtained from
+ * from dtsi file
+ * @gpu_freq:          GPU frequency vote in Hz
+ * @bus_freq:          Bus bandwidth vote index
+ * @bus_min:           Min bus index @gpu_freq
+ * @bus_max:           Max bus index @gpu_freq
+ */
+struct kgsl_pwrlevel {
+	unsigned int gpu_freq;
+	unsigned int bus_freq;
+	unsigned int bus_min;
+	unsigned int bus_max;
+};
+
+struct kgsl_regulator {
+	struct regulator *reg;
+	char name[8];
+};
+
+/**
+ * struct kgsl_pwrctrl - Power control settings for a KGSL device
+ * @interrupt_num - The interrupt number for the device
+ * @grp_clks - Array of clocks structures that we control
+ * @dummy_mx_clk - mx clock that is contolled during retention
+ * @power_flags - Control flags for power
+ * @pwrlevels - List of supported power levels
+ * @active_pwrlevel - The currently active power level
+ * @previous_pwrlevel - The power level before transition
+ * @thermal_pwrlevel - maximum powerlevel constraint from thermal
+ * @default_pwrlevel - device wake up power level
+ * @max_pwrlevel - maximum allowable powerlevel per the user
+ * @min_pwrlevel - minimum allowable powerlevel per the user
+ * @num_pwrlevels - number of available power levels
+ * @interval_timeout - timeout in jiffies to be idle before a power event
+ * @strtstp_sleepwake - true if the device supports low latency GPU start/stop
+ * @regulators - array of pointers to kgsl_regulator structs
+ * @pcl - bus scale identifier
+ * @ocmem - ocmem bus scale identifier
+ * @irq_name - resource name for the IRQ
+ * @clk_stats - structure of clock statistics
+ * @pm_qos_req_dma - the power management quality of service structure
+ * @pm_qos_active_latency - allowed CPU latency in microseconds when active
+ * @pm_qos_wakeup_latency - allowed CPU latency in microseconds during wakeup
+ * @bus_control - true if the bus calculation is independent
+ * @bus_mod - modifier from the current power level for the bus vote
+ * @bus_percent_ab - current percent of total possible bus usage
+ * @bus_width - target specific bus width in number of bytes
+ * @bus_ab_mbytes - AB vote in Mbytes for current bus usage
+ * @bus_index - default bus index into the bus_ib table
+ * @bus_ib - the set of unique ib requests needed for the bus calculation
+ * @constraint - currently active power constraint
+ * @superfast - Boolean flag to indicate that the GPU start should be run in the
+ * higher priority thread
+ * @thermal_cycle_ws - Work struct for scheduling thermal cycling
+ * @thermal_timer - Timer for thermal cycling
+ * @thermal_timeout - Cycling timeout for switching between frequencies
+ * @thermal_cycle - Is thermal cycling enabled
+ * @thermal_highlow - flag for swithcing between high and low frequency
+ * @limits - list head for limits
+ * @limits_lock - spin lock to protect limits list
+ * @sysfs_pwr_limit - pointer to the sysfs limits node
+ * @deep_nap_timer - Timer struct for entering deep nap
+ * @deep_nap_timeout - Timeout for entering deep nap
+ * @gx_retention - true if retention voltage is allowed
+ */
+
+struct kgsl_pwrctrl {
+	int interrupt_num;
+	struct clk *grp_clks[KGSL_MAX_CLKS];
+	struct clk *dummy_mx_clk;
+	unsigned long power_flags;
+	unsigned long ctrl_flags;
+	struct kgsl_pwrlevel pwrlevels[KGSL_MAX_PWRLEVELS];
+	unsigned int active_pwrlevel;
+	unsigned int previous_pwrlevel;
+	unsigned int thermal_pwrlevel;
+	unsigned int default_pwrlevel;
+	unsigned int wakeup_maxpwrlevel;
+	unsigned int max_pwrlevel;
+	unsigned int min_pwrlevel;
+	unsigned int num_pwrlevels;
+	unsigned long interval_timeout;
+	bool strtstp_sleepwake;
+	struct kgsl_regulator regulators[KGSL_MAX_REGULATORS];
+	uint32_t pcl;
+	uint32_t ocmem_pcl;
+	const char *irq_name;
+	struct kgsl_clk_stats clk_stats;
+	struct pm_qos_request pm_qos_req_dma;
+	unsigned int pm_qos_active_latency;
+	unsigned int pm_qos_wakeup_latency;
+	bool bus_control;
+	int bus_mod;
+	unsigned int bus_percent_ab;
+	unsigned int bus_width;
+	unsigned long bus_ab_mbytes;
+	struct device *devbw;
+	unsigned int bus_index[KGSL_MAX_PWRLEVELS];
+	uint64_t *bus_ib;
+	struct kgsl_pwr_constraint constraint;
+	bool superfast;
+	struct work_struct thermal_cycle_ws;
+	struct timer_list thermal_timer;
+	uint32_t thermal_timeout;
+	uint32_t thermal_cycle;
+	uint32_t thermal_highlow;
+	struct list_head limits;
+	spinlock_t limits_lock;
+	struct kgsl_pwr_limit *sysfs_pwr_limit;
+	struct timer_list deep_nap_timer;
+	uint32_t deep_nap_timeout;
+	bool gx_retention;
+};
+
+int kgsl_pwrctrl_init(struct kgsl_device *device);
+void kgsl_pwrctrl_close(struct kgsl_device *device);
+void kgsl_timer(unsigned long data);
+void kgsl_idle_check(struct work_struct *work);
+void kgsl_pre_hwaccess(struct kgsl_device *device);
+void kgsl_pwrctrl_pwrlevel_change(struct kgsl_device *device,
+	unsigned int level);
+void kgsl_pwrctrl_buslevel_update(struct kgsl_device *device,
+	bool on);
+int kgsl_pwrctrl_init_sysfs(struct kgsl_device *device);
+void kgsl_pwrctrl_uninit_sysfs(struct kgsl_device *device);
+int kgsl_pwrctrl_change_state(struct kgsl_device *device, int state);
+
+static inline unsigned long kgsl_get_clkrate(struct clk *clk)
+{
+	return (clk != NULL) ? clk_get_rate(clk) : 0;
+}
+
+/*
+ * kgsl_pwrctrl_active_freq - get currently configured frequency
+ * @pwr: kgsl_pwrctrl structure for the device
+ *
+ * Returns the currently configured frequency for the device.
+ */
+static inline unsigned long
+kgsl_pwrctrl_active_freq(struct kgsl_pwrctrl *pwr)
+{
+	return pwr->pwrlevels[pwr->active_pwrlevel].gpu_freq;
+}
+
+int __must_check kgsl_active_count_get(struct kgsl_device *device);
+void kgsl_active_count_put(struct kgsl_device *device);
+int kgsl_active_count_wait(struct kgsl_device *device, int count);
+void kgsl_pwrctrl_busy_time(struct kgsl_device *device, u64 time, u64 busy);
+void kgsl_pwrctrl_set_constraint(struct kgsl_device *device,
+			struct kgsl_pwr_constraint *pwrc, uint32_t id);
+#endif /* __KGSL_PWRCTRL_H */
diff --git a/drivers/gpu/msm/kgsl_pwrscale.c b/drivers/gpu/msm/kgsl_pwrscale.c
new file mode 100644
index 000000000000..c888df3cb6ed
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_pwrscale.c
@@ -0,0 +1,905 @@
+/* Copyright (c) 2010-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/kernel.h>
+
+#include "kgsl.h"
+#include "kgsl_pwrscale.h"
+#include "kgsl_device.h"
+#include "kgsl_trace.h"
+
+#define FAST_BUS 1
+#define SLOW_BUS -1
+
+/*
+ * "SLEEP" is generic counting both NAP & SLUMBER
+ * PERIODS generally won't exceed 9 for the relavent 150msec
+ * window, but can be significantly smaller and still POPP
+ * pushable in cases where SLUMBER is involved.  Hence the
+ * additional reliance on PERCENT to make sure a reasonable
+ * amount of down-time actually exists.
+ */
+#define MIN_SLEEP_PERIODS	3
+#define MIN_SLEEP_PERCENT	5
+
+static struct kgsl_popp popp_param[POPP_MAX] = {
+	{0, 0},
+	{-5, 20},
+	{-5, 0},
+	{0, 0},
+};
+
+static void do_devfreq_suspend(struct work_struct *work);
+static void do_devfreq_resume(struct work_struct *work);
+static void do_devfreq_notify(struct work_struct *work);
+
+/*
+ * These variables are used to keep the latest data
+ * returned by kgsl_devfreq_get_dev_status
+ */
+static struct xstats last_xstats;
+static struct devfreq_dev_status last_status = { .private_data = &last_xstats };
+
+/*
+ * kgsl_pwrscale_sleep - notify governor that device is going off
+ * @device: The device
+ *
+ * Called shortly after all pending work is completed.
+ */
+void kgsl_pwrscale_sleep(struct kgsl_device *device)
+{
+	struct kgsl_pwrscale *psc = &device->pwrscale;
+	BUG_ON(!mutex_is_locked(&device->mutex));
+	if (!device->pwrscale.enabled)
+		return;
+	device->pwrscale.on_time = 0;
+
+	psc->popp_level = 0;
+	clear_bit(POPP_PUSH, &device->pwrscale.popp_state);
+
+	/* to call devfreq_suspend_device() from a kernel thread */
+	queue_work(device->pwrscale.devfreq_wq,
+		&device->pwrscale.devfreq_suspend_ws);
+}
+EXPORT_SYMBOL(kgsl_pwrscale_sleep);
+
+/*
+ * kgsl_pwrscale_wake - notify governor that device is going on
+ * @device: The device
+ *
+ * Called when the device is returning to an active state.
+ */
+void kgsl_pwrscale_wake(struct kgsl_device *device)
+{
+	struct kgsl_power_stats stats;
+	struct kgsl_pwrscale *psc = &device->pwrscale;
+	BUG_ON(!mutex_is_locked(&device->mutex));
+
+	if (!device->pwrscale.enabled)
+		return;
+	/* clear old stats before waking */
+	memset(&psc->accum_stats, 0, sizeof(psc->accum_stats));
+	memset(&last_xstats, 0, sizeof(last_xstats));
+
+	/* and any hw activity from waking up*/
+	device->ftbl->power_stats(device, &stats);
+
+	psc->time = ktime_get();
+
+	psc->next_governor_call = ktime_add_us(psc->time,
+			KGSL_GOVERNOR_CALL_INTERVAL);
+
+	/* to call devfreq_resume_device() from a kernel thread */
+	queue_work(psc->devfreq_wq, &psc->devfreq_resume_ws);
+}
+EXPORT_SYMBOL(kgsl_pwrscale_wake);
+
+/*
+ * kgsl_pwrscale_busy - update pwrscale state for new work
+ * @device: The device
+ *
+ * Called when new work is submitted to the device.
+ * This function must be called with the device mutex locked.
+ */
+void kgsl_pwrscale_busy(struct kgsl_device *device)
+{
+	BUG_ON(!mutex_is_locked(&device->mutex));
+	if (!device->pwrscale.enabled)
+		return;
+	if (device->pwrscale.on_time == 0)
+		device->pwrscale.on_time = ktime_to_us(ktime_get());
+}
+EXPORT_SYMBOL(kgsl_pwrscale_busy);
+
+/**
+ * kgsl_pwrscale_update_stats() - update device busy statistics
+ * @device: The device
+ *
+ * Read hardware busy counters and accumulate the results.
+ */
+void kgsl_pwrscale_update_stats(struct kgsl_device *device)
+{
+	struct kgsl_pwrscale *psc = &device->pwrscale;
+	BUG_ON(!mutex_is_locked(&device->mutex));
+
+	if (!psc->enabled)
+		return;
+
+	if (device->state == KGSL_STATE_ACTIVE) {
+		struct kgsl_power_stats stats;
+		device->ftbl->power_stats(device, &stats);
+		if (psc->popp_level) {
+			u64 x = stats.busy_time;
+			u64 y = stats.ram_time;
+			do_div(x, 100);
+			do_div(y, 100);
+			x *= popp_param[psc->popp_level].gpu_x;
+			y *= popp_param[psc->popp_level].ddr_y;
+			trace_kgsl_popp_mod(device, x, y);
+			stats.busy_time += x;
+			stats.ram_time += y;
+		}
+		device->pwrscale.accum_stats.busy_time += stats.busy_time;
+		device->pwrscale.accum_stats.ram_time += stats.ram_time;
+		device->pwrscale.accum_stats.ram_wait += stats.ram_wait;
+	}
+}
+EXPORT_SYMBOL(kgsl_pwrscale_update_stats);
+
+/**
+ * kgsl_pwrscale_update() - update device busy statistics
+ * @device: The device
+ *
+ * If enough time has passed schedule the next call to devfreq
+ * get_dev_status.
+ */
+void kgsl_pwrscale_update(struct kgsl_device *device)
+{
+	ktime_t t;
+	BUG_ON(!mutex_is_locked(&device->mutex));
+
+	if (!device->pwrscale.enabled)
+		return;
+
+	t = ktime_get();
+	if (ktime_compare(t, device->pwrscale.next_governor_call) < 0)
+		return;
+
+	device->pwrscale.next_governor_call = ktime_add_us(t,
+			KGSL_GOVERNOR_CALL_INTERVAL);
+
+	/* to call srcu_notifier_call_chain() from a kernel thread */
+	if (device->state != KGSL_STATE_SLUMBER)
+		queue_work(device->pwrscale.devfreq_wq,
+			&device->pwrscale.devfreq_notify_ws);
+}
+EXPORT_SYMBOL(kgsl_pwrscale_update);
+
+/*
+ * kgsl_pwrscale_disable - temporarily disable the governor
+ * @device: The device
+ *
+ * Temporarily disable the governor, to prevent interference
+ * with profiling tools that expect a fixed clock frequency.
+ * This function must be called with the device mutex locked.
+ */
+void kgsl_pwrscale_disable(struct kgsl_device *device)
+{
+	BUG_ON(!mutex_is_locked(&device->mutex));
+	if (device->pwrscale.devfreqptr)
+		queue_work(device->pwrscale.devfreq_wq,
+			&device->pwrscale.devfreq_suspend_ws);
+	device->pwrscale.enabled = false;
+	kgsl_pwrctrl_pwrlevel_change(device, KGSL_PWRLEVEL_TURBO);
+}
+EXPORT_SYMBOL(kgsl_pwrscale_disable);
+
+/*
+ * kgsl_pwrscale_enable - re-enable the governor
+ * @device: The device
+ *
+ * Reenable the governor after a kgsl_pwrscale_disable() call.
+ * This function must be called with the device mutex locked.
+ */
+void kgsl_pwrscale_enable(struct kgsl_device *device)
+{
+	BUG_ON(!mutex_is_locked(&device->mutex));
+
+	if (device->pwrscale.devfreqptr) {
+		queue_work(device->pwrscale.devfreq_wq,
+			&device->pwrscale.devfreq_resume_ws);
+		device->pwrscale.enabled = true;
+	} else {
+		/*
+		 * Don't enable it if devfreq is not set and let the device
+		 * run at default level;
+		 */
+		kgsl_pwrctrl_pwrlevel_change(device,
+					device->pwrctrl.default_pwrlevel);
+		device->pwrscale.enabled = false;
+	}
+}
+EXPORT_SYMBOL(kgsl_pwrscale_enable);
+
+static int _thermal_adjust(struct kgsl_pwrctrl *pwr, int level)
+{
+	if (level < pwr->active_pwrlevel)
+		return pwr->active_pwrlevel;
+
+	/*
+	 * A lower frequency has been recommended!  Stop thermal
+	 * cycling (but keep the upper thermal limit) and switch to
+	 * the lower frequency.
+	 */
+	pwr->thermal_cycle = CYCLE_ENABLE;
+	del_timer_sync(&pwr->thermal_timer);
+	return level;
+}
+
+/*
+ * Use various metrics including level stability, NAP intervals, and
+ * overall GPU freq / DDR freq combination to decide if POPP should
+ * be activated.
+ */
+static bool popp_stable(struct kgsl_device *device)
+{
+	s64 t;
+	s64 nap_time = 0;
+	s64 go_time = 0;
+	int i, index;
+	int nap = 0;
+	s64 percent_nap = 0;
+	struct kgsl_pwr_event *e;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct kgsl_pwrscale *psc = &device->pwrscale;
+
+	if (!test_bit(POPP_ON, &psc->popp_state))
+		return false;
+
+	/* If already pushed or running naturally at min don't push further */
+	if (test_bit(POPP_PUSH, &psc->popp_state))
+		return false;
+	if (!psc->popp_level &&
+			(pwr->active_pwrlevel == pwr->min_pwrlevel))
+		return false;
+	if (psc->history[KGSL_PWREVENT_STATE].events == NULL)
+		return false;
+
+	t = ktime_to_ms(ktime_get());
+	/* Check for recent NAP statistics: NAPping regularly and well? */
+	if (pwr->active_pwrlevel == 0) {
+		index = psc->history[KGSL_PWREVENT_STATE].index;
+		i = index > 0 ? (index - 1) :
+			(psc->history[KGSL_PWREVENT_STATE].size - 1);
+		while (i != index) {
+			e = &psc->history[KGSL_PWREVENT_STATE].events[i];
+			if (e->data == KGSL_STATE_NAP ||
+				e->data == KGSL_STATE_SLUMBER) {
+				if (ktime_to_ms(e->start) + STABLE_TIME > t) {
+					nap++;
+					nap_time += e->duration;
+				}
+			} else if (e->data == KGSL_STATE_ACTIVE) {
+				if (ktime_to_ms(e->start) + STABLE_TIME > t)
+					go_time += e->duration;
+			}
+			if (i == 0)
+				i = psc->history[KGSL_PWREVENT_STATE].size - 1;
+			else
+				i--;
+		}
+		if (nap_time && go_time) {
+			percent_nap = 100 * nap_time;
+			do_div(percent_nap, nap_time + go_time);
+		}
+		trace_kgsl_popp_nap(device, (int)nap_time / 1000, nap,
+				percent_nap);
+		/* If running high at turbo, don't push */
+		if (nap < MIN_SLEEP_PERIODS || percent_nap < MIN_SLEEP_PERCENT)
+			return false;
+	}
+
+	/* Finally check that there hasn't been a recent change */
+	if ((device->pwrscale.freq_change_time + STABLE_TIME) < t) {
+		device->pwrscale.freq_change_time = t;
+		return true;
+	}
+	return false;
+}
+
+bool kgsl_popp_check(struct kgsl_device *device)
+{
+	int i;
+	struct kgsl_pwrscale *psc = &device->pwrscale;
+	struct kgsl_pwr_event *e;
+
+	if (!test_bit(POPP_ON, &psc->popp_state))
+		return false;
+	if (!test_bit(POPP_PUSH, &psc->popp_state))
+		return false;
+	if (psc->history[KGSL_PWREVENT_STATE].events == NULL) {
+		clear_bit(POPP_PUSH, &psc->popp_state);
+		return false;
+	}
+
+	e = &psc->history[KGSL_PWREVENT_STATE].
+			events[psc->history[KGSL_PWREVENT_STATE].index];
+	if (e->data == KGSL_STATE_SLUMBER)
+		e->duration = ktime_us_delta(ktime_get(), e->start);
+
+	/* If there's been a long SLUMBER in recent history, clear the _PUSH */
+	for (i = 0; i < psc->history[KGSL_PWREVENT_STATE].size; i++) {
+		e = &psc->history[KGSL_PWREVENT_STATE].events[i];
+		if ((e->data == KGSL_STATE_SLUMBER) &&
+			 (e->duration > POPP_RESET_TIME)) {
+			clear_bit(POPP_PUSH, &psc->popp_state);
+			return false;
+		}
+	}
+	return true;
+}
+
+/*
+ * The GPU has been running at the current frequency for a while.  Attempt
+ * to lower the frequency for boarderline cases.
+ */
+static void popp_trans1(struct kgsl_device *device)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct kgsl_pwrlevel *pl = &pwr->pwrlevels[pwr->active_pwrlevel];
+	struct kgsl_pwrscale *psc = &device->pwrscale;
+	int old_level = psc->popp_level;
+
+	switch (old_level) {
+	case 0:
+		psc->popp_level = 2;
+		/* If the current level has a high default bus don't push it */
+		if (pl->bus_freq == pl->bus_max)
+			pwr->bus_mod = 1;
+		kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel + 1);
+		break;
+	case 1:
+	case 2:
+		psc->popp_level++;
+		break;
+	case 3:
+		set_bit(POPP_PUSH, &psc->popp_state);
+		psc->popp_level = 0;
+		break;
+	case POPP_MAX:
+	default:
+		psc->popp_level = 0;
+		break;
+	}
+
+	trace_kgsl_popp_level(device, old_level, psc->popp_level);
+}
+
+/*
+ * The GPU DCVS algorithm recommends a level change.  Apply any
+ * POPP restrictions and update the level accordingly
+ */
+static int popp_trans2(struct kgsl_device *device, int level)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct kgsl_pwrscale *psc = &device->pwrscale;
+	int old_level = psc->popp_level;
+
+	if (!test_bit(POPP_ON, &psc->popp_state))
+		return level;
+
+	clear_bit(POPP_PUSH, &psc->popp_state);
+	/* If the governor recommends going down, do it! */
+	if (pwr->active_pwrlevel < level) {
+		psc->popp_level = 0;
+		trace_kgsl_popp_level(device, old_level, psc->popp_level);
+		return level;
+	}
+
+	switch (psc->popp_level) {
+	case 0:
+		/* If the feature isn't engaged, go up immediately */
+		break;
+	case 1:
+		/* Turn off mitigation, and go up a level */
+		psc->popp_level = 0;
+		break;
+	case 2:
+	case 3:
+		/* Try a more aggressive mitigation */
+		psc->popp_level--;
+		level++;
+		/* Update the stable timestamp */
+		device->pwrscale.freq_change_time = ktime_to_ms(ktime_get());
+		break;
+	case POPP_MAX:
+	default:
+		psc->popp_level = 0;
+		break;
+	}
+
+	trace_kgsl_popp_level(device, old_level, psc->popp_level);
+
+	return level;
+}
+
+/*
+ * kgsl_devfreq_target - devfreq_dev_profile.target callback
+ * @dev: see devfreq.h
+ * @freq: see devfreq.h
+ * @flags: see devfreq.h
+ *
+ * This function expects the device mutex to be unlocked.
+ */
+int kgsl_devfreq_target(struct device *dev, unsigned long *freq, u32 flags)
+{
+	struct kgsl_device *device = dev_get_drvdata(dev);
+	struct kgsl_pwrctrl *pwr;
+	struct kgsl_pwrlevel *pwr_level;
+	int level, i;
+	unsigned long cur_freq;
+
+	if (device == NULL)
+		return -ENODEV;
+	if (freq == NULL)
+		return -EINVAL;
+	if (!device->pwrscale.enabled)
+		return 0;
+
+	pwr = &device->pwrctrl;
+	if (flags & DEVFREQ_FLAG_WAKEUP_MAXFREQ) {
+		/*
+		 * The GPU is about to get suspended,
+		 * but it needs to be at the max power level when waking up
+		*/
+		pwr->wakeup_maxpwrlevel = 1;
+		return 0;
+	}
+
+	mutex_lock(&device->mutex);
+	cur_freq = kgsl_pwrctrl_active_freq(pwr);
+	level = pwr->active_pwrlevel;
+	pwr_level = &pwr->pwrlevels[level];
+
+	/* If the governor recommends a new frequency, update it here */
+	if (*freq != cur_freq) {
+		level = pwr->max_pwrlevel;
+		for (i = pwr->min_pwrlevel; i >= pwr->max_pwrlevel; i--)
+			if (*freq <= pwr->pwrlevels[i].gpu_freq) {
+				if (pwr->thermal_cycle == CYCLE_ACTIVE)
+					level = _thermal_adjust(pwr, i);
+				else
+					level = popp_trans2(device, i);
+				break;
+			}
+		if (level != pwr->active_pwrlevel)
+			kgsl_pwrctrl_pwrlevel_change(device, level);
+	} else if (popp_stable(device)) {
+		popp_trans1(device);
+	}
+
+	*freq = kgsl_pwrctrl_active_freq(pwr);
+
+	mutex_unlock(&device->mutex);
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_devfreq_target);
+
+/*
+ * kgsl_devfreq_get_dev_status - devfreq_dev_profile.get_dev_status callback
+ * @dev: see devfreq.h
+ * @freq: see devfreq.h
+ * @flags: see devfreq.h
+ *
+ * This function expects the device mutex to be unlocked.
+ */
+int kgsl_devfreq_get_dev_status(struct device *dev,
+				struct devfreq_dev_status *stat)
+{
+	struct kgsl_device *device = dev_get_drvdata(dev);
+	struct kgsl_pwrctrl *pwrctrl;
+	struct kgsl_pwrscale *pwrscale;
+	ktime_t tmp;
+
+	if (device == NULL)
+		return -ENODEV;
+	if (stat == NULL)
+		return -EINVAL;
+
+	pwrscale = &device->pwrscale;
+	pwrctrl = &device->pwrctrl;
+
+	mutex_lock(&device->mutex);
+	/*
+	 * If the GPU clock is on grab the latest power counter
+	 * values.  Otherwise the most recent ACTIVE values will
+	 * already be stored in accum_stats.
+	 */
+	kgsl_pwrscale_update_stats(device);
+
+	tmp = ktime_get();
+	stat->total_time = ktime_us_delta(tmp, pwrscale->time);
+	pwrscale->time = tmp;
+
+	stat->busy_time = pwrscale->accum_stats.busy_time;
+
+	stat->current_frequency = kgsl_pwrctrl_active_freq(&device->pwrctrl);
+
+	/*
+	 * keep the latest devfreq_dev_status values
+	 * and vbif counters data
+	 * to be (re)used by kgsl_busmon_get_dev_status()
+	 */
+	if (pwrctrl->bus_control) {
+		struct xstats *last_b =
+			(struct xstats *)last_status.private_data;
+
+		last_status.total_time = stat->total_time;
+		last_status.busy_time = stat->busy_time;
+		last_status.current_frequency = stat->current_frequency;
+
+		last_b->ram_time = device->pwrscale.accum_stats.ram_time;
+		last_b->ram_wait = device->pwrscale.accum_stats.ram_wait;
+		last_b->mod = device->pwrctrl.bus_mod;
+	}
+
+	kgsl_pwrctrl_busy_time(device, stat->total_time, stat->busy_time);
+	trace_kgsl_pwrstats(device, stat->total_time, &pwrscale->accum_stats);
+	memset(&pwrscale->accum_stats, 0, sizeof(pwrscale->accum_stats));
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_devfreq_get_dev_status);
+
+/*
+ * kgsl_devfreq_get_cur_freq - devfreq_dev_profile.get_cur_freq callback
+ * @dev: see devfreq.h
+ * @freq: see devfreq.h
+ * @flags: see devfreq.h
+ *
+ * This function expects the device mutex to be unlocked.
+ */
+int kgsl_devfreq_get_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct kgsl_device *device = dev_get_drvdata(dev);
+
+	if (device == NULL)
+		return -ENODEV;
+	if (freq == NULL)
+		return -EINVAL;
+
+	mutex_lock(&device->mutex);
+	*freq = kgsl_pwrctrl_active_freq(&device->pwrctrl);
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_devfreq_get_cur_freq);
+
+/*
+ * kgsl_devfreq_add_notifier - add a fine grained notifier.
+ * @dev: The device
+ * @nb: Notifier block that will recieve updates.
+ *
+ * Add a notifier to recieve ADRENO_DEVFREQ_NOTIFY_* events
+ * from the device.
+ */
+int kgsl_devfreq_add_notifier(struct device *dev,
+		struct notifier_block *nb)
+{
+	struct kgsl_device *device = dev_get_drvdata(dev);
+
+	if (device == NULL)
+		return -ENODEV;
+
+	if (nb == NULL)
+		return -EINVAL;
+
+	return srcu_notifier_chain_register(&device->pwrscale.nh, nb);
+}
+EXPORT_SYMBOL(kgsl_devfreq_add_notifier);
+
+/*
+ * kgsl_devfreq_del_notifier - remove a fine grained notifier.
+ * @dev: The device
+ * @nb: The notifier block.
+ *
+ * Remove a notifier registered with kgsl_devfreq_add_notifier().
+ */
+int kgsl_devfreq_del_notifier(struct device *dev, struct notifier_block *nb)
+{
+	struct kgsl_device *device = dev_get_drvdata(dev);
+
+	if (device == NULL)
+		return -ENODEV;
+
+	if (nb == NULL)
+		return -EINVAL;
+
+	return srcu_notifier_chain_unregister(&device->pwrscale.nh, nb);
+}
+EXPORT_SYMBOL(kgsl_devfreq_del_notifier);
+
+
+/*
+ * kgsl_busmon_get_dev_status - devfreq_dev_profile.get_dev_status callback
+ * @dev: see devfreq.h
+ * @freq: see devfreq.h
+ * @flags: see devfreq.h
+ *
+ * This function expects the device mutex to be unlocked.
+ */
+int kgsl_busmon_get_dev_status(struct device *dev,
+			struct devfreq_dev_status *stat)
+{
+	struct xstats *b;
+	stat->total_time = last_status.total_time;
+	stat->busy_time = last_status.busy_time;
+	stat->current_frequency = last_status.current_frequency;
+	if (stat->private_data) {
+		struct xstats *last_b =
+			(struct xstats *)last_status.private_data;
+		b = (struct xstats *)stat->private_data;
+		b->ram_time = last_b->ram_time;
+		b->ram_wait = last_b->ram_wait;
+		b->mod = last_b->mod;
+	}
+	return 0;
+}
+
+/*
+ * kgsl_busmon_target - devfreq_dev_profile.target callback
+ * @dev: see devfreq.h
+ * @freq: see devfreq.h
+ * @flags: see devfreq.h
+ *
+ * This function expects the device mutex to be unlocked.
+ */
+int kgsl_busmon_target(struct device *dev, unsigned long *freq, u32 flags)
+{
+	struct kgsl_device *device = dev_get_drvdata(dev);
+	struct kgsl_pwrctrl *pwr;
+	struct kgsl_pwrlevel *pwr_level;
+	int  level, b;
+	u32 bus_flag;
+	unsigned long ab_mbytes;
+
+	if (device == NULL)
+		return -ENODEV;
+	if (freq == NULL)
+		return -EINVAL;
+	if (!device->pwrscale.enabled)
+		return 0;
+
+	pwr = &device->pwrctrl;
+
+	if (!pwr->bus_control)
+		return 0;
+
+	mutex_lock(&device->mutex);
+	level = pwr->active_pwrlevel;
+	pwr_level = &pwr->pwrlevels[level];
+	bus_flag = device->pwrscale.bus_profile.flag;
+	device->pwrscale.bus_profile.flag = 0;
+	ab_mbytes = device->pwrscale.bus_profile.ab_mbytes;
+
+	/*
+	 * Bus devfreq governor has calculated its recomendations
+	 * when gpu was running with *freq frequency.
+	 * If the gpu frequency is different now it's better to
+	 * ignore the call
+	 */
+	if (pwr_level->gpu_freq != *freq) {
+		mutex_unlock(&device->mutex);
+		return 0;
+	}
+
+	b = pwr->bus_mod;
+	if ((bus_flag & DEVFREQ_FLAG_FAST_HINT) &&
+		((pwr_level->bus_freq + pwr->bus_mod) < pwr_level->bus_max))
+			pwr->bus_mod++;
+	 else if ((bus_flag & DEVFREQ_FLAG_SLOW_HINT) &&
+		((pwr_level->bus_freq + pwr->bus_mod) > pwr_level->bus_min))
+			pwr->bus_mod--;
+
+	/* Update bus vote if AB or IB is modified */
+	if ((pwr->bus_mod != b) || (pwr->bus_ab_mbytes != ab_mbytes)) {
+		pwr->bus_percent_ab = device->pwrscale.bus_profile.percent_ab;
+		pwr->bus_ab_mbytes = ab_mbytes;
+		kgsl_pwrctrl_buslevel_update(device, true);
+	}
+
+	mutex_unlock(&device->mutex);
+	return 0;
+}
+
+int kgsl_busmon_get_cur_freq(struct device *dev, unsigned long *freq)
+{
+	return 0;
+}
+
+
+/*
+ * kgsl_pwrscale_init - Initialize pwrscale.
+ * @dev: The device
+ * @governor: The initial governor to use.
+ *
+ * Initialize devfreq and any non-constant profile data.
+ */
+int kgsl_pwrscale_init(struct device *dev, const char *governor)
+{
+	struct kgsl_device *device;
+	struct kgsl_pwrscale *pwrscale;
+	struct kgsl_pwrctrl *pwr;
+	struct devfreq *devfreq;
+	struct devfreq *bus_devfreq;
+	struct msm_adreno_extended_profile *gpu_profile;
+	struct devfreq_dev_profile *profile;
+	struct devfreq_msm_adreno_tz_data *data;
+	int i, out = 0;
+	int ret;
+
+	device = dev_get_drvdata(dev);
+	if (device == NULL)
+		return -ENODEV;
+
+	pwrscale = &device->pwrscale;
+	pwr = &device->pwrctrl;
+	gpu_profile = &pwrscale->gpu_profile;
+	profile = &pwrscale->gpu_profile.profile;
+
+	srcu_init_notifier_head(&pwrscale->nh);
+
+	profile->initial_freq =
+		pwr->pwrlevels[pwr->default_pwrlevel].gpu_freq;
+	/* Let's start with 10 ms and tune in later */
+	profile->polling_ms = 10;
+
+	/* do not include the 'off' level or duplicate freq. levels */
+	for (i = 0; i < (pwr->num_pwrlevels - 1); i++)
+		pwrscale->freq_table[out++] = pwr->pwrlevels[i].gpu_freq;
+
+	/*
+	 * Max_state is the number of valid power levels.
+	 * The valid power levels range from 0 - (max_state - 1)
+	 */
+	profile->max_state = pwr->num_pwrlevels - 1;
+	/* link storage array to the devfreq profile pointer */
+	profile->freq_table = pwrscale->freq_table;
+
+	/* if there is only 1 freq, no point in running a governor */
+	if (profile->max_state == 1)
+		governor = "performance";
+
+	/* initialize msm-adreno-tz governor specific data here */
+	data = gpu_profile->private_data;
+	/*
+	 * If there is a separate GX power rail, allow
+	 * independent modification to its voltage through
+	 * the bus bandwidth vote.
+	 */
+	if (pwr->bus_control) {
+		out = 0;
+		while (pwr->bus_ib[out] && out <= pwr->pwrlevels[0].bus_max) {
+			pwr->bus_ib[out] =
+				pwr->bus_ib[out] >> 20;
+			out++;
+		}
+		data->bus.num = out;
+		data->bus.ib = &pwr->bus_ib[0];
+		data->bus.index = &pwr->bus_index[0];
+		data->bus.width = pwr->bus_width;
+	} else
+		data->bus.num = 0;
+
+	devfreq = devfreq_add_device(dev, &pwrscale->gpu_profile.profile,
+			governor, pwrscale->gpu_profile.private_data);
+	if (IS_ERR(devfreq)) {
+		device->pwrscale.enabled = false;
+		return PTR_ERR(devfreq);
+	}
+
+	pwrscale->devfreqptr = devfreq;
+
+	pwrscale->gpu_profile.bus_devfreq = NULL;
+	if (data->bus.num) {
+		pwrscale->bus_profile.profile.max_state
+					= pwr->num_pwrlevels - 1;
+		pwrscale->bus_profile.profile.freq_table
+					= pwrscale->freq_table;
+
+		bus_devfreq = devfreq_add_device(device->busmondev,
+			&pwrscale->bus_profile.profile, "gpubw_mon", NULL);
+		if (!IS_ERR(bus_devfreq))
+			pwrscale->gpu_profile.bus_devfreq = bus_devfreq;
+	}
+
+	ret = sysfs_create_link(&device->dev->kobj,
+			&devfreq->dev.kobj, "devfreq");
+
+	pwrscale->devfreq_wq = create_freezable_workqueue("kgsl_devfreq_wq");
+	INIT_WORK(&pwrscale->devfreq_suspend_ws, do_devfreq_suspend);
+	INIT_WORK(&pwrscale->devfreq_resume_ws, do_devfreq_resume);
+	INIT_WORK(&pwrscale->devfreq_notify_ws, do_devfreq_notify);
+
+	pwrscale->next_governor_call = ktime_add_us(ktime_get(),
+			KGSL_GOVERNOR_CALL_INTERVAL);
+
+	/* history tracking */
+	for (i = 0; i < KGSL_PWREVENT_MAX; i++) {
+		pwrscale->history[i].events = kzalloc(
+				pwrscale->history[i].size *
+				sizeof(struct kgsl_pwr_event), GFP_KERNEL);
+		pwrscale->history[i].type = i;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_pwrscale_init);
+
+/*
+ * kgsl_pwrscale_close - clean up pwrscale
+ * @device: the device
+ *
+ * This function should be called with the device mutex locked.
+ */
+void kgsl_pwrscale_close(struct kgsl_device *device)
+{
+	int i;
+	struct kgsl_pwrscale *pwrscale;
+
+	BUG_ON(!mutex_is_locked(&device->mutex));
+
+	pwrscale = &device->pwrscale;
+	if (!pwrscale->devfreqptr)
+		return;
+	flush_workqueue(pwrscale->devfreq_wq);
+	destroy_workqueue(pwrscale->devfreq_wq);
+	devfreq_remove_device(device->pwrscale.devfreqptr);
+	device->pwrscale.devfreqptr = NULL;
+	srcu_cleanup_notifier_head(&device->pwrscale.nh);
+	for (i = 0; i < KGSL_PWREVENT_MAX; i++)
+		kfree(pwrscale->history[i].events);
+}
+EXPORT_SYMBOL(kgsl_pwrscale_close);
+
+static void do_devfreq_suspend(struct work_struct *work)
+{
+	struct kgsl_pwrscale *pwrscale = container_of(work,
+			struct kgsl_pwrscale, devfreq_suspend_ws);
+	struct devfreq *devfreq = pwrscale->devfreqptr;
+
+	devfreq_suspend_device(devfreq);
+}
+
+static void do_devfreq_resume(struct work_struct *work)
+{
+	struct kgsl_pwrscale *pwrscale = container_of(work,
+			struct kgsl_pwrscale, devfreq_resume_ws);
+	struct devfreq *devfreq = pwrscale->devfreqptr;
+
+	devfreq_resume_device(devfreq);
+}
+
+static void do_devfreq_notify(struct work_struct *work)
+{
+	struct kgsl_pwrscale *pwrscale = container_of(work,
+			struct kgsl_pwrscale, devfreq_notify_ws);
+	struct devfreq *devfreq = pwrscale->devfreqptr;
+	srcu_notifier_call_chain(&pwrscale->nh,
+				 ADRENO_DEVFREQ_NOTIFY_RETIRE,
+				 devfreq);
+}
diff --git a/drivers/gpu/msm/kgsl_pwrscale.h b/drivers/gpu/msm/kgsl_pwrscale.h
new file mode 100644
index 000000000000..c85317869f1d
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_pwrscale.h
@@ -0,0 +1,160 @@
+/* Copyright (c) 2010-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __KGSL_PWRSCALE_H
+#define __KGSL_PWRSCALE_H
+
+#include <linux/devfreq.h>
+#include <linux/msm_adreno_devfreq.h>
+#include "kgsl_pwrctrl.h"
+
+/* devfreq governor call window in usec */
+#define KGSL_GOVERNOR_CALL_INTERVAL 10000
+
+/* Power events to be tracked with history */
+#define KGSL_PWREVENT_STATE	0
+#define KGSL_PWREVENT_GPU_FREQ	1
+#define KGSL_PWREVENT_BUS_FREQ	2
+#define KGSL_PWREVENT_POPP	3
+#define KGSL_PWREVENT_MAX	4
+
+/**
+ * Amount of time running at a level to be considered
+ * "stable" in msec
+ */
+#define STABLE_TIME	150
+
+/* Amount of idle time needed to re-set stability in usec */
+#define POPP_RESET_TIME	1000000
+
+/* Number of POPP levels */
+#define POPP_MAX	4
+
+/* POPP state bits */
+#define POPP_ON		BIT(0)
+#define POPP_PUSH	BIT(1)
+
+struct kgsl_popp {
+	int gpu_x;
+	int ddr_y;
+};
+
+struct kgsl_power_stats {
+	u64 busy_time;
+	u64 ram_time;
+	u64 ram_wait;
+};
+
+struct kgsl_pwr_event {
+	unsigned int data;
+	ktime_t start;
+	s64 duration;
+};
+
+struct kgsl_pwr_history {
+	struct kgsl_pwr_event *events;
+	unsigned int type;
+	unsigned int index;
+	unsigned int size;
+};
+
+/**
+ * struct kgsl_pwrscale - Power scaling settings for a KGSL device
+ * @devfreqptr - Pointer to the devfreq device
+ * @gpu_profile - GPU profile data for the devfreq device
+ * @bus_profile - Bus specific data for the bus devfreq device
+ * @freq_table - GPU frequencies for the DCVS algorithm
+ * @last_governor - Prior devfreq governor
+ * @accum_stats - Accumulated statistics for various frequency calculations
+ * @enabled - Whether or not power scaling is enabled
+ * @time - Last submitted sample timestamp
+ * @on_time - Timestamp when gpu busy begins
+ * @freq_change_time - Timestamp of last freq change or popp update
+ * @nh - Notifier for the partner devfreq bus device
+ * @devfreq_wq - Main devfreq workqueue
+ * @devfreq_suspend_ws - Pass device suspension to devfreq
+ * @devfreq_resume_ws - Pass device resume to devfreq
+ * @devfreq_notify_ws - Notify devfreq to update sampling
+ * @next_governor_call - Timestamp after which the governor may be notified of
+ * a new sample
+ * @history - History of power events with timestamps and durations
+ * @popp_level - Current level of POPP mitigation
+ * @popp_state - Control state for POPP, on/off, recently pushed, etc
+ */
+struct kgsl_pwrscale {
+	struct devfreq *devfreqptr;
+	struct msm_adreno_extended_profile gpu_profile;
+	struct msm_busmon_extended_profile bus_profile;
+	unsigned int freq_table[KGSL_MAX_PWRLEVELS];
+	char last_governor[DEVFREQ_NAME_LEN];
+	struct kgsl_power_stats accum_stats;
+	bool enabled;
+	ktime_t time;
+	s64 on_time;
+	s64 freq_change_time;
+	struct srcu_notifier_head nh;
+	struct workqueue_struct *devfreq_wq;
+	struct work_struct devfreq_suspend_ws;
+	struct work_struct devfreq_resume_ws;
+	struct work_struct devfreq_notify_ws;
+	ktime_t next_governor_call;
+	struct kgsl_pwr_history history[KGSL_PWREVENT_MAX];
+	int popp_level;
+	unsigned long popp_state;
+};
+
+int kgsl_pwrscale_init(struct device *dev, const char *governor);
+void kgsl_pwrscale_close(struct kgsl_device *device);
+
+void kgsl_pwrscale_update(struct kgsl_device *device);
+void kgsl_pwrscale_update_stats(struct kgsl_device *device);
+void kgsl_pwrscale_busy(struct kgsl_device *device);
+void kgsl_pwrscale_sleep(struct kgsl_device *device);
+void kgsl_pwrscale_wake(struct kgsl_device *device);
+
+void kgsl_pwrscale_enable(struct kgsl_device *device);
+void kgsl_pwrscale_disable(struct kgsl_device *device);
+
+int kgsl_devfreq_target(struct device *dev, unsigned long *freq, u32 flags);
+int kgsl_devfreq_get_dev_status(struct device *, struct devfreq_dev_status *);
+int kgsl_devfreq_get_cur_freq(struct device *dev, unsigned long *freq);
+
+int kgsl_busmon_target(struct device *dev, unsigned long *freq, u32 flags);
+int kgsl_busmon_get_dev_status(struct device *, struct devfreq_dev_status *);
+int kgsl_busmon_get_cur_freq(struct device *dev, unsigned long *freq);
+
+bool kgsl_popp_check(struct kgsl_device *device);
+
+
+#define KGSL_PWRSCALE_INIT(_priv_data) { \
+	.enabled = true, \
+	.gpu_profile = { \
+		.private_data = _priv_data, \
+		.profile = { \
+			.target = kgsl_devfreq_target, \
+			.get_dev_status = kgsl_devfreq_get_dev_status, \
+			.get_cur_freq = kgsl_devfreq_get_cur_freq, \
+	} }, \
+	.bus_profile = { \
+		.private_data = _priv_data, \
+		.profile = { \
+			.target = kgsl_busmon_target, \
+			.get_dev_status = kgsl_busmon_get_dev_status, \
+			.get_cur_freq = kgsl_busmon_get_cur_freq, \
+	} }, \
+	.history[KGSL_PWREVENT_STATE].size = 20, \
+	.history[KGSL_PWREVENT_GPU_FREQ].size = 3, \
+	.history[KGSL_PWREVENT_BUS_FREQ].size = 5, \
+	.history[KGSL_PWREVENT_POPP].size = 5, \
+	}
+#endif
diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c
new file mode 100644
index 000000000000..53dd3270c75b
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_sharedmem.c
@@ -0,0 +1,1258 @@
+/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/vmalloc.h>
+#include <asm/cacheflush.h>
+#include <linux/slab.h>
+#include <linux/kmemleak.h>
+#include <linux/highmem.h>
+#include <linux/scatterlist.h>
+#include <soc/qcom/scm.h>
+#include <soc/qcom/secure_buffer.h>
+
+#include "kgsl.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_cffdump.h"
+#include "kgsl_device.h"
+#include "kgsl_log.h"
+
+/*
+ * The user can set this from debugfs to force failed memory allocations to
+ * fail without trying OOM first.  This is a debug setting useful for
+ * stress applications that want to test failure cases without pushing the
+ * system into unrecoverable OOM panics
+ */
+
+static bool sharedmem_noretry_flag;
+
+static DEFINE_MUTEX(kernel_map_global_lock);
+
+struct cp2_mem_chunks {
+	unsigned int chunk_list;
+	unsigned int chunk_list_size;
+	unsigned int chunk_size;
+} __attribute__ ((__packed__));
+
+struct cp2_lock_req {
+	struct cp2_mem_chunks chunks;
+	unsigned int mem_usage;
+	unsigned int lock;
+} __attribute__ ((__packed__));
+
+#define MEM_PROTECT_LOCK_ID2		0x0A
+#define MEM_PROTECT_LOCK_ID2_FLAT	0x11
+
+/* An attribute for showing per-process memory statistics */
+struct kgsl_mem_entry_attribute {
+	struct attribute attr;
+	int memtype;
+	ssize_t (*show)(struct kgsl_process_private *priv,
+		int type, char *buf);
+};
+
+#define to_mem_entry_attr(a) \
+container_of(a, struct kgsl_mem_entry_attribute, attr)
+
+#define __MEM_ENTRY_ATTR(_type, _name, _show) \
+{ \
+	.attr = { .name = __stringify(_name), .mode = 0444 }, \
+	.memtype = _type, \
+	.show = _show, \
+}
+
+/*
+ * A structure to hold the attributes for a particular memory type.
+ * For each memory type in each process we store the current and maximum
+ * memory usage and display the counts in sysfs.  This structure and
+ * the following macro allow us to simplify the definition for those
+ * adding new memory types
+ */
+
+struct mem_entry_stats {
+	int memtype;
+	struct kgsl_mem_entry_attribute attr;
+	struct kgsl_mem_entry_attribute max_attr;
+};
+
+
+#define MEM_ENTRY_STAT(_type, _name) \
+{ \
+	.memtype = _type, \
+	.attr = __MEM_ENTRY_ATTR(_type, _name, mem_entry_show), \
+	.max_attr = __MEM_ENTRY_ATTR(_type, _name##_max, \
+		mem_entry_max_show), \
+}
+
+static void kgsl_cma_unlock_secure(struct kgsl_memdesc *memdesc);
+
+/**
+ * Show the current amount of memory allocated for the given memtype
+ */
+
+static ssize_t
+mem_entry_show(struct kgsl_process_private *priv, int type, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%llu\n", priv->stats[type].cur);
+}
+
+/**
+ * Show the maximum memory allocated for the given memtype through the life of
+ * the process
+ */
+
+static ssize_t
+mem_entry_max_show(struct kgsl_process_private *priv, int type, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%llu\n", priv->stats[type].max);
+}
+
+static ssize_t mem_entry_sysfs_show(struct kobject *kobj,
+	struct attribute *attr, char *buf)
+{
+	struct kgsl_mem_entry_attribute *pattr = to_mem_entry_attr(attr);
+	struct kgsl_process_private *priv;
+	ssize_t ret;
+
+	/*
+	 * 1. sysfs_remove_file waits for reads to complete before the node
+	 *    is deleted.
+	 * 2. kgsl_process_init_sysfs takes a refcount to the process_private,
+	 *    which is put at the end of kgsl_process_uninit_sysfs.
+	 * These two conditions imply that priv will not be freed until this
+	 * function completes, and no further locking is needed.
+	 */
+	priv = kobj ? container_of(kobj, struct kgsl_process_private, kobj) :
+			NULL;
+
+	if (priv && pattr->show)
+		ret = pattr->show(priv, pattr->memtype, buf);
+	else
+		ret = -EIO;
+
+	return ret;
+}
+
+static const struct sysfs_ops mem_entry_sysfs_ops = {
+	.show = mem_entry_sysfs_show,
+};
+
+static struct kobj_type ktype_mem_entry = {
+	.sysfs_ops = &mem_entry_sysfs_ops,
+};
+
+static struct mem_entry_stats mem_stats[] = {
+	MEM_ENTRY_STAT(KGSL_MEM_ENTRY_KERNEL, kernel),
+	MEM_ENTRY_STAT(KGSL_MEM_ENTRY_USER, user),
+#ifdef CONFIG_ION
+	MEM_ENTRY_STAT(KGSL_MEM_ENTRY_ION, ion),
+#endif
+};
+
+void
+kgsl_process_uninit_sysfs(struct kgsl_process_private *private)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mem_stats); i++) {
+		sysfs_remove_file(&private->kobj, &mem_stats[i].attr.attr);
+		sysfs_remove_file(&private->kobj,
+			&mem_stats[i].max_attr.attr);
+	}
+
+	kobject_put(&private->kobj);
+	/* Put the refcount we got in kgsl_process_init_sysfs */
+	kgsl_process_private_put(private);
+}
+
+/**
+ * kgsl_process_init_sysfs() - Initialize and create sysfs files for a process
+ *
+ * @device: Pointer to kgsl device struct
+ * @private: Pointer to the structure for the process
+ *
+ * kgsl_process_init_sysfs() is called at the time of creating the
+ * process struct when a process opens the kgsl device for the first time.
+ * This function creates the sysfs files for the process.
+ */
+void kgsl_process_init_sysfs(struct kgsl_device *device,
+		struct kgsl_process_private *private)
+{
+	unsigned char name[16];
+	int i;
+
+	/* Keep private valid until the sysfs enries are removed. */
+	kgsl_process_private_get(private);
+
+	snprintf(name, sizeof(name), "%d", private->pid);
+
+	if (kobject_init_and_add(&private->kobj, &ktype_mem_entry,
+		kgsl_driver.prockobj, name)) {
+		WARN(1, "Unable to add sysfs dir '%s'\n", name);
+		return;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(mem_stats); i++) {
+		if (sysfs_create_file(&private->kobj,
+			&mem_stats[i].attr.attr))
+			WARN(1, "Couldn't create sysfs file '%s'\n",
+				mem_stats[i].attr.attr.name);
+
+		if (sysfs_create_file(&private->kobj,
+			&mem_stats[i].max_attr.attr))
+			WARN(1, "Couldn't create sysfs file '%s'\n",
+				mem_stats[i].max_attr.attr.name);
+
+	}
+}
+
+static ssize_t kgsl_drv_memstat_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	uint64_t val = 0;
+
+	if (!strcmp(attr->attr.name, "vmalloc"))
+		val = atomic_long_read(&kgsl_driver.stats.vmalloc);
+	else if (!strcmp(attr->attr.name, "vmalloc_max"))
+		val = atomic_long_read(&kgsl_driver.stats.vmalloc_max);
+	else if (!strcmp(attr->attr.name, "page_alloc"))
+		val = atomic_long_read(&kgsl_driver.stats.page_alloc);
+	else if (!strcmp(attr->attr.name, "page_alloc_max"))
+		val = atomic_long_read(&kgsl_driver.stats.page_alloc_max);
+	else if (!strcmp(attr->attr.name, "coherent"))
+		val = atomic_long_read(&kgsl_driver.stats.coherent);
+	else if (!strcmp(attr->attr.name, "coherent_max"))
+		val = atomic_long_read(&kgsl_driver.stats.coherent_max);
+	else if (!strcmp(attr->attr.name, "secure"))
+		val = atomic_long_read(&kgsl_driver.stats.secure);
+	else if (!strcmp(attr->attr.name, "secure_max"))
+		val = atomic_long_read(&kgsl_driver.stats.secure_max);
+	else if (!strcmp(attr->attr.name, "mapped"))
+		val = atomic_long_read(&kgsl_driver.stats.mapped);
+	else if (!strcmp(attr->attr.name, "mapped_max"))
+		val = atomic_long_read(&kgsl_driver.stats.mapped_max);
+
+	return snprintf(buf, PAGE_SIZE, "%llu\n", val);
+}
+
+static ssize_t kgsl_drv_full_cache_threshold_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{
+	int ret;
+	unsigned int thresh = 0;
+
+	ret = kgsl_sysfs_store(buf, &thresh);
+	if (ret)
+		return ret;
+
+	kgsl_driver.full_cache_threshold = thresh;
+	return count;
+}
+
+static ssize_t kgsl_drv_full_cache_threshold_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+			kgsl_driver.full_cache_threshold);
+}
+
+static DEVICE_ATTR(vmalloc, 0444, kgsl_drv_memstat_show, NULL);
+static DEVICE_ATTR(vmalloc_max, 0444, kgsl_drv_memstat_show, NULL);
+static DEVICE_ATTR(page_alloc, 0444, kgsl_drv_memstat_show, NULL);
+static DEVICE_ATTR(page_alloc_max, 0444, kgsl_drv_memstat_show, NULL);
+static DEVICE_ATTR(coherent, 0444, kgsl_drv_memstat_show, NULL);
+static DEVICE_ATTR(coherent_max, 0444, kgsl_drv_memstat_show, NULL);
+static DEVICE_ATTR(secure, 0444, kgsl_drv_memstat_show, NULL);
+static DEVICE_ATTR(secure_max, 0444, kgsl_drv_memstat_show, NULL);
+static DEVICE_ATTR(mapped, 0444, kgsl_drv_memstat_show, NULL);
+static DEVICE_ATTR(mapped_max, 0444, kgsl_drv_memstat_show, NULL);
+static DEVICE_ATTR(full_cache_threshold, 0644,
+		kgsl_drv_full_cache_threshold_show,
+		kgsl_drv_full_cache_threshold_store);
+
+static const struct device_attribute *drv_attr_list[] = {
+	&dev_attr_vmalloc,
+	&dev_attr_vmalloc_max,
+	&dev_attr_page_alloc,
+	&dev_attr_page_alloc_max,
+	&dev_attr_coherent,
+	&dev_attr_coherent_max,
+	&dev_attr_secure,
+	&dev_attr_secure_max,
+	&dev_attr_mapped,
+	&dev_attr_mapped_max,
+	&dev_attr_full_cache_threshold,
+	NULL
+};
+
+void
+kgsl_sharedmem_uninit_sysfs(void)
+{
+	kgsl_remove_device_sysfs_files(&kgsl_driver.virtdev, drv_attr_list);
+}
+
+int
+kgsl_sharedmem_init_sysfs(void)
+{
+	return kgsl_create_device_sysfs_files(&kgsl_driver.virtdev,
+		drv_attr_list);
+}
+
+static int kgsl_allocate_secure(struct kgsl_device *device,
+				struct kgsl_memdesc *memdesc,
+				struct kgsl_pagetable *pagetable,
+				uint64_t size) {
+	int ret;
+
+	if (MMU_FEATURE(&device->mmu, KGSL_MMU_HYP_SECURE_ALLOC))
+		ret = kgsl_sharedmem_page_alloc_user(memdesc, pagetable, size);
+	else
+		ret = kgsl_cma_alloc_secure(device, memdesc, size);
+
+	return ret;
+}
+
+int kgsl_allocate_user(struct kgsl_device *device,
+		struct kgsl_memdesc *memdesc,
+		struct kgsl_pagetable *pagetable,
+		uint64_t size, uint64_t mmapsize, uint64_t flags)
+{
+	int ret;
+
+	if (size == 0)
+		return -EINVAL;
+
+	memdesc->flags = flags;
+
+	if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_NONE)
+		ret = kgsl_cma_alloc_coherent(device, memdesc, pagetable, size);
+	else if (flags & KGSL_MEMFLAGS_SECURE)
+		ret = kgsl_allocate_secure(device, memdesc, pagetable, size);
+	else
+		ret = kgsl_sharedmem_page_alloc_user(memdesc, pagetable, size);
+
+	return ret;
+}
+
+static int kgsl_page_alloc_vmfault(struct kgsl_memdesc *memdesc,
+				struct vm_area_struct *vma,
+				struct vm_fault *vmf)
+{
+	int i, pgoff;
+	struct scatterlist *s = memdesc->sgt->sgl;
+	unsigned int offset;
+
+	offset = ((unsigned long) vmf->virtual_address - vma->vm_start);
+
+	if (offset >= memdesc->size)
+		return VM_FAULT_SIGBUS;
+
+	pgoff = offset >> PAGE_SHIFT;
+
+	/*
+	 * The sglist might be comprised of mixed blocks of memory depending
+	 * on how many 64K pages were allocated.  This means we have to do math
+	 * to find the actual 4K page to map in user space
+	 */
+
+	for (i = 0; i < memdesc->sgt->nents; i++) {
+		int npages = s->length >> PAGE_SHIFT;
+
+		if (pgoff < npages) {
+			struct page *page = sg_page(s);
+
+			page = nth_page(page, pgoff);
+
+			get_page(page);
+			vmf->page = page;
+
+			return 0;
+		}
+
+		pgoff -= npages;
+		s = sg_next(s);
+	}
+
+	return VM_FAULT_SIGBUS;
+}
+
+/*
+ * kgsl_page_alloc_unmap_kernel() - Unmap the memory in memdesc
+ *
+ * @memdesc: The memory descriptor which contains information about the memory
+ *
+ * Unmaps the memory mapped into kernel address space
+ */
+static void kgsl_page_alloc_unmap_kernel(struct kgsl_memdesc *memdesc)
+{
+	mutex_lock(&kernel_map_global_lock);
+	if (!memdesc->hostptr) {
+		BUG_ON(memdesc->hostptr_count);
+		goto done;
+	}
+	memdesc->hostptr_count--;
+	if (memdesc->hostptr_count)
+		goto done;
+	vunmap(memdesc->hostptr);
+
+	atomic_long_sub(memdesc->size, &kgsl_driver.stats.vmalloc);
+	memdesc->hostptr = NULL;
+done:
+	mutex_unlock(&kernel_map_global_lock);
+}
+
+static void kgsl_page_alloc_free(struct kgsl_memdesc *memdesc)
+{
+	unsigned int i = 0;
+	struct scatterlist *sg;
+
+	kgsl_page_alloc_unmap_kernel(memdesc);
+	/* we certainly do not expect the hostptr to still be mapped */
+	BUG_ON(memdesc->hostptr);
+
+	/* Secure buffers need to be unlocked before being freed */
+	if (memdesc->priv & KGSL_MEMDESC_TZ_LOCKED) {
+		int ret;
+		int dest_perms = PERM_READ | PERM_WRITE | PERM_EXEC;
+		int source_vm = VMID_CP_PIXEL;
+		int dest_vm = VMID_HLOS;
+
+		ret = hyp_assign_table(memdesc->sgt, &source_vm, 1,
+					&dest_vm, &dest_perms, 1);
+		if (ret) {
+			pr_err("Secure buf unlock failed: gpuaddr: %llx size: %llx ret: %d\n",
+					memdesc->gpuaddr, memdesc->size, ret);
+			BUG();
+		}
+
+		atomic_long_sub(memdesc->size, &kgsl_driver.stats.secure);
+	} else {
+		atomic_long_sub(memdesc->size, &kgsl_driver.stats.page_alloc);
+	}
+
+	for_each_sg(memdesc->sgt->sgl, sg, memdesc->sgt->nents, i) {
+		/*
+		 * sg_alloc_table_from_pages() will collapse any physically
+		 * adjacent pages into a single scatterlist entry. We cannot
+		 * just call __free_pages() on the entire set since we cannot
+		 * ensure that the size is a whole order. Instead, free each
+		 * page or compound page group individually.
+		 */
+		struct page *p = sg_page(sg), *next;
+		unsigned int j = 0, count;
+		while (j < (sg->length/PAGE_SIZE)) {
+			if (memdesc->priv & KGSL_MEMDESC_TZ_LOCKED)
+				ClearPagePrivate(p);
+
+			count = 1 << compound_order(p);
+			next = nth_page(p, count);
+			__free_pages(p, compound_order(p));
+			p = next;
+			j += count;
+
+		}
+	}
+}
+
+/*
+ * kgsl_page_alloc_map_kernel - Map the memory in memdesc to kernel address
+ * space
+ *
+ * @memdesc - The memory descriptor which contains information about the memory
+ *
+ * Return: 0 on success else error code
+ */
+static int kgsl_page_alloc_map_kernel(struct kgsl_memdesc *memdesc)
+{
+	int ret = 0;
+
+	/* Sanity check - don't map more than we could possibly chew */
+	if (memdesc->size > ULONG_MAX)
+		return -ENOMEM;
+
+	mutex_lock(&kernel_map_global_lock);
+	if (!memdesc->hostptr) {
+		pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL);
+		struct page **pages = NULL;
+		struct scatterlist *sg;
+		int npages = PAGE_ALIGN(memdesc->size) >> PAGE_SHIFT;
+		int sglen = memdesc->sgt->nents;
+		int i, count = 0;
+
+		/* create a list of pages to call vmap */
+		pages = kgsl_malloc(npages * sizeof(struct page *));
+		if (pages == NULL) {
+			ret = -ENOMEM;
+			goto done;
+		}
+
+		for_each_sg(memdesc->sgt->sgl, sg, sglen, i) {
+			struct page *page = sg_page(sg);
+			int j;
+
+			for (j = 0; j < sg->length >> PAGE_SHIFT; j++)
+				pages[count++] = page++;
+		}
+
+
+		memdesc->hostptr = vmap(pages, count,
+					VM_IOREMAP, page_prot);
+		if (memdesc->hostptr)
+			KGSL_STATS_ADD(memdesc->size,
+				&kgsl_driver.stats.vmalloc,
+				&kgsl_driver.stats.vmalloc_max);
+		else
+			ret = -ENOMEM;
+		kgsl_free(pages);
+	}
+	if (memdesc->hostptr)
+		memdesc->hostptr_count++;
+done:
+	mutex_unlock(&kernel_map_global_lock);
+
+	return ret;
+}
+
+static int kgsl_contiguous_vmfault(struct kgsl_memdesc *memdesc,
+				struct vm_area_struct *vma,
+				struct vm_fault *vmf)
+{
+	unsigned long offset, pfn;
+	int ret;
+
+	offset = ((unsigned long) vmf->virtual_address - vma->vm_start) >>
+		PAGE_SHIFT;
+
+	pfn = (memdesc->physaddr >> PAGE_SHIFT) + offset;
+	ret = vm_insert_pfn(vma, (unsigned long) vmf->virtual_address, pfn);
+
+	if (ret == -ENOMEM || ret == -EAGAIN)
+		return VM_FAULT_OOM;
+	else if (ret == -EFAULT)
+		return VM_FAULT_SIGBUS;
+
+	return VM_FAULT_NOPAGE;
+}
+
+static void kgsl_cma_coherent_free(struct kgsl_memdesc *memdesc)
+{
+	struct dma_attrs *attrs = NULL;
+
+	if (memdesc->hostptr) {
+		if (memdesc->priv & KGSL_MEMDESC_SECURE) {
+			atomic_long_sub(memdesc->size,
+				&kgsl_driver.stats.secure);
+
+			kgsl_cma_unlock_secure(memdesc);
+			attrs = &memdesc->attrs;
+		} else
+			atomic_long_sub(memdesc->size,
+				&kgsl_driver.stats.coherent);
+
+		dma_free_attrs(memdesc->dev, (size_t) memdesc->size,
+			memdesc->hostptr, memdesc->physaddr, attrs);
+	}
+}
+
+/* Global */
+static struct kgsl_memdesc_ops kgsl_page_alloc_ops = {
+	.free = kgsl_page_alloc_free,
+	.vmflags = VM_DONTDUMP | VM_DONTEXPAND | VM_DONTCOPY,
+	.vmfault = kgsl_page_alloc_vmfault,
+	.map_kernel = kgsl_page_alloc_map_kernel,
+	.unmap_kernel = kgsl_page_alloc_unmap_kernel,
+};
+
+/* CMA ops - used during NOMMU mode */
+static struct kgsl_memdesc_ops kgsl_cma_ops = {
+	.free = kgsl_cma_coherent_free,
+	.vmflags = VM_DONTDUMP | VM_PFNMAP | VM_DONTEXPAND | VM_DONTCOPY,
+	.vmfault = kgsl_contiguous_vmfault,
+};
+
+#ifdef CONFIG_ARM64
+/*
+ * For security reasons, ARMv8 doesn't allow invalidate only on read-only
+ * mapping. It would be performance prohibitive to read the permissions on
+ * the buffer before the operation. Every use case that we have found does not
+ * assume that an invalidate operation is invalidate only, so we feel
+ * comfortable turning invalidates into flushes for these targets
+ */
+static inline unsigned int _fixup_cache_range_op(unsigned int op)
+{
+	if (op == KGSL_CACHE_OP_INV)
+		return KGSL_CACHE_OP_FLUSH;
+	return op;
+}
+#else
+static inline unsigned int _fixup_cache_range_op(unsigned int op)
+{
+	return op;
+}
+#endif
+
+int kgsl_cache_range_op(struct kgsl_memdesc *memdesc, uint64_t offset,
+		uint64_t size, unsigned int op)
+{
+	/*
+	 * If the buffer is mapped in the kernel operate on that address
+	 * otherwise use the user address
+	 */
+
+	void *addr = (memdesc->hostptr) ?
+		memdesc->hostptr : (void *) memdesc->useraddr;
+
+	/* Make sure that size is non-zero */
+	if (!size)
+		return -EINVAL;
+
+	/* Make sure that the offset + size isn't bigger than we can handle */
+	if ((offset + size) > ULONG_MAX)
+		return -ERANGE;
+
+	/* Make sure the offset + size do not overflow the address */
+	if (addr + ((size_t) offset + (size_t) size) < addr)
+		return -ERANGE;
+
+	/* Check that offset+length does not exceed memdesc->size */
+	if (offset + size > memdesc->size)
+		return -ERANGE;
+
+	/* Return quietly if the buffer isn't mapped on the CPU */
+	if (addr == NULL)
+		return 0;
+
+	addr = addr + offset;
+
+	/*
+	 * The dmac_xxx_range functions handle addresses and sizes that
+	 * are not aligned to the cacheline size correctly.
+	 */
+
+	switch (_fixup_cache_range_op(op)) {
+	case KGSL_CACHE_OP_FLUSH:
+		dmac_flush_range(addr, addr + (size_t) size);
+		break;
+	case KGSL_CACHE_OP_CLEAN:
+		dmac_clean_range(addr, addr + (size_t) size);
+		break;
+	case KGSL_CACHE_OP_INV:
+		dmac_inv_range(addr, addr + (size_t) size);
+		break;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_cache_range_op);
+
+#ifndef CONFIG_ALLOC_BUFFERS_IN_4K_CHUNKS
+static inline int get_page_size(size_t size, unsigned int align)
+{
+	return (align >= ilog2(SZ_64K) && size >= SZ_64K)
+					? SZ_64K : PAGE_SIZE;
+}
+#else
+static inline int get_page_size(size_t size, unsigned int align)
+{
+	return PAGE_SIZE;
+}
+#endif
+
+static int
+_kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc,
+			struct kgsl_pagetable *pagetable,
+			uint64_t size)
+{
+	int ret = 0;
+	unsigned int j, pcount = 0, page_size, len_alloc;
+	size_t len;
+	struct page **pages = NULL;
+	pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL);
+	void *ptr;
+	unsigned int align;
+	unsigned int step = ((VMALLOC_END - VMALLOC_START)/8) >> PAGE_SHIFT;
+
+	align = (memdesc->flags & KGSL_MEMALIGN_MASK) >> KGSL_MEMALIGN_SHIFT;
+
+	page_size = get_page_size(size, align);
+
+	/*
+	 * The alignment cannot be less than the intended page size - it can be
+	 * larger however to accomodate hardware quirks
+	 */
+
+	if (align < ilog2(page_size))
+		kgsl_memdesc_set_align(memdesc, ilog2(page_size));
+
+	if (size > SIZE_MAX)
+		return -EINVAL;
+
+	/*
+	 * There needs to be enough room in the page array to be able to
+	 * service the allocation entirely with PAGE_SIZE sized chunks
+	 */
+
+	len_alloc = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+	memdesc->pagetable = pagetable;
+	memdesc->ops = &kgsl_page_alloc_ops;
+
+	memdesc->sgt = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (memdesc->sgt == NULL)
+		return -ENOMEM;
+
+	/*
+	 * Allocate space to store the list of pages to send to vmap. This is an
+	 * array of pointers so we can track 1024 pages per page of allocation
+	 */
+
+	pages = kgsl_malloc(len_alloc * sizeof(struct page *));
+
+	if (pages == NULL) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	len = size;
+
+	while (len > 0) {
+		struct page *page;
+		gfp_t gfp_mask = __GFP_HIGHMEM;
+		int j;
+
+		/* don't waste space at the end of the allocation*/
+		if (len < page_size)
+			page_size = PAGE_SIZE;
+
+		/*
+		 * Don't do some of the more aggressive memory recovery
+		 * techniques for large order allocations
+		 */
+		if (page_size != PAGE_SIZE)
+			gfp_mask |= __GFP_COMP | __GFP_NORETRY |
+				__GFP_NO_KSWAPD | __GFP_NOWARN;
+		else
+			gfp_mask |= GFP_KERNEL;
+
+		if (sharedmem_noretry_flag == true)
+			gfp_mask |= __GFP_NORETRY | __GFP_NOWARN;
+
+		page = alloc_pages(gfp_mask, get_order(page_size));
+
+		if (page == NULL) {
+			if (page_size != PAGE_SIZE) {
+				page_size = PAGE_SIZE;
+				continue;
+			}
+
+			/*
+			 * Update sglen and memdesc size,as requested allocation
+			 * not served fully. So that they can be correctly freed
+			 * in kgsl_sharedmem_free().
+			 */
+			memdesc->size = (size - len);
+
+			if (sharedmem_noretry_flag != true)
+				KGSL_CORE_ERR(
+					"Out of memory: only allocated %lldKB of %lldKB requested\n",
+					(size - len) >> 10, size >> 10);
+
+			ret = -ENOMEM;
+			goto done;
+		}
+
+		for (j = 0; j < page_size >> PAGE_SHIFT; j++)
+			pages[pcount++] = nth_page(page, j);
+
+		len -= page_size;
+		memdesc->size += page_size;
+	}
+
+	ret = sg_alloc_table_from_pages(memdesc->sgt, pages, pcount, 0,
+				memdesc->size, GFP_KERNEL);
+	if (ret)
+		goto done;
+
+	/* Call to the hypervisor to lock any secure buffer allocations */
+	if (memdesc->flags & KGSL_MEMFLAGS_SECURE) {
+		unsigned int i;
+		struct scatterlist *sg;
+		int dest_perms = PERM_READ | PERM_WRITE;
+		int source_vm = VMID_HLOS;
+		int dest_vm = VMID_CP_PIXEL;
+
+		ret = hyp_assign_table(memdesc->sgt, &source_vm, 1,
+					&dest_vm, &dest_perms, 1);
+		if (ret)
+			goto done;
+
+		/* Set private bit for each sg to indicate that its secured */
+		for_each_sg(memdesc->sgt->sgl, sg, memdesc->sgt->nents, i)
+			SetPagePrivate(sg_page(sg));
+
+		memdesc->priv |= KGSL_MEMDESC_TZ_LOCKED;
+
+		/* Record statistics */
+		KGSL_STATS_ADD(memdesc->size, &kgsl_driver.stats.secure,
+			&kgsl_driver.stats.secure_max);
+
+		/* Don't map and zero the locked secure buffer */
+		goto done;
+	}
+
+	/*
+	 * All memory that goes to the user has to be zeroed out before it gets
+	 * exposed to userspace. This means that the memory has to be mapped in
+	 * the kernel, zeroed (memset) and then unmapped.  This also means that
+	 * the dcache has to be flushed to ensure coherency between the kernel
+	 * and user pages. We used to pass __GFP_ZERO to alloc_page which mapped
+	 * zeroed and unmaped each individual page, and then we had to turn
+	 * around and call flush_dcache_page() on that page to clear the caches.
+	 * This was killing us for performance. Instead, we found it is much
+	 * faster to allocate the pages without GFP_ZERO, map a chunk of the
+	 * range ('step' pages), memset it, flush it and then unmap
+	 * - this results in a factor of 4 improvement for speed for large
+	 * buffers. There is a small decrease in speed for small buffers,
+	 * but only on the order of a few microseconds at best. The 'step'
+	 * size is based on a guess at the amount of free vmalloc space, but
+	 * will scale down if there's not enough free space.
+	 */
+	for (j = 0; j < pcount; j += step) {
+		step = min(step, pcount - j);
+
+		ptr = vmap(&pages[j], step, VM_IOREMAP, page_prot);
+
+		if (ptr != NULL) {
+			memset(ptr, 0, step * PAGE_SIZE);
+			dmac_flush_range(ptr, ptr + step * PAGE_SIZE);
+			vunmap(ptr);
+		} else {
+			int k;
+			/* Very, very, very slow path */
+
+			for (k = j; k < j + step; k++) {
+				ptr = kmap_atomic(pages[k]);
+				memset(ptr, 0, PAGE_SIZE);
+				dmac_flush_range(ptr, ptr + PAGE_SIZE);
+				kunmap_atomic(ptr);
+			}
+			/* scale down the step size to avoid this path */
+			if (step > 1)
+				step >>= 1;
+		}
+	}
+
+	KGSL_STATS_ADD(memdesc->size, &kgsl_driver.stats.page_alloc,
+		&kgsl_driver.stats.page_alloc_max);
+
+done:
+	if (ret) {
+		unsigned int count = 1;
+		for (j = 0; j < pcount; j += count) {
+			count = 1 << compound_order(pages[j]);
+			__free_pages(pages[j], compound_order(pages[j]));
+		}
+
+		kfree(memdesc->sgt);
+		memset(memdesc, 0, sizeof(*memdesc));
+	}
+	kgsl_free(pages);
+
+	return ret;
+}
+
+int
+kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc,
+			    struct kgsl_pagetable *pagetable,
+			    uint64_t size)
+{
+	size = PAGE_ALIGN(size);
+	if (size == 0)
+		return -EINVAL;
+
+	return _kgsl_sharedmem_page_alloc(memdesc, pagetable, size);
+}
+EXPORT_SYMBOL(kgsl_sharedmem_page_alloc_user);
+
+void kgsl_sharedmem_free(struct kgsl_memdesc *memdesc)
+{
+	if (memdesc == NULL || memdesc->size == 0)
+		return;
+
+	if (memdesc->gpuaddr) {
+		kgsl_mmu_unmap(memdesc->pagetable, memdesc);
+		kgsl_mmu_put_gpuaddr(memdesc->pagetable, memdesc);
+	}
+
+	if (memdesc->ops && memdesc->ops->free)
+		memdesc->ops->free(memdesc);
+
+	if (memdesc->sgt) {
+		sg_free_table(memdesc->sgt);
+		kfree(memdesc->sgt);
+	}
+
+	memset(memdesc, 0, sizeof(*memdesc));
+}
+EXPORT_SYMBOL(kgsl_sharedmem_free);
+
+int
+kgsl_sharedmem_readl(const struct kgsl_memdesc *memdesc,
+			uint32_t *dst,
+			uint64_t offsetbytes)
+{
+	uint32_t *src;
+	BUG_ON(memdesc == NULL || memdesc->hostptr == NULL || dst == NULL);
+	WARN_ON(offsetbytes % sizeof(uint32_t) != 0);
+	if (offsetbytes % sizeof(uint32_t) != 0)
+		return -EINVAL;
+
+	WARN_ON(offsetbytes + sizeof(uint32_t) > memdesc->size);
+	if (offsetbytes + sizeof(uint32_t) > memdesc->size)
+		return -ERANGE;
+
+	rmb();
+	src = (uint32_t *)(memdesc->hostptr + offsetbytes);
+	*dst = *src;
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_sharedmem_readl);
+
+int
+kgsl_sharedmem_writel(struct kgsl_device *device,
+			const struct kgsl_memdesc *memdesc,
+			uint64_t offsetbytes,
+			uint32_t src)
+{
+	uint32_t *dst;
+	BUG_ON(memdesc == NULL || memdesc->hostptr == NULL);
+	WARN_ON(offsetbytes % sizeof(uint32_t) != 0);
+	if (offsetbytes % sizeof(uint32_t) != 0)
+		return -EINVAL;
+
+	WARN_ON(offsetbytes + sizeof(uint32_t) > memdesc->size);
+	if (offsetbytes + sizeof(uint32_t) > memdesc->size)
+		return -ERANGE;
+	kgsl_cffdump_write(device,
+		memdesc->gpuaddr + offsetbytes,
+		src);
+	dst = (uint32_t *)(memdesc->hostptr + offsetbytes);
+	*dst = src;
+
+	wmb();
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_sharedmem_writel);
+
+int
+kgsl_sharedmem_readq(const struct kgsl_memdesc *memdesc,
+			uint64_t *dst,
+			uint64_t offsetbytes)
+{
+	uint64_t *src;
+	BUG_ON(memdesc == NULL || memdesc->hostptr == NULL || dst == NULL);
+	WARN_ON(offsetbytes % sizeof(uint32_t) != 0);
+	if (offsetbytes % sizeof(uint32_t) != 0)
+		return -EINVAL;
+
+	WARN_ON(offsetbytes + sizeof(uint32_t) > memdesc->size);
+	if (offsetbytes + sizeof(uint32_t) > memdesc->size)
+		return -ERANGE;
+
+	/*
+	 * We are reading shared memory between CPU and GPU.
+	 * Make sure reads before this are complete
+	 */
+	rmb();
+	src = (uint64_t *)(memdesc->hostptr + offsetbytes);
+	*dst = *src;
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_sharedmem_readq);
+
+int
+kgsl_sharedmem_writeq(struct kgsl_device *device,
+			const struct kgsl_memdesc *memdesc,
+			uint64_t offsetbytes,
+			uint64_t src)
+{
+	uint64_t *dst;
+	BUG_ON(memdesc == NULL || memdesc->hostptr == NULL);
+	WARN_ON(offsetbytes % sizeof(uint32_t) != 0);
+	if (offsetbytes % sizeof(uint32_t) != 0)
+		return -EINVAL;
+
+	WARN_ON(offsetbytes + sizeof(uint32_t) > memdesc->size);
+	if (offsetbytes + sizeof(uint32_t) > memdesc->size)
+		return -ERANGE;
+	kgsl_cffdump_write(device,
+		lower_32_bits(memdesc->gpuaddr + offsetbytes), src);
+	kgsl_cffdump_write(device,
+		upper_32_bits(memdesc->gpuaddr + offsetbytes), src);
+	dst = (uint64_t *)(memdesc->hostptr + offsetbytes);
+	*dst = src;
+
+	/*
+	 * We are writing to shared memory between CPU and GPU.
+	 * Make sure write above is posted immediately
+	 */
+	wmb();
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_sharedmem_writeq);
+
+int
+kgsl_sharedmem_set(struct kgsl_device *device,
+		const struct kgsl_memdesc *memdesc, uint64_t offsetbytes,
+		unsigned int value, uint64_t sizebytes)
+{
+	BUG_ON(memdesc == NULL || memdesc->hostptr == NULL);
+	BUG_ON(offsetbytes + sizebytes > memdesc->size);
+
+	kgsl_cffdump_memset(device,
+		memdesc->gpuaddr + offsetbytes, value,
+		sizebytes);
+	memset(memdesc->hostptr + offsetbytes, value, sizebytes);
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_sharedmem_set);
+
+static const char * const memtype_str[] = {
+	[KGSL_MEMTYPE_OBJECTANY] = "any(0)",
+	[KGSL_MEMTYPE_FRAMEBUFFER] = "framebuffer",
+	[KGSL_MEMTYPE_RENDERBUFFER] = "renderbuffer",
+	[KGSL_MEMTYPE_ARRAYBUFFER] = "arraybuffer",
+	[KGSL_MEMTYPE_ELEMENTARRAYBUFFER] = "elementarraybuffer",
+	[KGSL_MEMTYPE_VERTEXARRAYBUFFER] = "vertexarraybuffer",
+	[KGSL_MEMTYPE_TEXTURE] = "texture",
+	[KGSL_MEMTYPE_SURFACE] = "surface",
+	[KGSL_MEMTYPE_EGL_SURFACE] = "egl_surface",
+	[KGSL_MEMTYPE_GL] = "gl",
+	[KGSL_MEMTYPE_CL] = "cl",
+	[KGSL_MEMTYPE_CL_BUFFER_MAP] = "cl_buffer_map",
+	[KGSL_MEMTYPE_CL_BUFFER_NOMAP] = "cl_buffer_nomap",
+	[KGSL_MEMTYPE_CL_IMAGE_MAP] = "cl_image_map",
+	[KGSL_MEMTYPE_CL_IMAGE_NOMAP] = "cl_image_nomap",
+	[KGSL_MEMTYPE_CL_KERNEL_STACK] = "cl_kernel_stack",
+	[KGSL_MEMTYPE_COMMAND] = "command",
+	[KGSL_MEMTYPE_2D] = "2d",
+	[KGSL_MEMTYPE_EGL_IMAGE] = "egl_image",
+	[KGSL_MEMTYPE_EGL_SHADOW] = "egl_shadow",
+	[KGSL_MEMTYPE_MULTISAMPLE] = "egl_multisample",
+	/* KGSL_MEMTYPE_KERNEL handled below, to avoid huge array */
+};
+
+void kgsl_get_memory_usage(char *name, size_t name_size, uint64_t memflags)
+{
+	unsigned int type = MEMFLAGS(memflags, KGSL_MEMTYPE_MASK,
+		KGSL_MEMTYPE_SHIFT);
+
+	if (type == KGSL_MEMTYPE_KERNEL)
+		strlcpy(name, "kernel", name_size);
+	else if (type < ARRAY_SIZE(memtype_str) && memtype_str[type] != NULL)
+		strlcpy(name, memtype_str[type], name_size);
+	else
+		snprintf(name, name_size, "unknown(%3d)", type);
+}
+EXPORT_SYMBOL(kgsl_get_memory_usage);
+
+int kgsl_cma_alloc_coherent(struct kgsl_device *device,
+			struct kgsl_memdesc *memdesc,
+			struct kgsl_pagetable *pagetable, uint64_t size)
+{
+	int result = 0;
+
+	size = ALIGN(size, PAGE_SIZE);
+
+	if (size == 0 || size > SIZE_MAX)
+		return -EINVAL;
+
+	memdesc->size = size;
+	memdesc->pagetable = pagetable;
+	memdesc->ops = &kgsl_cma_ops;
+	memdesc->dev = device->dev->parent;
+
+	memdesc->hostptr = dma_alloc_attrs(memdesc->dev, (size_t) size,
+		&memdesc->physaddr, GFP_KERNEL, NULL);
+
+	if (memdesc->hostptr == NULL) {
+		result = -ENOMEM;
+		goto err;
+	}
+
+	result = memdesc_sg_dma(memdesc, memdesc->physaddr, size);
+	if (result)
+		goto err;
+
+	/* Record statistics */
+
+	KGSL_STATS_ADD(size, &kgsl_driver.stats.coherent,
+		&kgsl_driver.stats.coherent_max);
+
+err:
+	if (result)
+		kgsl_sharedmem_free(memdesc);
+
+	return result;
+}
+EXPORT_SYMBOL(kgsl_cma_alloc_coherent);
+
+static int scm_lock_chunk(struct kgsl_memdesc *memdesc, int lock)
+{
+	struct cp2_lock_req request;
+	unsigned int resp;
+	unsigned int *chunk_list;
+	struct scm_desc desc = {0};
+	int result;
+
+	/*
+	 * Flush the virt addr range before sending the memory to the
+	 * secure environment to ensure the data is actually present
+	 * in RAM
+	 *
+	 * Chunk_list holds the physical address of secure memory.
+	 * Pass in the virtual address of chunk_list to flush.
+	 * Chunk_list size is 1 because secure memory is physically
+	 * contiguous.
+	 */
+	chunk_list = kzalloc(sizeof(unsigned int), GFP_KERNEL);
+	if (!chunk_list)
+		return -ENOMEM;
+
+	chunk_list[0] = memdesc->physaddr;
+	dmac_flush_range((void *)chunk_list, (void *)chunk_list + 1);
+
+	request.chunks.chunk_list = virt_to_phys(chunk_list);
+	/*
+	 * virt_to_phys(chunk_list) may be an address > 4GB. It is guaranteed
+	 * that when using scm_call (the older interface), the phys addresses
+	 * will be restricted to below 4GB.
+	 */
+	desc.args[0] = virt_to_phys(chunk_list);
+	desc.args[1] = request.chunks.chunk_list_size = 1;
+	desc.args[2] = request.chunks.chunk_size = (unsigned int) memdesc->size;
+	desc.args[3] = request.mem_usage = 0;
+	desc.args[4] = request.lock = lock;
+	desc.args[5] = 0;
+	desc.arginfo = SCM_ARGS(6, SCM_RW, SCM_VAL, SCM_VAL, SCM_VAL, SCM_VAL,
+				SCM_VAL);
+	kmap_flush_unused();
+	kmap_atomic_flush_unused();
+	if (!is_scm_armv8()) {
+		result = scm_call(SCM_SVC_MP, MEM_PROTECT_LOCK_ID2,
+				&request, sizeof(request), &resp, sizeof(resp));
+	} else {
+		result = scm_call2(SCM_SIP_FNID(SCM_SVC_MP,
+				   MEM_PROTECT_LOCK_ID2_FLAT), &desc);
+		resp = desc.ret[0];
+	}
+
+	kfree(chunk_list);
+	return result;
+}
+
+int kgsl_cma_alloc_secure(struct kgsl_device *device,
+			struct kgsl_memdesc *memdesc, uint64_t size)
+{
+	struct kgsl_iommu *iommu = device->mmu.priv;
+	int result = 0;
+	struct kgsl_pagetable *pagetable = device->mmu.securepagetable;
+	size_t aligned;
+
+	if (size == 0)
+		return -EINVAL;
+
+	/* Align size to 1M boundaries */
+	aligned = ALIGN(size, SZ_1M);
+
+	/* The SCM call uses an unsigned int for the size */
+	if (aligned > UINT_MAX)
+		return -EINVAL;
+
+	/*
+	 * If there is more than a page gap between the requested size and the
+	 * aligned size we don't need to add more memory for a guard page. Yay!
+	 */
+
+	if (memdesc->priv & KGSL_MEMDESC_GUARD_PAGE)
+		if (aligned - size >= SZ_4K)
+			memdesc->priv &= ~KGSL_MEMDESC_GUARD_PAGE;
+
+	memdesc->size = aligned;
+	memdesc->pagetable = pagetable;
+	memdesc->ops = &kgsl_cma_ops;
+	memdesc->dev = iommu->ctx[KGSL_IOMMU_CONTEXT_SECURE].dev;
+
+	init_dma_attrs(&memdesc->attrs);
+	dma_set_attr(DMA_ATTR_STRONGLY_ORDERED, &memdesc->attrs);
+
+	memdesc->hostptr = dma_alloc_attrs(memdesc->dev, aligned,
+		&memdesc->physaddr, GFP_KERNEL, &memdesc->attrs);
+
+	if (memdesc->hostptr == NULL) {
+		result = -ENOMEM;
+		goto err;
+	}
+
+	result = memdesc_sg_dma(memdesc, memdesc->physaddr, aligned);
+	if (result)
+		goto err;
+
+	result = scm_lock_chunk(memdesc, 1);
+
+	if (result != 0)
+		goto err;
+
+	/* Set the private bit to indicate that we've secured this */
+	SetPagePrivate(sg_page(memdesc->sgt->sgl));
+
+	memdesc->priv |= KGSL_MEMDESC_TZ_LOCKED;
+
+	/* Record statistics */
+	KGSL_STATS_ADD(aligned, &kgsl_driver.stats.secure,
+	       &kgsl_driver.stats.secure_max);
+err:
+	if (result)
+		kgsl_sharedmem_free(memdesc);
+
+	return result;
+}
+EXPORT_SYMBOL(kgsl_cma_alloc_secure);
+
+/**
+ * kgsl_cma_unlock_secure() - Unlock secure memory by calling TZ
+ * @memdesc: memory descriptor
+ */
+static void kgsl_cma_unlock_secure(struct kgsl_memdesc *memdesc)
+{
+	if (memdesc->size == 0 || !(memdesc->priv & KGSL_MEMDESC_TZ_LOCKED))
+		return;
+
+	if (!scm_lock_chunk(memdesc, 0))
+		ClearPagePrivate(sg_page(memdesc->sgt->sgl));
+}
+
+void kgsl_sharedmem_set_noretry(bool val)
+{
+	sharedmem_noretry_flag = val;
+}
+
+bool kgsl_sharedmem_get_noretry(void)
+{
+	return sharedmem_noretry_flag;
+}
diff --git a/drivers/gpu/msm/kgsl_sharedmem.h b/drivers/gpu/msm/kgsl_sharedmem.h
new file mode 100644
index 000000000000..9e5651d18df8
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_sharedmem.h
@@ -0,0 +1,331 @@
+/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __KGSL_SHAREDMEM_H
+#define __KGSL_SHAREDMEM_H
+
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+#include "kgsl_mmu.h"
+#include <linux/slab.h>
+#include <linux/kmemleak.h>
+#include <linux/iommu.h>
+
+#include "kgsl_mmu.h"
+#include "kgsl_log.h"
+
+struct kgsl_device;
+struct kgsl_process_private;
+
+#define KGSL_CACHE_OP_INV       0x01
+#define KGSL_CACHE_OP_FLUSH     0x02
+#define KGSL_CACHE_OP_CLEAN     0x03
+
+int kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc,
+				struct kgsl_pagetable *pagetable,
+				uint64_t size);
+
+int kgsl_cma_alloc_coherent(struct kgsl_device *device,
+			struct kgsl_memdesc *memdesc,
+			struct kgsl_pagetable *pagetable, uint64_t size);
+
+int kgsl_cma_alloc_secure(struct kgsl_device *device,
+			struct kgsl_memdesc *memdesc, uint64_t size);
+
+void kgsl_sharedmem_free(struct kgsl_memdesc *memdesc);
+
+int kgsl_sharedmem_readl(const struct kgsl_memdesc *memdesc,
+			uint32_t *dst,
+			uint64_t offsetbytes);
+
+int kgsl_sharedmem_writel(struct kgsl_device *device,
+			const struct kgsl_memdesc *memdesc,
+			uint64_t offsetbytes,
+			uint32_t src);
+
+int kgsl_sharedmem_readq(const struct kgsl_memdesc *memdesc,
+			uint64_t *dst,
+			uint64_t offsetbytes);
+
+int kgsl_sharedmem_writeq(struct kgsl_device *device,
+			const struct kgsl_memdesc *memdesc,
+			uint64_t offsetbytes,
+			uint64_t src);
+
+int kgsl_sharedmem_set(struct kgsl_device *device,
+			const struct kgsl_memdesc *memdesc,
+			uint64_t offsetbytes, unsigned int value,
+			uint64_t sizebytes);
+
+int kgsl_cache_range_op(struct kgsl_memdesc *memdesc,
+			uint64_t offset, uint64_t size,
+			unsigned int op);
+
+void kgsl_process_init_sysfs(struct kgsl_device *device,
+		struct kgsl_process_private *private);
+void kgsl_process_uninit_sysfs(struct kgsl_process_private *private);
+
+int kgsl_sharedmem_init_sysfs(void);
+void kgsl_sharedmem_uninit_sysfs(void);
+
+int kgsl_allocate_user(struct kgsl_device *device,
+		struct kgsl_memdesc *memdesc,
+		struct kgsl_pagetable *pagetable,
+		uint64_t size, uint64_t mmapsize, uint64_t flags);
+
+#define MEMFLAGS(_flags, _mask, _shift) \
+	((unsigned int) (((_flags) & (_mask)) >> (_shift)))
+
+/*
+ * kgsl_memdesc_get_align - Get alignment flags from a memdesc
+ * @memdesc - the memdesc
+ *
+ * Returns the alignment requested, as power of 2 exponent.
+ */
+static inline int
+kgsl_memdesc_get_align(const struct kgsl_memdesc *memdesc)
+{
+	return MEMFLAGS(memdesc->flags, KGSL_MEMALIGN_MASK,
+		KGSL_MEMALIGN_SHIFT);
+}
+
+/*
+ * kgsl_memdesc_get_cachemode - Get cache mode of a memdesc
+ * @memdesc: the memdesc
+ *
+ * Returns a KGSL_CACHEMODE* value.
+ */
+static inline int
+kgsl_memdesc_get_cachemode(const struct kgsl_memdesc *memdesc)
+{
+	return MEMFLAGS(memdesc->flags, KGSL_CACHEMODE_MASK,
+		KGSL_CACHEMODE_SHIFT);
+}
+
+static inline unsigned int
+kgsl_memdesc_get_memtype(const struct kgsl_memdesc *memdesc)
+{
+	return MEMFLAGS(memdesc->flags, KGSL_MEMTYPE_MASK,
+		KGSL_MEMTYPE_SHIFT);
+}
+/*
+ * kgsl_memdesc_set_align - Set alignment flags of a memdesc
+ * @memdesc - the memdesc
+ * @align - alignment requested, as a power of 2 exponent.
+ */
+static inline int
+kgsl_memdesc_set_align(struct kgsl_memdesc *memdesc, unsigned int align)
+{
+	if (align > 32) {
+		KGSL_CORE_ERR("Alignment too big, restricting to 2^32\n");
+		align = 32;
+	}
+
+	memdesc->flags &= ~KGSL_MEMALIGN_MASK;
+	memdesc->flags |= (align << KGSL_MEMALIGN_SHIFT) & KGSL_MEMALIGN_MASK;
+	return 0;
+}
+
+/**
+ * kgsl_memdesc_usermem_type - return buffer type
+ * @memdesc - the memdesc
+ *
+ * Returns a KGSL_MEM_ENTRY_* value for this buffer, which
+ * identifies if was allocated by us, or imported from
+ * another allocator.
+ */
+static inline unsigned int
+kgsl_memdesc_usermem_type(const struct kgsl_memdesc *memdesc)
+{
+	return MEMFLAGS(memdesc->flags, KGSL_MEMFLAGS_USERMEM_MASK,
+		KGSL_MEMFLAGS_USERMEM_SHIFT);
+}
+
+/**
+ * memdesg_sg_dma() - Turn a dma_addr (from CMA) into a sg table
+ * @memdesc: Pointer to the memdesc structure
+ * @addr: Physical address from the dma_alloc function
+ * @size: Size of the chunk
+ *
+ * Create a sg table for the contigious chunk specified by addr and size.
+ */
+static inline int
+memdesc_sg_dma(struct kgsl_memdesc *memdesc,
+		phys_addr_t addr, uint64_t size)
+{
+	int ret;
+	struct page *page = phys_to_page(addr);
+
+	memdesc->sgt = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (memdesc->sgt == NULL)
+		return -ENOMEM;
+
+	ret = sg_alloc_table(memdesc->sgt, 1, GFP_KERNEL);
+	if (ret) {
+		kfree(memdesc->sgt);
+		memdesc->sgt = NULL;
+		return ret;
+	}
+
+	sg_set_page(memdesc->sgt->sgl, page, (size_t) size, 0);
+	return 0;
+}
+
+/*
+ * kgsl_memdesc_is_global - is this a globally mapped buffer?
+ * @memdesc: the memdesc
+ *
+ * Returns nonzero if this is a global mapping, 0 otherwise
+ */
+static inline int kgsl_memdesc_is_global(const struct kgsl_memdesc *memdesc)
+{
+	return (memdesc->priv & KGSL_MEMDESC_GLOBAL) != 0;
+}
+
+/*
+ * kgsl_memdesc_is_secured - is this a secure buffer?
+ * @memdesc: the memdesc
+ *
+ * Returns true if this is a secure mapping, false otherwise
+ */
+static inline bool kgsl_memdesc_is_secured(const struct kgsl_memdesc *memdesc)
+{
+	return memdesc && (memdesc->priv & KGSL_MEMDESC_SECURE);
+}
+
+/*
+ * kgsl_memdesc_has_guard_page - is the last page a guard page?
+ * @memdesc - the memdesc
+ *
+ * Returns nonzero if there is a guard page, 0 otherwise
+ */
+static inline int
+kgsl_memdesc_has_guard_page(const struct kgsl_memdesc *memdesc)
+{
+	return (memdesc->priv & KGSL_MEMDESC_GUARD_PAGE) != 0;
+}
+
+/*
+ * kgsl_memdesc_guard_page_size - returns guard page size
+ * @memdesc - the memdesc
+ *
+ * Returns guard page size
+ */
+static inline int
+kgsl_memdesc_guard_page_size(const struct kgsl_mmu *mmu,
+				const struct kgsl_memdesc *memdesc)
+{
+	return kgsl_memdesc_is_secured(memdesc) ? mmu->secure_align_mask + 1 :
+								PAGE_SIZE;
+}
+
+/*
+ * kgsl_memdesc_use_cpu_map - use the same virtual mapping on CPU and GPU?
+ * @memdesc - the memdesc
+ */
+static inline int
+kgsl_memdesc_use_cpu_map(const struct kgsl_memdesc *memdesc)
+{
+	return (memdesc->flags & KGSL_MEMFLAGS_USE_CPU_MAP) != 0;
+}
+
+/*
+ * kgsl_memdesc_mmapsize - get the size of the mmap region
+ * @memdesc - the memdesc
+ *
+ * The entire memdesc must be mapped. Additionally if the
+ * CPU mapping is going to be mirrored, there must be room
+ * for the guard page to be mapped so that the address spaces
+ * match up.
+ */
+static inline uint64_t
+kgsl_memdesc_mmapsize(const struct kgsl_memdesc *memdesc)
+{
+	uint64_t size = memdesc->size;
+	if (kgsl_memdesc_has_guard_page(memdesc))
+		size += SZ_4K;
+	return size;
+}
+
+static inline int
+kgsl_allocate_contiguous(struct kgsl_device *device,
+			struct kgsl_memdesc *memdesc, size_t size)
+{
+	int ret;
+
+	size = ALIGN(size, PAGE_SIZE);
+
+	ret = kgsl_cma_alloc_coherent(device, memdesc, NULL, size);
+	if (!ret && (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_NONE))
+		memdesc->gpuaddr = memdesc->physaddr;
+
+	return ret;
+}
+
+/*
+ * kgsl_allocate_global() - Allocate GPU accessible memory that will be global
+ * across all processes
+ * @device: The device pointer to which the memdesc belongs
+ * @memdesc: Pointer to a KGSL memory descriptor for the memory allocation
+ * @size: size of the allocation
+ * @flags: Allocation flags that control how the memory is mapped
+ * @priv: Priv flags that controls memory attributes
+ *
+ * Allocate contiguous memory for internal use and add the allocation to the
+ * list of global pagetable entries that will be mapped at the same address in
+ * all pagetables.  This is for use for device wide GPU allocations such as
+ * ringbuffers.
+ */
+static inline int kgsl_allocate_global(struct kgsl_device *device,
+	struct kgsl_memdesc *memdesc, uint64_t size, uint64_t flags,
+	unsigned int priv)
+{
+	int ret;
+
+	BUG_ON(size > SIZE_MAX);
+
+	if (size == 0)
+		return -EINVAL;
+
+	memdesc->flags = flags;
+	memdesc->priv = priv;
+
+	ret = kgsl_allocate_contiguous(device, memdesc, (size_t) size);
+
+	if (!ret) {
+		ret = kgsl_add_global_pt_entry(device, memdesc);
+		if (ret)
+			kgsl_sharedmem_free(memdesc);
+	}
+
+	return ret;
+}
+
+/**
+ * kgsl_free_global() - Free a device wide GPU allocation and remove it from the
+ * global pagetable entry list
+ *
+ * @memdesc: Pointer to the GPU memory descriptor to free
+ *
+ * Remove the specific memory descriptor from the global pagetable entry list
+ * and free it
+ */
+static inline void kgsl_free_global(struct kgsl_memdesc *memdesc)
+{
+	kgsl_remove_global_pt_entry(memdesc);
+	kgsl_sharedmem_free(memdesc);
+}
+
+void kgsl_sharedmem_set_noretry(bool val);
+bool kgsl_sharedmem_get_noretry(void);
+
+#endif /* __KGSL_SHAREDMEM_H */
diff --git a/drivers/gpu/msm/kgsl_snapshot.c b/drivers/gpu/msm/kgsl_snapshot.c
new file mode 100644
index 000000000000..8116ccaa96bd
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_snapshot.c
@@ -0,0 +1,1051 @@
+/* Copyright (c) 2012-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/export.h>
+#include <linux/time.h>
+#include <linux/sysfs.h>
+#include <linux/utsname.h>
+#include <linux/sched.h>
+#include <linux/idr.h>
+
+#include "kgsl.h"
+#include "kgsl_log.h"
+#include "kgsl_device.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_snapshot.h"
+#include "adreno_cp_parser.h"
+
+/* Placeholder for list of ib objects that contain all objects in that IB */
+
+struct kgsl_snapshot_cp_obj {
+	struct adreno_ib_object_list *ib_obj_list;
+	struct list_head node;
+};
+
+struct snapshot_obj_itr {
+	u8 *buf;      /* Buffer pointer to write to */
+	int pos;        /* Current position in the sequence */
+	loff_t offset;  /* file offset to start writing from */
+	size_t remain;  /* Bytes remaining in buffer */
+	size_t write;   /* Bytes written so far */
+};
+
+static void obj_itr_init(struct snapshot_obj_itr *itr, u8 *buf,
+	loff_t offset, size_t remain)
+{
+	itr->buf = buf;
+	itr->offset = offset;
+	itr->remain = remain;
+	itr->pos = 0;
+	itr->write = 0;
+}
+
+static int obj_itr_out(struct snapshot_obj_itr *itr, void *src, int size)
+{
+	if (itr->remain == 0)
+		return 0;
+
+	if ((itr->pos + size) <= itr->offset)
+		goto done;
+
+	/* Handle the case that offset is in the middle of the buffer */
+
+	if (itr->offset > itr->pos) {
+		src += (itr->offset - itr->pos);
+		size -= (itr->offset - itr->pos);
+
+		/* Advance pos to the offset start */
+		itr->pos = itr->offset;
+	}
+
+	if (size > itr->remain)
+		size = itr->remain;
+
+	memcpy(itr->buf, src, size);
+
+	itr->buf += size;
+	itr->write += size;
+	itr->remain -= size;
+
+done:
+	itr->pos += size;
+	return size;
+}
+
+/* idr_for_each function to count the number of contexts */
+
+static int snapshot_context_count(int id, void *ptr, void *data)
+{
+	int *count = data;
+	*count = *count + 1;
+
+	return 0;
+}
+
+/*
+ * To simplify the iterator loop use a global pointer instead of trying
+ * to pass around double star references to the snapshot data
+ */
+
+static u8 *_ctxtptr;
+
+static int snapshot_context_info(int id, void *ptr, void *data)
+{
+	struct kgsl_snapshot_linux_context *header =
+		(struct kgsl_snapshot_linux_context *)_ctxtptr;
+	struct kgsl_context *context = ptr;
+	struct kgsl_device *device;
+
+	device = context->device;
+
+	header->id = id;
+
+	/* Future-proof for per-context timestamps - for now, just
+	 * return the global timestamp for all contexts
+	 */
+
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED,
+		&header->timestamp_queued);
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED,
+		&header->timestamp_retired);
+
+	_ctxtptr += sizeof(struct kgsl_snapshot_linux_context);
+
+	return 0;
+}
+
+/* Snapshot the Linux specific information */
+static size_t snapshot_os(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_linux *header = (struct kgsl_snapshot_linux *)buf;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct task_struct *task;
+	pid_t pid;
+	int ctxtcount = 0;
+	size_t size = sizeof(*header);
+	u64 temp_ptbase;
+
+	/* Figure out how many active contexts there are - these will
+	 * be appended on the end of the structure */
+
+	read_lock(&device->context_lock);
+	idr_for_each(&device->context_idr, snapshot_context_count, &ctxtcount);
+	read_unlock(&device->context_lock);
+
+	size += ctxtcount * sizeof(struct kgsl_snapshot_linux_context);
+
+	/* Make sure there is enough room for the data */
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "OS");
+		return 0;
+	}
+
+	memset(header, 0, sizeof(*header));
+
+	header->osid = KGSL_SNAPSHOT_OS_LINUX;
+
+	header->state = SNAPSHOT_STATE_HUNG;
+
+	/* Get the kernel build information */
+	strlcpy(header->release, utsname()->release, sizeof(header->release));
+	strlcpy(header->version, utsname()->version, sizeof(header->version));
+
+	/* Get the Unix time for the timestamp */
+	header->seconds = get_seconds();
+
+	/* Remember the power information */
+	header->power_flags = pwr->power_flags;
+	header->power_level = pwr->active_pwrlevel;
+	header->power_interval_timeout = pwr->interval_timeout;
+	header->grpclk = kgsl_get_clkrate(pwr->grp_clks[0]);
+
+	/*
+	 * Save the last active context from global index since its more
+	 * reliable than currrent RB index
+	 */
+	kgsl_sharedmem_readl(&device->memstore, &header->current_context,
+		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context));
+
+
+	/* Get the current PT base */
+	temp_ptbase = kgsl_mmu_get_current_ttbr0(&device->mmu);
+	/* Truncate to 32 bits in case LPAE is used */
+	header->ptbase = (__u32)temp_ptbase;
+	/* And the PID for the task leader */
+	pid = header->pid = kgsl_mmu_get_ptname_from_ptbase(&device->mmu,
+								temp_ptbase);
+
+	task = find_task_by_vpid(pid);
+
+	if (task)
+		get_task_comm(header->comm, task);
+
+	header->ctxtcount = ctxtcount;
+
+	_ctxtptr = buf + sizeof(*header);
+	/* append information for each context */
+
+	read_lock(&device->context_lock);
+	idr_for_each(&device->context_idr, snapshot_context_info, NULL);
+	read_unlock(&device->context_lock);
+
+	/* Return the size of the data segment */
+	return size;
+}
+
+static void kgsl_snapshot_put_object(struct kgsl_snapshot_object *obj)
+{
+	list_del(&obj->node);
+
+	obj->entry->memdesc.priv &= ~KGSL_MEMDESC_FROZEN;
+	kgsl_mem_entry_put(obj->entry);
+
+	kfree(obj);
+}
+
+/**
+ * kgsl_snapshot_have_object() - return 1 if the object has been processed
+ * @snapshot: the snapshot data
+ * @process: The process that owns the the object to freeze
+ * @gpuaddr: The gpu address of the object to freeze
+ * @size: the size of the object (may not always be the size of the region)
+ *
+ * Return 1 if the object is already in the list - this can save us from
+ * having to parse the same thing over again. There are 2 lists that are
+ * tracking objects so check for the object in both lists
+*/
+int kgsl_snapshot_have_object(struct kgsl_snapshot *snapshot,
+	struct kgsl_process_private *process,
+	uint64_t gpuaddr, uint64_t size)
+{
+	struct kgsl_snapshot_object *obj;
+	struct kgsl_snapshot_cp_obj *obj_cp;
+	struct adreno_ib_object *ib_obj;
+	int i;
+
+	/* Check whether the object is tracked already in ib list */
+	list_for_each_entry(obj_cp, &snapshot->cp_list, node) {
+		if (obj_cp->ib_obj_list == NULL
+			|| obj_cp->ib_obj_list->num_objs == 0)
+			continue;
+
+		ib_obj = &(obj_cp->ib_obj_list->obj_list[0]);
+		if (ib_obj->entry == NULL || ib_obj->entry->priv != process)
+			continue;
+
+		for (i = 0; i < obj_cp->ib_obj_list->num_objs; i++) {
+			ib_obj = &(obj_cp->ib_obj_list->obj_list[i]);
+			if ((gpuaddr >= ib_obj->gpuaddr) &&
+				((gpuaddr + size) <=
+				(ib_obj->gpuaddr + ib_obj->size)))
+				return 1;
+		}
+	}
+
+	list_for_each_entry(obj, &snapshot->obj_list, node) {
+		if (obj->entry == NULL || obj->entry->priv != process)
+			continue;
+
+		if ((gpuaddr >= obj->gpuaddr) &&
+			((gpuaddr + size) <= (obj->gpuaddr + obj->size)))
+			return 1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_snapshot_have_object);
+
+/**
+ * kgsl_snapshot_get_object() - Mark a GPU buffer to be frozen
+ * @snapshot: The snapshot data
+ * @process: The process that owns the object we want to freeze
+ * @gpuaddr: The gpu address of the object to freeze
+ * @size: the size of the object (may not always be the size of the region)
+ * @type: the type of object being saved (shader, vbo, etc)
+ *
+ * Mark and freeze a GPU buffer object.  This will prevent it from being
+ * freed until it can be copied out as part of the snapshot dump.  Returns the
+ * size of the object being frozen
+ */
+int kgsl_snapshot_get_object(struct kgsl_snapshot *snapshot,
+	struct kgsl_process_private *process, uint64_t gpuaddr,
+	uint64_t size, unsigned int type)
+{
+	struct kgsl_mem_entry *entry;
+	struct kgsl_snapshot_object *obj;
+	uint64_t offset;
+	int ret = -EINVAL;
+	unsigned int mem_type;
+
+	if (!gpuaddr)
+		return 0;
+
+	entry = kgsl_sharedmem_find(process, gpuaddr);
+
+	if (entry == NULL) {
+		KGSL_CORE_ERR("Unable to find GPU buffer 0x%016llX\n", gpuaddr);
+		return -EINVAL;
+	}
+
+	/* We can't freeze external memory, because we don't own it */
+	if (entry->memdesc.flags & KGSL_MEMFLAGS_USERMEM_MASK)
+		goto err_put;
+	/*
+	 * Do not save texture and render targets in snapshot,
+	 * they can be just too big
+	 */
+
+	mem_type = kgsl_memdesc_get_memtype(&entry->memdesc);
+	if (KGSL_MEMTYPE_TEXTURE == mem_type ||
+		KGSL_MEMTYPE_EGL_SURFACE == mem_type ||
+		KGSL_MEMTYPE_EGL_IMAGE == mem_type) {
+		ret = 0;
+		goto err_put;
+	}
+
+	/*
+	 * size indicates the number of bytes in the region to save. This might
+	 * not always be the entire size of the region because some buffers are
+	 * sub-allocated from a larger region.  However, if size 0 was passed
+	 * thats a flag that the caller wants to capture the entire buffer
+	 */
+
+	if (size == 0) {
+		size = entry->memdesc.size;
+		offset = 0;
+
+		/* Adjust the gpuaddr to the start of the object */
+		gpuaddr = entry->memdesc.gpuaddr;
+	} else {
+		offset = gpuaddr - entry->memdesc.gpuaddr;
+	}
+
+	if (size + offset > entry->memdesc.size) {
+		KGSL_CORE_ERR("Invalid size for GPU buffer 0x%016llX\n",
+			gpuaddr);
+		goto err_put;
+	}
+
+	/* If the buffer is already on the list, skip it */
+	list_for_each_entry(obj, &snapshot->obj_list, node) {
+		/* combine the range with existing object if they overlap */
+		if (obj->entry->priv == process && obj->type == type &&
+			kgsl_addr_range_overlap(obj->gpuaddr, obj->size,
+				gpuaddr, size)) {
+			uint64_t end1 = obj->gpuaddr + obj->size;
+			uint64_t end2 = gpuaddr + size;
+			if (obj->gpuaddr > gpuaddr)
+				obj->gpuaddr = gpuaddr;
+			if (end1 > end2)
+				obj->size = end1 - obj->gpuaddr;
+			else
+				obj->size = end2 - obj->gpuaddr;
+			obj->offset = obj->gpuaddr - entry->memdesc.gpuaddr;
+			ret = 0;
+			goto err_put;
+		}
+	}
+
+	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+
+	if (obj == NULL)
+		goto err_put;
+
+	obj->type = type;
+	obj->entry = entry;
+	obj->gpuaddr = gpuaddr;
+	obj->size = size;
+	obj->offset = offset;
+
+	list_add(&obj->node, &snapshot->obj_list);
+
+	/*
+	 * Return the size of the entire mem entry that was frozen - this gets
+	 * used for tracking how much memory is frozen for a hang.  Also, mark
+	 * the memory entry as frozen. If the entry was already marked as
+	 * frozen, then another buffer already got to it.  In that case, return
+	 * 0 so it doesn't get counted twice
+	 */
+
+	ret = (entry->memdesc.priv & KGSL_MEMDESC_FROZEN) ? 0
+		: entry->memdesc.size;
+
+	entry->memdesc.priv |= KGSL_MEMDESC_FROZEN;
+
+	return ret;
+err_put:
+	kgsl_mem_entry_put(entry);
+	return ret;
+}
+EXPORT_SYMBOL(kgsl_snapshot_get_object);
+
+/**
+ * kgsl_snapshot_dump_registers - helper function to dump device registers
+ * @device - the device to dump registers from
+ * @snapshot - pointer to the start of the region of memory for the snapshot
+ * @remain - a pointer to the number of bytes remaining in the snapshot
+ * @priv - A pointer to the kgsl_snapshot_registers data
+ *
+ * Given an array of register ranges pairs (start,end [inclusive]), dump the
+ * registers into a snapshot register section.  The snapshot region stores a
+ * part of dwords for each register - the word address of the register, and
+ * the value.
+ */
+size_t kgsl_snapshot_dump_registers(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf;
+	struct kgsl_snapshot_registers *regs = priv;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int count = 0, j, k;
+
+	/* Figure out how many registers we are going to dump */
+
+	for (j = 0; j < regs->count; j++) {
+		int start = regs->regs[j * 2];
+		int end = regs->regs[j * 2 + 1];
+
+		count += (end - start + 1);
+	}
+
+	if (remain < (count * 8) + sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+		return 0;
+	}
+
+	for (j = 0; j < regs->count; j++) {
+		unsigned int start = regs->regs[j * 2];
+		unsigned int end = regs->regs[j * 2 + 1];
+
+		for (k = start; k <= end; k++) {
+			unsigned int val;
+
+			kgsl_regread(device, k, &val);
+			*data++ = k;
+			*data++ = val;
+		}
+	}
+
+	header->count = count;
+
+	/* Return the size of the section */
+	return (count * 8) + sizeof(*header);
+}
+EXPORT_SYMBOL(kgsl_snapshot_dump_registers);
+
+struct kgsl_snapshot_indexed_registers {
+	unsigned int index;
+	unsigned int data;
+	unsigned int start;
+	unsigned int count;
+};
+
+static size_t kgsl_snapshot_dump_indexed_regs(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_indexed_registers *iregs = priv;
+	struct kgsl_snapshot_indexed_regs *header =
+		(struct kgsl_snapshot_indexed_regs *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int i;
+
+	BUG_ON(!mutex_is_locked(&device->mutex));
+
+	if (remain < (iregs->count * 4) + sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "INDEXED REGS");
+		return 0;
+	}
+
+	header->index_reg = iregs->index;
+	header->data_reg = iregs->data;
+	header->count = iregs->count;
+	header->start = iregs->start;
+
+	for (i = 0; i < iregs->count; i++) {
+		kgsl_regwrite(device, iregs->index, iregs->start + i);
+		kgsl_regread(device, iregs->data, &data[i]);
+	}
+
+	return (iregs->count * 4) + sizeof(*header);
+}
+
+/**
+ * kgsl_snapshot_indexed_registers - Add a set of indexed registers to the
+ * snapshot
+ * @device: Pointer to the KGSL device being snapshotted
+ * @snapshot: Snapshot instance
+ * @index: Offset for the index register
+ * @data: Offset for the data register
+ * @start: Index to start reading
+ * @count: Number of entries to read
+ *
+ * Dump the values from an indexed register group into the snapshot
+ */
+void kgsl_snapshot_indexed_registers(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot,
+		unsigned int index, unsigned int data,
+		unsigned int start,
+		unsigned int count)
+{
+	struct kgsl_snapshot_indexed_registers iregs;
+	iregs.index = index;
+	iregs.data = data;
+	iregs.start = start;
+	iregs.count = count;
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_INDEXED_REGS,
+		snapshot, kgsl_snapshot_dump_indexed_regs, &iregs);
+}
+EXPORT_SYMBOL(kgsl_snapshot_indexed_registers);
+
+/**
+ * kgsl_snapshot_add_section() - Add a new section to the GPU snapshot
+ * @device: the KGSL device being snapshotted
+ * @id: the section id
+ * @snapshot: pointer to the snapshot instance
+ * @func:  Function pointer to fill the section
+ * @priv: Private pointer to pass to the function
+ *
+ * Set up a KGSL snapshot header by filling the memory with the callback
+ * function and adding the standard section header
+ */
+void kgsl_snapshot_add_section(struct kgsl_device *device, u16 id,
+	struct kgsl_snapshot *snapshot,
+	size_t (*func)(struct kgsl_device *, u8 *, size_t, void *),
+	void *priv)
+{
+	struct kgsl_snapshot_section_header *header =
+		(struct kgsl_snapshot_section_header *)snapshot->ptr;
+	u8 *data = snapshot->ptr + sizeof(*header);
+	size_t ret = 0;
+
+	/*
+	 * Sanity check to make sure there is enough for the header.  The
+	 * callback will check to make sure there is enough for the rest
+	 * of the data.  If there isn't enough room then don't advance the
+	 * pointer.
+	 */
+
+	if (snapshot->remain < sizeof(*header))
+		return;
+
+	/* It is legal to have no function (i.e. - make an empty section) */
+	if (func) {
+		ret = func(device, data, snapshot->remain - sizeof(*header),
+			priv);
+
+		/*
+		 * If there wasn't enough room for the data then don't bother
+		 * setting up the header.
+		 */
+
+		if (ret == 0)
+			return;
+	}
+
+	header->magic = SNAPSHOT_SECTION_MAGIC;
+	header->id = id;
+	header->size = ret + sizeof(*header);
+
+	snapshot->ptr += header->size;
+	snapshot->remain -= header->size;
+	snapshot->size += header->size;
+}
+
+/**
+ * kgsl_snapshot() - construct a device snapshot
+ * @device: device to snapshot
+ * @context: the context that is hung, might be NULL if unknown.
+ *
+ * Given a device, construct a binary snapshot dump of the current device state
+ * and store it in the device snapshot memory.
+ */
+void kgsl_device_snapshot(struct kgsl_device *device,
+		struct kgsl_context *context)
+{
+	struct kgsl_snapshot_header *header = device->snapshot_memory.ptr;
+	struct kgsl_snapshot *snapshot;
+	struct timespec boot;
+	phys_addr_t pa;
+
+	if (device->snapshot_memory.ptr == NULL) {
+		KGSL_DRV_ERR(device,
+			"snapshot: no snapshot memory available\n");
+		return;
+	}
+
+	BUG_ON(!kgsl_state_is_awake(device));
+	/* increment the hang count for good book keeping */
+	device->snapshot_faultcount++;
+
+	/*
+	 * The first hang is always the one we are interested in. Don't capture
+	 * a new snapshot instance if the old one hasn't been grabbed yet
+	 */
+	if (device->snapshot != NULL)
+		return;
+
+	/* Allocate memory for the snapshot instance */
+	snapshot = kzalloc(sizeof(*snapshot), GFP_KERNEL);
+	if (snapshot == NULL)
+		return;
+
+	init_completion(&snapshot->dump_gate);
+	INIT_LIST_HEAD(&snapshot->obj_list);
+	INIT_LIST_HEAD(&snapshot->cp_list);
+	INIT_WORK(&snapshot->work, kgsl_snapshot_save_frozen_objs);
+
+	snapshot->start = device->snapshot_memory.ptr;
+	snapshot->ptr = device->snapshot_memory.ptr;
+	snapshot->remain = device->snapshot_memory.size;
+
+	header = (struct kgsl_snapshot_header *) snapshot->ptr;
+
+	header->magic = SNAPSHOT_MAGIC;
+	header->gpuid = kgsl_gpuid(device, &header->chipid);
+
+	snapshot->ptr += sizeof(*header);
+	snapshot->remain -= sizeof(*header);
+	snapshot->size += sizeof(*header);
+
+	/* Build the Linux specific header */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_OS,
+			snapshot, snapshot_os, NULL);
+
+	/* Get the device specific sections */
+	if (device->ftbl->snapshot)
+		device->ftbl->snapshot(device, snapshot, context);
+
+	/*
+	 * The timestamp is the seconds since boot so it is easier to match to
+	 * the kernel log
+	 */
+
+	getboottime(&boot);
+	snapshot->timestamp = get_seconds() - boot.tv_sec;
+
+	/* Store the instance in the device until it gets dumped */
+	device->snapshot = snapshot;
+
+	/* log buffer info to aid in ramdump fault tolerance */
+	pa = __pa(device->snapshot_memory.ptr);
+	KGSL_DRV_ERR(device, "snapshot created at pa %pa size %zd\n",
+			&pa, snapshot->size);
+
+	sysfs_notify(&device->snapshot_kobj, NULL, "timestamp");
+
+	/*
+	 * Queue a work item that will save the IB data in snapshot into
+	 * static memory to prevent loss of data due to overwriting of
+	 * memory.
+	 *
+	 */
+	kgsl_schedule_work(&snapshot->work);
+}
+EXPORT_SYMBOL(kgsl_device_snapshot);
+
+/* An attribute for showing snapshot details */
+struct kgsl_snapshot_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct kgsl_device *device, char *buf);
+	ssize_t (*store)(struct kgsl_device *device, const char *buf,
+		size_t count);
+};
+
+/**
+ * kgsl_snapshot_process_ib_obj_list() - Go through the list of IB's which need
+ * to be dumped for snapshot and move them to the global snapshot list so
+ * they will get dumped when the global list is dumped
+ * @device: device being snapshotted
+ */
+static void kgsl_snapshot_process_ib_obj_list(struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_snapshot_cp_obj *obj, *obj_temp;
+	struct adreno_ib_object *ib_obj;
+	int i;
+
+	list_for_each_entry_safe(obj, obj_temp, &snapshot->cp_list,
+			node) {
+		for (i = 0; i < obj->ib_obj_list->num_objs; i++) {
+			ib_obj = &(obj->ib_obj_list->obj_list[i]);
+			kgsl_snapshot_get_object(snapshot, ib_obj->entry->priv,
+				ib_obj->gpuaddr, ib_obj->size,
+				ib_obj->snapshot_obj_type);
+		}
+		list_del(&obj->node);
+		adreno_ib_destroy_obj_list(obj->ib_obj_list);
+		kfree(obj);
+	}
+}
+
+#define to_snapshot_attr(a) \
+container_of(a, struct kgsl_snapshot_attribute, attr)
+
+#define kobj_to_device(a) \
+container_of(a, struct kgsl_device, snapshot_kobj)
+
+/* Dump the sysfs binary data to the user */
+static ssize_t snapshot_show(struct file *filep, struct kobject *kobj,
+	struct bin_attribute *attr, char *buf, loff_t off,
+	size_t count)
+{
+	struct kgsl_device *device = kobj_to_device(kobj);
+	struct kgsl_snapshot *snapshot;
+	struct kgsl_snapshot_object *obj, *tmp;
+	struct kgsl_snapshot_section_header head;
+	struct snapshot_obj_itr itr;
+	int ret;
+
+	if (device == NULL)
+		return 0;
+
+	mutex_lock(&device->mutex);
+	snapshot = device->snapshot;
+	mutex_unlock(&device->mutex);
+
+	/* Return nothing if we haven't taken a snapshot yet */
+	if (snapshot == NULL)
+		return 0;
+
+	/*
+	 * Wait for the dump worker to finish. This is interruptible
+	 * to allow userspace to bail if things go horribly wrong.
+	 */
+	ret = wait_for_completion_interruptible(&snapshot->dump_gate);
+	if (ret)
+		return ret;
+
+	obj_itr_init(&itr, buf, off, count);
+
+	ret = obj_itr_out(&itr, snapshot->start, snapshot->size);
+	if (ret == 0)
+		goto done;
+
+	/* Dump the memory pool if it exists */
+	if (device->snapshot->mempool) {
+		ret = obj_itr_out(&itr, snapshot->mempool,
+				snapshot->mempool_size);
+		if (ret == 0)
+			goto done;
+	}
+
+	{
+		head.magic = SNAPSHOT_SECTION_MAGIC;
+		head.id = KGSL_SNAPSHOT_SECTION_END;
+		head.size = sizeof(head);
+
+		obj_itr_out(&itr, &head, sizeof(head));
+	}
+
+	/*
+	 * Make sure everything has been written out before destroying things.
+	 * The best way to confirm this is to go all the way through without
+	 * writing any bytes - so only release if we get this far and
+	 * itr->write is 0
+	 */
+
+	if (itr.write == 0) {
+		mutex_lock(&device->mutex);
+		device->snapshot = NULL;
+		mutex_unlock(&device->mutex);
+
+		list_for_each_entry_safe(obj, tmp, &snapshot->obj_list, node)
+			kgsl_snapshot_put_object(obj);
+
+		if (snapshot->mempool)
+			vfree(snapshot->mempool);
+
+		kfree(snapshot);
+		KGSL_CORE_ERR("snapshot: objects released\n");
+	}
+
+done:
+
+	return itr.write;
+}
+
+/* Show the total number of hangs since device boot */
+static ssize_t faultcount_show(struct kgsl_device *device, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", device->snapshot_faultcount);
+}
+
+/* Reset the total number of hangs since device boot */
+static ssize_t faultcount_store(struct kgsl_device *device, const char *buf,
+	size_t count)
+{
+	if (device && count > 0)
+		device->snapshot_faultcount = 0;
+
+	return count;
+}
+
+/* Show the timestamp of the last collected snapshot */
+static ssize_t timestamp_show(struct kgsl_device *device, char *buf)
+{
+	unsigned long timestamp =
+		device->snapshot ? device->snapshot->timestamp : 0;
+
+	return snprintf(buf, PAGE_SIZE, "%lu\n", timestamp);
+}
+
+static struct bin_attribute snapshot_attr = {
+	.attr.name = "dump",
+	.attr.mode = 0444,
+	.size = 0,
+	.read = snapshot_show
+};
+
+#define SNAPSHOT_ATTR(_name, _mode, _show, _store) \
+struct kgsl_snapshot_attribute attr_##_name = { \
+	.attr = { .name = __stringify(_name), .mode = _mode }, \
+	.show = _show, \
+	.store = _store, \
+}
+
+static SNAPSHOT_ATTR(timestamp, 0444, timestamp_show, NULL);
+static SNAPSHOT_ATTR(faultcount, 0644, faultcount_show, faultcount_store);
+
+static ssize_t snapshot_sysfs_show(struct kobject *kobj,
+	struct attribute *attr, char *buf)
+{
+	struct kgsl_snapshot_attribute *pattr = to_snapshot_attr(attr);
+	struct kgsl_device *device = kobj_to_device(kobj);
+	ssize_t ret;
+
+	if (device && pattr->show)
+		ret = pattr->show(device, buf);
+	else
+		ret = -EIO;
+
+	return ret;
+}
+
+static ssize_t snapshot_sysfs_store(struct kobject *kobj,
+	struct attribute *attr, const char *buf, size_t count)
+{
+	struct kgsl_snapshot_attribute *pattr = to_snapshot_attr(attr);
+	struct kgsl_device *device = kobj_to_device(kobj);
+	ssize_t ret;
+
+	if (device && pattr->store)
+		ret = pattr->store(device, buf, count);
+	else
+		ret = -EIO;
+
+	return ret;
+}
+
+static const struct sysfs_ops snapshot_sysfs_ops = {
+	.show = snapshot_sysfs_show,
+	.store = snapshot_sysfs_store,
+};
+
+static struct kobj_type ktype_snapshot = {
+	.sysfs_ops = &snapshot_sysfs_ops,
+};
+
+/**
+ * kgsl_device_snapshot_init() - add resources for the device GPU snapshot
+ * @device: The device to initalize
+ *
+ * Allocate memory for a GPU snapshot for the specified device,
+ * and create the sysfs files to manage it
+ */
+int kgsl_device_snapshot_init(struct kgsl_device *device)
+{
+	int ret;
+
+	if (kgsl_property_read_u32(device, "qcom,snapshot-size",
+		(unsigned int *) &(device->snapshot_memory.size)))
+		device->snapshot_memory.size = KGSL_SNAPSHOT_MEMSIZE;
+
+	/*
+	 * Choosing a memory size of 0 is essentially the same as disabling
+	 * snapshotting
+	 */
+	if (device->snapshot_memory.size == 0)
+		return 0;
+
+	/*
+	 * I'm not sure why anybody would choose to do so but make sure
+	 * that we can at least fit the snapshot header in the requested
+	 * region
+	 */
+	if (device->snapshot_memory.size < sizeof(struct kgsl_snapshot_header))
+		device->snapshot_memory.size =
+			sizeof(struct kgsl_snapshot_header);
+
+	device->snapshot_memory.ptr = kzalloc(device->snapshot_memory.size,
+		GFP_KERNEL);
+
+	if (device->snapshot_memory.ptr == NULL)
+		return -ENOMEM;
+
+	device->snapshot = NULL;
+	device->snapshot_faultcount = 0;
+
+	ret = kobject_init_and_add(&device->snapshot_kobj, &ktype_snapshot,
+		&device->dev->kobj, "snapshot");
+	if (ret)
+		goto done;
+
+	ret = sysfs_create_bin_file(&device->snapshot_kobj, &snapshot_attr);
+	if (ret)
+		goto done;
+
+	ret  = sysfs_create_file(&device->snapshot_kobj, &attr_timestamp.attr);
+	if (ret)
+		goto done;
+
+	ret  = sysfs_create_file(&device->snapshot_kobj, &attr_faultcount.attr);
+
+done:
+	return ret;
+}
+EXPORT_SYMBOL(kgsl_device_snapshot_init);
+
+/**
+ * kgsl_device_snapshot_close() - take down snapshot memory for a device
+ * @device: Pointer to the kgsl_device
+ *
+ * Remove the sysfs files and free the memory allocated for the GPU
+ * snapshot
+ */
+void kgsl_device_snapshot_close(struct kgsl_device *device)
+{
+	sysfs_remove_bin_file(&device->snapshot_kobj, &snapshot_attr);
+	sysfs_remove_file(&device->snapshot_kobj, &attr_timestamp.attr);
+
+	kobject_put(&device->snapshot_kobj);
+
+	kfree(device->snapshot_memory.ptr);
+
+	device->snapshot_memory.ptr = NULL;
+	device->snapshot_memory.size = 0;
+	device->snapshot_faultcount = 0;
+}
+EXPORT_SYMBOL(kgsl_device_snapshot_close);
+
+/**
+ * kgsl_snapshot_add_ib_obj_list() - Add a IB object list to the snapshot
+ * object list
+ * @device: the device that is being snapshotted
+ * @ib_obj_list: The IB list that has objects required to execute an IB
+ * @num_objs: Number of IB objects
+ * @ptbase: The pagetable base in which the IB is mapped
+ *
+ * Adds a new IB to the list of IB objects maintained when getting snapshot
+ * Returns 0 on success else -ENOMEM on error
+ */
+int kgsl_snapshot_add_ib_obj_list(struct kgsl_snapshot *snapshot,
+	struct adreno_ib_object_list *ib_obj_list)
+{
+	struct kgsl_snapshot_cp_obj *obj;
+
+	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	if (!obj)
+		return -ENOMEM;
+	obj->ib_obj_list = ib_obj_list;
+	list_add(&obj->node, &snapshot->cp_list);
+	return 0;
+}
+
+static size_t _mempool_add_object(u8 *data, struct kgsl_snapshot_object *obj)
+{
+	struct kgsl_snapshot_section_header *section =
+		(struct kgsl_snapshot_section_header *)data;
+	struct kgsl_snapshot_gpu_object_v2 *header =
+		(struct kgsl_snapshot_gpu_object_v2 *)(data + sizeof(*section));
+	u8 *dest = data + sizeof(*section) + sizeof(*header);
+	uint64_t size;
+
+	size = obj->size;
+
+	if (!kgsl_memdesc_map(&obj->entry->memdesc)) {
+		KGSL_CORE_ERR("snapshot: failed to map GPU object\n");
+		return 0;
+	}
+
+	section->magic = SNAPSHOT_SECTION_MAGIC;
+	section->id = KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2;
+	section->size = size + sizeof(*header) + sizeof(*section);
+
+	header->size = size >> 2;
+	header->gpuaddr = obj->gpuaddr;
+	header->ptbase =
+		kgsl_mmu_pagetable_get_ttbr0(obj->entry->priv->pagetable);
+	header->type = obj->type;
+
+	memcpy(dest, obj->entry->memdesc.hostptr + obj->offset, size);
+	kgsl_memdesc_unmap(&obj->entry->memdesc);
+
+	return section->size;
+}
+
+/**
+ * kgsl_snapshot_save_frozen_objs() - Save the objects frozen in snapshot into
+ * memory so that the data reported in these objects is correct when snapshot
+ * is taken
+ * @work: The work item that scheduled this work
+ */
+void kgsl_snapshot_save_frozen_objs(struct work_struct *work)
+{
+	struct kgsl_snapshot *snapshot = container_of(work,
+				struct kgsl_snapshot, work);
+	struct kgsl_snapshot_object *obj, *tmp;
+	size_t size = 0;
+	void *ptr;
+
+	kgsl_snapshot_process_ib_obj_list(snapshot);
+
+	list_for_each_entry(obj, &snapshot->obj_list, node) {
+		obj->size = ALIGN(obj->size, 4);
+
+		size += ((size_t) obj->size +
+			sizeof(struct kgsl_snapshot_gpu_object) +
+			sizeof(struct kgsl_snapshot_section_header));
+	}
+
+	if (size == 0)
+		goto done;
+
+	snapshot->mempool = vmalloc(size);
+	if (snapshot->mempool != NULL)
+		KGSL_CORE_ERR("snapshot: mempool address %p, size %zx\n",
+				snapshot->mempool, size);
+
+	ptr = snapshot->mempool;
+	snapshot->mempool_size = 0;
+
+	/* even if vmalloc fails, make sure we clean up the obj_list */
+	list_for_each_entry_safe(obj, tmp, &snapshot->obj_list, node) {
+		if (snapshot->mempool) {
+			size_t ret = _mempool_add_object(ptr, obj);
+			ptr += ret;
+			snapshot->mempool_size += ret;
+		}
+
+		kgsl_snapshot_put_object(obj);
+	}
+done:
+	/*
+	 * Get rid of the process struct here, so that it doesn't sit
+	 * around until someone bothers to read the snapshot file.
+	 */
+	kgsl_process_private_put(snapshot->process);
+	snapshot->process = NULL;
+
+	complete_all(&snapshot->dump_gate);
+	return;
+}
diff --git a/drivers/gpu/msm/kgsl_snapshot.h b/drivers/gpu/msm/kgsl_snapshot.h
new file mode 100644
index 000000000000..8167ff83a18b
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_snapshot.h
@@ -0,0 +1,238 @@
+/* Copyright (c) 2012-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _KGSL_SNAPSHOT_H_
+#define _KGSL_SNAPSHOT_H_
+
+#include <linux/types.h>
+
+/* Snapshot header */
+
+/* High word is static, low word is snapshot version ID */
+#define SNAPSHOT_MAGIC 0x504D0002
+
+/* GPU ID scheme:
+ * [16:31] - core identifer (0x0002 for 2D or 0x0003 for 3D)
+ * [00:16] - GPU specific identifier
+ */
+
+struct kgsl_snapshot_header {
+	__u32 magic; /* Magic identifier */
+	__u32 gpuid; /* GPU ID - see above */
+	/* Added in snapshot version 2 */
+	__u32 chipid; /* Chip ID from the GPU */
+} __packed;
+
+/* Section header */
+#define SNAPSHOT_SECTION_MAGIC 0xABCD
+
+struct kgsl_snapshot_section_header {
+	__u16 magic; /* Magic identifier */
+	__u16 id;    /* Type of section */
+	__u32 size;  /* Size of the section including this header */
+} __packed;
+
+/* Section identifiers */
+#define KGSL_SNAPSHOT_SECTION_OS           0x0101
+#define KGSL_SNAPSHOT_SECTION_REGS         0x0201
+#define KGSL_SNAPSHOT_SECTION_RB           0x0301
+#define KGSL_SNAPSHOT_SECTION_RB_V2        0x0302
+#define KGSL_SNAPSHOT_SECTION_IB           0x0401
+#define KGSL_SNAPSHOT_SECTION_IB_V2        0x0402
+#define KGSL_SNAPSHOT_SECTION_INDEXED_REGS 0x0501
+#define KGSL_SNAPSHOT_SECTION_ISTORE       0x0801
+#define KGSL_SNAPSHOT_SECTION_DEBUG        0x0901
+#define KGSL_SNAPSHOT_SECTION_DEBUGBUS     0x0A01
+#define KGSL_SNAPSHOT_SECTION_GPU_OBJECT   0x0B01
+#define KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2 0x0B02
+#define KGSL_SNAPSHOT_SECTION_MEMLIST      0x0E01
+#define KGSL_SNAPSHOT_SECTION_MEMLIST_V2   0x0E02
+#define KGSL_SNAPSHOT_SECTION_SHADER       0x1201
+
+#define KGSL_SNAPSHOT_SECTION_END          0xFFFF
+
+/* OS sub-section header */
+#define KGSL_SNAPSHOT_OS_LINUX             0x0001
+
+/* Linux OS specific information */
+
+#define SNAPSHOT_STATE_HUNG 0
+#define SNAPSHOT_STATE_RUNNING 1
+
+struct kgsl_snapshot_linux {
+	int osid;                   /* subsection OS identifier */
+	int state;		    /* 1 if the thread is running, 0 for hung */
+	__u32 seconds;		    /* Unix timestamp for the snapshot */
+	__u32 power_flags;            /* Current power flags */
+	__u32 power_level;            /* Current power level */
+	__u32 power_interval_timeout; /* Power interval timeout */
+	__u32 grpclk;                 /* Current GP clock value */
+	__u32 busclk;		    /* Current busclk value */
+	__u32 ptbase;		    /* Current ptbase */
+	__u32 pid;		    /* PID of the process that owns the PT */
+	__u32 current_context;	    /* ID of the current context */
+	__u32 ctxtcount;	    /* Number of contexts appended to section */
+	unsigned char release[32];  /* kernel release */
+	unsigned char version[32];  /* kernel version */
+	unsigned char comm[16];	    /* Name of the process that owns the PT */
+} __packed;
+
+/*
+ * This structure contains a record of an active context.
+ * These are appended one after another in the OS section below
+ * the header above
+ */
+
+struct kgsl_snapshot_linux_context {
+	__u32 id;			/* The context ID */
+	__u32 timestamp_queued;		/* The last queued timestamp */
+	__u32 timestamp_retired;	/* The last timestamp retired by HW */
+};
+
+/* Ringbuffer sub-section header */
+struct kgsl_snapshot_rb {
+	int start;  /* dword at the start of the dump */
+	int end;    /* dword at the end of the dump */
+	int rbsize; /* Size (in dwords) of the ringbuffer */
+	int wptr;   /* Current index of the CPU write pointer */
+	int rptr;   /* Current index of the GPU read pointer */
+	int count;  /* Number of dwords in the dump */
+	__u32 timestamp_queued; /* The last queued timestamp */
+	__u32 timestamp_retired; /* The last timestamp retired by HW */
+} __packed;
+
+struct kgsl_snapshot_rb_v2 {
+	int start;  /* dword at the start of the dump */
+	int end;    /* dword at the end of the dump */
+	int rbsize; /* Size (in dwords) of the ringbuffer */
+	int wptr;   /* Current index of the CPU write pointer */
+	int rptr;   /* Current index of the GPU read pointer */
+	int count;  /* Number of dwords in the dump */
+	__u32 timestamp_queued; /* The last queued timestamp */
+	__u32 timestamp_retired; /* The last timestamp retired by HW */
+	__u64 gpuaddr; /* The GPU address of the ringbuffer */
+	__u32 id; /* Ringbuffer identifier */
+} __packed;
+
+
+/* Replay or Memory list section, both sections have same header */
+struct kgsl_snapshot_replay_mem_list {
+	/*
+	 * Number of IBs to replay for replay section or
+	 * number of memory list entries for mem list section
+	 */
+	int num_entries;
+	/* Pagetable base to which the replay IBs or memory entries belong */
+	__u32 ptbase;
+} __packed;
+
+/* Replay or Memory list section, both sections have same header */
+struct kgsl_snapshot_mem_list_v2 {
+	/*
+	 * Number of IBs to replay for replay section or
+	 * number of memory list entries for mem list section
+	 */
+	int num_entries;
+	/* Pagetable base to which the replay IBs or memory entries belong */
+	__u64 ptbase;
+} __packed;
+
+
+/* Indirect buffer sub-section header */
+struct kgsl_snapshot_ib {
+	__u32 gpuaddr; /* GPU address of the the IB */
+	__u32 ptbase;  /* Base for the pagetable the GPU address is valid in */
+	int size;    /* Size of the IB */
+} __packed;
+
+/* Indirect buffer sub-section header (v2) */
+struct kgsl_snapshot_ib_v2 {
+	__u64 gpuaddr; /* GPU address of the the IB */
+	__u64 ptbase;  /* Base for the pagetable the GPU address is valid in */
+	__u64 size;    /* Size of the IB */
+} __packed;
+
+
+/* Register sub-section header */
+struct kgsl_snapshot_regs {
+	__u32 count; /* Number of register pairs in the section */
+} __packed;
+
+/* Indexed register sub-section header */
+struct kgsl_snapshot_indexed_regs {
+	__u32 index_reg; /* Offset of the index register for this section */
+	__u32 data_reg;  /* Offset of the data register for this section */
+	int start;     /* Starting index */
+	int count;     /* Number of dwords in the data */
+} __packed;
+
+/* Istore sub-section header */
+struct kgsl_snapshot_istore {
+	int count;   /* Number of instructions in the istore */
+} __packed;
+
+/* Debug data sub-section header */
+
+/* A2XX debug sections */
+#define SNAPSHOT_DEBUG_SX         1
+#define SNAPSHOT_DEBUG_CP         2
+#define SNAPSHOT_DEBUG_SQ         3
+#define SNAPSHOT_DEBUG_SQTHREAD   4
+#define SNAPSHOT_DEBUG_MIU        5
+
+/* A3XX debug sections */
+#define SNAPSHOT_DEBUG_VPC_MEMORY 6
+#define SNAPSHOT_DEBUG_CP_MEQ     7
+#define SNAPSHOT_DEBUG_CP_PM4_RAM 8
+#define SNAPSHOT_DEBUG_CP_PFP_RAM 9
+#define SNAPSHOT_DEBUG_CP_ROQ     10
+#define SNAPSHOT_DEBUG_SHADER_MEMORY 11
+#define SNAPSHOT_DEBUG_CP_MERCIU 12
+
+struct kgsl_snapshot_debug {
+	int type;    /* Type identifier for the attached tata */
+	int size;   /* Size of the section in dwords */
+} __packed;
+
+struct kgsl_snapshot_debugbus {
+	int id;	   /* Debug bus ID */
+	int count; /* Number of dwords in the dump */
+} __packed;
+
+struct kgsl_snapshot_shader {
+	int type;  /* SP/TP statetype */
+	int index; /* SP/TP index */
+	int size;  /* Number of dwords in the dump */
+} __packed;
+
+#define SNAPSHOT_GPU_OBJECT_SHADER  1
+#define SNAPSHOT_GPU_OBJECT_IB      2
+#define SNAPSHOT_GPU_OBJECT_GENERIC 3
+#define SNAPSHOT_GPU_OBJECT_DRAW    4
+#define SNAPSHOT_GPU_OBJECT_GLOBAL  5
+
+struct kgsl_snapshot_gpu_object {
+	int type;      /* Type of GPU object */
+	__u32 gpuaddr; /* GPU address of the the object */
+	__u32 ptbase;  /* Base for the pagetable the GPU address is valid in */
+	int size;    /* Size of the object (in dwords) */
+};
+
+struct kgsl_snapshot_gpu_object_v2 {
+	int type;      /* Type of GPU object */
+	__u64 gpuaddr; /* GPU address of the the object */
+	__u64 ptbase;  /* Base for the pagetable the GPU address is valid in */
+	__u64 size;    /* Size of the object (in dwords) */
+} __packed;
+
+#endif
diff --git a/drivers/gpu/msm/kgsl_sync.c b/drivers/gpu/msm/kgsl_sync.c
new file mode 100644
index 000000000000..401a6661f5a9
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_sync.c
@@ -0,0 +1,646 @@
+/* Copyright (c) 2012-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/file.h>
+#include <linux/oneshot_sync.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <asm/current.h>
+
+#include "kgsl_sync.h"
+
+static void kgsl_sync_timeline_signal(struct sync_timeline *timeline,
+	unsigned int timestamp);
+
+static struct sync_pt *kgsl_sync_pt_create(struct sync_timeline *timeline,
+	struct kgsl_context *context, unsigned int timestamp)
+{
+	struct sync_pt *pt;
+	pt = sync_pt_create(timeline, (int) sizeof(struct kgsl_sync_pt));
+	if (pt) {
+		struct kgsl_sync_pt *kpt = (struct kgsl_sync_pt *) pt;
+		kpt->context = context;
+		kpt->timestamp = timestamp;
+	}
+	return pt;
+}
+
+/*
+ * This should only be called on sync_pts which have been created but
+ * not added to a fence.
+ */
+static void kgsl_sync_pt_destroy(struct sync_pt *pt)
+{
+	sync_pt_free(pt);
+}
+
+static struct sync_pt *kgsl_sync_pt_dup(struct sync_pt *pt)
+{
+	struct kgsl_sync_pt *kpt = (struct kgsl_sync_pt *) pt;
+	return kgsl_sync_pt_create(sync_pt_parent(pt), kpt->context, kpt->timestamp);
+}
+
+static int kgsl_sync_pt_has_signaled(struct sync_pt *pt)
+{
+	struct kgsl_sync_pt *kpt = (struct kgsl_sync_pt *) pt;
+	struct kgsl_sync_timeline *ktimeline =
+		 (struct kgsl_sync_timeline *) sync_pt_parent(pt);
+	unsigned int ts = kpt->timestamp;
+	int ret = 0;
+
+	spin_lock(&ktimeline->lock);
+	ret = (timestamp_cmp(ktimeline->last_timestamp, ts) >= 0);
+	spin_unlock(&ktimeline->lock);
+
+	return ret;
+}
+
+static int kgsl_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
+{
+	struct kgsl_sync_pt *kpt_a = (struct kgsl_sync_pt *) a;
+	struct kgsl_sync_pt *kpt_b = (struct kgsl_sync_pt *) b;
+	unsigned int ts_a = kpt_a->timestamp;
+	unsigned int ts_b = kpt_b->timestamp;
+	return timestamp_cmp(ts_a, ts_b);
+}
+
+struct kgsl_fence_event_priv {
+	struct kgsl_context *context;
+	unsigned int timestamp;
+};
+
+/**
+ * kgsl_fence_event_cb - Event callback for a fence timestamp event
+ * @device - The KGSL device that expired the timestamp
+ * @context- Pointer to the context that owns the event
+ * @priv: Private data for the callback
+ * @result - Result of the event (retired or canceled)
+ *
+ * Signal a fence following the expiration of a timestamp
+ */
+
+static void kgsl_fence_event_cb(struct kgsl_device *device,
+		struct kgsl_event_group *group, void *priv, int result)
+{
+	struct kgsl_fence_event_priv *ev = priv;
+	kgsl_sync_timeline_signal(ev->context->timeline, ev->timestamp);
+	kgsl_context_put(ev->context);
+	kfree(ev);
+}
+
+static int _add_fence_event(struct kgsl_device *device,
+	struct kgsl_context *context, unsigned int timestamp)
+{
+	struct kgsl_fence_event_priv *event;
+	int ret;
+
+	event = kmalloc(sizeof(*event), GFP_KERNEL);
+	if (event == NULL)
+		return -ENOMEM;
+
+	/*
+	 * Increase the refcount for the context to keep it through the
+	 * callback
+	 */
+	if (!_kgsl_context_get(context)) {
+		kfree(event);
+		return -ENOENT;
+	}
+
+	event->context = context;
+	event->timestamp = timestamp;
+	event->context = context;
+
+	ret = kgsl_add_event(device, &context->events, timestamp,
+		kgsl_fence_event_cb, event);
+
+	if (ret) {
+		kgsl_context_put(context);
+		kfree(event);
+	}
+
+	return ret;
+}
+
+/**
+ * kgsl_add_fence_event - Create a new fence event
+ * @device - KGSL device to create the event on
+ * @timestamp - Timestamp to trigger the event
+ * @data - Return fence fd stored in struct kgsl_timestamp_event_fence
+ * @len - length of the fence event
+ * @owner - driver instance that owns this event
+ * @returns 0 on success or error code on error
+ *
+ * Create a fence and register an event to signal the fence when
+ * the timestamp expires
+ */
+
+int kgsl_add_fence_event(struct kgsl_device *device,
+	u32 context_id, u32 timestamp, void __user *data, int len,
+	struct kgsl_device_private *owner)
+{
+	struct kgsl_timestamp_event_fence priv;
+	struct kgsl_context *context;
+	struct sync_pt *pt;
+	struct sync_fence *fence = NULL;
+	int ret = -EINVAL;
+	char fence_name[sizeof(fence->name)] = {};
+	unsigned int cur;
+
+	priv.fence_fd = -1;
+
+	if (len != sizeof(priv))
+		return -EINVAL;
+
+	context = kgsl_context_get_owner(owner, context_id);
+
+	if (context == NULL)
+		return -EINVAL;
+
+	if (test_bit(KGSL_CONTEXT_PRIV_INVALID, &context->priv))
+		goto out;
+
+	pt = kgsl_sync_pt_create(context->timeline, context, timestamp);
+	if (pt == NULL) {
+		KGSL_DRV_CRIT_RATELIMIT(device, "kgsl_sync_pt_create failed\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+	snprintf(fence_name, sizeof(fence_name),
+		"%s-pid-%d-ctx-%d-ts-%d",
+		device->name, current->group_leader->pid,
+		context_id, timestamp);
+
+
+	fence = sync_fence_create(fence_name, pt);
+	if (fence == NULL) {
+		/* only destroy pt when not added to fence */
+		kgsl_sync_pt_destroy(pt);
+		KGSL_DRV_CRIT_RATELIMIT(device, "sync_fence_create failed\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	priv.fence_fd = get_unused_fd_flags(0);
+	if (priv.fence_fd < 0) {
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"Unable to get a file descriptor: %d\n",
+			priv.fence_fd);
+		ret = priv.fence_fd;
+		goto out;
+	}
+
+	/*
+	 * If the timestamp hasn't expired yet create an event to trigger it.
+	 * Otherwise, just signal the fence - there is no reason to go through
+	 * the effort of creating a fence we don't need.
+	 */
+
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, &cur);
+
+	if (timestamp_cmp(cur, timestamp) >= 0) {
+		ret = 0;
+		kgsl_sync_timeline_signal(context->timeline, cur);
+	} else {
+		ret = _add_fence_event(device, context, timestamp);
+		if (ret)
+			goto out;
+	}
+
+	if (copy_to_user(data, &priv, sizeof(priv))) {
+		ret = -EFAULT;
+		goto out;
+	}
+	sync_fence_install(fence, priv.fence_fd);
+out:
+	kgsl_context_put(context);
+	if (ret) {
+		if (priv.fence_fd >= 0)
+			put_unused_fd(priv.fence_fd);
+
+		if (fence)
+			sync_fence_put(fence);
+	}
+	return ret;
+}
+
+static unsigned int kgsl_sync_get_timestamp(
+	struct kgsl_sync_timeline *ktimeline, enum kgsl_timestamp_type type)
+{
+	unsigned int ret = 0;
+	struct kgsl_context *context;
+
+	if (ktimeline->device == NULL)
+		return 0;
+
+	context = kgsl_context_get(ktimeline->device,
+			ktimeline->context_id);
+
+	if (context)
+		kgsl_readtimestamp(ktimeline->device, context, type, &ret);
+
+	kgsl_context_put(context);
+	return ret;
+}
+
+static void kgsl_sync_timeline_value_str(struct sync_timeline *sync_timeline,
+					 char *str, int size)
+{
+	struct kgsl_sync_timeline *ktimeline =
+		(struct kgsl_sync_timeline *) sync_timeline;
+
+	/*
+	 * This callback can be called before the device and spinlock are
+	 * initialized in struct kgsl_sync_timeline. kgsl_sync_get_timestamp()
+	 * will check if device is NULL and return 0. Queued and retired
+	 * timestamp of the context will be reported as 0, which is correct
+	 * because the context and timeline are just getting initialized.
+	 */
+	unsigned int timestamp_retired = kgsl_sync_get_timestamp(ktimeline,
+		KGSL_TIMESTAMP_RETIRED);
+	unsigned int timestamp_queued = kgsl_sync_get_timestamp(ktimeline,
+		KGSL_TIMESTAMP_QUEUED);
+
+	snprintf(str, size, "%u queued:%u retired:%u",
+		ktimeline->last_timestamp,
+		timestamp_queued, timestamp_retired);
+}
+
+static void kgsl_sync_pt_value_str(struct sync_pt *sync_pt,
+				   char *str, int size)
+{
+	struct kgsl_sync_pt *kpt = (struct kgsl_sync_pt *) sync_pt;
+	snprintf(str, size, "%u", kpt->timestamp);
+}
+
+static int kgsl_sync_fill_driver_data(struct sync_pt *sync_pt, void *data,
+					int size)
+{
+	struct kgsl_sync_pt *kpt = (struct kgsl_sync_pt *) sync_pt;
+
+	if (size < sizeof(kpt->timestamp))
+		return -ENOMEM;
+
+	memcpy(data, &kpt->timestamp, sizeof(kpt->timestamp));
+	return sizeof(kpt->timestamp);
+}
+
+static void kgsl_sync_pt_log(struct sync_pt *sync_pt)
+{
+	struct kgsl_sync_pt *kpt = (struct kgsl_sync_pt *) sync_pt;
+	pr_info("-----\n");
+	kgsl_context_dump(kpt->context);
+	pr_info("-----\n");
+}
+
+static void kgsl_sync_timeline_release_obj(struct sync_timeline *sync_timeline)
+{
+	/*
+	 * Make sure to free the timeline only after destroy flag is set.
+	 * This is to avoid further accessing to the timeline from KGSL and
+	 * also to catch any unbalanced kref of timeline.
+	 */
+	BUG_ON(sync_timeline && (sync_timeline->destroyed != true));
+}
+static const struct sync_timeline_ops kgsl_sync_timeline_ops = {
+	.driver_name = "kgsl-timeline",
+	.dup = kgsl_sync_pt_dup,
+	.has_signaled = kgsl_sync_pt_has_signaled,
+	.compare = kgsl_sync_pt_compare,
+	.timeline_value_str = kgsl_sync_timeline_value_str,
+	.pt_value_str = kgsl_sync_pt_value_str,
+	.fill_driver_data = kgsl_sync_fill_driver_data,
+	.release_obj = kgsl_sync_timeline_release_obj,
+	.pt_log = kgsl_sync_pt_log,
+};
+
+int kgsl_sync_timeline_create(struct kgsl_context *context)
+{
+	struct kgsl_sync_timeline *ktimeline;
+
+	/* Generate a name which includes the thread name, thread id, process
+	 * name, process id, and context id. This makes it possible to
+	 * identify the context of a timeline in the sync dump. */
+	char ktimeline_name[sizeof(context->timeline->name)] = {};
+	snprintf(ktimeline_name, sizeof(ktimeline_name),
+		"%s_%.15s(%d)-%.15s(%d)-%d",
+		context->device->name,
+		current->group_leader->comm, current->group_leader->pid,
+		current->comm, current->pid, context->id);
+
+	context->timeline = sync_timeline_create(&kgsl_sync_timeline_ops,
+		(int) sizeof(struct kgsl_sync_timeline), ktimeline_name);
+	if (context->timeline == NULL)
+		return -EINVAL;
+
+	ktimeline = (struct kgsl_sync_timeline *) context->timeline;
+	ktimeline->last_timestamp = 0;
+	ktimeline->device = context->device;
+	ktimeline->context_id = context->id;
+
+	spin_lock_init(&ktimeline->lock);
+	return 0;
+}
+
+static void kgsl_sync_timeline_signal(struct sync_timeline *timeline,
+	unsigned int timestamp)
+{
+	struct kgsl_sync_timeline *ktimeline =
+		(struct kgsl_sync_timeline *) timeline;
+
+	spin_lock(&ktimeline->lock);
+	if (timestamp_cmp(timestamp, ktimeline->last_timestamp) > 0)
+		ktimeline->last_timestamp = timestamp;
+	spin_unlock(&ktimeline->lock);
+
+	sync_timeline_signal(timeline);
+}
+
+void kgsl_sync_timeline_destroy(struct kgsl_context *context)
+{
+	sync_timeline_destroy(context->timeline);
+}
+
+static void kgsl_sync_callback(struct sync_fence *fence,
+	struct sync_fence_waiter *waiter)
+{
+	struct kgsl_sync_fence_waiter *kwaiter =
+		(struct kgsl_sync_fence_waiter *) waiter;
+	kwaiter->func(kwaiter->priv);
+	sync_fence_put(kwaiter->fence);
+	kfree(kwaiter);
+}
+
+struct kgsl_sync_fence_waiter *kgsl_sync_fence_async_wait(int fd,
+	void (*func)(void *priv), void *priv)
+{
+	struct kgsl_sync_fence_waiter *kwaiter;
+	struct sync_fence *fence;
+	int status;
+
+	fence = sync_fence_fdget(fd);
+	if (fence == NULL)
+		return ERR_PTR(-EINVAL);
+
+	/* create the waiter */
+	kwaiter = kzalloc(sizeof(*kwaiter), GFP_ATOMIC);
+	if (kwaiter == NULL) {
+		sync_fence_put(fence);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	kwaiter->fence = fence;
+	kwaiter->priv = priv;
+	kwaiter->func = func;
+
+	strlcpy(kwaiter->name, fence->name, sizeof(kwaiter->name));
+
+	sync_fence_waiter_init((struct sync_fence_waiter *) kwaiter,
+		kgsl_sync_callback);
+
+	/* if status then error or signaled */
+	status = sync_fence_wait_async(fence,
+		(struct sync_fence_waiter *) kwaiter);
+	if (status) {
+		kfree(kwaiter);
+		sync_fence_put(fence);
+		if (status < 0)
+			kwaiter = ERR_PTR(status);
+		else
+			kwaiter = NULL;
+	}
+
+	return kwaiter;
+}
+
+int kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_waiter *kwaiter)
+{
+	if (kwaiter == NULL)
+		return 0;
+
+	if (sync_fence_cancel_async(kwaiter->fence,
+		(struct sync_fence_waiter *) kwaiter) == 0) {
+		sync_fence_put(kwaiter->fence);
+		kfree(kwaiter);
+		return 1;
+	}
+	return 0;
+}
+
+#ifdef CONFIG_ONESHOT_SYNC
+
+struct kgsl_syncsource {
+	struct kref refcount;
+	int id;
+	struct kgsl_process_private *private;
+	struct oneshot_sync_timeline *oneshot;
+};
+
+long kgsl_ioctl_syncsource_create(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	struct kgsl_syncsource *syncsource = NULL;
+	struct kgsl_syncsource_create *param = data;
+	int ret = -EINVAL;
+	int id = 0;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	char name[32];
+
+	syncsource = kzalloc(sizeof(*syncsource), GFP_KERNEL);
+	if (syncsource == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	snprintf(name, sizeof(name), "kgsl-syncsource-pid-%d",
+			current->group_leader->pid);
+
+	syncsource->oneshot = oneshot_timeline_create(name);
+	if (syncsource->oneshot == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&private->syncsource_lock);
+	id = idr_alloc(&private->syncsource_idr, syncsource, 1, 0, GFP_NOWAIT);
+	spin_unlock(&private->syncsource_lock);
+	idr_preload_end();
+
+	if (id > 0) {
+		kref_init(&syncsource->refcount);
+		syncsource->id = id;
+		syncsource->private = private;
+
+		param->id = id;
+		ret = 0;
+	} else {
+		ret = id;
+	}
+
+out:
+	if (ret) {
+		if (syncsource && syncsource->oneshot)
+			oneshot_timeline_destroy(syncsource->oneshot);
+		kfree(syncsource);
+	}
+
+	return ret;
+}
+
+static struct kgsl_syncsource *
+kgsl_syncsource_get(struct kgsl_process_private *private, int id)
+{
+	int result = 0;
+	struct kgsl_syncsource *syncsource = NULL;
+
+	spin_lock(&private->syncsource_lock);
+
+	syncsource = idr_find(&private->syncsource_idr, id);
+	if (syncsource)
+		result = kref_get_unless_zero(&syncsource->refcount);
+
+	spin_unlock(&private->syncsource_lock);
+
+	return result ? syncsource : NULL;
+}
+
+static void kgsl_syncsource_destroy(struct kref *kref)
+{
+	struct kgsl_syncsource *syncsource = container_of(kref,
+						struct kgsl_syncsource,
+						refcount);
+
+	struct kgsl_process_private *private = syncsource->private;
+
+	spin_lock(&private->syncsource_lock);
+	if (syncsource->id != 0) {
+		idr_remove(&private->syncsource_idr, syncsource->id);
+		syncsource->id = 0;
+	}
+	oneshot_timeline_destroy(syncsource->oneshot);
+	spin_unlock(&private->syncsource_lock);
+
+	kfree(syncsource);
+}
+
+void kgsl_syncsource_put(struct kgsl_syncsource *syncsource)
+{
+	if (syncsource)
+		kref_put(&syncsource->refcount, kgsl_syncsource_destroy);
+}
+
+long kgsl_ioctl_syncsource_destroy(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	struct kgsl_syncsource_destroy *param = data;
+	struct kgsl_syncsource *syncsource = NULL;
+	struct kgsl_process_private *private;
+
+	syncsource = kgsl_syncsource_get(dev_priv->process_priv,
+				     param->id);
+
+	if (syncsource == NULL)
+		return -EINVAL;
+
+	private = syncsource->private;
+
+	spin_lock(&private->syncsource_lock);
+	idr_remove(&private->syncsource_idr, param->id);
+	syncsource->id = 0;
+	spin_unlock(&private->syncsource_lock);
+
+	/* put reference from syncsource creation */
+	kgsl_syncsource_put(syncsource);
+	/* put reference from getting the syncsource above */
+	kgsl_syncsource_put(syncsource);
+	return 0;
+}
+
+long kgsl_ioctl_syncsource_create_fence(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	struct kgsl_syncsource_create_fence *param = data;
+	struct kgsl_syncsource *syncsource = NULL;
+	int ret = -EINVAL;
+	struct sync_fence *fence = NULL;
+	int fd = -1;
+	char name[32];
+
+
+	syncsource = kgsl_syncsource_get(dev_priv->process_priv,
+					param->id);
+	if (syncsource == NULL)
+		goto out;
+
+	snprintf(name, sizeof(name), "kgsl-syncsource-pid-%d-%d",
+			current->group_leader->pid, syncsource->id);
+
+	fence = oneshot_fence_create(syncsource->oneshot, name);
+	if (fence == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	fd = get_unused_fd_flags(0);
+	if (fd < 0) {
+		ret = -EBADF;
+		goto out;
+	}
+	ret = 0;
+
+	sync_fence_install(fence, fd);
+
+	param->fence_fd = fd;
+out:
+	if (ret) {
+		if (fence)
+			sync_fence_put(fence);
+		if (fd >= 0)
+			put_unused_fd(fd);
+
+	}
+	kgsl_syncsource_put(syncsource);
+	return ret;
+}
+
+long kgsl_ioctl_syncsource_signal_fence(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	int ret = -EINVAL;
+	struct kgsl_syncsource_signal_fence *param = data;
+	struct kgsl_syncsource *syncsource = NULL;
+	struct sync_fence *fence = NULL;
+
+	syncsource = kgsl_syncsource_get(dev_priv->process_priv,
+					param->id);
+	if (syncsource == NULL)
+		goto out;
+
+	fence = sync_fence_fdget(param->fence_fd);
+	if (fence == NULL) {
+		ret = -EBADF;
+		goto out;
+	}
+
+	ret = oneshot_fence_signal(syncsource->oneshot, fence);
+out:
+	if (fence)
+		sync_fence_put(fence);
+	kgsl_syncsource_put(syncsource);
+	return ret;
+}
+#endif
diff --git a/drivers/gpu/msm/kgsl_sync.h b/drivers/gpu/msm/kgsl_sync.h
new file mode 100644
index 000000000000..bf69ad9657c3
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_sync.h
@@ -0,0 +1,139 @@
+/* Copyright (c) 2012-2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __KGSL_SYNC_H
+#define __KGSL_SYNC_H
+
+#include <linux/sync.h>
+#include "kgsl_device.h"
+
+struct kgsl_sync_timeline {
+	struct sync_timeline timeline;
+	unsigned int last_timestamp;
+	struct kgsl_device *device;
+	u32 context_id;
+	spinlock_t lock;
+};
+
+struct kgsl_sync_pt {
+	struct sync_pt pt;
+	struct kgsl_context *context;
+	unsigned int timestamp;
+};
+
+struct kgsl_sync_fence_waiter {
+	struct sync_fence_waiter waiter;
+	struct sync_fence *fence;
+	char name[32];
+	void (*func)(void *priv);
+	void *priv;
+};
+
+struct kgsl_syncsource;
+
+#if defined(CONFIG_SYNC)
+int kgsl_add_fence_event(struct kgsl_device *device,
+	u32 context_id, u32 timestamp, void __user *data, int len,
+	struct kgsl_device_private *owner);
+int kgsl_sync_timeline_create(struct kgsl_context *context);
+void kgsl_sync_timeline_destroy(struct kgsl_context *context);
+struct kgsl_sync_fence_waiter *kgsl_sync_fence_async_wait(int fd,
+	void (*func)(void *priv), void *priv);
+int kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_waiter *waiter);
+static inline void kgsl_sync_fence_log(struct sync_fence *fence)
+{
+}
+#else
+static inline int kgsl_add_fence_event(struct kgsl_device *device,
+	u32 context_id, u32 timestamp, void __user *data, int len,
+	struct kgsl_device_private *owner)
+{
+	return -EINVAL;
+}
+
+static inline int kgsl_sync_timeline_create(struct kgsl_context *context)
+{
+	context->timeline = NULL;
+	return 0;
+}
+
+static inline void kgsl_sync_timeline_destroy(struct kgsl_context *context)
+{
+}
+
+static inline struct
+kgsl_sync_fence_waiter *kgsl_sync_fence_async_wait(int fd,
+	void (*func)(void *priv), void *priv)
+{
+	return NULL;
+}
+
+static inline int
+kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_waiter *waiter)
+{
+	return 1;
+}
+
+static inline void kgsl_sync_fence_log(struct sync_fence *fence)
+{
+}
+
+#endif
+
+#ifdef CONFIG_ONESHOT_SYNC
+long kgsl_ioctl_syncsource_create(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_syncsource_destroy(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_syncsource_create_fence(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_syncsource_signal_fence(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+
+void kgsl_syncsource_put(struct kgsl_syncsource *syncsource);
+
+#else
+static inline long
+kgsl_ioctl_syncsource_create(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	return -ENOIOCTLCMD;
+}
+
+static inline long
+kgsl_ioctl_syncsource_destroy(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	return -ENOIOCTLCMD;
+}
+
+static inline long
+kgsl_ioctl_syncsource_create_fence(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	return -ENOIOCTLCMD;
+}
+
+static inline long
+kgsl_ioctl_syncsource_signal_fence(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	return -ENOIOCTLCMD;
+}
+
+static inline void kgsl_syncsource_put(struct kgsl_syncsource *syncsource)
+{
+
+}
+#endif
+
+#endif /* __KGSL_SYNC_H */
diff --git a/drivers/gpu/msm/kgsl_trace.c b/drivers/gpu/msm/kgsl_trace.c
new file mode 100644
index 000000000000..3541425ff643
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_trace.c
@@ -0,0 +1,26 @@
+/* Copyright (c) 2011, 2013, 2015 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+
+#include "kgsl.h"
+#include "kgsl_device.h"
+
+/* Instantiate tracepoints */
+#define CREATE_TRACE_POINTS
+#include "kgsl_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL(kgsl_regwrite);
+EXPORT_TRACEPOINT_SYMBOL(kgsl_issueibcmds);
+EXPORT_TRACEPOINT_SYMBOL(kgsl_user_pwrlevel_constraint);
+EXPORT_TRACEPOINT_SYMBOL(kgsl_constraint);
diff --git a/drivers/gpu/msm/kgsl_trace.h b/drivers/gpu/msm/kgsl_trace.h
new file mode 100644
index 000000000000..da7a282acc62
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_trace.h
@@ -0,0 +1,1107 @@
+/* Copyright (c) 2011-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#if !defined(_KGSL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _KGSL_TRACE_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kgsl
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE kgsl_trace
+
+#include <linux/tracepoint.h>
+#include "kgsl_device.h"
+#include "adreno_drawctxt.h"
+
+struct kgsl_device;
+struct kgsl_ringbuffer_issueibcmds;
+struct kgsl_device_waittimestamp;
+
+/*
+ * Tracepoint for kgsl issue ib commands
+ */
+TRACE_EVENT(kgsl_issueibcmds,
+
+	TP_PROTO(struct kgsl_device *device,
+			int drawctxt_id,
+			struct kgsl_cmdbatch *cmdbatch,
+			unsigned int numibs,
+			int timestamp,
+			int flags,
+			int result,
+			unsigned int type),
+
+	TP_ARGS(device, drawctxt_id, cmdbatch, numibs, timestamp,
+		flags, result, type),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, drawctxt_id)
+		__field(unsigned int, numibs)
+		__field(unsigned int, timestamp)
+		__field(unsigned int, flags)
+		__field(int, result)
+		__field(unsigned int, drawctxt_type)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->drawctxt_id = drawctxt_id;
+		__entry->numibs = numibs;
+		__entry->timestamp = timestamp;
+		__entry->flags = flags;
+		__entry->result = result;
+		__entry->drawctxt_type = type;
+	),
+
+	TP_printk(
+		"d_name=%s ctx=%u ib=0x0 numibs=%u ts=%u "
+		"flags=%s result=%d type=%s",
+		__get_str(device_name),
+		__entry->drawctxt_id,
+		__entry->numibs,
+		__entry->timestamp,
+		__entry->flags ? __print_flags(__entry->flags, "|",
+						KGSL_CMDBATCH_FLAGS) : "None",
+		__entry->result,
+		__print_symbolic(__entry->drawctxt_type, KGSL_CONTEXT_TYPES)
+	)
+);
+
+/*
+ * Tracepoint for kgsl readtimestamp
+ */
+TRACE_EVENT(kgsl_readtimestamp,
+
+	TP_PROTO(struct kgsl_device *device,
+			unsigned int context_id,
+			unsigned int type,
+			unsigned int timestamp),
+
+	TP_ARGS(device, context_id, type, timestamp),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, context_id)
+		__field(unsigned int, type)
+		__field(unsigned int, timestamp)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->context_id = context_id;
+		__entry->type = type;
+		__entry->timestamp = timestamp;
+	),
+
+	TP_printk(
+		"d_name=%s context_id=%u type=%u ts=%u",
+		__get_str(device_name),
+		__entry->context_id,
+		__entry->type,
+		__entry->timestamp
+	)
+);
+
+/*
+ * Tracepoint for kgsl waittimestamp entry
+ */
+TRACE_EVENT(kgsl_waittimestamp_entry,
+
+	TP_PROTO(struct kgsl_device *device,
+			unsigned int context_id,
+			unsigned int curr_ts,
+			unsigned int wait_ts,
+			unsigned int timeout),
+
+	TP_ARGS(device, context_id, curr_ts, wait_ts, timeout),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, context_id)
+		__field(unsigned int, curr_ts)
+		__field(unsigned int, wait_ts)
+		__field(unsigned int, timeout)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->context_id = context_id;
+		__entry->curr_ts = curr_ts;
+		__entry->wait_ts = wait_ts;
+		__entry->timeout = timeout;
+	),
+
+	TP_printk(
+		"d_name=%s ctx=%u curr_ts=%u ts=%u timeout=%u",
+		__get_str(device_name),
+		__entry->context_id,
+		__entry->curr_ts,
+		__entry->wait_ts,
+		__entry->timeout
+	)
+);
+
+/*
+ * Tracepoint for kgsl waittimestamp exit
+ */
+TRACE_EVENT(kgsl_waittimestamp_exit,
+
+	TP_PROTO(struct kgsl_device *device, unsigned int curr_ts,
+		 int result),
+
+	TP_ARGS(device, curr_ts, result),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, curr_ts)
+		__field(int, result)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->curr_ts = curr_ts;
+		__entry->result = result;
+	),
+
+	TP_printk(
+		"d_name=%s curr_ts=%u result=%d",
+		__get_str(device_name),
+		__entry->curr_ts,
+		__entry->result
+	)
+);
+
+DECLARE_EVENT_CLASS(kgsl_pwr_template,
+	TP_PROTO(struct kgsl_device *device, int on),
+
+	TP_ARGS(device, on),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(int, on)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->on = on;
+	),
+
+	TP_printk(
+		"d_name=%s flag=%s",
+		__get_str(device_name),
+		__entry->on ? "on" : "off"
+	)
+);
+
+DEFINE_EVENT(kgsl_pwr_template, kgsl_irq,
+	TP_PROTO(struct kgsl_device *device, int on),
+	TP_ARGS(device, on)
+);
+
+DEFINE_EVENT(kgsl_pwr_template, kgsl_bus,
+	TP_PROTO(struct kgsl_device *device, int on),
+	TP_ARGS(device, on)
+);
+
+DEFINE_EVENT(kgsl_pwr_template, kgsl_rail,
+	TP_PROTO(struct kgsl_device *device, int on),
+	TP_ARGS(device, on)
+);
+
+DEFINE_EVENT(kgsl_pwr_template, kgsl_retention_clk,
+	TP_PROTO(struct kgsl_device *device, int on),
+	TP_ARGS(device, on)
+);
+
+TRACE_EVENT(kgsl_clk,
+
+	TP_PROTO(struct kgsl_device *device, unsigned int on,
+		unsigned int freq),
+
+	TP_ARGS(device, on, freq),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(int, on)
+		__field(unsigned int, freq)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->on = on;
+		__entry->freq = freq;
+	),
+
+	TP_printk(
+		"d_name=%s flag=%s active_freq=%d",
+		__get_str(device_name),
+		__entry->on ? "on" : "off",
+		__entry->freq
+	)
+);
+
+TRACE_EVENT(kgsl_pwrlevel,
+
+	TP_PROTO(struct kgsl_device *device,
+		unsigned int pwrlevel,
+		unsigned int freq,
+		unsigned int prev_pwrlevel,
+		unsigned int prev_freq),
+
+	TP_ARGS(device, pwrlevel, freq, prev_pwrlevel, prev_freq),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, pwrlevel)
+		__field(unsigned int, freq)
+		__field(unsigned int, prev_pwrlevel)
+		__field(unsigned int, prev_freq)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->pwrlevel = pwrlevel;
+		__entry->freq = freq;
+		__entry->prev_pwrlevel = prev_pwrlevel;
+		__entry->prev_freq = prev_freq;
+	),
+
+	TP_printk(
+		"d_name=%s pwrlevel=%d freq=%d prev_pwrlevel=%d prev_freq=%d",
+		__get_str(device_name),
+		__entry->pwrlevel,
+		__entry->freq,
+		__entry->prev_pwrlevel,
+		__entry->prev_freq
+	)
+);
+
+TRACE_EVENT(kgsl_buslevel,
+
+	TP_PROTO(struct kgsl_device *device, unsigned int pwrlevel,
+		 unsigned int bus),
+
+	TP_ARGS(device, pwrlevel, bus),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, pwrlevel)
+		__field(unsigned int, bus)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->pwrlevel = pwrlevel;
+		__entry->bus = bus;
+	),
+
+	TP_printk(
+		"d_name=%s pwrlevel=%d bus=%d",
+		__get_str(device_name),
+		__entry->pwrlevel,
+		__entry->bus
+	)
+);
+
+TRACE_EVENT(kgsl_gpubusy,
+	TP_PROTO(struct kgsl_device *device, unsigned int busy,
+		unsigned int elapsed),
+
+	TP_ARGS(device, busy, elapsed),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, busy)
+		__field(unsigned int, elapsed)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->busy = busy;
+		__entry->elapsed = elapsed;
+	),
+
+	TP_printk(
+		"d_name=%s busy=%u elapsed=%d",
+		__get_str(device_name),
+		__entry->busy,
+		__entry->elapsed
+	)
+);
+
+TRACE_EVENT(kgsl_pwrstats,
+	TP_PROTO(struct kgsl_device *device, s64 time,
+		struct kgsl_power_stats *pstats),
+
+	TP_ARGS(device, time, pstats),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(s64, total_time)
+		__field(u64, busy_time)
+		__field(u64, ram_time)
+		__field(u64, ram_wait)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->total_time = time;
+		__entry->busy_time = pstats->busy_time;
+		__entry->ram_time = pstats->ram_time;
+		__entry->ram_wait = pstats->ram_wait;
+	),
+
+	TP_printk(
+		"d_name=%s total=%lld busy=%lld ram_time=%lld ram_wait=%lld",
+		__get_str(device_name), __entry->total_time, __entry->busy_time,
+		__entry->ram_time, __entry->ram_wait
+	)
+);
+
+DECLARE_EVENT_CLASS(kgsl_pwrstate_template,
+	TP_PROTO(struct kgsl_device *device, unsigned int state),
+
+	TP_ARGS(device, state),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, state)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->state = state;
+	),
+
+	TP_printk(
+		"d_name=%s state=%s",
+		__get_str(device_name),
+		kgsl_pwrstate_to_str(__entry->state)
+	)
+);
+
+DEFINE_EVENT(kgsl_pwrstate_template, kgsl_pwr_set_state,
+	TP_PROTO(struct kgsl_device *device, unsigned int state),
+	TP_ARGS(device, state)
+);
+
+DEFINE_EVENT(kgsl_pwrstate_template, kgsl_pwr_request_state,
+	TP_PROTO(struct kgsl_device *device, unsigned int state),
+	TP_ARGS(device, state)
+);
+
+TRACE_EVENT(kgsl_mem_alloc,
+
+	TP_PROTO(struct kgsl_mem_entry *mem_entry),
+
+	TP_ARGS(mem_entry),
+
+	TP_STRUCT__entry(
+		__field(uint64_t, gpuaddr)
+		__field(uint64_t, size)
+		__field(unsigned int, tgid)
+		__array(char, usage, 16)
+		__field(unsigned int, id)
+		__field(uint64_t, flags)
+	),
+
+	TP_fast_assign(
+		__entry->gpuaddr = mem_entry->memdesc.gpuaddr;
+		__entry->size = mem_entry->memdesc.size;
+		__entry->tgid = mem_entry->priv->pid;
+		kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
+				     mem_entry->memdesc.flags);
+		__entry->id = mem_entry->id;
+		__entry->flags = mem_entry->memdesc.flags;
+	),
+
+	TP_printk(
+		"gpuaddr=0x%llx size=%llu tgid=%u usage=%s id=%u flags=0x%llx",
+		__entry->gpuaddr, __entry->size, __entry->tgid,
+		__entry->usage, __entry->id, __entry->flags
+	)
+);
+
+TRACE_EVENT(kgsl_mem_mmap,
+
+	TP_PROTO(struct kgsl_mem_entry *mem_entry),
+
+	TP_ARGS(mem_entry),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, useraddr)
+		__field(uint64_t, gpuaddr)
+		__field(uint64_t, size)
+		__array(char, usage, 16)
+		__field(unsigned int, id)
+		__field(uint64_t, flags)
+	),
+
+	TP_fast_assign(
+		__entry->useraddr = mem_entry->memdesc.useraddr;
+		__entry->gpuaddr = mem_entry->memdesc.gpuaddr;
+		__entry->size = mem_entry->memdesc.size;
+		kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
+				     mem_entry->memdesc.flags);
+		__entry->id = mem_entry->id;
+		__entry->flags = mem_entry->memdesc.flags;
+	),
+
+	TP_printk(
+	 "useraddr=0x%lx gpuaddr=0x%llx size=%llu usage=%s id=%u flags=0x%llx",
+		__entry->useraddr, __entry->gpuaddr, __entry->size,
+		__entry->usage, __entry->id, __entry->flags
+	)
+);
+
+TRACE_EVENT(kgsl_mem_unmapped_area_collision,
+
+	TP_PROTO(struct kgsl_mem_entry *mem_entry,
+		 unsigned long addr,
+		 unsigned long len),
+
+	TP_ARGS(mem_entry, addr, len),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned long, addr)
+		__field(unsigned long, len)
+	),
+
+	TP_fast_assign(
+		__entry->id = mem_entry->id;
+		__entry->len = len;
+		__entry->addr = addr;
+	),
+
+	TP_printk(
+		"id=%u len=%lu addr=0x%lx",
+		__entry->id, __entry->len, __entry->addr
+	)
+);
+
+TRACE_EVENT(kgsl_mem_map,
+
+	TP_PROTO(struct kgsl_mem_entry *mem_entry, int fd),
+
+	TP_ARGS(mem_entry, fd),
+
+	TP_STRUCT__entry(
+		__field(uint64_t, gpuaddr)
+		__field(uint64_t, size)
+		__field(int, fd)
+		__field(int, type)
+		__field(unsigned int, tgid)
+		__array(char, usage, 16)
+		__field(unsigned int, id)
+	),
+
+	TP_fast_assign(
+		__entry->gpuaddr = mem_entry->memdesc.gpuaddr;
+		__entry->size = mem_entry->memdesc.size;
+		__entry->fd = fd;
+		__entry->type = kgsl_memdesc_usermem_type(&mem_entry->memdesc);
+		__entry->tgid = mem_entry->priv->pid;
+		kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
+				     mem_entry->memdesc.flags);
+		__entry->id = mem_entry->id;
+	),
+
+	TP_printk(
+		"gpuaddr=0x%llx size=%llu type=%s fd=%d tgid=%u usage=%s id=%u",
+		__entry->gpuaddr, __entry->size,
+		__print_symbolic(__entry->type, KGSL_MEM_TYPES),
+		__entry->fd, __entry->tgid,
+		__entry->usage, __entry->id
+	)
+);
+
+TRACE_EVENT(kgsl_mem_free,
+
+	TP_PROTO(struct kgsl_mem_entry *mem_entry),
+
+	TP_ARGS(mem_entry),
+
+	TP_STRUCT__entry(
+		__field(uint64_t, gpuaddr)
+		__field(uint64_t, size)
+		__field(int, type)
+		__field(int, fd)
+		__field(unsigned int, tgid)
+		__array(char, usage, 16)
+		__field(unsigned int, id)
+	),
+
+	TP_fast_assign(
+		__entry->gpuaddr = mem_entry->memdesc.gpuaddr;
+		__entry->size = mem_entry->memdesc.size;
+		__entry->type = kgsl_memdesc_usermem_type(&mem_entry->memdesc);
+		__entry->tgid = mem_entry->priv->pid;
+		kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
+				     mem_entry->memdesc.flags);
+		__entry->id = mem_entry->id;
+	),
+
+	TP_printk(
+		"gpuaddr=0x%llx size=%llu type=%s tgid=%u usage=%s id=%u",
+		__entry->gpuaddr, __entry->size,
+		__print_symbolic(__entry->type, KGSL_MEM_TYPES),
+		__entry->tgid, __entry->usage, __entry->id
+	)
+);
+
+TRACE_EVENT(kgsl_mem_sync_cache,
+
+	TP_PROTO(struct kgsl_mem_entry *mem_entry, uint64_t offset,
+		uint64_t length, unsigned int op),
+
+	TP_ARGS(mem_entry, offset, length, op),
+
+	TP_STRUCT__entry(
+		__field(uint64_t, gpuaddr)
+		__array(char, usage, 16)
+		__field(unsigned int, tgid)
+		__field(unsigned int, id)
+		__field(unsigned int, op)
+		__field(uint64_t, offset)
+		__field(uint64_t, length)
+	),
+
+	TP_fast_assign(
+		__entry->gpuaddr = mem_entry->memdesc.gpuaddr;
+		kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
+				     mem_entry->memdesc.flags);
+		__entry->tgid = mem_entry->priv->pid;
+		__entry->id = mem_entry->id;
+		__entry->op = op;
+		__entry->offset = offset;
+		__entry->length = (length == 0) ?
+				mem_entry->memdesc.size : length;
+	),
+
+	TP_printk(
+	 "gpuaddr=0x%llx size=%llu tgid=%u  usage=%s id=%u op=%c%c offset=%llu",
+		__entry->gpuaddr,  __entry->length,
+		__entry->tgid, __entry->usage, __entry->id,
+		(__entry->op & KGSL_GPUMEM_CACHE_CLEAN) ? 'c' : '.',
+		(__entry->op & KGSL_GPUMEM_CACHE_INV) ? 'i' : '.',
+		__entry->offset
+	)
+);
+
+TRACE_EVENT(kgsl_mem_sync_full_cache,
+
+	TP_PROTO(unsigned int num_bufs, uint64_t bulk_size),
+	TP_ARGS(num_bufs, bulk_size),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, num_bufs)
+		__field(uint64_t, bulk_size)
+	),
+
+	TP_fast_assign(
+		__entry->num_bufs = num_bufs;
+		__entry->bulk_size = bulk_size;
+	),
+
+	TP_printk(
+		"num_bufs=%u bulk_size=%llu op=ci",
+		__entry->num_bufs, __entry->bulk_size
+	)
+);
+
+DECLARE_EVENT_CLASS(kgsl_mem_timestamp_template,
+
+	TP_PROTO(struct kgsl_device *device, struct kgsl_mem_entry *mem_entry,
+		unsigned int id, unsigned int curr_ts, unsigned int free_ts),
+
+	TP_ARGS(device, mem_entry, id, curr_ts, free_ts),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(uint64_t, gpuaddr)
+		__field(uint64_t, size)
+		__field(int, type)
+		__array(char, usage, 16)
+		__field(unsigned int, id)
+		__field(unsigned int, drawctxt_id)
+		__field(unsigned int, curr_ts)
+		__field(unsigned int, free_ts)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->gpuaddr = mem_entry->memdesc.gpuaddr;
+		__entry->size = mem_entry->memdesc.size;
+		kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
+				     mem_entry->memdesc.flags);
+		__entry->id = mem_entry->id;
+		__entry->drawctxt_id = id;
+		__entry->type = kgsl_memdesc_usermem_type(&mem_entry->memdesc);
+		__entry->curr_ts = curr_ts;
+		__entry->free_ts = free_ts;
+	),
+
+	TP_printk(
+		"d_name=%s gpuaddr=0x%llx size=%llu type=%s usage=%s id=%u ctx=%u"
+		" curr_ts=%u free_ts=%u",
+		__get_str(device_name),
+		__entry->gpuaddr,
+		__entry->size,
+		__print_symbolic(__entry->type, KGSL_MEM_TYPES),
+		__entry->usage,
+		__entry->id,
+		__entry->drawctxt_id,
+		__entry->curr_ts,
+		__entry->free_ts
+	)
+);
+
+DEFINE_EVENT(kgsl_mem_timestamp_template, kgsl_mem_timestamp_queue,
+	TP_PROTO(struct kgsl_device *device, struct kgsl_mem_entry *mem_entry,
+		unsigned int id, unsigned int curr_ts, unsigned int free_ts),
+	TP_ARGS(device, mem_entry, id, curr_ts, free_ts)
+);
+
+DEFINE_EVENT(kgsl_mem_timestamp_template, kgsl_mem_timestamp_free,
+	TP_PROTO(struct kgsl_device *device, struct kgsl_mem_entry *mem_entry,
+		unsigned int id, unsigned int curr_ts, unsigned int free_ts),
+	TP_ARGS(device, mem_entry, id, curr_ts, free_ts)
+);
+
+TRACE_EVENT(kgsl_context_create,
+
+	TP_PROTO(struct kgsl_device *device, struct kgsl_context *context,
+		 unsigned int flags),
+
+	TP_ARGS(device, context, flags),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, id)
+		__field(unsigned int, flags)
+		__field(unsigned int, priority)
+		__field(unsigned int, type)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->id = context->id;
+		__entry->flags = flags & ~(KGSL_CONTEXT_PRIORITY_MASK |
+						KGSL_CONTEXT_TYPE_MASK);
+		__entry->priority =
+			(flags & KGSL_CONTEXT_PRIORITY_MASK)
+				>> KGSL_CONTEXT_PRIORITY_SHIFT;
+		__entry->type =
+			(flags & KGSL_CONTEXT_TYPE_MASK)
+				>> KGSL_CONTEXT_TYPE_SHIFT;
+	),
+
+	TP_printk(
+		"d_name=%s ctx=%u flags=%s priority=%u type=%s",
+		__get_str(device_name), __entry->id,
+		__entry->flags ? __print_flags(__entry->flags, "|",
+						KGSL_CONTEXT_FLAGS) : "None",
+		__entry->priority,
+		__print_symbolic(__entry->type, KGSL_CONTEXT_TYPES)
+	)
+);
+
+TRACE_EVENT(kgsl_context_detach,
+
+	TP_PROTO(struct kgsl_device *device, struct kgsl_context *context),
+
+	TP_ARGS(device, context),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, id)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->id = context->id;
+	),
+
+	TP_printk(
+		"d_name=%s ctx=%u",
+		__get_str(device_name), __entry->id
+	)
+);
+
+TRACE_EVENT(kgsl_context_destroy,
+
+	TP_PROTO(struct kgsl_device *device, struct kgsl_context *context),
+
+	TP_ARGS(device, context),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, id)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->id = context->id;
+	),
+
+	TP_printk(
+		"d_name=%s ctx=%u",
+		__get_str(device_name), __entry->id
+	)
+);
+
+TRACE_EVENT(kgsl_user_pwrlevel_constraint,
+
+	TP_PROTO(struct kgsl_device *device, unsigned int id, unsigned int type,
+		unsigned int sub_type),
+
+	TP_ARGS(device, id, type, sub_type),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, id)
+		__field(unsigned int, type)
+		__field(unsigned int, sub_type)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->id = id;
+		__entry->type = type;
+		__entry->sub_type = sub_type;
+	),
+
+	TP_printk(
+		"d_name=%s ctx=%u constraint_type=%s constraint_subtype=%s",
+		__get_str(device_name), __entry->id,
+		__print_symbolic(__entry->type, KGSL_CONSTRAINT_TYPES),
+		__print_symbolic(__entry->sub_type,
+		KGSL_CONSTRAINT_PWRLEVEL_SUBTYPES)
+	)
+);
+
+TRACE_EVENT(kgsl_constraint,
+
+	TP_PROTO(struct kgsl_device *device, unsigned int type,
+		unsigned int value, unsigned int on),
+
+	TP_ARGS(device, type, value, on),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, type)
+		__field(unsigned int, value)
+		__field(unsigned int, on)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->type = type;
+		__entry->value = value;
+		__entry->on = on;
+	),
+
+	TP_printk(
+		"d_name=%s constraint_type=%s constraint_value=%u status=%s",
+		__get_str(device_name),
+		__print_symbolic(__entry->type, KGSL_CONSTRAINT_TYPES),
+		__entry->value,
+		__entry->on ? "ON" : "OFF"
+	)
+);
+
+TRACE_EVENT(kgsl_mmu_pagefault,
+
+	TP_PROTO(struct kgsl_device *device, unsigned int page,
+		 unsigned int pt, const char *op),
+
+	TP_ARGS(device, page, pt, op),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, page)
+		__field(unsigned int, pt)
+		__string(op, op)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->page = page;
+		__entry->pt = pt;
+		__assign_str(op, op);
+	),
+
+	TP_printk(
+		"d_name=%s page=0x%08x pt=%u op=%s",
+		__get_str(device_name), __entry->page, __entry->pt,
+		__get_str(op)
+	)
+);
+
+TRACE_EVENT(kgsl_regwrite,
+
+	TP_PROTO(struct kgsl_device *device, unsigned int offset,
+		unsigned int value),
+
+	TP_ARGS(device, offset, value),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, offset)
+		__field(unsigned int, value)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->offset = offset;
+		__entry->value = value;
+	),
+
+	TP_printk(
+		"d_name=%s reg=0x%x value=0x%x",
+		__get_str(device_name), __entry->offset, __entry->value
+	)
+);
+
+TRACE_EVENT(kgsl_popp_level,
+
+	TP_PROTO(struct kgsl_device *device, int level1, int level2),
+
+	TP_ARGS(device, level1, level2),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(int, level1)
+		__field(int, level2)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->level1 = level1;
+		__entry->level2 = level2;
+	),
+
+	TP_printk(
+		"d_name=%s old level=%d new level=%d",
+		__get_str(device_name), __entry->level1, __entry->level2)
+);
+
+TRACE_EVENT(kgsl_popp_mod,
+
+	TP_PROTO(struct kgsl_device *device, int x, int y),
+
+	TP_ARGS(device, x, y),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(int, x)
+		__field(int, y)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->x = x;
+		__entry->y = y;
+	),
+
+	TP_printk(
+		"d_name=%s GPU busy mod=%d bus busy mod=%d",
+		__get_str(device_name), __entry->x, __entry->y)
+);
+
+TRACE_EVENT(kgsl_popp_nap,
+
+	TP_PROTO(struct kgsl_device *device, int t, int nap, int percent),
+
+	TP_ARGS(device, t, nap, percent),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(int, t)
+		__field(int, nap)
+		__field(int, percent)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->t = t;
+		__entry->nap = nap;
+		__entry->percent = percent;
+	),
+
+	TP_printk(
+		"d_name=%s nap time=%d number of naps=%d percentage=%d",
+		__get_str(device_name), __entry->t, __entry->nap,
+			__entry->percent)
+);
+
+TRACE_EVENT(kgsl_register_event,
+		TP_PROTO(unsigned int id, unsigned int timestamp, void *func),
+		TP_ARGS(id, timestamp, func),
+		TP_STRUCT__entry(
+			__field(unsigned int, id)
+			__field(unsigned int, timestamp)
+			__field(void *, func)
+		),
+		TP_fast_assign(
+			__entry->id = id;
+			__entry->timestamp = timestamp;
+			__entry->func = func;
+		),
+		TP_printk(
+			"ctx=%u ts=%u cb=%pF",
+			__entry->id, __entry->timestamp, __entry->func)
+);
+
+TRACE_EVENT(kgsl_fire_event,
+		TP_PROTO(unsigned int id, unsigned int ts,
+			unsigned int type, unsigned int age, void *func),
+		TP_ARGS(id, ts, type, age, func),
+		TP_STRUCT__entry(
+			__field(unsigned int, id)
+			__field(unsigned int, ts)
+			__field(unsigned int, type)
+			__field(unsigned int, age)
+			__field(void *, func)
+		),
+		TP_fast_assign(
+			__entry->id = id;
+			__entry->ts = ts;
+			__entry->type = type;
+			__entry->age = age;
+			__entry->func = func;
+		),
+		TP_printk(
+			"ctx=%u ts=%u type=%s age=%u cb=%pF",
+			__entry->id, __entry->ts,
+			__print_symbolic(__entry->type, KGSL_EVENT_TYPES),
+			__entry->age, __entry->func)
+);
+
+TRACE_EVENT(kgsl_active_count,
+
+	TP_PROTO(struct kgsl_device *device, unsigned long ip),
+
+	TP_ARGS(device, ip),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, count)
+		__field(unsigned long, ip)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->count = atomic_read(&device->active_cnt);
+		__entry->ip = ip;
+	),
+
+	TP_printk(
+		"d_name=%s active_cnt=%u func=%pf",
+		__get_str(device_name), __entry->count, (void *) __entry->ip
+	)
+);
+
+TRACE_EVENT(kgsl_pagetable_destroy,
+	TP_PROTO(u64 ptbase, unsigned int name),
+	TP_ARGS(ptbase, name),
+	TP_STRUCT__entry(
+		__field(u64, ptbase)
+		__field(unsigned int, name)
+	),
+	TP_fast_assign(
+		__entry->ptbase = ptbase;
+		__entry->name = name;
+	),
+	TP_printk("ptbase=%llx name=%u", __entry->ptbase, __entry->name)
+);
+
+DECLARE_EVENT_CLASS(syncpoint_timestamp_template,
+	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, struct kgsl_context *context,
+		unsigned int timestamp),
+	TP_ARGS(cmdbatch, context, timestamp),
+	TP_STRUCT__entry(
+		__field(unsigned int, cmdbatch_context_id)
+		__field(unsigned int, context_id)
+		__field(unsigned int, timestamp)
+	),
+	TP_fast_assign(
+		__entry->cmdbatch_context_id = cmdbatch->context->id;
+		__entry->context_id = context->id;
+		__entry->timestamp = timestamp;
+	),
+	TP_printk("ctx=%d sync ctx=%d ts=%d",
+		__entry->cmdbatch_context_id, __entry->context_id,
+		__entry->timestamp)
+);
+
+DEFINE_EVENT(syncpoint_timestamp_template, syncpoint_timestamp,
+	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, struct kgsl_context *context,
+		unsigned int timestamp),
+	TP_ARGS(cmdbatch, context, timestamp)
+);
+
+DEFINE_EVENT(syncpoint_timestamp_template, syncpoint_timestamp_expire,
+	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, struct kgsl_context *context,
+		unsigned int timestamp),
+	TP_ARGS(cmdbatch, context, timestamp)
+);
+
+DECLARE_EVENT_CLASS(syncpoint_fence_template,
+	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, char *name),
+	TP_ARGS(cmdbatch, name),
+	TP_STRUCT__entry(
+		__string(fence_name, name)
+		__field(unsigned int, cmdbatch_context_id)
+	),
+	TP_fast_assign(
+		__entry->cmdbatch_context_id = cmdbatch->context->id;
+		__assign_str(fence_name, name);
+	),
+	TP_printk("ctx=%d fence=%s",
+		__entry->cmdbatch_context_id, __get_str(fence_name))
+);
+
+DEFINE_EVENT(syncpoint_fence_template, syncpoint_fence,
+	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, char *name),
+	TP_ARGS(cmdbatch, name)
+);
+
+DEFINE_EVENT(syncpoint_fence_template, syncpoint_fence_expire,
+	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, char *name),
+	TP_ARGS(cmdbatch, name)
+);
+
+TRACE_EVENT(kgsl_msg,
+	TP_PROTO(const char *msg),
+	TP_ARGS(msg),
+	TP_STRUCT__entry(
+		__string(msg, msg)
+	),
+	TP_fast_assign(
+		__assign_str(msg, msg);
+	),
+	TP_printk(
+		"%s", __get_str(msg)
+	)
+);
+
+
+#endif /* _KGSL_TRACE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h
new file mode 100644
index 000000000000..68cfe76e8652
--- /dev/null
+++ b/include/linux/msm_kgsl.h
@@ -0,0 +1,13 @@
+#ifndef _MSM_KGSL_H
+#define _MSM_KGSL_H
+
+#include <uapi/linux/msm_kgsl.h>
+
+/* Limits mitigations APIs */
+void *kgsl_pwr_limits_add(enum kgsl_deviceid id);
+void kgsl_pwr_limits_del(void *limit);
+int kgsl_pwr_limits_set_freq(void *limit, unsigned int freq);
+void kgsl_pwr_limits_set_default(void *limit);
+unsigned int kgsl_pwr_limits_get_freq(enum kgsl_deviceid id);
+
+#endif /* _MSM_KGSL_H */
diff --git a/include/uapi/linux/msm_kgsl.h b/include/uapi/linux/msm_kgsl.h
new file mode 100644
index 000000000000..51d6e99f0449
--- /dev/null
+++ b/include/uapi/linux/msm_kgsl.h
@@ -0,0 +1,1440 @@
+#ifndef _UAPI_MSM_KGSL_H
+#define _UAPI_MSM_KGSL_H
+
+/*
+ * The KGSL version has proven not to be very useful in userspace if features
+ * are cherry picked into other trees out of order so it is frozen as of 3.14.
+ * It is left here for backwards compatabilty and as a reminder that
+ * software releases are never linear. Also, I like pie.
+ */
+
+#define KGSL_VERSION_MAJOR        3
+#define KGSL_VERSION_MINOR        14
+
+/*
+ * We have traditionally mixed context and issueibcmds / command batch flags
+ * together into a big flag stew. This worked fine until we started adding a
+ * lot more command batch flags and we started running out of bits. Turns out
+ * we have a bit of room in the context type / priority mask that we could use
+ * for command batches, but that means we need to split out the flags into two
+ * coherent sets.
+ *
+ * If any future definitions are for both context and cmdbatch add both defines
+ * and link the cmdbatch to the context define as we do below. Otherwise feel
+ * free to add exclusive bits to either set.
+ */
+
+/* --- context flags --- */
+#define KGSL_CONTEXT_SAVE_GMEM		0x00000001
+#define KGSL_CONTEXT_NO_GMEM_ALLOC	0x00000002
+/* This is a cmdbatch exclusive flag - use the CMDBATCH equivalent instead */
+#define KGSL_CONTEXT_SUBMIT_IB_LIST	0x00000004
+#define KGSL_CONTEXT_CTX_SWITCH		0x00000008
+#define KGSL_CONTEXT_PREAMBLE		0x00000010
+#define KGSL_CONTEXT_TRASH_STATE	0x00000020
+#define KGSL_CONTEXT_PER_CONTEXT_TS	0x00000040
+#define KGSL_CONTEXT_USER_GENERATED_TS	0x00000080
+/* This is a cmdbatch exclusive flag - use the CMDBATCH equivalent instead */
+#define KGSL_CONTEXT_END_OF_FRAME	0x00000100
+#define KGSL_CONTEXT_NO_FAULT_TOLERANCE 0x00000200
+/* This is a cmdbatch exclusive flag - use the CMDBATCH equivalent instead */
+#define KGSL_CONTEXT_SYNC               0x00000400
+#define KGSL_CONTEXT_PWR_CONSTRAINT     0x00000800
+
+#define KGSL_CONTEXT_PRIORITY_MASK      0x0000F000
+#define KGSL_CONTEXT_PRIORITY_SHIFT     12
+#define KGSL_CONTEXT_PRIORITY_UNDEF     0
+
+#define KGSL_CONTEXT_IFH_NOP            0x00010000
+#define KGSL_CONTEXT_SECURE             0x00020000
+
+#define KGSL_CONTEXT_PREEMPT_STYLE_MASK       0x0E000000
+#define KGSL_CONTEXT_PREEMPT_STYLE_SHIFT      25
+#define KGSL_CONTEXT_PREEMPT_STYLE_DEFAULT    0x0
+#define KGSL_CONTEXT_PREEMPT_STYLE_RINGBUFFER 0x1
+#define KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN  0x2
+
+#define KGSL_CONTEXT_TYPE_MASK          0x01F00000
+#define KGSL_CONTEXT_TYPE_SHIFT         20
+#define KGSL_CONTEXT_TYPE_ANY		0
+#define KGSL_CONTEXT_TYPE_GL		1
+#define KGSL_CONTEXT_TYPE_CL		2
+#define KGSL_CONTEXT_TYPE_C2D		3
+#define KGSL_CONTEXT_TYPE_RS		4
+#define KGSL_CONTEXT_TYPE_UNKNOWN	0x1E
+
+#define KGSL_CONTEXT_INVALID 0xffffffff
+
+/*
+ * --- command batch flags ---
+ * The bits that are linked to a KGSL_CONTEXT equivalent are either legacy
+ * definitions or bits that are valid for both contexts and cmdbatches.  To be
+ * safe the other 8 bits that are still available in the context field should be
+ * omitted here in case we need to share - the other bits are available for
+ * cmdbatch only flags as needed
+ */
+#define KGSL_CMDBATCH_MEMLIST		0x00000001
+#define KGSL_CMDBATCH_MARKER		0x00000002
+#define KGSL_CMDBATCH_SUBMIT_IB_LIST	KGSL_CONTEXT_SUBMIT_IB_LIST /* 0x004 */
+#define KGSL_CMDBATCH_CTX_SWITCH	KGSL_CONTEXT_CTX_SWITCH     /* 0x008 */
+#define KGSL_CMDBATCH_PROFILING		0x00000010
+#define KGSL_CMDBATCH_END_OF_FRAME	KGSL_CONTEXT_END_OF_FRAME   /* 0x100 */
+#define KGSL_CMDBATCH_SYNC		KGSL_CONTEXT_SYNC           /* 0x400 */
+#define KGSL_CMDBATCH_PWR_CONSTRAINT	KGSL_CONTEXT_PWR_CONSTRAINT /* 0x800 */
+
+/*
+ * Reserve bits [16:19] and bits [28:31] for possible bits shared between
+ * contexts and command batches.  Update this comment as new flags are added.
+ */
+
+/*
+ * gpu_command_object flags - these flags communicate the type of command or
+ * memory object being submitted for a GPU command
+ */
+
+/* Flags for GPU command objects */
+#define KGSL_CMDLIST_IB                  0x00000001U
+#define KGSL_CMDLIST_CTXTSWITCH_PREAMBLE 0x00000002U
+#define KGSL_CMDLIST_IB_PREAMBLE         0x00000004U
+
+/* Flags for GPU command memory objects */
+#define KGSL_OBJLIST_MEMOBJ  0x00000008U
+#define KGSL_OBJLIST_PROFILE 0x00000010U
+
+/* Flags for GPU command sync points */
+#define KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP 0
+#define KGSL_CMD_SYNCPOINT_TYPE_FENCE 1
+
+/* --- Memory allocation flags --- */
+
+/* General allocation hints */
+#define KGSL_MEMFLAGS_SECURE      0x00000008ULL
+#define KGSL_MEMFLAGS_GPUREADONLY 0x01000000U
+#define KGSL_MEMFLAGS_GPUWRITEONLY 0x02000000U
+#define KGSL_MEMFLAGS_FORCE_32BIT 0x100000000ULL
+
+/* Memory caching hints */
+#define KGSL_CACHEMODE_MASK       0x0C000000U
+#define KGSL_CACHEMODE_SHIFT 26
+
+#define KGSL_CACHEMODE_WRITECOMBINE 0
+#define KGSL_CACHEMODE_UNCACHED 1
+#define KGSL_CACHEMODE_WRITETHROUGH 2
+#define KGSL_CACHEMODE_WRITEBACK 3
+
+#define KGSL_MEMFLAGS_USE_CPU_MAP 0x10000000ULL
+
+/* Memory types for which allocations are made */
+#define KGSL_MEMTYPE_MASK		0x0000FF00
+#define KGSL_MEMTYPE_SHIFT		8
+
+#define KGSL_MEMTYPE_OBJECTANY			0
+#define KGSL_MEMTYPE_FRAMEBUFFER		1
+#define KGSL_MEMTYPE_RENDERBUFFER		2
+#define KGSL_MEMTYPE_ARRAYBUFFER		3
+#define KGSL_MEMTYPE_ELEMENTARRAYBUFFER		4
+#define KGSL_MEMTYPE_VERTEXARRAYBUFFER		5
+#define KGSL_MEMTYPE_TEXTURE			6
+#define KGSL_MEMTYPE_SURFACE			7
+#define KGSL_MEMTYPE_EGL_SURFACE		8
+#define KGSL_MEMTYPE_GL				9
+#define KGSL_MEMTYPE_CL				10
+#define KGSL_MEMTYPE_CL_BUFFER_MAP		11
+#define KGSL_MEMTYPE_CL_BUFFER_NOMAP		12
+#define KGSL_MEMTYPE_CL_IMAGE_MAP		13
+#define KGSL_MEMTYPE_CL_IMAGE_NOMAP		14
+#define KGSL_MEMTYPE_CL_KERNEL_STACK		15
+#define KGSL_MEMTYPE_COMMAND			16
+#define KGSL_MEMTYPE_2D				17
+#define KGSL_MEMTYPE_EGL_IMAGE			18
+#define KGSL_MEMTYPE_EGL_SHADOW			19
+#define KGSL_MEMTYPE_MULTISAMPLE		20
+#define KGSL_MEMTYPE_KERNEL			255
+
+/*
+ * Alignment hint, passed as the power of 2 exponent.
+ * i.e 4k (2^12) would be 12, 64k (2^16)would be 16.
+ */
+#define KGSL_MEMALIGN_MASK		0x00FF0000
+#define KGSL_MEMALIGN_SHIFT		16
+
+enum kgsl_user_mem_type {
+	KGSL_USER_MEM_TYPE_PMEM		= 0x00000000,
+	KGSL_USER_MEM_TYPE_ASHMEM	= 0x00000001,
+	KGSL_USER_MEM_TYPE_ADDR		= 0x00000002,
+	KGSL_USER_MEM_TYPE_ION		= 0x00000003,
+	/*
+	 * ION type is retained for backwards compatibilty but Ion buffers are
+	 * dma-bufs so try to use that naming if we can
+	 */
+	KGSL_USER_MEM_TYPE_DMABUF       = 0x00000003,
+	KGSL_USER_MEM_TYPE_MAX		= 0x00000007,
+};
+#define KGSL_MEMFLAGS_USERMEM_MASK 0x000000e0
+#define KGSL_MEMFLAGS_USERMEM_SHIFT 5
+
+/*
+ * Unfortunately, enum kgsl_user_mem_type starts at 0 which does not
+ * leave a good value for allocated memory. In the flags we use
+ * 0 to indicate allocated memory and thus need to add 1 to the enum
+ * values.
+ */
+#define KGSL_USERMEM_FLAG(x) (((x) + 1) << KGSL_MEMFLAGS_USERMEM_SHIFT)
+
+#define KGSL_MEMFLAGS_NOT_USERMEM 0
+#define KGSL_MEMFLAGS_USERMEM_PMEM KGSL_USERMEM_FLAG(KGSL_USER_MEM_TYPE_PMEM)
+#define KGSL_MEMFLAGS_USERMEM_ASHMEM \
+		KGSL_USERMEM_FLAG(KGSL_USER_MEM_TYPE_ASHMEM)
+#define KGSL_MEMFLAGS_USERMEM_ADDR KGSL_USERMEM_FLAG(KGSL_USER_MEM_TYPE_ADDR)
+#define KGSL_MEMFLAGS_USERMEM_ION KGSL_USERMEM_FLAG(KGSL_USER_MEM_TYPE_ION)
+
+/* --- generic KGSL flag values --- */
+
+#define KGSL_FLAGS_NORMALMODE  0x00000000
+#define KGSL_FLAGS_SAFEMODE    0x00000001
+#define KGSL_FLAGS_INITIALIZED0 0x00000002
+#define KGSL_FLAGS_INITIALIZED 0x00000004
+#define KGSL_FLAGS_STARTED     0x00000008
+#define KGSL_FLAGS_ACTIVE      0x00000010
+#define KGSL_FLAGS_RESERVED0   0x00000020
+#define KGSL_FLAGS_RESERVED1   0x00000040
+#define KGSL_FLAGS_RESERVED2   0x00000080
+#define KGSL_FLAGS_SOFT_RESET  0x00000100
+#define KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS 0x00000200
+
+/* Server Side Sync Timeout in milliseconds */
+#define KGSL_SYNCOBJ_SERVER_TIMEOUT 2000
+
+/*
+ * Reset status values for context
+ */
+enum kgsl_ctx_reset_stat {
+	KGSL_CTX_STAT_NO_ERROR				= 0x00000000,
+	KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT		= 0x00000001,
+	KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT	= 0x00000002,
+	KGSL_CTX_STAT_UNKNOWN_CONTEXT_RESET_EXT		= 0x00000003
+};
+
+#define KGSL_CONVERT_TO_MBPS(val) \
+	(val*1000*1000U)
+
+/* device id */
+enum kgsl_deviceid {
+	KGSL_DEVICE_3D0		= 0x00000000,
+	KGSL_DEVICE_MAX
+};
+
+struct kgsl_devinfo {
+
+	unsigned int device_id;
+	/* chip revision id
+	* coreid:8 majorrev:8 minorrev:8 patch:8
+	*/
+	unsigned int chip_id;
+	unsigned int mmu_enabled;
+	unsigned long gmem_gpubaseaddr;
+	/*
+	* This field contains the adreno revision
+	* number 200, 205, 220, etc...
+	*/
+	unsigned int gpu_id;
+	size_t gmem_sizebytes;
+};
+
+/*
+ * struct kgsl_devmemstore - this structure defines the region of memory
+ * that can be mmap()ed from this driver. The timestamp fields are volatile
+ * because they are written by the GPU
+ * @soptimestamp: Start of pipeline timestamp written by GPU before the
+ * commands in concern are processed
+ * @sbz: Unused, kept for 8 byte alignment
+ * @eoptimestamp: End of pipeline timestamp written by GPU after the
+ * commands in concern are processed
+ * @sbz2: Unused, kept for 8 byte alignment
+ * @preempted: Indicates if the context was preempted
+ * @sbz3: Unused, kept for 8 byte alignment
+ * @ref_wait_ts: Timestamp on which to generate interrupt, unused now.
+ * @sbz4: Unused, kept for 8 byte alignment
+ * @current_context: The current context the GPU is working on
+ * @sbz5: Unused, kept for 8 byte alignment
+ */
+struct kgsl_devmemstore {
+	volatile unsigned int soptimestamp;
+	unsigned int sbz;
+	volatile unsigned int eoptimestamp;
+	unsigned int sbz2;
+	volatile unsigned int preempted;
+	unsigned int sbz3;
+	volatile unsigned int ref_wait_ts;
+	unsigned int sbz4;
+	unsigned int current_context;
+	unsigned int sbz5;
+};
+
+#define KGSL_MEMSTORE_OFFSET(ctxt_id, field) \
+	((ctxt_id)*sizeof(struct kgsl_devmemstore) + \
+	 offsetof(struct kgsl_devmemstore, field))
+
+/* timestamp id*/
+enum kgsl_timestamp_type {
+	KGSL_TIMESTAMP_CONSUMED = 0x00000001, /* start-of-pipeline timestamp */
+	KGSL_TIMESTAMP_RETIRED  = 0x00000002, /* end-of-pipeline timestamp*/
+	KGSL_TIMESTAMP_QUEUED   = 0x00000003,
+};
+
+/* property types - used with kgsl_device_getproperty */
+#define KGSL_PROP_DEVICE_INFO		0x1
+#define KGSL_PROP_DEVICE_SHADOW		0x2
+#define KGSL_PROP_DEVICE_POWER		0x3
+#define KGSL_PROP_SHMEM			0x4
+#define KGSL_PROP_SHMEM_APERTURES	0x5
+#define KGSL_PROP_MMU_ENABLE		0x6
+#define KGSL_PROP_INTERRUPT_WAITS	0x7
+#define KGSL_PROP_VERSION		0x8
+#define KGSL_PROP_GPU_RESET_STAT	0x9
+#define KGSL_PROP_PWRCTRL		0xE
+#define KGSL_PROP_PWR_CONSTRAINT	0x12
+#define KGSL_PROP_UCHE_GMEM_VADDR	0x13
+#define KGSL_PROP_SP_GENERIC_MEM	0x14
+#define KGSL_PROP_UCODE_VERSION		0x15
+#define KGSL_PROP_GPMU_VERSION		0x16
+
+struct kgsl_shadowprop {
+	unsigned long gpuaddr;
+	size_t size;
+	unsigned int flags; /* contains KGSL_FLAGS_ values */
+};
+
+struct kgsl_version {
+	unsigned int drv_major;
+	unsigned int drv_minor;
+	unsigned int dev_major;
+	unsigned int dev_minor;
+};
+
+struct kgsl_sp_generic_mem {
+	uint64_t local;
+	uint64_t pvt;
+};
+
+struct kgsl_ucode_version {
+	unsigned int pfp;
+	unsigned int pm4;
+};
+
+struct kgsl_gpmu_version {
+	unsigned int major;
+	unsigned int minor;
+	unsigned int features;
+};
+
+/* Performance counter groups */
+
+#define KGSL_PERFCOUNTER_GROUP_CP 0x0
+#define KGSL_PERFCOUNTER_GROUP_RBBM 0x1
+#define KGSL_PERFCOUNTER_GROUP_PC 0x2
+#define KGSL_PERFCOUNTER_GROUP_VFD 0x3
+#define KGSL_PERFCOUNTER_GROUP_HLSQ 0x4
+#define KGSL_PERFCOUNTER_GROUP_VPC 0x5
+#define KGSL_PERFCOUNTER_GROUP_TSE 0x6
+#define KGSL_PERFCOUNTER_GROUP_RAS 0x7
+#define KGSL_PERFCOUNTER_GROUP_UCHE 0x8
+#define KGSL_PERFCOUNTER_GROUP_TP 0x9
+#define KGSL_PERFCOUNTER_GROUP_SP 0xA
+#define KGSL_PERFCOUNTER_GROUP_RB 0xB
+#define KGSL_PERFCOUNTER_GROUP_PWR 0xC
+#define KGSL_PERFCOUNTER_GROUP_VBIF 0xD
+#define KGSL_PERFCOUNTER_GROUP_VBIF_PWR 0xE
+#define KGSL_PERFCOUNTER_GROUP_MH 0xF
+#define KGSL_PERFCOUNTER_GROUP_PA_SU 0x10
+#define KGSL_PERFCOUNTER_GROUP_SQ 0x11
+#define KGSL_PERFCOUNTER_GROUP_SX 0x12
+#define KGSL_PERFCOUNTER_GROUP_TCF 0x13
+#define KGSL_PERFCOUNTER_GROUP_TCM 0x14
+#define KGSL_PERFCOUNTER_GROUP_TCR 0x15
+#define KGSL_PERFCOUNTER_GROUP_L2 0x16
+#define KGSL_PERFCOUNTER_GROUP_VSC 0x17
+#define KGSL_PERFCOUNTER_GROUP_CCU 0x18
+#define KGSL_PERFCOUNTER_GROUP_LRZ 0x19
+#define KGSL_PERFCOUNTER_GROUP_CMP 0x1A
+#define KGSL_PERFCOUNTER_GROUP_ALWAYSON 0x1B
+#define KGSL_PERFCOUNTER_GROUP_SP_PWR 0x1C
+#define KGSL_PERFCOUNTER_GROUP_TP_PWR 0x1D
+#define KGSL_PERFCOUNTER_GROUP_RB_PWR 0x1E
+#define KGSL_PERFCOUNTER_GROUP_CCU_PWR 0x1F
+#define KGSL_PERFCOUNTER_GROUP_UCHE_PWR 0x20
+#define KGSL_PERFCOUNTER_GROUP_CP_PWR 0x21
+#define KGSL_PERFCOUNTER_GROUP_GPMU_PWR 0x22
+#define KGSL_PERFCOUNTER_GROUP_ALWAYSON_PWR 0x23
+#define KGSL_PERFCOUNTER_GROUP_MAX 0x24
+
+#define KGSL_PERFCOUNTER_NOT_USED 0xFFFFFFFF
+#define KGSL_PERFCOUNTER_BROKEN 0xFFFFFFFE
+
+/* structure holds list of ibs */
+struct kgsl_ibdesc {
+	unsigned long gpuaddr;
+	unsigned long __pad;
+	size_t sizedwords;
+	unsigned int ctrl;
+};
+
+/**
+ * struct kgsl_cmdbatch_profiling_buffer
+ * @wall_clock_s: Wall clock at ringbuffer submission time (seconds)
+ * @wall_clock_ns: Wall clock at ringbuffer submission time (nanoseconds)
+ * @gpu_ticks_queued: GPU ticks at ringbuffer submission
+ * @gpu_ticks_submitted: GPU ticks when starting cmdbatch execution
+ * @gpu_ticks_retired: GPU ticks when finishing cmdbatch execution
+ *
+ * This structure defines the profiling buffer used to measure cmdbatch
+ * execution time
+ */
+struct kgsl_cmdbatch_profiling_buffer {
+	uint64_t wall_clock_s;
+	uint64_t wall_clock_ns;
+	uint64_t gpu_ticks_queued;
+	uint64_t gpu_ticks_submitted;
+	uint64_t gpu_ticks_retired;
+};
+
+/* ioctls */
+#define KGSL_IOC_TYPE 0x09
+
+/* get misc info about the GPU
+   type should be a value from enum kgsl_property_type
+   value points to a structure that varies based on type
+   sizebytes is sizeof() that structure
+   for KGSL_PROP_DEVICE_INFO, use struct kgsl_devinfo
+   this structure contaings hardware versioning info.
+   for KGSL_PROP_DEVICE_SHADOW, use struct kgsl_shadowprop
+   this is used to find mmap() offset and sizes for mapping
+   struct kgsl_memstore into userspace.
+*/
+struct kgsl_device_getproperty {
+	unsigned int type;
+	void __user *value;
+	size_t sizebytes;
+};
+
+#define IOCTL_KGSL_DEVICE_GETPROPERTY \
+	_IOWR(KGSL_IOC_TYPE, 0x2, struct kgsl_device_getproperty)
+
+/* IOCTL_KGSL_DEVICE_READ (0x3) - removed 03/2012
+ */
+
+/* block until the GPU has executed past a given timestamp
+ * timeout is in milliseconds.
+ */
+struct kgsl_device_waittimestamp {
+	unsigned int timestamp;
+	unsigned int timeout;
+};
+
+#define IOCTL_KGSL_DEVICE_WAITTIMESTAMP \
+	_IOW(KGSL_IOC_TYPE, 0x6, struct kgsl_device_waittimestamp)
+
+struct kgsl_device_waittimestamp_ctxtid {
+	unsigned int context_id;
+	unsigned int timestamp;
+	unsigned int timeout;
+};
+
+#define IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID \
+	_IOW(KGSL_IOC_TYPE, 0x7, struct kgsl_device_waittimestamp_ctxtid)
+
+/* DEPRECATED: issue indirect commands to the GPU.
+ * drawctxt_id must have been created with IOCTL_KGSL_DRAWCTXT_CREATE
+ * ibaddr and sizedwords must specify a subset of a buffer created
+ * with IOCTL_KGSL_SHAREDMEM_FROM_PMEM
+ * flags may be a mask of KGSL_CONTEXT_ values
+ * timestamp is a returned counter value which can be passed to
+ * other ioctls to determine when the commands have been executed by
+ * the GPU.
+ *
+ * This fucntion is deprecated - consider using IOCTL_KGSL_SUBMIT_COMMANDS
+ * instead
+ */
+struct kgsl_ringbuffer_issueibcmds {
+	unsigned int drawctxt_id;
+	unsigned long ibdesc_addr;
+	unsigned int numibs;
+	unsigned int timestamp; /*output param */
+	unsigned int flags;
+};
+
+#define IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS \
+	_IOWR(KGSL_IOC_TYPE, 0x10, struct kgsl_ringbuffer_issueibcmds)
+
+/* read the most recently executed timestamp value
+ * type should be a value from enum kgsl_timestamp_type
+ */
+struct kgsl_cmdstream_readtimestamp {
+	unsigned int type;
+	unsigned int timestamp; /*output param */
+};
+
+#define IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_OLD \
+	_IOR(KGSL_IOC_TYPE, 0x11, struct kgsl_cmdstream_readtimestamp)
+
+#define IOCTL_KGSL_CMDSTREAM_READTIMESTAMP \
+	_IOWR(KGSL_IOC_TYPE, 0x11, struct kgsl_cmdstream_readtimestamp)
+
+/* free memory when the GPU reaches a given timestamp.
+ * gpuaddr specify a memory region created by a
+ * IOCTL_KGSL_SHAREDMEM_FROM_PMEM call
+ * type should be a value from enum kgsl_timestamp_type
+ */
+struct kgsl_cmdstream_freememontimestamp {
+	unsigned long gpuaddr;
+	unsigned int type;
+	unsigned int timestamp;
+};
+
+#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP \
+	_IOW(KGSL_IOC_TYPE, 0x12, struct kgsl_cmdstream_freememontimestamp)
+
+/* Previous versions of this header had incorrectly defined
+   IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP as a read-only ioctl instead
+   of a write only ioctl.  To ensure binary compatability, the following
+   #define will be used to intercept the incorrect ioctl
+*/
+
+#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_OLD \
+	_IOR(KGSL_IOC_TYPE, 0x12, struct kgsl_cmdstream_freememontimestamp)
+
+/* create a draw context, which is used to preserve GPU state.
+ * The flags field may contain a mask KGSL_CONTEXT_*  values
+ */
+struct kgsl_drawctxt_create {
+	unsigned int flags;
+	unsigned int drawctxt_id; /*output param */
+};
+
+#define IOCTL_KGSL_DRAWCTXT_CREATE \
+	_IOWR(KGSL_IOC_TYPE, 0x13, struct kgsl_drawctxt_create)
+
+/* destroy a draw context */
+struct kgsl_drawctxt_destroy {
+	unsigned int drawctxt_id;
+};
+
+#define IOCTL_KGSL_DRAWCTXT_DESTROY \
+	_IOW(KGSL_IOC_TYPE, 0x14, struct kgsl_drawctxt_destroy)
+
+/* add a block of pmem, fb, ashmem or user allocated address
+ * into the GPU address space */
+struct kgsl_map_user_mem {
+	int fd;
+	unsigned long gpuaddr;   /*output param */
+	size_t len;
+	size_t offset;
+	unsigned long hostptr;   /*input param */
+	enum kgsl_user_mem_type memtype;
+	unsigned int flags;
+};
+
+#define IOCTL_KGSL_MAP_USER_MEM \
+	_IOWR(KGSL_IOC_TYPE, 0x15, struct kgsl_map_user_mem)
+
+struct kgsl_cmdstream_readtimestamp_ctxtid {
+	unsigned int context_id;
+	unsigned int type;
+	unsigned int timestamp; /*output param */
+};
+
+#define IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID \
+	_IOWR(KGSL_IOC_TYPE, 0x16, struct kgsl_cmdstream_readtimestamp_ctxtid)
+
+struct kgsl_cmdstream_freememontimestamp_ctxtid {
+	unsigned int context_id;
+	unsigned long gpuaddr;
+	unsigned int type;
+	unsigned int timestamp;
+};
+
+#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID \
+	_IOW(KGSL_IOC_TYPE, 0x17, \
+	struct kgsl_cmdstream_freememontimestamp_ctxtid)
+
+/* add a block of pmem or fb into the GPU address space */
+struct kgsl_sharedmem_from_pmem {
+        int pmem_fd;
+        unsigned long gpuaddr;  /*output param */
+        unsigned int len;
+        unsigned int offset;
+};
+
+#define IOCTL_KGSL_SHAREDMEM_FROM_PMEM \
+        _IOWR(KGSL_IOC_TYPE, 0x20, struct kgsl_sharedmem_from_pmem)
+
+/* remove memory from the GPU's address space */
+struct kgsl_sharedmem_free {
+	unsigned long gpuaddr;
+};
+
+#define IOCTL_KGSL_SHAREDMEM_FREE \
+	_IOW(KGSL_IOC_TYPE, 0x21, struct kgsl_sharedmem_free)
+
+struct kgsl_cff_user_event {
+	unsigned char cff_opcode;
+	unsigned int op1;
+	unsigned int op2;
+	unsigned int op3;
+	unsigned int op4;
+	unsigned int op5;
+	unsigned int __pad[2];
+};
+
+#define IOCTL_KGSL_CFF_USER_EVENT \
+	_IOW(KGSL_IOC_TYPE, 0x31, struct kgsl_cff_user_event)
+
+struct kgsl_gmem_desc {
+	unsigned int x;
+	unsigned int y;
+	unsigned int width;
+	unsigned int height;
+	unsigned int pitch;
+};
+
+struct kgsl_buffer_desc {
+	void 			*hostptr;
+	unsigned long	gpuaddr;
+	int				size;
+	unsigned int	format;
+	unsigned int  	pitch;
+	unsigned int  	enabled;
+};
+
+struct kgsl_bind_gmem_shadow {
+	unsigned int drawctxt_id;
+	struct kgsl_gmem_desc gmem_desc;
+	unsigned int shadow_x;
+	unsigned int shadow_y;
+	struct kgsl_buffer_desc shadow_buffer;
+	unsigned int buffer_id;
+};
+
+#define IOCTL_KGSL_DRAWCTXT_BIND_GMEM_SHADOW \
+    _IOW(KGSL_IOC_TYPE, 0x22, struct kgsl_bind_gmem_shadow)
+
+/* add a block of memory into the GPU address space */
+
+/*
+ * IOCTL_KGSL_SHAREDMEM_FROM_VMALLOC deprecated 09/2012
+ * use IOCTL_KGSL_GPUMEM_ALLOC instead
+ */
+
+struct kgsl_sharedmem_from_vmalloc {
+	unsigned long gpuaddr;	/*output param */
+	unsigned int hostptr;
+	unsigned int flags;
+};
+
+#define IOCTL_KGSL_SHAREDMEM_FROM_VMALLOC \
+	_IOWR(KGSL_IOC_TYPE, 0x23, struct kgsl_sharedmem_from_vmalloc)
+
+/*
+ * This is being deprecated in favor of IOCTL_KGSL_GPUMEM_CACHE_SYNC which
+ * supports both directions (flush and invalidate). This code will still
+ * work, but by definition it will do a flush of the cache which might not be
+ * what you want to have happen on a buffer following a GPU operation.  It is
+ * safer to go with IOCTL_KGSL_GPUMEM_CACHE_SYNC
+ */
+
+#define IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE \
+	_IOW(KGSL_IOC_TYPE, 0x24, struct kgsl_sharedmem_free)
+
+struct kgsl_drawctxt_set_bin_base_offset {
+	unsigned int drawctxt_id;
+	unsigned int offset;
+};
+
+#define IOCTL_KGSL_DRAWCTXT_SET_BIN_BASE_OFFSET \
+	_IOW(KGSL_IOC_TYPE, 0x25, struct kgsl_drawctxt_set_bin_base_offset)
+
+enum kgsl_cmdwindow_type {
+	KGSL_CMDWINDOW_MIN     = 0x00000000,
+	KGSL_CMDWINDOW_2D      = 0x00000000,
+	KGSL_CMDWINDOW_3D      = 0x00000001, /* legacy */
+	KGSL_CMDWINDOW_MMU     = 0x00000002,
+	KGSL_CMDWINDOW_ARBITER = 0x000000FF,
+	KGSL_CMDWINDOW_MAX     = 0x000000FF,
+};
+
+/* write to the command window */
+struct kgsl_cmdwindow_write {
+	enum kgsl_cmdwindow_type target;
+	unsigned int addr;
+	unsigned int data;
+};
+
+#define IOCTL_KGSL_CMDWINDOW_WRITE \
+	_IOW(KGSL_IOC_TYPE, 0x2e, struct kgsl_cmdwindow_write)
+
+struct kgsl_gpumem_alloc {
+	unsigned long gpuaddr; /* output param */
+	size_t size;
+	unsigned int flags;
+};
+
+#define IOCTL_KGSL_GPUMEM_ALLOC \
+	_IOWR(KGSL_IOC_TYPE, 0x2f, struct kgsl_gpumem_alloc)
+
+struct kgsl_cff_syncmem {
+	unsigned long gpuaddr;
+	size_t len;
+	unsigned int __pad[2]; /* For future binary compatibility */
+};
+
+#define IOCTL_KGSL_CFF_SYNCMEM \
+	_IOW(KGSL_IOC_TYPE, 0x30, struct kgsl_cff_syncmem)
+
+/*
+ * A timestamp event allows the user space to register an action following an
+ * expired timestamp. Note IOCTL_KGSL_TIMESTAMP_EVENT has been redefined to
+ * _IOWR to support fences which need to return a fd for the priv parameter.
+ */
+
+struct kgsl_timestamp_event {
+	int type;                /* Type of event (see list below) */
+	unsigned int timestamp;  /* Timestamp to trigger event on */
+	unsigned int context_id; /* Context for the timestamp */
+	void __user *priv;	 /* Pointer to the event specific blob */
+	size_t len;              /* Size of the event specific blob */
+};
+
+#define IOCTL_KGSL_TIMESTAMP_EVENT_OLD \
+	_IOW(KGSL_IOC_TYPE, 0x31, struct kgsl_timestamp_event)
+
+/* A genlock timestamp event releases an existing lock on timestamp expire */
+
+#define KGSL_TIMESTAMP_EVENT_GENLOCK 1
+
+struct kgsl_timestamp_event_genlock {
+	int handle; /* Handle of the genlock lock to release */
+};
+
+/* A fence timestamp event releases an existing lock on timestamp expire */
+
+#define KGSL_TIMESTAMP_EVENT_FENCE 2
+
+struct kgsl_timestamp_event_fence {
+	int fence_fd; /* Fence to signal */
+};
+
+/*
+ * Set a property within the kernel.  Uses the same structure as
+ * IOCTL_KGSL_GETPROPERTY
+ */
+
+#define IOCTL_KGSL_SETPROPERTY \
+	_IOW(KGSL_IOC_TYPE, 0x32, struct kgsl_device_getproperty)
+
+#define IOCTL_KGSL_TIMESTAMP_EVENT \
+	_IOWR(KGSL_IOC_TYPE, 0x33, struct kgsl_timestamp_event)
+
+/**
+ * struct kgsl_gpumem_alloc_id - argument to IOCTL_KGSL_GPUMEM_ALLOC_ID
+ * @id: returned id value for this allocation.
+ * @flags: mask of KGSL_MEM* values requested and actual flags on return.
+ * @size: requested size of the allocation and actual size on return.
+ * @mmapsize: returned size to pass to mmap() which may be larger than 'size'
+ * @gpuaddr: returned GPU address for the allocation
+ *
+ * Allocate memory for access by the GPU. The flags and size fields are echoed
+ * back by the kernel, so that the caller can know if the request was
+ * adjusted.
+ *
+ * Supported flags:
+ * KGSL_MEMFLAGS_GPUREADONLY: the GPU will be unable to write to the buffer
+ * KGSL_MEMTYPE*: usage hint for debugging aid
+ * KGSL_MEMALIGN*: alignment hint, may be ignored or adjusted by the kernel.
+ * KGSL_MEMFLAGS_USE_CPU_MAP: If set on call and return, the returned GPU
+ * address will be 0. Calling mmap() will set the GPU address.
+ */
+struct kgsl_gpumem_alloc_id {
+	unsigned int id;
+	unsigned int flags;
+	size_t size;
+	size_t mmapsize;
+	unsigned long gpuaddr;
+/* private: reserved for future use*/
+	unsigned long __pad[2];
+};
+
+#define IOCTL_KGSL_GPUMEM_ALLOC_ID \
+	_IOWR(KGSL_IOC_TYPE, 0x34, struct kgsl_gpumem_alloc_id)
+
+/**
+ * struct kgsl_gpumem_free_id - argument to IOCTL_KGSL_GPUMEM_FREE_ID
+ * @id: GPU allocation id to free
+ *
+ * Free an allocation by id, in case a GPU address has not been assigned or
+ * is unknown. Freeing an allocation by id with this ioctl or by GPU address
+ * with IOCTL_KGSL_SHAREDMEM_FREE are equivalent.
+ */
+struct kgsl_gpumem_free_id {
+	unsigned int id;
+/* private: reserved for future use*/
+	unsigned int __pad;
+};
+
+#define IOCTL_KGSL_GPUMEM_FREE_ID \
+	_IOWR(KGSL_IOC_TYPE, 0x35, struct kgsl_gpumem_free_id)
+
+/**
+ * struct kgsl_gpumem_get_info - argument to IOCTL_KGSL_GPUMEM_GET_INFO
+ * @gpuaddr: GPU address to query. Also set on return.
+ * @id: GPU allocation id to query. Also set on return.
+ * @flags: returned mask of KGSL_MEM* values.
+ * @size: returned size of the allocation.
+ * @mmapsize: returned size to pass mmap(), which may be larger than 'size'
+ * @useraddr: returned address of the userspace mapping for this buffer
+ *
+ * This ioctl allows querying of all user visible attributes of an existing
+ * allocation, by either the GPU address or the id returned by a previous
+ * call to IOCTL_KGSL_GPUMEM_ALLOC_ID. Legacy allocation ioctls may not
+ * return all attributes so this ioctl can be used to look them up if needed.
+ *
+ */
+struct kgsl_gpumem_get_info {
+	unsigned long gpuaddr;
+	unsigned int id;
+	unsigned int flags;
+	size_t size;
+	size_t mmapsize;
+	unsigned long useraddr;
+/* private: reserved for future use*/
+	unsigned long __pad[4];
+};
+
+#define IOCTL_KGSL_GPUMEM_GET_INFO\
+	_IOWR(KGSL_IOC_TYPE, 0x36, struct kgsl_gpumem_get_info)
+
+/**
+ * struct kgsl_gpumem_sync_cache - argument to IOCTL_KGSL_GPUMEM_SYNC_CACHE
+ * @gpuaddr: GPU address of the buffer to sync.
+ * @id: id of the buffer to sync. Either gpuaddr or id is sufficient.
+ * @op: a mask of KGSL_GPUMEM_CACHE_* values
+ * @offset: offset into the buffer
+ * @length: number of bytes starting from offset to perform
+ * the cache operation on
+ *
+ * Sync the L2 cache for memory headed to and from the GPU - this replaces
+ * KGSL_SHAREDMEM_FLUSH_CACHE since it can handle cache management for both
+ * directions
+ *
+ */
+struct kgsl_gpumem_sync_cache {
+	unsigned long gpuaddr;
+	unsigned int id;
+	unsigned int op;
+	size_t offset;
+	size_t length;
+};
+
+#define KGSL_GPUMEM_CACHE_CLEAN (1 << 0)
+#define KGSL_GPUMEM_CACHE_TO_GPU KGSL_GPUMEM_CACHE_CLEAN
+
+#define KGSL_GPUMEM_CACHE_INV (1 << 1)
+#define KGSL_GPUMEM_CACHE_FROM_GPU KGSL_GPUMEM_CACHE_INV
+
+#define KGSL_GPUMEM_CACHE_FLUSH \
+	(KGSL_GPUMEM_CACHE_CLEAN | KGSL_GPUMEM_CACHE_INV)
+
+/* Flag to ensure backwards compatibility of kgsl_gpumem_sync_cache struct */
+#define KGSL_GPUMEM_CACHE_RANGE (1 << 31U)
+
+#define IOCTL_KGSL_GPUMEM_SYNC_CACHE \
+	_IOW(KGSL_IOC_TYPE, 0x37, struct kgsl_gpumem_sync_cache)
+
+/**
+ * struct kgsl_perfcounter_get - argument to IOCTL_KGSL_PERFCOUNTER_GET
+ * @groupid: Performance counter group ID
+ * @countable: Countable to select within the group
+ * @offset: Return offset of the reserved LO counter
+ * @offset_hi: Return offset of the reserved HI counter
+ *
+ * Get an available performance counter from a specified groupid.  The offset
+ * of the performance counter will be returned after successfully assigning
+ * the countable to the counter for the specified group.  An error will be
+ * returned and an offset of 0 if the groupid is invalid or there are no
+ * more counters left.  After successfully getting a perfcounter, the user
+ * must call kgsl_perfcounter_put(groupid, contable) when finished with
+ * the perfcounter to clear up perfcounter resources.
+ *
+ */
+struct kgsl_perfcounter_get {
+	unsigned int groupid;
+	unsigned int countable;
+	unsigned int offset;
+	unsigned int offset_hi;
+/* private: reserved for future use */
+	unsigned int __pad; /* For future binary compatibility */
+};
+
+#define IOCTL_KGSL_PERFCOUNTER_GET \
+	_IOWR(KGSL_IOC_TYPE, 0x38, struct kgsl_perfcounter_get)
+
+/**
+ * struct kgsl_perfcounter_put - argument to IOCTL_KGSL_PERFCOUNTER_PUT
+ * @groupid: Performance counter group ID
+ * @countable: Countable to release within the group
+ *
+ * Put an allocated performance counter to allow others to have access to the
+ * resource that was previously taken.  This is only to be called after
+ * successfully getting a performance counter from kgsl_perfcounter_get().
+ *
+ */
+struct kgsl_perfcounter_put {
+	unsigned int groupid;
+	unsigned int countable;
+/* private: reserved for future use */
+	unsigned int __pad[2]; /* For future binary compatibility */
+};
+
+#define IOCTL_KGSL_PERFCOUNTER_PUT \
+	_IOW(KGSL_IOC_TYPE, 0x39, struct kgsl_perfcounter_put)
+
+/**
+ * struct kgsl_perfcounter_query - argument to IOCTL_KGSL_PERFCOUNTER_QUERY
+ * @groupid: Performance counter group ID
+ * @countable: Return active countables array
+ * @size: Size of active countables array
+ * @max_counters: Return total number counters for the group ID
+ *
+ * Query the available performance counters given a groupid.  The array
+ * *countables is used to return the current active countables in counters.
+ * The size of the array is passed in so the kernel will only write at most
+ * size or counter->size for the group id.  The total number of available
+ * counters for the group ID is returned in max_counters.
+ * If the array or size passed in are invalid, then only the maximum number
+ * of counters will be returned, no data will be written to *countables.
+ * If the groupid is invalid an error code will be returned.
+ *
+ */
+struct kgsl_perfcounter_query {
+	unsigned int groupid;
+	/* Array to return the current countable for up to size counters */
+	unsigned int __user *countables;
+	unsigned int count;
+	unsigned int max_counters;
+/* private: reserved for future use */
+	unsigned int __pad[2]; /* For future binary compatibility */
+};
+
+#define IOCTL_KGSL_PERFCOUNTER_QUERY \
+	_IOWR(KGSL_IOC_TYPE, 0x3A, struct kgsl_perfcounter_query)
+
+/**
+ * struct kgsl_perfcounter_query - argument to IOCTL_KGSL_PERFCOUNTER_QUERY
+ * @groupid: Performance counter group IDs
+ * @countable: Performance counter countable IDs
+ * @value: Return performance counter reads
+ * @size: Size of all arrays (groupid/countable pair and return value)
+ *
+ * Read in the current value of a performance counter given by the groupid
+ * and countable.
+ *
+ */
+
+struct kgsl_perfcounter_read_group {
+	unsigned int groupid;
+	unsigned int countable;
+	unsigned long long value;
+};
+
+struct kgsl_perfcounter_read {
+	struct kgsl_perfcounter_read_group __user *reads;
+	unsigned int count;
+/* private: reserved for future use */
+	unsigned int __pad[2]; /* For future binary compatibility */
+};
+
+#define IOCTL_KGSL_PERFCOUNTER_READ \
+	_IOWR(KGSL_IOC_TYPE, 0x3B, struct kgsl_perfcounter_read)
+/*
+ * struct kgsl_gpumem_sync_cache_bulk - argument to
+ * IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK
+ * @id_list: list of GPU buffer ids of the buffers to sync
+ * @count: number of GPU buffer ids in id_list
+ * @op: a mask of KGSL_GPUMEM_CACHE_* values
+ *
+ * Sync the cache for memory headed to and from the GPU. Certain
+ * optimizations can be made on the cache operation based on the total
+ * size of the working set of memory to be managed.
+ */
+struct kgsl_gpumem_sync_cache_bulk {
+	unsigned int __user *id_list;
+	unsigned int count;
+	unsigned int op;
+/* private: reserved for future use */
+	unsigned int __pad[2]; /* For future binary compatibility */
+};
+
+#define IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK \
+	_IOWR(KGSL_IOC_TYPE, 0x3C, struct kgsl_gpumem_sync_cache_bulk)
+
+/*
+ * struct kgsl_cmd_syncpoint_timestamp
+ * @context_id: ID of a KGSL context
+ * @timestamp: GPU timestamp
+ *
+ * This structure defines a syncpoint comprising a context/timestamp pair. A
+ * list of these may be passed by IOCTL_KGSL_SUBMIT_COMMANDS to define
+ * dependencies that must be met before the command can be submitted to the
+ * hardware
+ */
+struct kgsl_cmd_syncpoint_timestamp {
+	unsigned int context_id;
+	unsigned int timestamp;
+};
+
+struct kgsl_cmd_syncpoint_fence {
+	int fd;
+};
+
+/**
+ * struct kgsl_cmd_syncpoint - Define a sync point for a command batch
+ * @type: type of sync point defined here
+ * @priv: Pointer to the type specific buffer
+ * @size: Size of the type specific buffer
+ *
+ * This structure contains pointers defining a specific command sync point.
+ * The pointer and size should point to a type appropriate structure.
+ */
+struct kgsl_cmd_syncpoint {
+	int type;
+	void __user *priv;
+	size_t size;
+};
+
+/* Flag to indicate that the cmdlist may contain memlists */
+#define KGSL_IBDESC_MEMLIST 0x1
+
+/* Flag to point out the cmdbatch profiling buffer in the memlist */
+#define KGSL_IBDESC_PROFILING_BUFFER 0x2
+
+/**
+ * struct kgsl_submit_commands - Argument to IOCTL_KGSL_SUBMIT_COMMANDS
+ * @context_id: KGSL context ID that owns the commands
+ * @flags:
+ * @cmdlist: User pointer to a list of kgsl_ibdesc structures
+ * @numcmds: Number of commands listed in cmdlist
+ * @synclist: User pointer to a list of kgsl_cmd_syncpoint structures
+ * @numsyncs: Number of sync points listed in synclist
+ * @timestamp: On entry the a user defined timestamp, on exist the timestamp
+ * assigned to the command batch
+ *
+ * This structure specifies a command to send to the GPU hardware.  This is
+ * similar to kgsl_issueibcmds expect that it doesn't support the legacy way to
+ * submit IB lists and it adds sync points to block the IB until the
+ * dependencies are satisified.  This entry point is the new and preferred way
+ * to submit commands to the GPU. The memory list can be used to specify all
+ * memory that is referrenced in the current set of commands.
+ */
+
+struct kgsl_submit_commands {
+	unsigned int context_id;
+	unsigned int flags;
+	struct kgsl_ibdesc __user *cmdlist;
+	unsigned int numcmds;
+	struct kgsl_cmd_syncpoint __user *synclist;
+	unsigned int numsyncs;
+	unsigned int timestamp;
+/* private: reserved for future use */
+	unsigned int __pad[4];
+};
+
+#define IOCTL_KGSL_SUBMIT_COMMANDS \
+	_IOWR(KGSL_IOC_TYPE, 0x3D, struct kgsl_submit_commands)
+
+/**
+ * struct kgsl_device_constraint - device constraint argument
+ * @context_id: KGSL context ID
+ * @type: type of constraint i.e pwrlevel/none
+ * @data: constraint data
+ * @size: size of the constraint data
+ */
+struct kgsl_device_constraint {
+	unsigned int type;
+	unsigned int context_id;
+	void __user *data;
+	size_t size;
+};
+
+/* Constraint Type*/
+#define KGSL_CONSTRAINT_NONE 0
+#define KGSL_CONSTRAINT_PWRLEVEL 1
+
+/* PWRLEVEL constraint level*/
+/* set to min frequency */
+#define KGSL_CONSTRAINT_PWR_MIN    0
+/* set to max frequency */
+#define KGSL_CONSTRAINT_PWR_MAX    1
+
+struct kgsl_device_constraint_pwrlevel {
+	unsigned int level;
+};
+
+/**
+ * struct kgsl_syncsource_create - Argument to IOCTL_KGSL_SYNCSOURCE_CREATE
+ * @id: returned id for the syncsource that was created.
+ *
+ * This ioctl creates a userspace sync timeline.
+ */
+
+struct kgsl_syncsource_create {
+	unsigned int id;
+/* private: reserved for future use */
+	unsigned int __pad[3];
+};
+
+#define IOCTL_KGSL_SYNCSOURCE_CREATE \
+	_IOWR(KGSL_IOC_TYPE, 0x40, struct kgsl_syncsource_create)
+
+/**
+ * struct kgsl_syncsource_destroy - Argument to IOCTL_KGSL_SYNCSOURCE_DESTROY
+ * @id: syncsource id to destroy
+ *
+ * This ioctl creates a userspace sync timeline.
+ */
+
+struct kgsl_syncsource_destroy {
+	unsigned int id;
+/* private: reserved for future use */
+	unsigned int __pad[3];
+};
+
+#define IOCTL_KGSL_SYNCSOURCE_DESTROY \
+	_IOWR(KGSL_IOC_TYPE, 0x41, struct kgsl_syncsource_destroy)
+
+/**
+ * struct kgsl_syncsource_create_fence - Argument to
+ *     IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE
+ * @id: syncsource id
+ * @fence_fd: returned sync_fence fd
+ *
+ * Create a fence that may be signaled by userspace by calling
+ * IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE. There are no order dependencies between
+ * these fences.
+ */
+struct kgsl_syncsource_create_fence {
+	unsigned int id;
+	int fence_fd;
+/* private: reserved for future use */
+	unsigned int __pad[4];
+};
+
+/**
+ * struct kgsl_syncsource_signal_fence - Argument to
+ *     IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE
+ * @id: syncsource id
+ * @fence_fd: sync_fence fd to signal
+ *
+ * Signal a fence that was created by a IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE
+ * call using the same syncsource id. This allows a fence to be shared
+ * to other processes but only signaled by the process owning the fd
+ * used to create the fence.
+ */
+#define IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE \
+	_IOWR(KGSL_IOC_TYPE, 0x42, struct kgsl_syncsource_create_fence)
+
+struct kgsl_syncsource_signal_fence {
+	unsigned int id;
+	int fence_fd;
+/* private: reserved for future use */
+	unsigned int __pad[4];
+};
+
+#define IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE \
+	_IOWR(KGSL_IOC_TYPE, 0x43, struct kgsl_syncsource_signal_fence)
+
+/**
+ * struct kgsl_cff_sync_gpuobj - Argument to IOCTL_KGSL_CFF_SYNC_GPUOBJ
+ * @offset: Offset into the GPU object to sync
+ * @length: Number of bytes to sync
+ * @id: ID of the GPU object to sync
+ */
+struct kgsl_cff_sync_gpuobj {
+	uint64_t offset;
+	uint64_t length;
+	unsigned int id;
+};
+
+#define IOCTL_KGSL_CFF_SYNC_GPUOBJ \
+	_IOW(KGSL_IOC_TYPE, 0x44, struct kgsl_cff_sync_gpuobj)
+
+/**
+ * struct kgsl_gpuobj_alloc - Argument to IOCTL_KGSL_GPUOBJ_ALLOC
+ * @size: Size in bytes of the object to allocate
+ * @flags: mask of KGSL_MEMFLAG_* bits
+ * @va_len: Size in bytes of the virtual region to allocate
+ * @mmapsize: Returns the mmap() size of the object
+ * @id: Returns the GPU object ID of the new object
+ * @metadata_len: Length of the metdata to copy from the user
+ * @metadata: Pointer to the user specified metadata to store for the object
+ */
+struct kgsl_gpuobj_alloc {
+	uint64_t size;
+	uint64_t flags;
+	uint64_t va_len;
+	uint64_t mmapsize;
+	unsigned int id;
+	unsigned int metadata_len;
+	uint64_t metadata;
+};
+
+/* Let the user know that this header supports the gpuobj metadata */
+#define KGSL_GPUOBJ_ALLOC_METADATA_MAX 64
+
+#define IOCTL_KGSL_GPUOBJ_ALLOC \
+	_IOWR(KGSL_IOC_TYPE, 0x45, struct kgsl_gpuobj_alloc)
+
+/**
+ * struct kgsl_gpuobj_free - Argument to IOCTL_KGLS_GPUOBJ_FREE
+ * @flags: Mask of: KGSL_GUPOBJ_FREE_ON_EVENT
+ * @priv: Pointer to the private object if KGSL_GPUOBJ_FREE_ON_EVENT is
+ * specified
+ * @id: ID of the GPU object to free
+ * @type: If KGSL_GPUOBJ_FREE_ON_EVENT is specified, the type of asynchronous
+ * event to free on
+ * @len: Length of the data passed in priv
+ */
+struct kgsl_gpuobj_free {
+	uint64_t flags;
+	uint64_t __user priv;
+	unsigned int id;
+	unsigned int type;
+	unsigned int len;
+};
+
+#define KGSL_GPUOBJ_FREE_ON_EVENT 1
+
+#define KGSL_GPU_EVENT_TIMESTAMP 1
+#define KGSL_GPU_EVENT_FENCE     2
+
+/**
+ * struct kgsl_gpu_event_timestamp - Specifies a timestamp event to free a GPU
+ * object on
+ * @context_id: ID of the timestamp event to wait for
+ * @timestamp: Timestamp of the timestamp event to wait for
+ */
+struct kgsl_gpu_event_timestamp {
+	unsigned int context_id;
+	unsigned int timestamp;
+};
+
+/**
+ * struct kgsl_gpu_event_fence - Specifies a fence ID to to free a GPU object on
+ * @fd: File descriptor for the fence
+ */
+struct kgsl_gpu_event_fence {
+	int fd;
+};
+
+#define IOCTL_KGSL_GPUOBJ_FREE \
+	_IOW(KGSL_IOC_TYPE, 0x46, struct kgsl_gpuobj_free)
+
+/**
+ * struct kgsl_gpuobj_info - argument to IOCTL_KGSL_GPUOBJ_INFO
+ * @gpuaddr: GPU address of the object
+ * @flags: Current flags for the object
+ * @size: Size of the object
+ * @va_len: VA size of the object
+ * @va_addr: Virtual address of the object (if it is mapped)
+ * id - GPU object ID of the object to query
+ */
+struct kgsl_gpuobj_info {
+	uint64_t gpuaddr;
+	uint64_t flags;
+	uint64_t size;
+	uint64_t va_len;
+	uint64_t va_addr;
+	unsigned int id;
+};
+
+#define IOCTL_KGSL_GPUOBJ_INFO \
+	_IOWR(KGSL_IOC_TYPE, 0x47, struct kgsl_gpuobj_info)
+
+/**
+ * struct kgsl_gpuobj_import - argument to IOCTL_KGSL_GPUOBJ_IMPORT
+ * @priv: Pointer to the private data for the import type
+ * @priv_len: Length of the private data
+ * @flags: Mask of KGSL_MEMFLAG_ flags
+ * @type: Type of the import (KGSL_USER_MEM_TYPE_*)
+ * @id: Returns the ID of the new GPU object
+ */
+struct kgsl_gpuobj_import {
+	uint64_t __user priv;
+	uint64_t priv_len;
+	uint64_t flags;
+	unsigned int type;
+	unsigned int id;
+};
+
+/**
+ * struct kgsl_gpuobj_import_dma_buf - import a dmabuf object
+ * @fd: File descriptor for the dma-buf object
+ */
+struct kgsl_gpuobj_import_dma_buf {
+	int fd;
+};
+
+/**
+ * struct kgsl_gpuobj_import_useraddr - import an object based on a useraddr
+ * @virtaddr: Virtual address of the object to import
+ */
+struct kgsl_gpuobj_import_useraddr {
+	uint64_t virtaddr;
+};
+
+#define IOCTL_KGSL_GPUOBJ_IMPORT \
+	_IOWR(KGSL_IOC_TYPE, 0x48, struct kgsl_gpuobj_import)
+
+/**
+ * struct kgsl_gpuobj_sync_obj - Individual GPU object to sync
+ * @offset: Offset within the GPU object to sync
+ * @length: Number of bytes to sync
+ * @id: ID of the GPU object to sync
+ * @op: Cache operation to execute
+ */
+
+struct kgsl_gpuobj_sync_obj {
+	uint64_t offset;
+	uint64_t length;
+	unsigned int id;
+	unsigned int op;
+};
+
+/**
+ * struct kgsl_gpuobj_sync - Argument for IOCTL_KGSL_GPUOBJ_SYNC
+ * @objs: Pointer to an array of kgsl_gpuobj_sync_obj structs
+ * @obj_len: Size of each item in the array
+ * @count: Number of items in the array
+ */
+
+struct kgsl_gpuobj_sync {
+	uint64_t __user objs;
+	unsigned int obj_len;
+	unsigned int count;
+};
+
+#define IOCTL_KGSL_GPUOBJ_SYNC \
+	_IOW(KGSL_IOC_TYPE, 0x49, struct kgsl_gpuobj_sync)
+
+/**
+ * struct kgsl_command_object - GPU command object
+ * @offset: GPU address offset of the object
+ * @gpuaddr: GPU address of the object
+ * @size: Size of the object
+ * @flags: Current flags for the object
+ * @id - GPU command object ID
+ */
+struct kgsl_command_object {
+	uint64_t offset;
+	uint64_t gpuaddr;
+	uint64_t size;
+	unsigned int flags;
+	unsigned int id;
+};
+
+/**
+ * struct kgsl_command_syncpoint - GPU syncpoint object
+ * @priv: Pointer to the type specific buffer
+ * @size: Size of the type specific buffer
+ * @type: type of sync point defined here
+ */
+struct kgsl_command_syncpoint {
+	uint64_t __user priv;
+	uint64_t size;
+	unsigned int type;
+};
+
+/**
+ * struct kgsl_command_object - Argument for IOCTL_KGSL_GPU_COMMAND
+ * @flags: Current flags for the object
+ * @cmdlist: List of kgsl_command_objects for submission
+ * @cmd_size: Size of kgsl_command_objects structure
+ * @numcmds: Number of kgsl_command_objects in command list
+ * @objlist: List of kgsl_command_objects for tracking
+ * @obj_size: Size of kgsl_command_objects structure
+ * @numobjs: Number of kgsl_command_objects in object list
+ * @synclist: List of kgsl_command_syncpoints
+ * @sync_size: Size of kgsl_command_syncpoint structure
+ * @numsyncs: Number of kgsl_command_syncpoints in syncpoint list
+ * @context_id: Context ID submittin ghte kgsl_gpu_command
+ * @timestamp: Timestamp for the submitted commands
+ */
+struct kgsl_gpu_command {
+	uint64_t flags;
+	uint64_t __user cmdlist;
+	unsigned int cmdsize;
+	unsigned int numcmds;
+	uint64_t __user objlist;
+	unsigned int objsize;
+	unsigned int numobjs;
+	uint64_t __user synclist;
+	unsigned int syncsize;
+	unsigned int numsyncs;
+	unsigned int context_id;
+	unsigned int timestamp;
+};
+
+#define IOCTL_KGSL_GPU_COMMAND \
+	_IOWR(KGSL_IOC_TYPE, 0x4A, struct kgsl_gpu_command)
+
+/**
+ * struct kgsl_preemption_counters_query - argument to
+ * IOCTL_KGSL_PREEMPTIONCOUNTER_QUERY
+ * @counters: Return preemption counters array
+ * @size_user: Size allocated by userspace
+ * @size_priority_level: Size of preemption counters for each
+ * priority level
+ * @max_priority_level: Return max number of priority levels
+ *
+ * Query the available preemption counters. The array counters
+ * is used to return preemption counters. The size of the array
+ * is passed in so the kernel will only write at most size_user
+ * or max available preemption counters.  The total number of
+ * preemption counters is returned in max_priority_level. If the
+ * array or size passed in are invalid, then an error is
+ * returned back.
+ */
+struct kgsl_preemption_counters_query {
+	uint64_t __user counters;
+	unsigned int size_user;
+	unsigned int size_priority_level;
+	unsigned int max_priority_level;
+};
+
+#define IOCTL_KGSL_PREEMPTIONCOUNTER_QUERY \
+	_IOWR(KGSL_IOC_TYPE, 0x4B, struct kgsl_preemption_counters_query)
+
+/**
+ * struct kgsl_gpuobj_set_info - argument for IOCTL_KGSL_GPUOBJ_SET_INFO
+ * @flags: Flags to indicate which paramaters to change
+ * @metadata:  If KGSL_GPUOBJ_SET_INFO_METADATA is set, a pointer to the new
+ * metadata
+ * @id: GPU memory object ID to change
+ * @metadata_len:  If KGSL_GPUOBJ_SET_INFO_METADATA is set, the length of the
+ * new metadata string
+ * @type: If KGSL_GPUOBJ_SET_INFO_TYPE is set, the new type of the memory object
+ */
+
+#define KGSL_GPUOBJ_SET_INFO_METADATA (1 << 0)
+#define KGSL_GPUOBJ_SET_INFO_TYPE (1 << 1)
+
+struct kgsl_gpuobj_set_info {
+	uint64_t flags;
+	uint64_t metadata;
+	unsigned int id;
+	unsigned int metadata_len;
+	unsigned int type;
+};
+
+#define IOCTL_KGSL_GPUOBJ_SET_INFO \
+	_IOW(KGSL_IOC_TYPE, 0x4C, struct kgsl_gpuobj_set_info)
+
+#endif /* _UAPI_MSM_KGSL_H */
author	Jordan Crouse <jcrouse@codeaurora.org>	2016-02-25 09:16:24 -0700
committer	David Keitel <dkeitel@codeaurora.org>	2016-03-22 11:15:49 -0700
commit	5103db813f92bdb6c064631674e4ae5726be03f3 (patch)
tree	c12b0fec0575125e59639631c9cdc190cbe770d1
parent	e64e0d283a6be977af3bfba4f9a559630a7836ee (diff)