diff options
author | Jordan Crouse <jcrouse@codeaurora.org> | 2016-02-25 09:16:24 -0700 |
---|---|---|
committer | David Keitel <dkeitel@codeaurora.org> | 2016-03-22 11:15:49 -0700 |
commit | 5103db813f92bdb6c064631674e4ae5726be03f3 (patch) | |
tree | c12b0fec0575125e59639631c9cdc190cbe770d1 | |
parent | e64e0d283a6be977af3bfba4f9a559630a7836ee (diff) |
msm: kgsl: Add Qualcomm GPU driver
Snapshot of the Qualcom Adreno GPU driver (KGSL) as of msm-3.18 commit
commit e70ad0cd5efd ("Promotion of kernel.lnx.3.18-151201.").
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
77 files changed, 53632 insertions, 0 deletions
diff --git a/Documentation/devicetree/bindings/gpu/adreno-busmon.txt b/Documentation/devicetree/bindings/gpu/adreno-busmon.txt new file mode 100644 index 000000000000..7bf2fe8274d0 --- /dev/null +++ b/Documentation/devicetree/bindings/gpu/adreno-busmon.txt @@ -0,0 +1,16 @@ +Adreno bus monitor device + +kgsl-busmon is a psedo device that represents a devfreq bus bandwidth +governor. If this device is present then two different governors are used +for GPU DCVS and bus DCVS. + +Required properties: +- compatible: Must be "qcom,kgsl-busmon" +- label: Device name used for sysfs entry. + +Example: + + qcom,kgsl-busmon { + compatible = "qcom,kgsl-busmon"; + label = "kgsl-busmon"; + }; diff --git a/Documentation/devicetree/bindings/gpu/adreno-iommu.txt b/Documentation/devicetree/bindings/gpu/adreno-iommu.txt new file mode 100644 index 000000000000..de88a6eba7a5 --- /dev/null +++ b/Documentation/devicetree/bindings/gpu/adreno-iommu.txt @@ -0,0 +1,88 @@ +Qualcomm Technologies, Inc. GPU IOMMU + +Required properties: + +Required properties: +- compatible : one of: + - "qcom,kgsl-smmu-v1" + - "qcom,kgsl-smmu-v2" + +- reg : Base address and size of the SMMU. + +- clocks : List of clocks to be used during SMMU register access. See + Documentation/devicetree/bindings/clock/clock-bindings.txt + for information about the format. For each clock specified + here, there must be a corresponding entry in clock-names + (see below). + +- clock-names : List of clock names corresponding to the clocks specified in + the "clocks" property (above). See + Documentation/devicetree/bindings/clock/clock-bindings.txt + for more info. +- qcom,protect : The GPU register region which must be protected by a CP + protected mode. On some targets this region must cover + the entire SMMU register space, on others there + is a separate aperture for CP to program context banks. + +Optional properties: +- qcom,micro-mmu-control : Some targets provide an implementation defined + register for blocking translation requests during GPU side + programming. This property specifies the offset of this + register within the iommu register space. +- qcom,retention : A boolean specifying if retention is supported on this target +- qcom,global_pt : A boolean specifying if global pagetable should be used. + When not set we use per process pagetables +- qcom,hyp_secure_alloc : A bool specifying if the hypervisor is used on this target + for secure buffer allocation +- qcom,secure_align_mask: A mask for determining how secure buffers need to + be aligned +- qcom,coherent-htw: A boolean specifying if coherent hardware table walks should + be enabled. + +- List of sub nodes, one for each of the translation context banks supported. + The driver uses the names of these nodes to determine how they are used, + currently supported names are: + - gfx3d_user : Used for the 'normal' GPU address space. + - gfx3d_secure : Used for the content protection address space. + Each sub node has the following required properties: + + - compatible : "qcom,smmu-kgsl-cb" + - iommus : Specifies the SID's used by this context bank, this needs to be + <kgsl_smmu SID> pair, kgsl_smmu is the string parsed by iommu + driver to match this context bank with the kgsl_smmu device + defined in iommu device tree. On targets where the msm iommu + driver is used rather than the arm smmu driver, this property + may be absent. + - qcom,gpu-offset : Offset into the GPU register space for accessing + this context bank. On some targets the iommu registers are not + part of the GPU's register space, and a separate register aperture + is used. Otherwise the same register offsets may be used for CPU + or GPU side programming. + +Example: + + msm_iommu: qcom,kgsl-iommu { + compatible = "qcom,kgsl-smmu-v2"; + reg = <0xb40000 0x20000>; + qcom,protect = <0x40000 0x20000>; + clocks = <&clock_mmss clk_gpu_ahb_clk>, + <&clock_gcc clk_gcc_mmss_bimc_gfx_clk>, + <&clock_mmss clk_mmss_mmagic_ahb_clk>, + <&clock_mmss clk_mmss_mmagic_cfg_ahb_clk>; + clock-names = "gpu_ahb_clk", "bimc_gfx_clk", "mmagic_ahb_clk", "mmagic_cfg_ahb_clk"; + qcom,secure_align_mask = <0xfff>; + qcom,retention; + qcom,global_pt; + + gfx3d_user: gfx3d_user { + compatible = "qcom,smmu-kgsl-cb"; + iommus = <&kgsl_smmu 0>, + <&kgsl_smmu 1>; + qcom,gpu-offset = <0x48000>; + }; + + gfx3d_secure: gfx3d_secure { + compatible = "qcom,smmu-kgsl-cb"; + iommus = <&kgsl_smmu 2>; + }; + }; diff --git a/Documentation/devicetree/bindings/gpu/adreno-pwrlevels.txt b/Documentation/devicetree/bindings/gpu/adreno-pwrlevels.txt new file mode 100644 index 000000000000..e5617d15a821 --- /dev/null +++ b/Documentation/devicetree/bindings/gpu/adreno-pwrlevels.txt @@ -0,0 +1,25 @@ +Qualcomm GPU powerlevels + +Powerlevels are defined in sets by qcom,gpu-pwrlevels. Multiple sets (bins) +can be defined within qcom,gpu-pwrelvel-bins. Each powerlevel defines a +voltage, bus, and bandwitdh level. + +- qcom,gpu-pwrlevel-bins: Contains one or more qcom,gpu-pwrlevels sets + +Properties: +- compatible: Must be qcom,gpu-pwrlevel-bins +- qcom,gpu-pwrlevels: Defines a set of powerlevels + +Properties: +- qcom,speed-bin: Speed bin identifier for the set - must match + the value read from the hardware + +- qcom,gpu-pwrlevel: A single powerlevel + +Properties: +- reg: Index of the powerlevel (0 = highest perf) +- qcom,gpu-freq GPU frequency for the powerlevel (in Hz) +- qcom,bus-freq Index to a bus level (defined by the bus + settings) +- qcom,bus-min Minimum bus level to set for the power level +- qcom,bus-max maximum bus level to set for the power level diff --git a/Documentation/devicetree/bindings/gpu/adreno.txt b/Documentation/devicetree/bindings/gpu/adreno.txt new file mode 100644 index 000000000000..84cbc21705e9 --- /dev/null +++ b/Documentation/devicetree/bindings/gpu/adreno.txt @@ -0,0 +1,222 @@ +Qualcomm GPU + +Qualcomm Adreno GPU + +Required properties: +- label: A string used as a descriptive name for the device. +- compatible: Must be "qcom,kgsl-3d0" and "qcom,kgsl-3d" +- reg: Specifies the register base address and size. The second interval + specifies the shader memory base address and size. +- reg-names: Resource names used for the physical address of device registers + and shader memory. "kgsl_3d0_reg_memory" gives the physical address + and length of device registers while "kgsl_3d0_shader_memory" gives + physical address and length of device shader memory. If + specified, "qfprom_memory" gives the range for the efuse + registers used for various configuration options. +- interrupts: Interrupt mapping for GPU IRQ. +- interrupt-names: String property to describe the name of the interrupt. +- qcom,id: An integer used as an identification number for the device. + +- clocks: List of phandle and clock specifier pairs, one pair + for each clock input to the device. +- clock-names: List of clock input name strings sorted in the same + order as the clocks property. + Current values of clock-names are: + "src_clk", "core_clk", "iface_clk", "mem_clk", "mem_iface_clk", + "alt_mem_iface_clk", "rbbmtimer_clk", "alwayson_clk" + "core_clk" and "iface_clk" are required and others are optional + +- qcom,base-leakage-coefficient: Dynamic leakage coefficient. +- qcom,lm-limit: Current limit for GPU limit management. + +Bus Scaling Data: +- qcom,msm-bus,name: String property to describe the name of the 3D graphics processor. +- qcom,msm-bus,num-cases: This is the the number of Bus Scaling use cases defined in the vectors property. +- qcom,msm-bus,active-only: A boolean flag indicating if it is active only. +- qcom,msm-bus,num-paths: This represents the number of paths in each Bus Scaling Usecase. +- qcom,msm-bus,vectors-KBps: A series of 4 cell properties, format of which is: + <src dst ab ib>, <src dst ab ib>, // For Bus Scaling Usecase 1 + <src dst ab ib>, <src dst ab ib>, // For Bus Scaling Usecase 2 + <.. .. .. ..>, <.. .. .. ..>; // For Bus Scaling Usecase n + This property is a series of all vectors for all Bus Scaling Usecases. + Each set of vectors for each usecase describes bandwidth votes for a combination + of src/dst ports. The driver will set the desired use case based on the selected + power level and the desired bandwidth vote will be registered for the port pairs. + Current values of src are: + 0 = MSM_BUS_MASTER_GRAPHICS_3D + 1 = MSM_BUS_MASTER_GRAPHICS_3D_PORT1 + 2 = MSM_BUS_MASTER_V_OCMEM_GFX3D + Current values of dst are: + 0 = MSM_BUS_SLAVE_EBI_CH0 + 1 = MSM_BUS_SLAVE_OCMEM + ab: Represents aggregated bandwidth. This value is 0 for Graphics. + ib: Represents instantaneous bandwidth. This value has a range <0 8000 MB/s> + +- qcom,ocmem-bus-client: Container for another set of bus scaling properties + qcom,msm-bus,name + qcom,msm-bus,num-cases + qcom,msm-bus,num-paths + qcom,msm-bus,vectors-KBps + to be used by ocmem msm bus scaling client. + +GDSC Oxili Regulators: +- regulator-names: List of regulator name strings sorted in power-on order +- vddcx-supply: Phandle for vddcx regulator device node. +- vdd-supply: Phandle for vdd regulator device node. + +IOMMU Data: +- iommu: Phandle for the KGSL IOMMU device node + +GPU Power levels: +- qcom,gpu-pwrlevel-bins: Container for sets of GPU power levels (see + adreno-pwrlevels.txt) + +DCVS Core info +- qcom,dcvs-core-info Container for the DCVS core info (see + dcvs-core-info.txt) + +Optional Properties: +- qcom,initial-powerlevel: This value indicates which qcom,gpu-pwrlevel should be used at start time + and when coming back out of resume +- qcom,bus-control: Boolean. Enables an independent bus vote from the gpu frequency +- qcom,bus-width: Bus width in number of bytes. This enables dynamic AB bus voting based on + bus width and actual bus transactions. +- qcom,gpubw-dev: a phandle to a device representing bus bandwidth requirements + (see devdw.txt) +- qcom,idle-timeout: This property represents the time in microseconds for idle timeout. +- qcom,deep-nap-timeout: This property represents the time in microseconds for entering deeper + power state. +- qcom,chipid: If it exists this property is used to replace + the chip identification read from the GPU hardware. + This is used to override faulty hardware readings. +- qcom,strtstp-sleepwake: Boolean. Enables use of GPU SLUMBER instead of SLEEP for power savings +- qcom,gx-retention: Boolean. Enables use of GX rail RETENTION voltage + +- qcom,pm-qos-active-latency: + Right after GPU wakes up from sleep, driver votes for + acceptable maximum latency to the pm-qos driver. This + voting demands that the system can not go into any + power save state *if* the latency to bring system back + into active state is more than this value. + Value is in microseconds. +- qcom,pm-qos-wakeup-latency: + Similar to the above. Driver votes against deep low + power modes right before GPU wakes up from sleep. +- qcom,force-32bit: + Force the GPU to use 32 bit data sizes even if + it is capable of doing 64 bit. + +- qcom,gpu-quirk-two-pass-use-wfi: + Signal the GPU to set Set TWOPASSUSEWFI bit in + A5XX_PC_DBG_ECO_CNTL (5XX only) + +The following properties are optional as collecting data via coresight might +not be supported for every chipset. The documentation for coresight +properties can be found in: +Documentation/devicetree/bindings/coresight/coresight.txt + +- coresight-id Unique integer identifier for the bus. +- coresight-name Unique descriptive name of the bus. +- coresight-nr-inports Number of input ports on the bus. +- coresight-outports List of output port numbers on the bus. +- coresight-child-list List of phandles pointing to the children of this + component. +- coresight-child-ports List of input port numbers of the children. + + +Example of A330 GPU in MSM8916: + +&soc { + msm_gpu: qcom,kgsl-3d0@01c00000 { + label = "kgsl-3d0"; + compatible = "qcom,kgsl-3d0", "qcom,kgsl-3d"; + reg = <0x01c00000 0x10000 + 0x01c20000 0x20000>; + reg-names = "kgsl_3d0_reg_memory" , "kgsl_3d0_shader_memory"; + interrupts = <0 33 0>; + interrupt-names = "kgsl_3d0_irq"; + qcom,id = <0>; + + qcom,chipid = <0x03000600>; + + qcom,initial-pwrlevel = <1>; + + /* Idle Timeout = HZ/12 */ + qcom,idle-timeout = <8>; + qcom,strtstp-sleepwake; + + clocks = <&clock_gcc clk_gcc_oxili_gfx3d_clk>, + <&clock_gcc clk_gcc_oxili_ahb_clk>, + <&clock_gcc clk_gcc_oxili_gmem_clk>, + <&clock_gcc clk_gcc_bimc_gfx_clk>, + <&clock_gcc clk_gcc_bimc_gpu_clk>; + clock-names = "core_clk", "iface_clk", "mem_clk", + "mem_iface_clk", "alt_mem_iface_clk"; + + /* Bus Scale Settings */ + qcom,msm-bus,name = "grp3d"; + qcom,msm-bus,num-cases = <4>; + qcom,msm-bus,num-paths = <1>; + qcom,msm-bus,vectors-KBps = + <26 512 0 0>, + <26 512 0 1600000>, + <26 512 0 3200000>, + <26 512 0 4264000>; + + /* GDSC oxili regulators */ + vdd-supply = <&gdsc_oxili_gx>; + + /* IOMMU Data */ + iommu = <&gfx_iommu>; + + /* Trace bus */ + coresight-id = <67>; + coresight-name = "coresight-gfx"; + coresight-nr-inports = <0>; + coresight-outports = <0>; + coresight-child-list = <&funnel_in0>; + coresight-child-ports = <5>; + + /* Power levels */ + qcom,gpu-pwrlevels-bins { + #address-cells = <1>; + #size-cells = <0>; + + qcom,gpu-pwrlevels-0 { + #address-cells = <1>; + #size-cells = <0>; + + qcom,speed-bin = <0>; + + qcom,gpu-pwrlevel@0 { + reg = <0>; + qcom,gpu-freq = <400000000>; + qcom,bus-freq = <3>; + qcom,io-fraction = <33>; + }; + + qcom,gpu-pwrlevel@1 { + reg = <1>; + qcom,gpu-freq = <310000000>; + qcom,bus-freq = <2>; + qcom,io-fraction = <66>; + }; + + qcom,gpu-pwrlevel@2 { + reg = <2>; + qcom,gpu-freq = <200000000>; + qcom,bus-freq = <1>; + qcom,io-fraction = <100>; + }; + + qcom,gpu-pwrlevel@3 { + reg = <3>; + qcom,gpu-freq = <27000000>; + qcom,bus-freq = <0>; + qcom,io-fraction = <0>; + }; + }; + }; + + }; +}; diff --git a/drivers/gpu/msm/Kconfig b/drivers/gpu/msm/Kconfig new file mode 100644 index 000000000000..aca3ab611214 --- /dev/null +++ b/drivers/gpu/msm/Kconfig @@ -0,0 +1,42 @@ +config MSM_KGSL + tristate "MSM 3D Graphics driver" + default n + depends on ARCH_MSM && !ARCH_MSM7X00A && !ARCH_MSM7X25 + select GENERIC_ALLOCATOR + select FW_LOADER + select PM_DEVFREQ + select DEVFREQ_GOV_SIMPLE_ONDEMAND + select DEVFREQ_GOV_PERFORMANCE + select DEVFREQ_GOV_MSM_ADRENO_TZ + select DEVFREQ_GOV_MSM_GPUBW_MON + select ONESHOT_SYNC if SYNC + ---help--- + 3D graphics driver. Required to use hardware accelerated + OpenGL ES 2.0 and 1.1. + +config MSM_KGSL_CFF_DUMP + bool "Enable KGSL Common File Format (CFF) Dump Feature [Use with caution]" + default n + depends on MSM_KGSL + select RELAY + ---help--- + This is an analysis and diagnostic feature only, and should only be + turned on during KGSL GPU diagnostics and will slow down the KGSL + performance sigificantly, hence *do not use in production builds*. + When enabled, CFF Dump is on at boot. It can be turned off at runtime + via 'echo 0 > /d/kgsl/cff_dump'. The log can be captured via + /d/kgsl-cff/cpu[0|1]. + +config MSM_KGSL_CFF_DUMP_NO_CONTEXT_MEM_DUMP + bool "When selected will disable KGSL CFF Dump for context switches" + default n + depends on MSM_KGSL_CFF_DUMP + ---help--- + Dumping all the memory for every context switch can produce quite + huge log files, to reduce this, turn this feature on. + +config MSM_ADRENO_DEFAULT_GOVERNOR + string "devfreq governor for the adreno core" + default "msm-adreno-tz" if DEVFREQ_GOV_MSM_ADRENO_TZ + default "simple_ondemand" + depends on MSM_KGSL diff --git a/drivers/gpu/msm/Makefile b/drivers/gpu/msm/Makefile new file mode 100644 index 000000000000..9ee90751b944 --- /dev/null +++ b/drivers/gpu/msm/Makefile @@ -0,0 +1,48 @@ +ccflags-y := -Iinclude/uapi/drm -Iinclude/drm -Idrivers/gpu/msm + +msm_kgsl_core-y = \ + kgsl.o \ + kgsl_trace.o \ + kgsl_cmdbatch.o \ + kgsl_ioctl.o \ + kgsl_sharedmem.o \ + kgsl_pwrctrl.o \ + kgsl_pwrscale.o \ + kgsl_mmu.o \ + kgsl_iommu.o \ + kgsl_snapshot.o \ + kgsl_events.o + +msm_kgsl_core-$(CONFIG_DEBUG_FS) += kgsl_debugfs.o +msm_kgsl_core-$(CONFIG_MSM_KGSL_CFF_DUMP) += kgsl_cffdump.o +msm_kgsl_core-$(CONFIG_SYNC) += kgsl_sync.o +msm_kgsl_core-$(CONFIG_COMPAT) += kgsl_compat.o + +msm_adreno-y += \ + adreno_ioctl.o \ + adreno_ringbuffer.o \ + adreno_drawctxt.o \ + adreno_dispatch.o \ + adreno_snapshot.o \ + adreno_coresight.o \ + adreno_trace.o \ + adreno_a3xx.o \ + adreno_a4xx.o \ + adreno_a5xx.o \ + adreno_a3xx_snapshot.o \ + adreno_a4xx_snapshot.o \ + adreno_a5xx_snapshot.o \ + adreno_sysfs.o \ + adreno.o \ + adreno_cp_parser.o \ + adreno_iommu.o \ + adreno_perfcounter.o + +msm_adreno-$(CONFIG_DEBUG_FS) += adreno_debugfs.o adreno_profile.o +msm_adreno-$(CONFIG_COMPAT) += adreno_compat.o + +msm_kgsl_core-objs = $(msm_kgsl_core-y) +msm_adreno-objs = $(msm_adreno-y) + +obj-$(CONFIG_MSM_KGSL) += msm_kgsl_core.o +obj-$(CONFIG_MSM_KGSL) += msm_adreno.o diff --git a/drivers/gpu/msm/a3xx_reg.h b/drivers/gpu/msm/a3xx_reg.h new file mode 100644 index 000000000000..13d55bd4ae3b --- /dev/null +++ b/drivers/gpu/msm/a3xx_reg.h @@ -0,0 +1,891 @@ +/* Copyright (c) 2012-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _A300_REG_H +#define _A300_REG_H + +/* Interrupt bit positions within RBBM_INT_0 */ + +#define A3XX_INT_RBBM_GPU_IDLE 0 +#define A3XX_INT_RBBM_AHB_ERROR 1 +#define A3XX_INT_RBBM_REG_TIMEOUT 2 +#define A3XX_INT_RBBM_ME_MS_TIMEOUT 3 +#define A3XX_INT_RBBM_PFP_MS_TIMEOUT 4 +#define A3XX_INT_RBBM_ATB_BUS_OVERFLOW 5 +#define A3XX_INT_VFD_ERROR 6 +#define A3XX_INT_CP_SW_INT 7 +#define A3XX_INT_CP_T0_PACKET_IN_IB 8 +#define A3XX_INT_CP_OPCODE_ERROR 9 +#define A3XX_INT_CP_RESERVED_BIT_ERROR 10 +#define A3XX_INT_CP_HW_FAULT 11 +#define A3XX_INT_CP_DMA 12 +#define A3XX_INT_CP_IB2_INT 13 +#define A3XX_INT_CP_IB1_INT 14 +#define A3XX_INT_CP_RB_INT 15 +#define A3XX_INT_CP_REG_PROTECT_FAULT 16 +#define A3XX_INT_CP_RB_DONE_TS 17 +#define A3XX_INT_CP_VS_DONE_TS 18 +#define A3XX_INT_CP_PS_DONE_TS 19 +#define A3XX_INT_CACHE_FLUSH_TS 20 +#define A3XX_INT_CP_AHB_ERROR_HALT 21 +#define A3XX_INT_MISC_HANG_DETECT 24 +#define A3XX_INT_UCHE_OOB_ACCESS 25 + +/* CP_EVENT_WRITE events */ +#define CACHE_FLUSH_TS 4 + +/* CP_INTERRUPT masks */ + +#define CP_INTERRUPT_IB2 0x20000000 +#define CP_INTERRUPT_IB1 0x40000000 +#define CP_INTERRUPT_RB 0x80000000 + +/* Register definitions */ + +#define A3XX_RBBM_HW_VERSION 0x000 +#define A3XX_RBBM_HW_RELEASE 0x001 +#define A3XX_RBBM_HW_CONFIGURATION 0x002 +#define A3XX_RBBM_CLOCK_CTL 0x010 +#define A3XX_RBBM_SP_HYST_CNT 0x012 +#define A3XX_RBBM_SW_RESET_CMD 0x018 +#define A3XX_RBBM_AHB_CTL0 0x020 +#define A3XX_RBBM_AHB_CTL1 0x021 +#define A3XX_RBBM_AHB_CMD 0x022 +#define A3XX_RBBM_AHB_ME_SPLIT_STATUS 0x25 +#define A3XX_RBBM_AHB_PFP_SPLIT_STATUS 0x26 +#define A3XX_RBBM_AHB_ERROR_STATUS 0x027 +#define A3XX_RBBM_GPR0_CTL 0x02E +/* This the same register as on A2XX, just in a different place */ +#define A3XX_RBBM_STATUS 0x030 +#define A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL 0x33 +#define A3XX_RBBM_INTERFACE_HANG_INT_CTL 0x50 +#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL0 0x51 +#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL1 0x54 +#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL2 0x57 +#define A3XX_RBBM_INTERFACE_HANG_MASK_CTL3 0x5A +#define A3XX_RBBM_INT_CLEAR_CMD 0x061 +#define A3XX_RBBM_INT_0_MASK 0x063 +#define A3XX_RBBM_INT_0_STATUS 0x064 +#define A3XX_RBBM_PERFCTR_CTL 0x80 +#define A3XX_RBBM_PERFCTR_LOAD_CMD0 0x81 +#define A3XX_RBBM_PERFCTR_LOAD_CMD1 0x82 +#define A3XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x84 +#define A3XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x85 +#define A3XX_RBBM_PERFCOUNTER0_SELECT 0x86 +#define A3XX_RBBM_PERFCOUNTER1_SELECT 0x87 +#define A3XX_RBBM_GPU_BUSY_MASKED 0x88 +#define A3XX_RBBM_PERFCTR_CP_0_LO 0x90 +#define A3XX_RBBM_PERFCTR_CP_0_HI 0x91 +#define A3XX_RBBM_PERFCTR_RBBM_0_LO 0x92 +#define A3XX_RBBM_PERFCTR_RBBM_0_HI 0x93 +#define A3XX_RBBM_PERFCTR_RBBM_1_LO 0x94 +#define A3XX_RBBM_PERFCTR_RBBM_1_HI 0x95 +#define A3XX_RBBM_PERFCTR_PC_0_LO 0x96 +#define A3XX_RBBM_PERFCTR_PC_0_HI 0x97 +#define A3XX_RBBM_PERFCTR_PC_1_LO 0x98 +#define A3XX_RBBM_PERFCTR_PC_1_HI 0x99 +#define A3XX_RBBM_PERFCTR_PC_2_LO 0x9A +#define A3XX_RBBM_PERFCTR_PC_2_HI 0x9B +#define A3XX_RBBM_PERFCTR_PC_3_LO 0x9C +#define A3XX_RBBM_PERFCTR_PC_3_HI 0x9D +#define A3XX_RBBM_PERFCTR_VFD_0_LO 0x9E +#define A3XX_RBBM_PERFCTR_VFD_0_HI 0x9F +#define A3XX_RBBM_PERFCTR_VFD_1_LO 0xA0 +#define A3XX_RBBM_PERFCTR_VFD_1_HI 0xA1 +#define A3XX_RBBM_PERFCTR_HLSQ_0_LO 0xA2 +#define A3XX_RBBM_PERFCTR_HLSQ_0_HI 0xA3 +#define A3XX_RBBM_PERFCTR_HLSQ_1_LO 0xA4 +#define A3XX_RBBM_PERFCTR_HLSQ_1_HI 0xA5 +#define A3XX_RBBM_PERFCTR_HLSQ_2_LO 0xA6 +#define A3XX_RBBM_PERFCTR_HLSQ_2_HI 0xA7 +#define A3XX_RBBM_PERFCTR_HLSQ_3_LO 0xA8 +#define A3XX_RBBM_PERFCTR_HLSQ_3_HI 0xA9 +#define A3XX_RBBM_PERFCTR_HLSQ_4_LO 0xAA +#define A3XX_RBBM_PERFCTR_HLSQ_4_HI 0xAB +#define A3XX_RBBM_PERFCTR_HLSQ_5_LO 0xAC +#define A3XX_RBBM_PERFCTR_HLSQ_5_HI 0xAD +#define A3XX_RBBM_PERFCTR_VPC_0_LO 0xAE +#define A3XX_RBBM_PERFCTR_VPC_0_HI 0xAF +#define A3XX_RBBM_PERFCTR_VPC_1_LO 0xB0 +#define A3XX_RBBM_PERFCTR_VPC_1_HI 0xB1 +#define A3XX_RBBM_PERFCTR_TSE_0_LO 0xB2 +#define A3XX_RBBM_PERFCTR_TSE_0_HI 0xB3 +#define A3XX_RBBM_PERFCTR_TSE_1_LO 0xB4 +#define A3XX_RBBM_PERFCTR_TSE_1_HI 0xB5 +#define A3XX_RBBM_PERFCTR_RAS_0_LO 0xB6 +#define A3XX_RBBM_PERFCTR_RAS_0_HI 0xB7 +#define A3XX_RBBM_PERFCTR_RAS_1_LO 0xB8 +#define A3XX_RBBM_PERFCTR_RAS_1_HI 0xB9 +#define A3XX_RBBM_PERFCTR_UCHE_0_LO 0xBA +#define A3XX_RBBM_PERFCTR_UCHE_0_HI 0xBB +#define A3XX_RBBM_PERFCTR_UCHE_1_LO 0xBC +#define A3XX_RBBM_PERFCTR_UCHE_1_HI 0xBD +#define A3XX_RBBM_PERFCTR_UCHE_2_LO 0xBE +#define A3XX_RBBM_PERFCTR_UCHE_2_HI 0xBF +#define A3XX_RBBM_PERFCTR_UCHE_3_LO 0xC0 +#define A3XX_RBBM_PERFCTR_UCHE_3_HI 0xC1 +#define A3XX_RBBM_PERFCTR_UCHE_4_LO 0xC2 +#define A3XX_RBBM_PERFCTR_UCHE_4_HI 0xC3 +#define A3XX_RBBM_PERFCTR_UCHE_5_LO 0xC4 +#define A3XX_RBBM_PERFCTR_UCHE_5_HI 0xC5 +#define A3XX_RBBM_PERFCTR_TP_0_LO 0xC6 +#define A3XX_RBBM_PERFCTR_TP_0_HI 0xC7 +#define A3XX_RBBM_PERFCTR_TP_1_LO 0xC8 +#define A3XX_RBBM_PERFCTR_TP_1_HI 0xC9 +#define A3XX_RBBM_PERFCTR_TP_2_LO 0xCA +#define A3XX_RBBM_PERFCTR_TP_2_HI 0xCB +#define A3XX_RBBM_PERFCTR_TP_3_LO 0xCC +#define A3XX_RBBM_PERFCTR_TP_3_HI 0xCD +#define A3XX_RBBM_PERFCTR_TP_4_LO 0xCE +#define A3XX_RBBM_PERFCTR_TP_4_HI 0xCF +#define A3XX_RBBM_PERFCTR_TP_5_LO 0xD0 +#define A3XX_RBBM_PERFCTR_TP_5_HI 0xD1 +#define A3XX_RBBM_PERFCTR_SP_0_LO 0xD2 +#define A3XX_RBBM_PERFCTR_SP_0_HI 0xD3 +#define A3XX_RBBM_PERFCTR_SP_1_LO 0xD4 +#define A3XX_RBBM_PERFCTR_SP_1_HI 0xD5 +#define A3XX_RBBM_PERFCTR_SP_2_LO 0xD6 +#define A3XX_RBBM_PERFCTR_SP_2_HI 0xD7 +#define A3XX_RBBM_PERFCTR_SP_3_LO 0xD8 +#define A3XX_RBBM_PERFCTR_SP_3_HI 0xD9 +#define A3XX_RBBM_PERFCTR_SP_4_LO 0xDA +#define A3XX_RBBM_PERFCTR_SP_4_HI 0xDB +#define A3XX_RBBM_PERFCTR_SP_5_LO 0xDC +#define A3XX_RBBM_PERFCTR_SP_5_HI 0xDD +#define A3XX_RBBM_PERFCTR_SP_6_LO 0xDE +#define A3XX_RBBM_PERFCTR_SP_6_HI 0xDF +#define A3XX_RBBM_PERFCTR_SP_7_LO 0xE0 +#define A3XX_RBBM_PERFCTR_SP_7_HI 0xE1 +#define A3XX_RBBM_PERFCTR_RB_0_LO 0xE2 +#define A3XX_RBBM_PERFCTR_RB_0_HI 0xE3 +#define A3XX_RBBM_PERFCTR_RB_1_LO 0xE4 +#define A3XX_RBBM_PERFCTR_RB_1_HI 0xE5 + +#define A3XX_RBBM_RBBM_CTL 0x100 +#define A3XX_RBBM_PERFCTR_PWR_0_LO 0x0EA +#define A3XX_RBBM_PERFCTR_PWR_0_HI 0x0EB +#define A3XX_RBBM_PERFCTR_PWR_1_LO 0x0EC +#define A3XX_RBBM_PERFCTR_PWR_1_HI 0x0ED +#define A3XX_RBBM_DEBUG_BUS_CTL 0x111 +#define A3XX_RBBM_DEBUG_BUS_DATA_STATUS 0x112 +#define A3XX_RBBM_DEBUG_BUS_STB_CTL0 0x11B +#define A3XX_RBBM_DEBUG_BUS_STB_CTL1 0x11C +#define A3XX_RBBM_INT_TRACE_BUS_CTL 0x11D +#define A3XX_RBBM_EXT_TRACE_BUS_CTL 0x11E +#define A3XX_RBBM_EXT_TRACE_STOP_CNT 0x11F +#define A3XX_RBBM_EXT_TRACE_START_CNT 0x120 +#define A3XX_RBBM_EXT_TRACE_PERIOD_CNT 0x121 +#define A3XX_RBBM_EXT_TRACE_CMD 0x122 +#define A3XX_CP_RB_BASE 0x01C0 +#define A3XX_CP_RB_CNTL 0x01C1 +#define A3XX_CP_RB_RPTR_ADDR 0x01C3 +#define A3XX_CP_RB_RPTR 0x01C4 +#define A3XX_CP_RB_WPTR 0x01C5 +#define A3XX_CP_RB_RPTR_WR 0x01C7 +/* Following two are same as on A2XX, just in a different place */ +#define A3XX_CP_PFP_UCODE_ADDR 0x1C9 +#define A3XX_CP_PFP_UCODE_DATA 0x1CA +#define A3XX_CP_ROQ_ADDR 0x1CC +#define A3XX_CP_ROQ_DATA 0x1CD +#define A3XX_CP_MERCIU_ADDR 0x1D1 +#define A3XX_CP_MERCIU_DATA 0x1D2 +#define A3XX_CP_MERCIU_DATA2 0x1D3 +#define A3XX_CP_QUEUE_THRESHOLDS 0x01D5 +#define A3XX_CP_MEQ_ADDR 0x1DA +#define A3XX_CP_MEQ_DATA 0x1DB +#define A3XX_CP_SCRATCH_UMSK 0x01DC +#define A3XX_CP_SCRATCH_ADDR 0x01DD +#define A3XX_CP_STATE_DEBUG_INDEX 0x01EC +#define A3XX_CP_STATE_DEBUG_DATA 0x01ED +#define A3XX_CP_CNTL 0x01F4 +#define A3XX_CP_WFI_PEND_CTR 0x01F5 +#define A3XX_CP_ME_CNTL 0x01F6 +#define A3XX_CP_ME_STATUS 0x01F7 +#define A3XX_CP_ME_RAM_WADDR 0x01F8 +#define A3XX_CP_ME_RAM_RADDR 0x01F9 +#define A3XX_CP_ME_RAM_DATA 0x01FA +#define A3XX_CP_DEBUG 0x01FC + +#define A3XX_RBBM_PM_OVERRIDE2 0x039D + +#define A3XX_CP_PERFCOUNTER_SELECT 0x445 +#define A3XX_CP_IB1_BASE 0x0458 +#define A3XX_CP_IB1_BUFSZ 0x0459 +#define A3XX_CP_IB2_BASE 0x045A +#define A3XX_CP_IB2_BUFSZ 0x045B + +#define A3XX_CP_HW_FAULT 0x45C +#define A3XX_CP_AHB_FAULT 0x54D +#define A3XX_CP_PROTECT_CTRL 0x45E +#define A3XX_CP_PROTECT_STATUS 0x45F +#define A3XX_CP_PROTECT_REG_0 0x460 +#define A3XX_CP_PROTECT_REG_1 0x461 +#define A3XX_CP_PROTECT_REG_2 0x462 +#define A3XX_CP_PROTECT_REG_3 0x463 +#define A3XX_CP_PROTECT_REG_4 0x464 +#define A3XX_CP_PROTECT_REG_5 0x465 +#define A3XX_CP_PROTECT_REG_6 0x466 +#define A3XX_CP_PROTECT_REG_7 0x467 +#define A3XX_CP_PROTECT_REG_8 0x468 +#define A3XX_CP_PROTECT_REG_9 0x469 +#define A3XX_CP_PROTECT_REG_A 0x46A +#define A3XX_CP_PROTECT_REG_B 0x46B +#define A3XX_CP_PROTECT_REG_C 0x46C +#define A3XX_CP_PROTECT_REG_D 0x46D +#define A3XX_CP_PROTECT_REG_E 0x46E +#define A3XX_CP_PROTECT_REG_F 0x46F +#define A3XX_CP_STAT 0x047F +#define A3XX_CP_SCRATCH_REG0 0x578 +#define A3XX_CP_SCRATCH_REG6 0x57E +#define A3XX_CP_SCRATCH_REG7 0x57F +#define A3XX_VSC_BIN_SIZE 0xC01 +#define A3XX_VSC_SIZE_ADDRESS 0xC02 +#define A3XX_VSC_PIPE_CONFIG_0 0xC06 +#define A3XX_VSC_PIPE_DATA_ADDRESS_0 0xC07 +#define A3XX_VSC_PIPE_DATA_LENGTH_0 0xC08 +#define A3XX_VSC_PIPE_CONFIG_1 0xC09 +#define A3XX_VSC_PIPE_DATA_ADDRESS_1 0xC0A +#define A3XX_VSC_PIPE_DATA_LENGTH_1 0xC0B +#define A3XX_VSC_PIPE_CONFIG_2 0xC0C +#define A3XX_VSC_PIPE_DATA_ADDRESS_2 0xC0D +#define A3XX_VSC_PIPE_DATA_LENGTH_2 0xC0E +#define A3XX_VSC_PIPE_CONFIG_3 0xC0F +#define A3XX_VSC_PIPE_DATA_ADDRESS_3 0xC10 +#define A3XX_VSC_PIPE_DATA_LENGTH_3 0xC11 +#define A3XX_VSC_PIPE_CONFIG_4 0xC12 +#define A3XX_VSC_PIPE_DATA_ADDRESS_4 0xC13 +#define A3XX_VSC_PIPE_DATA_LENGTH_4 0xC14 +#define A3XX_VSC_PIPE_CONFIG_5 0xC15 +#define A3XX_VSC_PIPE_DATA_ADDRESS_5 0xC16 +#define A3XX_VSC_PIPE_DATA_LENGTH_5 0xC17 +#define A3XX_VSC_PIPE_CONFIG_6 0xC18 +#define A3XX_VSC_PIPE_DATA_ADDRESS_6 0xC19 +#define A3XX_VSC_PIPE_DATA_LENGTH_6 0xC1A +#define A3XX_VSC_PIPE_CONFIG_7 0xC1B +#define A3XX_VSC_PIPE_DATA_ADDRESS_7 0xC1C +#define A3XX_VSC_PIPE_DATA_LENGTH_7 0xC1D +#define A3XX_PC_PERFCOUNTER0_SELECT 0xC48 +#define A3XX_PC_PERFCOUNTER1_SELECT 0xC49 +#define A3XX_PC_PERFCOUNTER2_SELECT 0xC4A +#define A3XX_PC_PERFCOUNTER3_SELECT 0xC4B +#define A3XX_GRAS_TSE_DEBUG_ECO 0xC81 +#define A3XX_GRAS_PERFCOUNTER0_SELECT 0xC88 +#define A3XX_GRAS_PERFCOUNTER1_SELECT 0xC89 +#define A3XX_GRAS_PERFCOUNTER2_SELECT 0xC8A +#define A3XX_GRAS_PERFCOUNTER3_SELECT 0xC8B +#define A3XX_GRAS_CL_USER_PLANE_X0 0xCA0 +#define A3XX_GRAS_CL_USER_PLANE_Y0 0xCA1 +#define A3XX_GRAS_CL_USER_PLANE_Z0 0xCA2 +#define A3XX_GRAS_CL_USER_PLANE_W0 0xCA3 +#define A3XX_GRAS_CL_USER_PLANE_X1 0xCA4 +#define A3XX_GRAS_CL_USER_PLANE_Y1 0xCA5 +#define A3XX_GRAS_CL_USER_PLANE_Z1 0xCA6 +#define A3XX_GRAS_CL_USER_PLANE_W1 0xCA7 +#define A3XX_GRAS_CL_USER_PLANE_X2 0xCA8 +#define A3XX_GRAS_CL_USER_PLANE_Y2 0xCA9 +#define A3XX_GRAS_CL_USER_PLANE_Z2 0xCAA +#define A3XX_GRAS_CL_USER_PLANE_W2 0xCAB +#define A3XX_GRAS_CL_USER_PLANE_X3 0xCAC +#define A3XX_GRAS_CL_USER_PLANE_Y3 0xCAD +#define A3XX_GRAS_CL_USER_PLANE_Z3 0xCAE +#define A3XX_GRAS_CL_USER_PLANE_W3 0xCAF +#define A3XX_GRAS_CL_USER_PLANE_X4 0xCB0 +#define A3XX_GRAS_CL_USER_PLANE_Y4 0xCB1 +#define A3XX_GRAS_CL_USER_PLANE_Z4 0xCB2 +#define A3XX_GRAS_CL_USER_PLANE_W4 0xCB3 +#define A3XX_GRAS_CL_USER_PLANE_X5 0xCB4 +#define A3XX_GRAS_CL_USER_PLANE_Y5 0xCB5 +#define A3XX_GRAS_CL_USER_PLANE_Z5 0xCB6 +#define A3XX_GRAS_CL_USER_PLANE_W5 0xCB7 +#define A3XX_RB_GMEM_BASE_ADDR 0xCC0 +#define A3XX_RB_DEBUG_ECO_CONTROLS_ADDR 0xCC1 +#define A3XX_RB_PERFCOUNTER0_SELECT 0xCC6 +#define A3XX_RB_PERFCOUNTER1_SELECT 0xCC7 +#define A3XX_RB_FRAME_BUFFER_DIMENSION 0xCE0 +#define A3XX_SQ_GPR_MANAGEMENT 0x0D00 +#define A3XX_SQ_INST_STORE_MANAGMENT 0x0D02 +#define A3XX_HLSQ_PERFCOUNTER0_SELECT 0xE00 +#define A3XX_HLSQ_PERFCOUNTER1_SELECT 0xE01 +#define A3XX_HLSQ_PERFCOUNTER2_SELECT 0xE02 +#define A3XX_HLSQ_PERFCOUNTER3_SELECT 0xE03 +#define A3XX_HLSQ_PERFCOUNTER4_SELECT 0xE04 +#define A3XX_HLSQ_PERFCOUNTER5_SELECT 0xE05 +#define A3XX_TP0_CHICKEN 0x0E1E +#define A3XX_VFD_PERFCOUNTER0_SELECT 0xE44 +#define A3XX_VFD_PERFCOUNTER1_SELECT 0xE45 +#define A3XX_VPC_VPC_DEBUG_RAM_SEL 0xE61 +#define A3XX_VPC_VPC_DEBUG_RAM_READ 0xE62 +#define A3XX_VPC_PERFCOUNTER0_SELECT 0xE64 +#define A3XX_VPC_PERFCOUNTER1_SELECT 0xE65 +#define A3XX_UCHE_CACHE_MODE_CONTROL_REG 0xE82 +#define A3XX_UCHE_PERFCOUNTER0_SELECT 0xE84 +#define A3XX_UCHE_PERFCOUNTER1_SELECT 0xE85 +#define A3XX_UCHE_PERFCOUNTER2_SELECT 0xE86 +#define A3XX_UCHE_PERFCOUNTER3_SELECT 0xE87 +#define A3XX_UCHE_PERFCOUNTER4_SELECT 0xE88 +#define A3XX_UCHE_PERFCOUNTER5_SELECT 0xE89 +#define A3XX_UCHE_CACHE_INVALIDATE0_REG 0xEA0 +#define A3XX_UCHE_CACHE_INVALIDATE1_REG 0xEA1 +#define A3XX_UCHE_CACHE_WAYS_VFD 0xEA6 +#define A3XX_SP_PERFCOUNTER0_SELECT 0xEC4 +#define A3XX_SP_PERFCOUNTER1_SELECT 0xEC5 +#define A3XX_SP_PERFCOUNTER2_SELECT 0xEC6 +#define A3XX_SP_PERFCOUNTER3_SELECT 0xEC7 +#define A3XX_SP_PERFCOUNTER4_SELECT 0xEC8 +#define A3XX_SP_PERFCOUNTER5_SELECT 0xEC9 +#define A3XX_SP_PERFCOUNTER6_SELECT 0xECA +#define A3XX_SP_PERFCOUNTER7_SELECT 0xECB +#define A3XX_TP_PERFCOUNTER0_SELECT 0xF04 +#define A3XX_TP_PERFCOUNTER1_SELECT 0xF05 +#define A3XX_TP_PERFCOUNTER2_SELECT 0xF06 +#define A3XX_TP_PERFCOUNTER3_SELECT 0xF07 +#define A3XX_TP_PERFCOUNTER4_SELECT 0xF08 +#define A3XX_TP_PERFCOUNTER5_SELECT 0xF09 +#define A3XX_GRAS_CL_CLIP_CNTL 0x2040 +#define A3XX_GRAS_CL_GB_CLIP_ADJ 0x2044 +#define A3XX_GRAS_CL_VPORT_XOFFSET 0x2048 +#define A3XX_GRAS_CL_VPORT_XSCALE 0x2049 +#define A3XX_GRAS_CL_VPORT_YOFFSET 0x204A +#define A3XX_GRAS_CL_VPORT_YSCALE 0x204B +#define A3XX_GRAS_CL_VPORT_ZOFFSET 0x204C +#define A3XX_GRAS_CL_VPORT_ZSCALE 0x204D +#define A3XX_GRAS_SU_POINT_MINMAX 0x2068 +#define A3XX_GRAS_SU_POINT_SIZE 0x2069 +#define A3XX_GRAS_SU_POLY_OFFSET_SCALE 0x206C +#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET 0x206D +#define A3XX_GRAS_SU_MODE_CONTROL 0x2070 +#define A3XX_GRAS_SC_CONTROL 0x2072 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL 0x2074 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR 0x2075 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL 0x2079 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR 0x207A +#define A3XX_RB_MODE_CONTROL 0x20C0 +#define A3XX_RB_RENDER_CONTROL 0x20C1 +#define A3XX_RB_MSAA_CONTROL 0x20C2 +#define A3XX_RB_ALPHA_REFERENCE 0x20C3 +#define A3XX_RB_MRT_CONTROL0 0x20C4 +#define A3XX_RB_MRT_BUF_INFO0 0x20C5 +#define A3XX_RB_MRT_BUF_BASE0 0x20C6 +#define A3XX_RB_MRT_BLEND_CONTROL0 0x20C7 +#define A3XX_RB_MRT_CONTROL1 0x20C8 +#define A3XX_RB_MRT_BUF_INFO1 0x20C9 +#define A3XX_RB_MRT_BUF_BASE1 0x20CA +#define A3XX_RB_MRT_BLEND_CONTROL1 0x20CB +#define A3XX_RB_MRT_CONTROL2 0x20CC +#define A3XX_RB_MRT_BUF_INFO2 0x20CD +#define A3XX_RB_MRT_BUF_BASE2 0x20CE +#define A3XX_RB_MRT_BLEND_CONTROL2 0x20CF +#define A3XX_RB_MRT_CONTROL3 0x20D0 +#define A3XX_RB_MRT_BUF_INFO3 0x20D1 +#define A3XX_RB_MRT_BUF_BASE3 0x20D2 +#define A3XX_RB_MRT_BLEND_CONTROL3 0x20D3 +#define A3XX_RB_BLEND_RED 0x20E4 +#define A3XX_RB_BLEND_GREEN 0x20E5 +#define A3XX_RB_BLEND_BLUE 0x20E6 +#define A3XX_RB_BLEND_ALPHA 0x20E7 +#define A3XX_RB_CLEAR_COLOR_DW0 0x20E8 +#define A3XX_RB_CLEAR_COLOR_DW1 0x20E9 +#define A3XX_RB_CLEAR_COLOR_DW2 0x20EA +#define A3XX_RB_CLEAR_COLOR_DW3 0x20EB +#define A3XX_RB_COPY_CONTROL 0x20EC +#define A3XX_RB_COPY_DEST_BASE 0x20ED +#define A3XX_RB_COPY_DEST_PITCH 0x20EE +#define A3XX_RB_COPY_DEST_INFO 0x20EF +#define A3XX_RB_DEPTH_CONTROL 0x2100 +#define A3XX_RB_DEPTH_CLEAR 0x2101 +#define A3XX_RB_DEPTH_BUF_INFO 0x2102 +#define A3XX_RB_DEPTH_BUF_PITCH 0x2103 +#define A3XX_RB_STENCIL_CONTROL 0x2104 +#define A3XX_RB_STENCIL_CLEAR 0x2105 +#define A3XX_RB_STENCIL_BUF_INFO 0x2106 +#define A3XX_RB_STENCIL_BUF_PITCH 0x2107 +#define A3XX_RB_STENCIL_REF_MASK 0x2108 +#define A3XX_RB_STENCIL_REF_MASK_BF 0x2109 +#define A3XX_RB_LRZ_VSC_CONTROL 0x210C +#define A3XX_RB_WINDOW_OFFSET 0x210E +#define A3XX_RB_SAMPLE_COUNT_CONTROL 0x2110 +#define A3XX_RB_SAMPLE_COUNT_ADDR 0x2111 +#define A3XX_RB_Z_CLAMP_MIN 0x2114 +#define A3XX_RB_Z_CLAMP_MAX 0x2115 +#define A3XX_PC_VSTREAM_CONTROL 0x21E4 +#define A3XX_PC_VERTEX_REUSE_BLOCK_CNTL 0x21EA +#define A3XX_PC_PRIM_VTX_CNTL 0x21EC +#define A3XX_PC_RESTART_INDEX 0x21ED +#define A3XX_HLSQ_CONTROL_0_REG 0x2200 +#define A3XX_HLSQ_CONTROL_1_REG 0x2201 +#define A3XX_HLSQ_CONTROL_2_REG 0x2202 +#define A3XX_HLSQ_CONTROL_3_REG 0x2203 +#define A3XX_HLSQ_VS_CONTROL_REG 0x2204 +#define A3XX_HLSQ_FS_CONTROL_REG 0x2205 +#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG 0x2206 +#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG 0x2207 +#define A3XX_HLSQ_CL_NDRANGE_0_REG 0x220A +#define A3XX_HLSQ_CL_NDRANGE_1_REG 0x220B +#define A3XX_HLSQ_CL_NDRANGE_2_REG 0x220C +#define A3XX_HLSQ_CL_NDRANGE_3_REG 0x220D +#define A3XX_HLSQ_CL_NDRANGE_4_REG 0x220E +#define A3XX_HLSQ_CL_NDRANGE_5_REG 0x220F +#define A3XX_HLSQ_CL_NDRANGE_6_REG 0x2210 +#define A3XX_HLSQ_CL_CONTROL_0_REG 0x2211 +#define A3XX_HLSQ_CL_CONTROL_1_REG 0x2212 +#define A3XX_HLSQ_CL_KERNEL_CONST_REG 0x2214 +#define A3XX_HLSQ_CL_KERNEL_GROUP_X_REG 0x2215 +#define A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG 0x2216 +#define A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG 0x2217 +#define A3XX_HLSQ_CL_WG_OFFSET_REG 0x221A +#define A3XX_VFD_CONTROL_0 0x2240 +#define A3XX_VFD_INDEX_MIN 0x2242 +#define A3XX_VFD_INDEX_MAX 0x2243 +#define A3XX_VFD_FETCH_INSTR_0_0 0x2246 +#define A3XX_VFD_FETCH_INSTR_0_4 0x224E +#define A3XX_VFD_FETCH_INSTR_1_0 0x2247 +#define A3XX_VFD_FETCH_INSTR_1_1 0x2249 +#define A3XX_VFD_FETCH_INSTR_1_2 0x224B +#define A3XX_VFD_FETCH_INSTR_1_3 0x224D +#define A3XX_VFD_FETCH_INSTR_1_4 0x224F +#define A3XX_VFD_FETCH_INSTR_1_5 0x2251 +#define A3XX_VFD_FETCH_INSTR_1_6 0x2253 +#define A3XX_VFD_FETCH_INSTR_1_7 0x2255 +#define A3XX_VFD_FETCH_INSTR_1_8 0x2257 +#define A3XX_VFD_FETCH_INSTR_1_9 0x2259 +#define A3XX_VFD_FETCH_INSTR_1_A 0x225B +#define A3XX_VFD_FETCH_INSTR_1_B 0x225D +#define A3XX_VFD_FETCH_INSTR_1_C 0x225F +#define A3XX_VFD_FETCH_INSTR_1_D 0x2261 +#define A3XX_VFD_FETCH_INSTR_1_E 0x2263 +#define A3XX_VFD_FETCH_INSTR_1_F 0x2265 +#define A3XX_VFD_DECODE_INSTR_0 0x2266 +#define A3XX_VFD_VS_THREADING_THRESHOLD 0x227E +#define A3XX_VPC_ATTR 0x2280 +#define A3XX_VPC_VARY_CYLWRAP_ENABLE_1 0x228B +#define A3XX_SP_SP_CTRL_REG 0x22C0 +#define A3XX_SP_VS_CTRL_REG0 0x22C4 +#define A3XX_SP_VS_CTRL_REG1 0x22C5 +#define A3XX_SP_VS_PARAM_REG 0x22C6 +#define A3XX_SP_VS_OUT_REG_0 0x22C7 +#define A3XX_SP_VS_OUT_REG_1 0x22C8 +#define A3XX_SP_VS_OUT_REG_2 0x22C9 +#define A3XX_SP_VS_OUT_REG_3 0x22CA +#define A3XX_SP_VS_OUT_REG_4 0x22CB +#define A3XX_SP_VS_OUT_REG_5 0x22CC +#define A3XX_SP_VS_OUT_REG_6 0x22CD +#define A3XX_SP_VS_OUT_REG_7 0x22CE +#define A3XX_SP_VS_VPC_DST_REG_0 0x22D0 +#define A3XX_SP_VS_VPC_DST_REG_1 0x22D1 +#define A3XX_SP_VS_VPC_DST_REG_2 0x22D2 +#define A3XX_SP_VS_VPC_DST_REG_3 0x22D3 +#define A3XX_SP_VS_OBJ_OFFSET_REG 0x22D4 +#define A3XX_SP_VS_OBJ_START_REG 0x22D5 +#define A3XX_SP_VS_PVT_MEM_PARAM_REG 0x22D6 +#define A3XX_SP_VS_PVT_MEM_ADDR_REG 0x22D7 +#define A3XX_SP_VS_PVT_MEM_SIZE_REG 0x22D8 +#define A3XX_SP_VS_LENGTH_REG 0x22DF +#define A3XX_SP_FS_CTRL_REG0 0x22E0 +#define A3XX_SP_FS_CTRL_REG1 0x22E1 +#define A3XX_SP_FS_OBJ_OFFSET_REG 0x22E2 +#define A3XX_SP_FS_OBJ_START_REG 0x22E3 +#define A3XX_SP_FS_PVT_MEM_PARAM_REG 0x22E4 +#define A3XX_SP_FS_PVT_MEM_ADDR_REG 0x22E5 +#define A3XX_SP_FS_PVT_MEM_SIZE_REG 0x22E6 +#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_0 0x22E8 +#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_1 0x22E9 +#define A3XX_SP_FS_OUTPUT_REG 0x22EC +#define A3XX_SP_FS_MRT_REG_0 0x22F0 +#define A3XX_SP_FS_MRT_REG_1 0x22F1 +#define A3XX_SP_FS_MRT_REG_2 0x22F2 +#define A3XX_SP_FS_MRT_REG_3 0x22F3 +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_0 0x22F4 +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_1 0x22F5 +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_2 0x22F6 +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_3 0x22F7 +#define A3XX_SP_FS_LENGTH_REG 0x22FF +#define A3XX_PA_SC_AA_CONFIG 0x2301 +#define A3XX_TPL1_TP_VS_TEX_OFFSET 0x2340 +#define A3XX_TPL1_TP_FS_TEX_OFFSET 0x2342 +#define A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR 0x2343 +#define A3XX_VBIF_CLKON 0x3001 +#define A3XX_VBIF_FIXED_SORT_EN 0x300C +#define A3XX_VBIF_FIXED_SORT_SEL0 0x300D +#define A3XX_VBIF_FIXED_SORT_SEL1 0x300E +#define A3XX_VBIF_ABIT_SORT 0x301C +#define A3XX_VBIF_ABIT_SORT_CONF 0x301D +#define A3XX_VBIF_GATE_OFF_WRREQ_EN 0x302A +#define A3XX_VBIF_IN_RD_LIM_CONF0 0x302C +#define A3XX_VBIF_IN_RD_LIM_CONF1 0x302D +#define A3XX_VBIF_IN_WR_LIM_CONF0 0x3030 +#define A3XX_VBIF_IN_WR_LIM_CONF1 0x3031 +#define A3XX_VBIF_OUT_RD_LIM_CONF0 0x3034 +#define A3XX_VBIF_OUT_WR_LIM_CONF0 0x3035 +#define A3XX_VBIF_DDR_OUT_MAX_BURST 0x3036 +#define A3XX_VBIF_ARB_CTL 0x303C +#define A3XX_VBIF_ROUND_ROBIN_QOS_ARB 0x3049 +#define A3XX_VBIF_OUT_AXI_AMEMTYPE_CONF0 0x3058 +#define A3XX_VBIF_OUT_AXI_AOOO_EN 0x305E +#define A3XX_VBIF_OUT_AXI_AOOO 0x305F +#define A3XX_VBIF_PERF_CNT_EN 0x3070 +#define A3XX_VBIF_PERF_CNT_CLR 0x3071 +#define A3XX_VBIF_PERF_CNT_SEL 0x3072 +#define A3XX_VBIF_PERF_CNT0_LO 0x3073 +#define A3XX_VBIF_PERF_CNT0_HI 0x3074 +#define A3XX_VBIF_PERF_CNT1_LO 0x3075 +#define A3XX_VBIF_PERF_CNT1_HI 0x3076 +#define A3XX_VBIF_PERF_PWR_CNT0_LO 0x3077 +#define A3XX_VBIF_PERF_PWR_CNT0_HI 0x3078 +#define A3XX_VBIF_PERF_PWR_CNT1_LO 0x3079 +#define A3XX_VBIF_PERF_PWR_CNT1_HI 0x307a +#define A3XX_VBIF_PERF_PWR_CNT2_LO 0x307b +#define A3XX_VBIF_PERF_PWR_CNT2_HI 0x307c + +#define A3XX_VBIF_XIN_HALT_CTRL0 0x3080 +#define A3XX_VBIF_XIN_HALT_CTRL0_MASK 0x3F + +#define A3XX_VBIF_XIN_HALT_CTRL1 0x3081 + +/* VBIF register offsets for A306 */ +#define A3XX_VBIF2_XIN_HALT_CTRL0 0x3081 +#define A3XX_VBIF2_XIN_HALT_CTRL0_MASK 0x7 + +#define A3XX_VBIF2_XIN_HALT_CTRL1 0x3082 + +#define A3XX_VBIF2_PERF_CNT_EN0 0x30c0 +#define A3XX_VBIF2_PERF_CNT_EN1 0x30c1 +#define A3XX_VBIF2_PERF_CNT_EN2 0x30c2 +#define A3XX_VBIF2_PERF_CNT_EN3 0x30c3 +#define A3XX_VBIF2_PERF_CNT_CLR0 0x30c8 +#define A3XX_VBIF2_PERF_CNT_CLR1 0x30c9 +#define A3XX_VBIF2_PERF_CNT_CLR2 0x30ca +#define A3XX_VBIF2_PERF_CNT_CLR3 0x30cb +#define A3XX_VBIF2_PERF_CNT_SEL0 0x30d0 +#define A3XX_VBIF2_PERF_CNT_SEL1 0x30d1 +#define A3XX_VBIF2_PERF_CNT_SEL2 0x30d2 +#define A3XX_VBIF2_PERF_CNT_SEL3 0x30d3 +#define A3XX_VBIF2_PERF_CNT_LOW0 0x30d8 +#define A3XX_VBIF2_PERF_CNT_LOW1 0x30d9 +#define A3XX_VBIF2_PERF_CNT_LOW2 0x30da +#define A3XX_VBIF2_PERF_CNT_LOW3 0x30db +#define A3XX_VBIF2_PERF_CNT_HIGH0 0x30e0 +#define A3XX_VBIF2_PERF_CNT_HIGH1 0x30e1 +#define A3XX_VBIF2_PERF_CNT_HIGH2 0x30e2 +#define A3XX_VBIF2_PERF_CNT_HIGH3 0x30e3 + +#define A3XX_VBIF2_PERF_PWR_CNT_EN0 0x3100 +#define A3XX_VBIF2_PERF_PWR_CNT_EN1 0x3101 +#define A3XX_VBIF2_PERF_PWR_CNT_EN2 0x3102 +#define A3XX_VBIF2_PERF_PWR_CNT_CLR0 0x3108 +#define A3XX_VBIF2_PERF_PWR_CNT_CLR1 0x3109 +#define A3XX_VBIF2_PERF_PWR_CNT_CLR2 0x310A +#define A3XX_VBIF2_PERF_PWR_CNT_LOW0 0x3110 +#define A3XX_VBIF2_PERF_PWR_CNT_LOW1 0x3111 +#define A3XX_VBIF2_PERF_PWR_CNT_LOW2 0x3112 +#define A3XX_VBIF2_PERF_PWR_CNT_HIGH0 0x3118 +#define A3XX_VBIF2_PERF_PWR_CNT_HIGH1 0x3119 +#define A3XX_VBIF2_PERF_PWR_CNT_HIGH2 0x311a + +#define A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0 0x3800 +#define A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL1 0x3801 + +/* Various flags used by the context switch code */ + +#define SP_MULTI 0 +#define SP_BUFFER_MODE 1 +#define SP_TWO_VTX_QUADS 0 +#define SP_PIXEL_BASED 0 +#define SP_R8G8B8A8_UNORM 8 +#define SP_FOUR_PIX_QUADS 1 + +#define HLSQ_DIRECT 0 +#define HLSQ_BLOCK_ID_SP_VS 4 +#define HLSQ_SP_VS_INSTR 0 +#define HLSQ_SP_FS_INSTR 0 +#define HLSQ_BLOCK_ID_SP_FS 6 +#define HLSQ_TWO_PIX_QUADS 0 +#define HLSQ_TWO_VTX_QUADS 0 +#define HLSQ_BLOCK_ID_TP_TEX 2 +#define HLSQ_TP_TEX_SAMPLERS 0 +#define HLSQ_TP_TEX_MEMOBJ 1 +#define HLSQ_BLOCK_ID_TP_MIPMAP 3 +#define HLSQ_TP_MIPMAP_BASE 1 +#define HLSQ_FOUR_PIX_QUADS 1 + +#define RB_FACTOR_ONE 1 +#define RB_BLEND_OP_ADD 0 +#define RB_FACTOR_ZERO 0 +#define RB_DITHER_DISABLE 0 +#define RB_DITHER_ALWAYS 1 +#define RB_FRAG_NEVER 0 +#define RB_ENDIAN_NONE 0 +#define RB_R8G8B8A8_UNORM 8 +#define RB_RESOLVE_PASS 2 +#define RB_CLEAR_MODE_RESOLVE 1 +#define RB_TILINGMODE_LINEAR 0 +#define RB_REF_NEVER 0 +#define RB_FRAG_LESS 1 +#define RB_REF_ALWAYS 7 +#define RB_STENCIL_KEEP 0 +#define RB_RENDERING_PASS 0 +#define RB_TILINGMODE_32X32 2 + +#define PC_DRAW_TRIANGLES 2 +#define PC_DI_PT_RECTLIST 8 +#define PC_DI_SRC_SEL_AUTO_INDEX 2 +#define PC_DI_INDEX_SIZE_16_BIT 0 +#define PC_DI_IGNORE_VISIBILITY 0 +#define PC_DI_PT_TRILIST 4 +#define PC_DI_SRC_SEL_IMMEDIATE 1 +#define PC_DI_INDEX_SIZE_32_BIT 1 + +#define UCHE_ENTIRE_CACHE 1 +#define UCHE_OP_INVALIDATE 1 + +/* + * The following are bit field shifts within some of the registers defined + * above. These are used in the context switch code in conjunction with the + * _SET macro + */ + +#define GRAS_CL_CLIP_CNTL_CLIP_DISABLE 16 +#define GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER 12 +#define GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE 21 +#define GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE 19 +#define GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE 20 +#define GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE 17 +#define GRAS_CL_VPORT_XSCALE_VPORT_XSCALE 0 +#define GRAS_CL_VPORT_YSCALE_VPORT_YSCALE 0 +#define GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE 0 +#define GRAS_SC_CONTROL_RASTER_MODE 12 +#define GRAS_SC_CONTROL_RENDER_MODE 4 +#define GRAS_SC_SCREEN_SCISSOR_BR_BR_X 0 +#define GRAS_SC_SCREEN_SCISSOR_BR_BR_Y 16 +#define GRAS_SC_WINDOW_SCISSOR_BR_BR_X 0 +#define GRAS_SC_WINDOW_SCISSOR_BR_BR_Y 16 +#define GRAS_SU_CTRLMODE_LINEHALFWIDTH 03 +#define HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY 16 +#define HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY 0 +#define HLSQ_CTRL0REG_CHUNKDISABLE 26 +#define HLSQ_CTRL0REG_CONSTSWITCHMODE 27 +#define HLSQ_CTRL0REG_FSSUPERTHREADENABLE 6 +#define HLSQ_CTRL0REG_FSTHREADSIZE 4 +#define HLSQ_CTRL0REG_LAZYUPDATEDISABLE 28 +#define HLSQ_CTRL0REG_RESERVED2 10 +#define HLSQ_CTRL0REG_SPCONSTFULLUPDATE 29 +#define HLSQ_CTRL0REG_SPSHADERRESTART 9 +#define HLSQ_CTRL0REG_TPFULLUPDATE 30 +#define HLSQ_CTRL1REG_RESERVED1 9 +#define HLSQ_CTRL1REG_VSSUPERTHREADENABLE 8 +#define HLSQ_CTRL1REG_VSTHREADSIZE 6 +#define HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD 26 +#define HLSQ_FSCTRLREG_FSCONSTLENGTH 0 +#define HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET 12 +#define HLSQ_FSCTRLREG_FSINSTRLENGTH 24 +#define HLSQ_VSCTRLREG_VSINSTRLENGTH 24 +#define PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE 8 +#define PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE 5 +#define PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST 25 +#define PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC 0 +#define PC_DRAW_INITIATOR_PRIM_TYPE 0 +#define PC_DRAW_INITIATOR_SOURCE_SELECT 6 +#define PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE 9 +#define PC_DRAW_INITIATOR_INDEX_SIZE 0x0B +#define PC_DRAW_INITIATOR_SMALL_INDEX 0x0D +#define PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE 0x0E +#define RB_COPYCONTROL_COPY_GMEM_BASE 14 +#define RB_COPYCONTROL_RESOLVE_CLEAR_MODE 4 +#define RB_COPYDESTBASE_COPY_DEST_BASE 4 +#define RB_COPYDESTINFO_COPY_COMPONENT_ENABLE 14 +#define RB_COPYDESTINFO_COPY_DEST_ENDIAN 18 +#define RB_COPYDESTINFO_COPY_DEST_FORMAT 2 +#define RB_COPYDESTINFO_COPY_DEST_TILE 0 +#define RB_COPYDESTPITCH_COPY_DEST_PITCH 0 +#define RB_DEPTHCONTROL_Z_TEST_FUNC 4 +#define RB_MODECONTROL_RENDER_MODE 8 +#define RB_MODECONTROL_MARB_CACHE_SPLIT_MODE 15 +#define RB_MODECONTROL_PACKER_TIMER_ENABLE 16 +#define RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE 21 +#define RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR 24 +#define RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR 16 +#define RB_MRTBLENDCONTROL_CLAMP_ENABLE 29 +#define RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE 5 +#define RB_MRTBLENDCONTROL_RGB_DEST_FACTOR 8 +#define RB_MRTBLENDCONTROL_RGB_SRC_FACTOR 0 +#define RB_MRTBUFBASE_COLOR_BUF_BASE 4 +#define RB_MRTBUFINFO_COLOR_BUF_PITCH 17 +#define RB_MRTBUFINFO_COLOR_FORMAT 0 +#define RB_MRTBUFINFO_COLOR_TILE_MODE 6 +#define RB_MRTCONTROL_COMPONENT_ENABLE 24 +#define RB_MRTCONTROL_DITHER_MODE 12 +#define RB_MRTCONTROL_READ_DEST_ENABLE 3 +#define RB_MRTCONTROL_ROP_CODE 8 +#define RB_MSAACONTROL_MSAA_DISABLE 10 +#define RB_MSAACONTROL_SAMPLE_MASK 16 +#define RB_RENDERCONTROL_ALPHA_TEST_FUNC 24 +#define RB_RENDERCONTROL_BIN_WIDTH 4 +#define RB_RENDERCONTROL_DISABLE_COLOR_PIPE 12 +#define RB_STENCILCONTROL_STENCIL_FAIL 11 +#define RB_STENCILCONTROL_STENCIL_FAIL_BF 23 +#define RB_STENCILCONTROL_STENCIL_FUNC 8 +#define RB_STENCILCONTROL_STENCIL_FUNC_BF 20 +#define RB_STENCILCONTROL_STENCIL_ZFAIL 17 +#define RB_STENCILCONTROL_STENCIL_ZFAIL_BF 29 +#define RB_STENCILCONTROL_STENCIL_ZPASS 14 +#define RB_STENCILCONTROL_STENCIL_ZPASS_BF 26 +#define SP_FSCTRLREG0_FSFULLREGFOOTPRINT 10 +#define SP_FSCTRLREG0_FSHALFREGFOOTPRINT 4 +#define SP_FSCTRLREG0_FSICACHEINVALID 2 +#define SP_FSCTRLREG0_FSINOUTREGOVERLAP 18 +#define SP_FSCTRLREG0_FSINSTRBUFFERMODE 1 +#define SP_FSCTRLREG0_FSLENGTH 24 +#define SP_FSCTRLREG0_FSSUPERTHREADMODE 21 +#define SP_FSCTRLREG0_FSTHREADMODE 0 +#define SP_FSCTRLREG0_FSTHREADSIZE 20 +#define SP_FSCTRLREG0_PIXLODENABLE 22 +#define SP_FSCTRLREG1_FSCONSTLENGTH 0 +#define SP_FSCTRLREG1_FSINITIALOUTSTANDING 20 +#define SP_FSCTRLREG1_HALFPRECVAROFFSET 24 +#define SP_FSMRTREG_REGID 0 +#define SP_FSMRTREG_PRECISION 8 +#define SP_FSOUTREG_PAD0 2 +#define SP_IMAGEOUTPUTREG_MRTFORMAT 0 +#define SP_IMAGEOUTPUTREG_DEPTHOUTMODE 3 +#define SP_IMAGEOUTPUTREG_PAD0 6 +#define SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET 16 +#define SP_OBJOFFSETREG_SHADEROBJOFFSETINIC 25 +#define SP_SHADERLENGTH_LEN 0 +#define SP_SPCTRLREG_CONSTMODE 18 +#define SP_SPCTRLREG_LOMODE 22 +#define SP_SPCTRLREG_SLEEPMODE 20 +#define SP_VSCTRLREG0_VSFULLREGFOOTPRINT 10 +#define SP_VSCTRLREG0_VSICACHEINVALID 2 +#define SP_VSCTRLREG0_VSINSTRBUFFERMODE 1 +#define SP_VSCTRLREG0_VSLENGTH 24 +#define SP_VSCTRLREG0_VSSUPERTHREADMODE 21 +#define SP_VSCTRLREG0_VSTHREADMODE 0 +#define SP_VSCTRLREG0_VSTHREADSIZE 20 +#define SP_VSCTRLREG1_VSINITIALOUTSTANDING 24 +#define SP_VSOUTREG_COMPMASK0 9 +#define SP_VSPARAMREG_POSREGID 0 +#define SP_VSPARAMREG_PSIZEREGID 8 +#define SP_VSPARAMREG_TOTALVSOUTVAR 20 +#define SP_VSVPCDSTREG_OUTLOC0 0 +#define TPL1_TPTEXOFFSETREG_BASETABLEPTR 16 +#define TPL1_TPTEXOFFSETREG_MEMOBJOFFSET 8 +#define TPL1_TPTEXOFFSETREG_SAMPLEROFFSET 0 +#define UCHE_INVALIDATE1REG_OPCODE 0x1C +#define UCHE_INVALIDATE1REG_ALLORPORTION 0x1F +#define VFD_BASEADDR_BASEADDR 0 +#define VFD_CTRLREG0_PACKETSIZE 18 +#define VFD_CTRLREG0_STRMDECINSTRCNT 22 +#define VFD_CTRLREG0_STRMFETCHINSTRCNT 27 +#define VFD_CTRLREG0_TOTALATTRTOVS 0 +#define VFD_CTRLREG1_MAXSTORAGE 0 +#define VFD_CTRLREG1_REGID4INST 24 +#define VFD_CTRLREG1_REGID4VTX 16 +#define VFD_DECODEINSTRUCTIONS_CONSTFILL 4 +#define VFD_DECODEINSTRUCTIONS_FORMAT 6 +#define VFD_DECODEINSTRUCTIONS_LASTCOMPVALID 29 +#define VFD_DECODEINSTRUCTIONS_REGID 12 +#define VFD_DECODEINSTRUCTIONS_SHIFTCNT 24 +#define VFD_DECODEINSTRUCTIONS_SWITCHNEXT 30 +#define VFD_DECODEINSTRUCTIONS_WRITEMASK 0 +#define VFD_FETCHINSTRUCTIONS_BUFSTRIDE 7 +#define VFD_FETCHINSTRUCTIONS_FETCHSIZE 0 +#define VFD_FETCHINSTRUCTIONS_INDEXDECODE 18 +#define VFD_FETCHINSTRUCTIONS_STEPRATE 24 +#define VFD_FETCHINSTRUCTIONS_SWITCHNEXT 17 +#define VFD_THREADINGTHRESHOLD_REGID_VTXCNT 8 +#define VFD_THREADINGTHRESHOLD_REGID_THRESHOLD 0 +#define VFD_THREADINGTHRESHOLD_RESERVED6 4 +#define VPC_VPCATTR_LMSIZE 28 +#define VPC_VPCATTR_THRHDASSIGN 12 +#define VPC_VPCATTR_TOTALATTR 0 +#define VPC_VPCPACK_NUMFPNONPOSVAR 8 +#define VPC_VPCPACK_NUMNONPOSVSVAR 16 +#define VPC_VPCVARPSREPLMODE_COMPONENT08 0 +#define VPC_VPCVARPSREPLMODE_COMPONENT09 2 +#define VPC_VPCVARPSREPLMODE_COMPONENT0A 4 +#define VPC_VPCVARPSREPLMODE_COMPONENT0B 6 +#define VPC_VPCVARPSREPLMODE_COMPONENT0C 8 +#define VPC_VPCVARPSREPLMODE_COMPONENT0D 10 +#define VPC_VPCVARPSREPLMODE_COMPONENT0E 12 +#define VPC_VPCVARPSREPLMODE_COMPONENT0F 14 +#define VPC_VPCVARPSREPLMODE_COMPONENT10 16 +#define VPC_VPCVARPSREPLMODE_COMPONENT11 18 +#define VPC_VPCVARPSREPLMODE_COMPONENT12 20 +#define VPC_VPCVARPSREPLMODE_COMPONENT13 22 +#define VPC_VPCVARPSREPLMODE_COMPONENT14 24 +#define VPC_VPCVARPSREPLMODE_COMPONENT15 26 +#define VPC_VPCVARPSREPLMODE_COMPONENT16 28 +#define VPC_VPCVARPSREPLMODE_COMPONENT17 30 + +/* RBBM Debug bus block IDs */ +#define RBBM_BLOCK_ID_NONE 0x0 +#define RBBM_BLOCK_ID_CP 0x1 +#define RBBM_BLOCK_ID_RBBM 0x2 +#define RBBM_BLOCK_ID_VBIF 0x3 +#define RBBM_BLOCK_ID_HLSQ 0x4 +#define RBBM_BLOCK_ID_UCHE 0x5 +#define RBBM_BLOCK_ID_PC 0x8 +#define RBBM_BLOCK_ID_VFD 0x9 +#define RBBM_BLOCK_ID_VPC 0xa +#define RBBM_BLOCK_ID_TSE 0xb +#define RBBM_BLOCK_ID_RAS 0xc +#define RBBM_BLOCK_ID_VSC 0xd +#define RBBM_BLOCK_ID_SP_0 0x10 +#define RBBM_BLOCK_ID_SP_1 0x11 +#define RBBM_BLOCK_ID_SP_2 0x12 +#define RBBM_BLOCK_ID_SP_3 0x13 +#define RBBM_BLOCK_ID_TPL1_0 0x18 +#define RBBM_BLOCK_ID_TPL1_1 0x19 +#define RBBM_BLOCK_ID_TPL1_2 0x1a +#define RBBM_BLOCK_ID_TPL1_3 0x1b +#define RBBM_BLOCK_ID_RB_0 0x20 +#define RBBM_BLOCK_ID_RB_1 0x21 +#define RBBM_BLOCK_ID_RB_2 0x22 +#define RBBM_BLOCK_ID_RB_3 0x23 +#define RBBM_BLOCK_ID_MARB_0 0x28 +#define RBBM_BLOCK_ID_MARB_1 0x29 +#define RBBM_BLOCK_ID_MARB_2 0x2a +#define RBBM_BLOCK_ID_MARB_3 0x2b + +/* RBBM_CLOCK_CTL default value */ +#define A3XX_RBBM_CLOCK_CTL_DEFAULT 0xAAAAAAAA +#define A320_RBBM_CLOCK_CTL_DEFAULT 0xBFFFFFFF +#define A330_RBBM_CLOCK_CTL_DEFAULT 0xBFFCFFFF + +#define A330_RBBM_GPR0_CTL_DEFAULT 0x00000000 +#define A330v2_RBBM_GPR0_CTL_DEFAULT 0x05515455 +#define A310_RBBM_GPR0_CTL_DEFAULT 0x000000AA + +/* COUNTABLE FOR SP PERFCOUNTER */ +#define SP_ALU_ACTIVE_CYCLES 0x1D +#define SP0_ICL1_MISSES 0x1A +#define SP_FS_CFLOW_INSTRUCTIONS 0x0C + +/* COUNTABLE FOR TSE PERFCOUNTER */ +#define TSE_INPUT_PRIM_NUM 0x0 + +/* VBIF countables */ +#define VBIF_AXI_TOTAL_BEATS 85 +#define VBIF_DDR_TOTAL_CYCLES 110 + +/* VBIF Recoverable HALT bit value */ +#define VBIF_RECOVERABLE_HALT_CTRL 0x1 + +/* + * CP DEBUG settings for A3XX core: + * DYNAMIC_CLK_DISABLE [27] - turn off the dynamic clock control + * MIU_128BIT_WRITE_ENABLE [25] - Allow 128 bit writes to the VBIF + */ +#define A3XX_CP_DEBUG_DEFAULT ((1 << 27) | (1 << 25)) + + +#endif diff --git a/drivers/gpu/msm/a4xx_reg.h b/drivers/gpu/msm/a4xx_reg.h new file mode 100644 index 000000000000..78db8dd2da40 --- /dev/null +++ b/drivers/gpu/msm/a4xx_reg.h @@ -0,0 +1,924 @@ +/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _A4XX_REG_H +#define _A4XX_REG_H + +/* A4XX interrupt bits */ +#define A4XX_INT_RBBM_GPU_IDLE 0 +#define A4XX_INT_RBBM_AHB_ERROR 1 +#define A4XX_INT_RBBM_REG_TIMEOUT 2 +#define A4XX_INT_RBBM_ME_MS_TIMEOUT 3 +#define A4XX_INT_RBBM_PFP_MS_TIMEOUT 4 +#define A4XX_INT_RBBM_ETS_MS_TIMEOUT 5 +#define A4XX_INT_RBBM_ASYNC_OVERFLOW 6 +#define A4XX_INT_RBBM_GPC_ERR 7 +#define A4XX_INT_CP_SW 8 +#define A4XX_INT_CP_OPCODE_ERROR 9 +#define A4XX_INT_CP_RESERVED_BIT_ERROR 10 +#define A4XX_INT_CP_HW_FAULT 11 +#define A4XX_INT_CP_DMA 12 +#define A4XX_INT_CP_IB2_INT 13 +#define A4XX_INT_CP_IB1_INT 14 +#define A4XX_INT_CP_RB_INT 15 +#define A4XX_INT_CP_REG_PROTECT_FAULT 16 +#define A4XX_INT_CP_RB_DONE_TS 17 +#define A4XX_INT_CP_VS_DONE_TS 18 +#define A4XX_INT_CP_PS_DONE_TS 19 +#define A4XX_INT_CACHE_FLUSH_TS 20 +#define A4XX_INT_CP_AHB_ERROR_HALT 21 +#define A4XX_INT_RBBM_ATB_BUS_OVERFLOW 22 +#define A4XX_INT_MISC_HANG_DETECT 24 +#define A4XX_INT_UCHE_OOB_ACCESS 25 +#define A4XX_INT_RBBM_DPM_CALC_ERR 28 +#define A4XX_INT_RBBM_DPM_EPOCH_ERR 29 +#define A4XX_INT_RBBM_DPM_THERMAL_YELLOW_ERR 30 +#define A4XX_INT_RBBM_DPM_THERMAL_RED_ERR 31 + +/* RB registers */ +#define A4XX_RB_GMEM_BASE_ADDR 0xcc0 + +#define A4XX_RB_PERFCTR_RB_SEL_0 0xcc7 +#define A4XX_RB_PERFCTR_RB_SEL_1 0xcc8 +#define A4XX_RB_PERFCTR_RB_SEL_2 0xcc9 +#define A4XX_RB_PERFCTR_RB_SEL_3 0xcca +#define A4XX_RB_PERFCTR_RB_SEL_4 0xccb +#define A4XX_RB_PERFCTR_RB_SEL_5 0xccc +#define A4XX_RB_PERFCTR_RB_SEL_6 0xccd +#define A4XX_RB_PERFCTR_RB_SEL_7 0xcce + +enum a4xx_rb_perfctr_rb_sel { + RB_VALID_SAMPLES = 0x25, + RB_Z_FAIL = 0x28, + RB_S_FAIL = 0x29, +}; + +/* RBBM registers */ +#define A4XX_RBBM_CLOCK_CTL_TP0 0x4 +#define A4XX_RBBM_CLOCK_CTL_TP1 0x5 +#define A4XX_RBBM_CLOCK_CTL_TP2 0x6 +#define A4XX_RBBM_CLOCK_CTL_TP3 0x7 +#define A4XX_RBBM_CLOCK_CTL2_TP0 0x8 +#define A4XX_RBBM_CLOCK_CTL2_TP1 0x9 +#define A4XX_RBBM_CLOCK_CTL2_TP2 0xA +#define A4XX_RBBM_CLOCK_CTL2_TP3 0xB +#define A4XX_RBBM_CLOCK_HYST_TP0 0xC +#define A4XX_RBBM_CLOCK_HYST_TP1 0xD +#define A4XX_RBBM_CLOCK_HYST_TP2 0xE +#define A4XX_RBBM_CLOCK_HYST_TP3 0xF +#define A4XX_RBBM_CLOCK_DELAY_TP0 0x10 +#define A4XX_RBBM_CLOCK_DELAY_TP1 0x11 +#define A4XX_RBBM_CLOCK_DELAY_TP2 0x12 +#define A4XX_RBBM_CLOCK_DELAY_TP3 0x13 +#define A4XX_RBBM_CLOCK_CTL_UCHE 0x14 +#define A4XX_RBBM_CLOCK_CTL2_UCHE 0x15 +#define A4XX_RBBM_CLOCK_CTL3_UCHE 0x16 +#define A4XX_RBBM_CLOCK_CTL4_UCHE 0x17 +#define A4XX_RBBM_CLOCK_HYST_UCHE 0x18 +#define A4XX_RBBM_CLOCK_DELAY_UCHE 0x19 +#define A4XX_RBBM_CLOCK_MODE_GPC 0x1a +#define A4XX_RBBM_CLOCK_DELAY_GPC 0x1b +#define A4XX_RBBM_CLOCK_HYST_GPC 0x1c +#define A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM 0x1d +#define A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM 0x1e +#define A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM 0x1f +#define A4XX_RBBM_CLOCK_CTL 0x20 +#define A4XX_RBBM_SP_HYST_CNT 0x21 +#define A4XX_RBBM_SW_RESET_CMD 0x22 +#define A4XX_RBBM_AHB_CTL0 0x23 +#define A4XX_RBBM_AHB_CTL1 0x24 +#define A4XX_RBBM_AHB_CMD 0x25 +#define A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL 0x2b +#define A4XX_RBBM_INTERFACE_HANG_INT_CTL 0x2f +#define A4XX_RBBM_INT_CLEAR_CMD 0x36 +#define A4XX_RBBM_INT_0_MASK 0x37 +#define A4XX_RBBM_ALWAYSON_COUNTER_CNTL 0x3d +#define A4XX_RBBM_RBBM_CTL 0x3e +#define A4XX_RBBM_CLOCK_CTL2 0x42 +#define A4XX_RBBM_BLOCK_SW_RESET_CMD 0x45 +#define A4XX_RBBM_EXT_TRACE_BUS_CTL 0x49 +#define A4XX_RBBM_CFG_DEBBUS_SEL_A 0x4a +#define A4XX_RBBM_CFG_DEBBUS_SEL_B 0x4b +#define A4XX_RBBM_CFG_DEBBUS_SEL_C 0x4c +#define A4XX_RBBM_CFG_DEBBUS_SEL_D 0x4d +#define A4XX_RBBM_CFG_DEBBUS_SEL_PING_INDEX_SHIFT 0 +#define A4XX_RBBM_CFG_DEBBUS_SEL_PING_BLK_SEL_SHIFT 8 +#define A4XX_RBBM_CFG_DEBBUS_SEL_PONG_INDEX_SHIFT 16 +#define A4XX_RBBM_CFG_DEBBUS_SEL_PONG_BLK_SEL_SHIFT 24 + +#define A4XX_RBBM_CFG_DEBBUS_CTLT 0x4e +#define A4XX_RBBM_CFG_DEBBUS_CTLT_ENT_SHIFT 0 +#define A4XX_RBBM_CFG_DEBBUS_CTLT_GRANU_SHIFT 12 +#define A4XX_RBBM_CFG_DEBBUS_CTLT_SEGT_SHIFT 28 + +#define A4XX_RBBM_CFG_DEBBUS_CTLM 0x4f +#define A4XX_RBBM_CFG_DEBBUS_CTLT_ENABLE_SHIFT 24 + +#define A4XX_RBBM_CFG_DEBBUS_OPL 0x50 +#define A4XX_RBBM_CFG_DEBBUS_OPE 0x51 +#define A4XX_RBBM_CFG_DEBBUS_IVTL_0 0x52 +#define A4XX_RBBM_CFG_DEBBUS_IVTL_1 0x53 +#define A4XX_RBBM_CFG_DEBBUS_IVTL_2 0x54 +#define A4XX_RBBM_CFG_DEBBUS_IVTL_3 0x55 + +#define A4XX_RBBM_CFG_DEBBUS_MASKL_0 0x56 +#define A4XX_RBBM_CFG_DEBBUS_MASKL_1 0x57 +#define A4XX_RBBM_CFG_DEBBUS_MASKL_2 0x58 +#define A4XX_RBBM_CFG_DEBBUS_MASKL_3 0x59 + + +#define A4XX_RBBM_CFG_DEBBUS_BYTEL_0 0x5a +#define A4XX_RBBM_CFG_DEBBUS_BYTEL_0_BYTEL0_SHIFT 0 +#define A4XX_RBBM_CFG_DEBBUS_BYTEL_0_BYTEL1_SHIFT 4 +#define A4XX_RBBM_CFG_DEBBUS_BYTEL_0_BYTEL2_SHIFT 8 +#define A4XX_RBBM_CFG_DEBBUS_BYTEL_0_BYTEL3_SHIFT 12 +#define A4XX_RBBM_CFG_DEBBUS_BYTEL_0_BYTEL4_SHIFT 16 +#define A4XX_RBBM_CFG_DEBBUS_BYTEL_0_BYTEL5_SHIFT 20 +#define A4XX_RBBM_CFG_DEBBUS_BYTEL_0_BYTEL6_SHIFT 24 +#define A4XX_RBBM_CFG_DEBBUS_BYTEL_0_BYTEL7_SHIFT 28 + +#define A4XX_RBBM_CFG_DEBBUS_BYTEL_1 0x5b +#define A4XX_RBBM_CFG_DEBBUS_BYTEL_1_BYTEL8_SHIFT 0 +#define A4XX_RBBM_CFG_DEBBUS_BYTEL_1_BYTEL9_SHIFT 4 +#define A4XX_RBBM_CFG_DEBBUS_BYTEL_1_BYTEL10_SHIFT 8 +#define A4XX_RBBM_CFG_DEBBUS_BYTEL_1_BYTEL11_SHIFT 12 +#define A4XX_RBBM_CFG_DEBBUS_BYTEL_1_BYTEL12_SHIFT 16 +#define A4XX_RBBM_CFG_DEBBUS_BYTEL_1_BYTEL13_SHIFT 20 +#define A4XX_RBBM_CFG_DEBBUS_BYTEL_1_BYTEL14_SHIFT 24 +#define A4XX_RBBM_CFG_DEBBUS_BYTEL_1_BYTEL15_SHIFT 28 + +#define A4XX_RBBM_CFG_DEBBUS_IVTE_0 0x5c +#define A4XX_RBBM_CFG_DEBBUS_IVTE_1 0x5d +#define A4XX_RBBM_CFG_DEBBUS_IVTE_2 0x5e +#define A4XX_RBBM_CFG_DEBBUS_IVTE_3 0x5f +#define A4XX_RBBM_CFG_DEBBUS_MASKE_0 0x60 +#define A4XX_RBBM_CFG_DEBBUS_MASKE_1 0x61 +#define A4XX_RBBM_CFG_DEBBUS_MASKE_2 0x62 +#define A4XX_RBBM_CFG_DEBBUS_MASKE_3 0x63 +#define A4XX_RBBM_CFG_DEBBUS_NIBBLEE 0x64 +#define A4XX_RBBM_CFG_DEBBUS_PTRC0 0x65 +#define A4XX_RBBM_CFG_DEBBUS_PTRC1 0x66 +#define A4XX_RBBM_CFG_DEBBUS_LOADREG 0x67 +#define A4XX_RBBM_CLOCK_CTL_SP0 0x68 +#define A4XX_RBBM_CLOCK_CTL_SP1 0x69 +#define A4XX_RBBM_CLOCK_CTL_SP2 0x6A +#define A4XX_RBBM_CLOCK_CTL_SP3 0x6B +#define A4XX_RBBM_CLOCK_CTL2_SP0 0x6C +#define A4XX_RBBM_CLOCK_CTL2_SP1 0x6D +#define A4XX_RBBM_CLOCK_CTL2_SP2 0x6E +#define A4XX_RBBM_CLOCK_CTL2_SP3 0x6F +#define A4XX_RBBM_CLOCK_HYST_SP0 0x70 +#define A4XX_RBBM_CLOCK_HYST_SP1 0x71 +#define A4XX_RBBM_CLOCK_HYST_SP2 0x72 +#define A4XX_RBBM_CLOCK_HYST_SP3 0x73 +#define A4XX_RBBM_CLOCK_DELAY_SP0 0x74 +#define A4XX_RBBM_CLOCK_DELAY_SP1 0x75 +#define A4XX_RBBM_CLOCK_DELAY_SP2 0x76 +#define A4XX_RBBM_CLOCK_DELAY_SP3 0x77 +#define A4XX_RBBM_CLOCK_CTL_RB0 0x78 +#define A4XX_RBBM_CLOCK_CTL_RB1 0x79 +#define A4XX_RBBM_CLOCK_CTL_RB2 0x7A +#define A4XX_RBBM_CLOCK_CTL_RB3 0x7B +#define A4XX_RBBM_CLOCK_CTL2_RB0 0x7C +#define A4XX_RBBM_CLOCK_CTL2_RB1 0x7D +#define A4XX_RBBM_CLOCK_CTL2_RB2 0x7E +#define A4XX_RBBM_CLOCK_CTL2_RB3 0x7F +#define A4XX_RBBM_CLOCK_HYST_COM_DCOM 0x80 +#define A4XX_RBBM_CLOCK_CTL_COM_DCOM 0x81 +#define A4XX_RBBM_CLOCK_CTL_MARB_CCU0 0x82 +#define A4XX_RBBM_CLOCK_CTL_MARB_CCU1 0x83 +#define A4XX_RBBM_CLOCK_CTL_MARB_CCU2 0x84 +#define A4XX_RBBM_CLOCK_CTL_MARB_CCU3 0x85 +#define A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU0 0x86 +#define A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU1 0x87 +#define A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU2 0x88 +#define A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU3 0x89 +#define A4XX_RBBM_CLOCK_CTL_HLSQ 0x8a +#define A4XX_RBBM_CLOCK_HYST_HLSQ 0x8b + +#define A4XX_RBBM_CLOCK_DELAY_HLSQ 0x8c +#define A4XX_CGC_HLSQ_TP_EARLY_CYC_MASK 0x00700000 +#define A4XX_CGC_HLSQ_TP_EARLY_CYC_SHIFT 20 + +#define A4XX_RBBM_CLOCK_DELAY_COM_DCOM 0x8d +#define A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_0 0x8e +#define A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_1 0x8f +#define A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_2 0x90 +#define A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_3 0x91 + +#define A4XX_RBBM_CFG_DEBBUS_IDX 0x93 +#define A4XX_RBBM_CFG_DEBBUS_CLRC 0x94 +#define A4XX_RBBM_CFG_DEBBUS_LOADIVT 0x95 + +#define A4XX_RBBM_POWER_CNTL_IP 0x98 +#define A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0 0x99 +#define A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1 0x9a +#define A4XX_RBBM_PERFCTR_CP_0_LO 0x9c +#define A4XX_RBBM_PERFCTR_CP_0_HI 0x9d +#define A4XX_RBBM_PERFCTR_CP_1_LO 0x9e +#define A4XX_RBBM_PERFCTR_CP_1_HI 0x9f +#define A4XX_RBBM_PERFCTR_CP_2_LO 0xa0 +#define A4XX_RBBM_PERFCTR_CP_2_HI 0xa1 +#define A4XX_RBBM_PERFCTR_CP_3_LO 0xa2 +#define A4XX_RBBM_PERFCTR_CP_3_HI 0xa3 +#define A4XX_RBBM_PERFCTR_CP_4_LO 0xa4 +#define A4XX_RBBM_PERFCTR_CP_4_HI 0xa5 +#define A4XX_RBBM_PERFCTR_CP_5_LO 0xa6 +#define A4XX_RBBM_PERFCTR_CP_5_HI 0xa7 +#define A4XX_RBBM_PERFCTR_CP_6_LO 0xa8 +#define A4XX_RBBM_PERFCTR_CP_6_HI 0xa9 +#define A4XX_RBBM_PERFCTR_CP_7_LO 0xaa +#define A4XX_RBBM_PERFCTR_CP_7_HI 0xab +#define A4XX_RBBM_PERFCTR_RBBM_0_LO 0xac +#define A4XX_RBBM_PERFCTR_RBBM_0_HI 0xad +#define A4XX_RBBM_PERFCTR_RBBM_1_LO 0xae +#define A4XX_RBBM_PERFCTR_RBBM_1_HI 0xaf +#define A4XX_RBBM_PERFCTR_RBBM_2_LO 0xb0 +#define A4XX_RBBM_PERFCTR_RBBM_2_HI 0xb1 +#define A4XX_RBBM_PERFCTR_RBBM_3_LO 0xb2 +#define A4XX_RBBM_PERFCTR_RBBM_3_HI 0xb3 +#define A4XX_RBBM_PERFCTR_PC_0_LO 0xb4 +#define A4XX_RBBM_PERFCTR_PC_0_HI 0xb5 +#define A4XX_RBBM_PERFCTR_PC_1_LO 0xb6 +#define A4XX_RBBM_PERFCTR_PC_1_HI 0xb7 +#define A4XX_RBBM_PERFCTR_PC_2_LO 0xb8 +#define A4XX_RBBM_PERFCTR_PC_2_HI 0xb9 +#define A4XX_RBBM_PERFCTR_PC_3_LO 0xba +#define A4XX_RBBM_PERFCTR_PC_3_HI 0xbb +#define A4XX_RBBM_PERFCTR_PC_4_LO 0xbc +#define A4XX_RBBM_PERFCTR_PC_4_HI 0xbd +#define A4XX_RBBM_PERFCTR_PC_5_LO 0xbe +#define A4XX_RBBM_PERFCTR_PC_5_HI 0xbf +#define A4XX_RBBM_PERFCTR_PC_6_LO 0xc0 +#define A4XX_RBBM_PERFCTR_PC_6_HI 0xc1 +#define A4XX_RBBM_PERFCTR_PC_7_LO 0xc2 +#define A4XX_RBBM_PERFCTR_PC_7_HI 0xc3 +#define A4XX_RBBM_PERFCTR_VFD_0_LO 0xc4 +#define A4XX_RBBM_PERFCTR_VFD_0_HI 0xc5 +#define A4XX_RBBM_PERFCTR_VFD_1_LO 0xc6 +#define A4XX_RBBM_PERFCTR_VFD_1_HI 0xc7 +#define A4XX_RBBM_PERFCTR_VFD_2_LO 0xc8 +#define A4XX_RBBM_PERFCTR_VFD_2_HI 0xc9 +#define A4XX_RBBM_PERFCTR_VFD_3_LO 0xca +#define A4XX_RBBM_PERFCTR_VFD_3_HI 0xcb +#define A4XX_RBBM_PERFCTR_VFD_4_LO 0xcc +#define A4XX_RBBM_PERFCTR_VFD_4_HI 0xcd +#define A4XX_RBBM_PERFCTR_VFD_5_LO 0xce +#define A4XX_RBBM_PERFCTR_VFD_5_HI 0xcf +#define A4XX_RBBM_PERFCTR_VFD_6_LO 0xd0 +#define A4XX_RBBM_PERFCTR_VFD_6_HI 0xd1 +#define A4XX_RBBM_PERFCTR_VFD_7_LO 0xd2 +#define A4XX_RBBM_PERFCTR_VFD_7_HI 0xd3 +#define A4XX_RBBM_PERFCTR_HLSQ_0_LO 0xd4 +#define A4XX_RBBM_PERFCTR_HLSQ_0_HI 0xd5 +#define A4XX_RBBM_PERFCTR_HLSQ_1_LO 0xd6 +#define A4XX_RBBM_PERFCTR_HLSQ_1_HI 0xd7 +#define A4XX_RBBM_PERFCTR_HLSQ_2_LO 0xd8 +#define A4XX_RBBM_PERFCTR_HLSQ_2_HI 0xd9 +#define A4XX_RBBM_PERFCTR_HLSQ_3_LO 0xda +#define A4XX_RBBM_PERFCTR_HLSQ_3_HI 0xdb +#define A4XX_RBBM_PERFCTR_HLSQ_4_LO 0xdc +#define A4XX_RBBM_PERFCTR_HLSQ_4_HI 0xdd +#define A4XX_RBBM_PERFCTR_HLSQ_5_LO 0xde +#define A4XX_RBBM_PERFCTR_HLSQ_5_HI 0xdf +#define A4XX_RBBM_PERFCTR_HLSQ_6_LO 0xe0 +#define A4XX_RBBM_PERFCTR_HLSQ_6_HI 0xe1 +#define A4XX_RBBM_PERFCTR_HLSQ_7_LO 0xe2 +#define A4XX_RBBM_PERFCTR_HLSQ_7_HI 0xe3 +#define A4XX_RBBM_PERFCTR_VPC_0_LO 0xe4 +#define A4XX_RBBM_PERFCTR_VPC_0_HI 0xe5 +#define A4XX_RBBM_PERFCTR_VPC_1_LO 0xe6 +#define A4XX_RBBM_PERFCTR_VPC_1_HI 0xe7 +#define A4XX_RBBM_PERFCTR_VPC_2_LO 0xe8 +#define A4XX_RBBM_PERFCTR_VPC_2_HI 0xe9 +#define A4XX_RBBM_PERFCTR_VPC_3_LO 0xea +#define A4XX_RBBM_PERFCTR_VPC_3_HI 0xeb +#define A4XX_RBBM_PERFCTR_CCU_0_LO 0xec +#define A4XX_RBBM_PERFCTR_CCU_0_HI 0xed +#define A4XX_RBBM_PERFCTR_CCU_1_LO 0xee +#define A4XX_RBBM_PERFCTR_CCU_1_HI 0xef +#define A4XX_RBBM_PERFCTR_CCU_2_LO 0xf0 +#define A4XX_RBBM_PERFCTR_CCU_2_HI 0xf1 +#define A4XX_RBBM_PERFCTR_CCU_3_LO 0xf2 +#define A4XX_RBBM_PERFCTR_CCU_3_HI 0xf3 +#define A4XX_RBBM_PERFCTR_TSE_0_LO 0xf4 +#define A4XX_RBBM_PERFCTR_TSE_0_HI 0xf5 +#define A4XX_RBBM_PERFCTR_TSE_1_LO 0xf6 +#define A4XX_RBBM_PERFCTR_TSE_1_HI 0xf7 +#define A4XX_RBBM_PERFCTR_TSE_2_LO 0xf8 +#define A4XX_RBBM_PERFCTR_TSE_2_HI 0xf9 +#define A4XX_RBBM_PERFCTR_TSE_3_LO 0xfa +#define A4XX_RBBM_PERFCTR_TSE_3_HI 0xfb +#define A4XX_RBBM_PERFCTR_RAS_0_LO 0xfc +#define A4XX_RBBM_PERFCTR_RAS_0_HI 0xfd +#define A4XX_RBBM_PERFCTR_RAS_1_LO 0xfe +#define A4XX_RBBM_PERFCTR_RAS_1_HI 0xff +#define A4XX_RBBM_PERFCTR_RAS_2_LO 0x100 +#define A4XX_RBBM_PERFCTR_RAS_2_HI 0x101 +#define A4XX_RBBM_PERFCTR_RAS_3_LO 0x102 +#define A4XX_RBBM_PERFCTR_RAS_3_HI 0x103 +#define A4XX_RBBM_PERFCTR_UCHE_0_LO 0x104 +#define A4XX_RBBM_PERFCTR_UCHE_0_HI 0x105 +#define A4XX_RBBM_PERFCTR_UCHE_1_LO 0x106 +#define A4XX_RBBM_PERFCTR_UCHE_1_HI 0x107 +#define A4XX_RBBM_PERFCTR_UCHE_2_LO 0x108 +#define A4XX_RBBM_PERFCTR_UCHE_2_HI 0x109 +#define A4XX_RBBM_PERFCTR_UCHE_3_LO 0x10a +#define A4XX_RBBM_PERFCTR_UCHE_3_HI 0x10b +#define A4XX_RBBM_PERFCTR_UCHE_4_LO 0x10c +#define A4XX_RBBM_PERFCTR_UCHE_4_HI 0x10d +#define A4XX_RBBM_PERFCTR_UCHE_5_LO 0x10e +#define A4XX_RBBM_PERFCTR_UCHE_5_HI 0x10f +#define A4XX_RBBM_PERFCTR_UCHE_6_LO 0x110 +#define A4XX_RBBM_PERFCTR_UCHE_6_HI 0x111 +#define A4XX_RBBM_PERFCTR_UCHE_7_LO 0x112 +#define A4XX_RBBM_PERFCTR_UCHE_7_HI 0x113 +#define A4XX_RBBM_PERFCTR_TP_0_LO 0x114 +#define A4XX_RBBM_PERFCTR_TP_0_HI 0x115 +#define A4XX_RBBM_PERFCTR_TP_1_LO 0x116 +#define A4XX_RBBM_PERFCTR_TP_1_HI 0x117 +#define A4XX_RBBM_PERFCTR_TP_2_LO 0x118 +#define A4XX_RBBM_PERFCTR_TP_2_HI 0x119 +#define A4XX_RBBM_PERFCTR_TP_3_LO 0x11a +#define A4XX_RBBM_PERFCTR_TP_3_HI 0x11b +#define A4XX_RBBM_PERFCTR_TP_4_LO 0x11c +#define A4XX_RBBM_PERFCTR_TP_4_HI 0x11d +#define A4XX_RBBM_PERFCTR_TP_5_LO 0x11e +#define A4XX_RBBM_PERFCTR_TP_5_HI 0x11f +#define A4XX_RBBM_PERFCTR_TP_6_LO 0x120 +#define A4XX_RBBM_PERFCTR_TP_6_HI 0x121 +#define A4XX_RBBM_PERFCTR_TP_7_LO 0x122 +#define A4XX_RBBM_PERFCTR_TP_7_HI 0x123 +#define A4XX_RBBM_PERFCTR_SP_0_LO 0x124 +#define A4XX_RBBM_PERFCTR_SP_0_HI 0x125 +#define A4XX_RBBM_PERFCTR_SP_1_LO 0x126 +#define A4XX_RBBM_PERFCTR_SP_1_HI 0x127 +#define A4XX_RBBM_PERFCTR_SP_2_LO 0x128 +#define A4XX_RBBM_PERFCTR_SP_2_HI 0x129 +#define A4XX_RBBM_PERFCTR_SP_3_LO 0x12a +#define A4XX_RBBM_PERFCTR_SP_3_HI 0x12b +#define A4XX_RBBM_PERFCTR_SP_4_LO 0x12c +#define A4XX_RBBM_PERFCTR_SP_4_HI 0x12d +#define A4XX_RBBM_PERFCTR_SP_5_LO 0x12e +#define A4XX_RBBM_PERFCTR_SP_5_HI 0x12f +#define A4XX_RBBM_PERFCTR_SP_6_LO 0x130 +#define A4XX_RBBM_PERFCTR_SP_6_HI 0x131 +#define A4XX_RBBM_PERFCTR_SP_7_LO 0x132 +#define A4XX_RBBM_PERFCTR_SP_7_HI 0x133 +#define A4XX_RBBM_PERFCTR_SP_8_LO 0x134 +#define A4XX_RBBM_PERFCTR_SP_8_HI 0x135 +#define A4XX_RBBM_PERFCTR_SP_9_LO 0x136 +#define A4XX_RBBM_PERFCTR_SP_9_HI 0x137 +#define A4XX_RBBM_PERFCTR_SP_10_LO 0x138 +#define A4XX_RBBM_PERFCTR_SP_10_HI 0x139 +#define A4XX_RBBM_PERFCTR_SP_11_LO 0x13a +#define A4XX_RBBM_PERFCTR_SP_11_HI 0x13b +#define A4XX_RBBM_PERFCTR_RB_0_LO 0x13c +#define A4XX_RBBM_PERFCTR_RB_0_HI 0x13d +#define A4XX_RBBM_PERFCTR_RB_1_LO 0x13e +#define A4XX_RBBM_PERFCTR_RB_1_HI 0x13f +#define A4XX_RBBM_PERFCTR_RB_2_LO 0x140 +#define A4XX_RBBM_PERFCTR_RB_2_HI 0x141 +#define A4XX_RBBM_PERFCTR_RB_3_LO 0x142 +#define A4XX_RBBM_PERFCTR_RB_3_HI 0x143 +#define A4XX_RBBM_PERFCTR_RB_4_LO 0x144 +#define A4XX_RBBM_PERFCTR_RB_4_HI 0x145 +#define A4XX_RBBM_PERFCTR_RB_5_LO 0x146 +#define A4XX_RBBM_PERFCTR_RB_5_HI 0x147 +#define A4XX_RBBM_PERFCTR_RB_6_LO 0x148 +#define A4XX_RBBM_PERFCTR_RB_6_HI 0x149 +#define A4XX_RBBM_PERFCTR_RB_7_LO 0x14a +#define A4XX_RBBM_PERFCTR_RB_7_HI 0x14b +#define A4XX_RBBM_PERFCTR_VSC_0_LO 0x14c +#define A4XX_RBBM_PERFCTR_VSC_0_HI 0x14d +#define A4XX_RBBM_PERFCTR_VSC_1_LO 0x14e +#define A4XX_RBBM_PERFCTR_VSC_1_HI 0x14f +#define A4XX_RBBM_PERFCTR_PWR_0_LO 0x166 +#define A4XX_RBBM_PERFCTR_PWR_0_HI 0x167 +#define A4XX_RBBM_PERFCTR_PWR_1_LO 0x168 +#define A4XX_RBBM_PERFCTR_PWR_1_HI 0x169 +#define A4XX_RBBM_ALWAYSON_COUNTER_LO 0x16e +#define A4XX_RBBM_ALWAYSON_COUNTER_HI 0x16f +#define A4XX_RBBM_PERFCTR_CTL 0x170 +#define A4XX_RBBM_PERFCTR_LOAD_CMD0 0x171 +#define A4XX_RBBM_PERFCTR_LOAD_CMD1 0x172 +#define A4XX_RBBM_PERFCTR_LOAD_CMD2 0x173 +#define A4XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x174 +#define A4XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x175 +#define A4XX_RBBM_PERFCTR_RBBM_SEL_0 0x176 +#define A4XX_RBBM_PERFCTR_RBBM_SEL_1 0x177 +#define A4XX_RBBM_PERFCTR_RBBM_SEL_2 0x178 +#define A4XX_RBBM_PERFCTR_RBBM_SEL_3 0x179 +#define A4XX_RBBM_GPU_BUSY_MASKED 0x17a +#define A4XX_RBBM_INT_0_STATUS 0x17d +#define A4XX_RBBM_AHB_ME_SPLIT_STATUS 0x18c +#define A4XX_RBBM_AHB_PFP_SPLIT_STATUS 0x18d +#define A4XX_RBBM_AHB_ERROR_STATUS 0x18f +#define A4XX_RBBM_STATUS 0x191 +#define A4XX_RBBM_CFG_COUNTER0 0x1a2 +#define A4XX_RBBM_CFG_DEBBUS_TRACE_BUF0 0x1a9 +#define A4XX_RBBM_CFG_DEBBUS_TRACE_BUF1 0x1aa +#define A4XX_RBBM_CFG_DEBBUS_TRACE_BUF2 0x1ab +#define A4XX_RBBM_CFG_DEBBUS_TRACE_BUF3 0x1ac +#define A4XX_RBBM_CFG_DEBBUS_TRACE_BUF4 0x1ad +#define A4XX_RBBM_CFG_DEBBUS_MISR0 0x1ae +#define A4XX_RBBM_CFG_DEBBUS_MISR1 0x1af +#define A4XX_RBBM_POWER_STATUS 0x1b0 +#define A4XX_RBBM_PPD_V2_SP_PWR_WEIGHTS 0x1b2 +#define A4XX_RBBM_PPD_V2_SP_RB_EPOCH_TH 0x1b3 +#define A4XX_RBBM_PPD_V2_TP_CONFIG 0x1b4 +#define A4XX_RBBM_PPD_RAMP_V2_CONTROL 0x1b5 +#define A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2 0x1b8 +#define A4XX_RBBM_PPD_CTRL 0x1b9 +#define A4XX_RBBM_PPD_EPOCH_INTRA_TH_1 0x1ba +#define A4XX_RBBM_PPD_EPOCH_INTRA_TH_2 0x1bb +#define A4XX_RBBM_PPD_EPOCH_INTER_TH_HIGH_CLEAR_THR 0x1bc +#define A4XX_RBBM_PPD_EPOCH_INTER_TH_LOW 0x1bd +/* SECVID registers */ +#define A4XX_RBBM_SECVID_TRUST_CONFIG 0xf000 +#define A4XX_RBBM_SECVID_TRUST_CONTROL 0xf400 +#define A4XX_RBBM_SECVID_TSB_TRUSTED_BASE 0xf800 +#define A4XX_RBBM_SECVID_TSB_TRUSTED_SIZE 0xf801 +#define A4XX_RBBM_SECVID_TSB_CONTROL 0xf802 + +/* CP registers */ +#define A4XX_CP_RB_BASE 0x200 +#define A4XX_CP_RB_CNTL 0x201 +#define A4XX_CP_RB_RPTR_ADDR 0x203 +#define A4XX_CP_RB_RPTR 0x204 +#define A4XX_CP_RB_WPTR 0x205 +#define A4XX_CP_IB1_BASE 0x206 +#define A4XX_CP_IB1_BUFSZ 0x207 +#define A4XX_CP_IB2_BASE 0x208 +#define A4XX_CP_IB2_BUFSZ 0x209 +#define A4XX_CP_ROQ_ADDR 0x21C +#define A4XX_CP_ROQ_DATA 0x21D +#define A4XX_CP_MEQ_ADDR 0x21E +#define A4XX_CP_MEQ_DATA 0x21F +#define A4XX_CP_MERCIU_ADDR 0x220 +#define A4XX_CP_MERCIU_DATA 0x221 +#define A4XX_CP_MERCIU_DATA2 0x222 +#define A4XX_CP_PFP_UCODE_ADDR 0x223 +#define A4XX_CP_PFP_UCODE_DATA 0x224 +#define A4XX_CP_ME_RAM_WADDR 0x225 +#define A4XX_CP_ME_RAM_RADDR 0x226 +#define A4XX_CP_ME_RAM_DATA 0x227 +#define A4XX_CP_SCRATCH_UMASK 0x228 +#define A4XX_CP_SCRATCH_ADDR 0x229 + +#define A4XX_CP_PREEMPT 0x22a +/* PREEMPT register bit shifts */ +#define A4XX_CP_PREEMPT_STOP_SHIFT 0 +#define A4XX_CP_PREEMPT_RESUME_SHIFT 1 + +#define A4XX_CP_PREEMPT_DISABLE 0x22b +#define A4XX_CP_CNTL 0x22c +#define A4XX_CP_ME_CNTL 0x22d +#define A4XX_CP_DEBUG 0x22e +#define A4XX_CP_STATE_DEBUG_INDEX 0x22f +#define A4XX_CP_STATE_DEBUG_DATA 0x230 +#define A4XX_CP_POWER_COLLAPSE_CNTL 0x234 +/* + * CP debug settings for A4xx cores + * MIU_128BIT_WRITE_ENABLE [25] - Allow 128 bit writes to the VBIF + */ +#define A4XX_CP_DEBUG_DEFAULT (1 << 25) + +#define A4XX_CP_PROTECT_REG_0 0x240 +#define A4XX_CP_PROTECT_CTRL 0x250 +#define A4XX_CP_PROTECT_REG_10 0x251 + +#define A4XX_CP_ME_STATUS 0x4d1 +#define A4XX_CP_CNTL 0x22c +#define A4XX_CP_WFI_PEND_CTR 0x4d2 +#define A4XX_CP_PREEMPT_DEBUG 0x4d6 +#define A4XX_CP_HW_FAULT 0x4d8 +#define A4XX_CP_PROTECT_STATUS 0x4da +#define A4XX_CP_PERFCTR_CP_SEL_0 0x500 +#define A4XX_CP_PERFCTR_CP_SEL_1 0x501 +#define A4XX_CP_PERFCTR_CP_SEL_2 0x502 +#define A4XX_CP_PERFCTR_CP_SEL_3 0x503 +#define A4XX_CP_PERFCTR_CP_SEL_4 0x504 +#define A4XX_CP_PERFCTR_CP_SEL_5 0x505 +#define A4XX_CP_PERFCTR_CP_SEL_6 0x506 +#define A4XX_CP_PERFCTR_CP_SEL_7 0x507 + +#define A4XX_CP_SCRATCH_REG0 0x578 +#define A4XX_CP_SCRATCH_REG6 0x57e +#define A4XX_CP_SCRATCH_REG7 0x57f +#define A4XX_CP_SCRATCH_REG8 0x580 +#define A4XX_CP_SCRATCH_REG9 0x581 +#define A4XX_CP_SCRATCH_REG10 0x582 +#define A4XX_CP_SCRATCH_REG11 0x583 +#define A4XX_CP_SCRATCH_REG12 0x584 +#define A4XX_CP_SCRATCH_REG13 0x585 +#define A4XX_CP_SCRATCH_REG14 0x586 +#define A4XX_CP_SCRATCH_REG15 0x587 +#define A4XX_CP_SCRATCH_REG16 0x588 +#define A4XX_CP_SCRATCH_REG17 0x589 +#define A4XX_CP_SCRATCH_REG18 0x58a +#define A4XX_CP_SCRATCH_REG23 0x58f + +/* SP registers */ +#define A4XX_SP_SP_CTRL 0x22C0 +#define A4XX_SP_INSTR_CACHE_CTRL 0x22c1 +#define A4XX_SP_VS_OBJ_START 0x22e1 +#define A4XX_SP_VS_PVT_MEM_ADDR 0x22e3 +#define A4XX_SP_FS_CTRL_1 0x22e9 +#define A4XX_SP_FS_OBJ_START 0x22eb +#define A4XX_SP_FS_PVT_MEM_ADDR 0x22ed +#define A4XX_SP_CS_CTRL_0 0x2300 +#define A4XX_SP_CS_OBJ_OFFSET 0x2301 +#define A4XX_SP_CS_OBJ_START 0x2302 +#define A4XX_SP_CS_PVT_MEM_PARAM 0x2303 +#define A4XX_SP_CS_PVT_MEM_ADDR 0x2304 +#define A4XX_SP_CS_PVT_MEM_SIZE 0x2305 +#define A4XX_SP_CS_LENGTH 0x2306 +#define A4XX_SP_MODE_CONTROL 0xec3 +#define A4XX_SP_PERFCTR_SP_SEL_0 0xec4 +#define A4XX_SP_PERFCTR_SP_SEL_1 0xec5 +#define A4XX_SP_PERFCTR_SP_SEL_2 0xec6 +#define A4XX_SP_PERFCTR_SP_SEL_3 0xec7 +#define A4XX_SP_PERFCTR_SP_SEL_4 0xec8 +#define A4XX_SP_PERFCTR_SP_SEL_5 0xec9 +#define A4XX_SP_PERFCTR_SP_SEL_6 0xeca +#define A4XX_SP_PERFCTR_SP_SEL_7 0xecb +#define A4XX_SP_PERFCTR_SP_SEL_8 0xecc +#define A4XX_SP_PERFCTR_SP_SEL_9 0xecd +#define A4XX_SP_PERFCTR_SP_SEL_10 0xece +#define A4XX_SP_PERFCTR_SP_SEL_11 0xecf +#define A4XX_SP_VS_PVT_MEM_ADDR 0x22e3 +#define A4XX_SP_FS_PVT_MEM_ADDR 0x22ed +#define A4XX_SP_VS_OBJ_START 0x22e1 +#define A4XX_SP_FS_OBJ_START 0x22eb + +/* COUNTABLE FOR SP PERFCOUNTER */ +#define A4XX_SP_ALU_ACTIVE_CYCLES 0x1D +#define A4XX_SP0_ICL1_MISSES 0x1A +#define A4XX_SP_FS_CFLOW_INSTRUCTIONS 0x0C + +/* COUNTABLE FOR TSE PERFCOUNTER */ +#define A4XX_TSE_INPUT_PRIM_NUM 0x0 + +enum a4xx_sp_perfctr_sp_sel { + SP_FS_STAGE_BARY_INSTRUCTIONS = 0x10, +}; + +/* VPC registers */ +#define A4XX_VPC_DEBUG_RAM_SEL 0xe60 +#define A4XX_VPC_DEBUG_RAM_READ 0xe61 +#define A4XX_VPC_PERFCTR_VPC_SEL_0 0xe65 +#define A4XX_VPC_PERFCTR_VPC_SEL_1 0xe66 +#define A4XX_VPC_PERFCTR_VPC_SEL_2 0xe67 +#define A4XX_VPC_PERFCTR_VPC_SEL_3 0xe68 + +/* UCHE register */ +#define UCHE_TRAP_BASE_LO 0xe83 +#define UCHE_TRAP_BASE_HI 0xe84 +#define A4XX_UCHE_INVALIDATE0 0xe8a +#define A4XX_UCHE_INVALIDATE1 0xe8b +#define A4XX_UCHE_CACHE_WAYS_VFD 0xe8c + +/* VSC registers */ +#define A4XX_VSC_SIZE_ADDRESS 0xc01 +#define A4XX_VSC_PIPE_DATA_ADDRESS_0 0xc10 +#define A4XX_VSC_PIPE_DATA_ADDRESS_1 0xc11 +#define A4XX_VSC_PIPE_DATA_ADDRESS_2 0xc12 +#define A4XX_VSC_PIPE_DATA_ADDRESS_3 0xc13 +#define A4XX_VSC_PIPE_DATA_ADDRESS_4 0xc14 +#define A4XX_VSC_PIPE_DATA_ADDRESS_5 0xc15 +#define A4XX_VSC_PIPE_DATA_ADDRESS_6 0xc16 +#define A4XX_VSC_PIPE_DATA_ADDRESS_7 0xc17 +#define A4XX_VSC_PIPE_DATA_LENGTH_0 0xc18 +#define A4XX_VSC_PIPE_DATA_LENGTH_1 0xc19 +#define A4XX_VSC_PIPE_DATA_LENGTH_2 0xc1a +#define A4XX_VSC_PIPE_DATA_LENGTH_3 0xc1b +#define A4XX_VSC_PIPE_DATA_LENGTH_4 0xc1c +#define A4XX_VSC_PIPE_DATA_LENGTH_5 0xc1d +#define A4XX_VSC_PIPE_DATA_LENGTH_6 0xc1e +#define A4XX_VSC_PIPE_DATA_LENGTH_7 0xc1f +#define A4XX_VSC_PERFCTR_VSC_SEL_0 0xc50 +#define A4XX_VSC_PERFCTR_VSC_SEL_1 0xc51 + +/* VFD registers */ +#define A4XX_VFD_CONTROL_0 0x2200 +#define A4XX_VFD_FETCH_INSTR_0_0 0x220a +#define A4XX_VFD_FETCH_INSTR_1_31 0x2287 +#define A4XX_VFD_PERFCTR_VFD_SEL_0 0xe43 +#define A4XX_VFD_PERFCTR_VFD_SEL_1 0xe44 +#define A4XX_VFD_PERFCTR_VFD_SEL_2 0xe45 +#define A4XX_VFD_PERFCTR_VFD_SEL_3 0xe46 +#define A4XX_VFD_PERFCTR_VFD_SEL_4 0xe47 +#define A4XX_VFD_PERFCTR_VFD_SEL_5 0xe48 +#define A4XX_VFD_PERFCTR_VFD_SEL_6 0xe49 +#define A4XX_VFD_PERFCTR_VFD_SEL_7 0xe4a +#define A4XX_VFD_FETCH_INSTR_1_0 0x220b +#define A4XX_VFD_FETCH_INSTR_1_1 0x220f +#define A4XX_VFD_FETCH_INSTR_1_2 0x2213 +#define A4XX_VFD_FETCH_INSTR_1_3 0x2217 +#define A4XX_VFD_FETCH_INSTR_1_4 0x221b +#define A4XX_VFD_FETCH_INSTR_1_5 0x221f +#define A4XX_VFD_FETCH_INSTR_1_6 0x2223 +#define A4XX_VFD_FETCH_INSTR_1_7 0x2227 +#define A4XX_VFD_FETCH_INSTR_1_8 0x222b +#define A4XX_VFD_FETCH_INSTR_1_9 0x222f +#define A4XX_VFD_FETCH_INSTR_1_10 0x2233 +#define A4XX_VFD_FETCH_INSTR_1_11 0x2237 +#define A4XX_VFD_FETCH_INSTR_1_12 0x223b +#define A4XX_VFD_FETCH_INSTR_1_13 0x223f +#define A4XX_VFD_FETCH_INSTR_1_14 0x2243 +#define A4XX_VFD_FETCH_INSTR_1_15 0x2247 +#define A4XX_VFD_FETCH_INSTR_1_16 0x224b +#define A4XX_VFD_FETCH_INSTR_1_17 0x224f +#define A4XX_VFD_FETCH_INSTR_1_18 0x2253 +#define A4XX_VFD_FETCH_INSTR_1_19 0x2257 +#define A4XX_VFD_FETCH_INSTR_1_20 0x225b +#define A4XX_VFD_FETCH_INSTR_1_21 0x225f +#define A4XX_VFD_FETCH_INSTR_1_22 0x2263 +#define A4XX_VFD_FETCH_INSTR_1_23 0x2267 +#define A4XX_VFD_FETCH_INSTR_1_24 0x226b +#define A4XX_VFD_FETCH_INSTR_1_25 0x226f +#define A4XX_VFD_FETCH_INSTR_1_26 0x2273 +#define A4XX_VFD_FETCH_INSTR_1_27 0x2277 +#define A4XX_VFD_FETCH_INSTR_1_28 0x227b +#define A4XX_VFD_FETCH_INSTR_1_29 0x227f +#define A4XX_VFD_FETCH_INSTR_1_30 0x2283 +#define A4XX_VFD_FETCH_INSTR_1_31 0x2287 + + +enum a4xx_vfd_perfctr_vfd_sel { + VFD_VPC_BYPASS_TRANS = 0x2, + VFD_UPPER_SHADER_FIBERS = 0xb, + VFD_LOWER_SHADER_FIBERS = 0xc, +}; + +/* VBIF registers */ +#define A4XX_VBIF_VERSION 0x3000 +#define A4XX_VBIF_CLKON 0x3001 +#define A4XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK 0x1 +#define A4XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT 0x1 + +#define A4XX_VBIF_ABIT_SORT 0x301c +#define A4XX_VBIF_ABIT_SORT_CONF 0x301d +#define A4XX_VBIF_GATE_OFF_WRREQ_EN 0x302a +#define A4XX_VBIF_IN_RD_LIM_CONF0 0x302c +#define A4XX_VBIF_IN_RD_LIM_CONF1 0x302d +#define A4XX_VBIF_IN_WR_LIM_CONF0 0x3030 +#define A4XX_VBIF_IN_WR_LIM_CONF1 0x3031 +#define A4XX_VBIF_ROUND_ROBIN_QOS_ARB 0x3049 + +#define A4XX_VBIF_XIN_HALT_CTRL0 0x3080 +#define A4XX_VBIF_XIN_HALT_CTRL0_MASK 0x1F +#define A405_VBIF_XIN_HALT_CTRL0_MASK 0x3 + +#define A4XX_VBIF_XIN_HALT_CTRL1 0x3081 + +#define A4XX_VBIF_TEST_BUS_OUT_CTRL 0x3084 +#define A4XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK 0x1 +#define A4XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT 0x0 + +#define A4XX_VBIF_TEST_BUS1_CTRL0 0x3085 +#define A4XX_VBIF_TEST_BUS1_CTRL1 0x3086 +#define A4XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK 0xF +#define A4XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT 0 + +#define A4XX_VBIF_TEST_BUS2_CTRL0 0x3087 +#define A4XX_VBIF_TEST_BUS2_CTRL1 0x3088 +#define A4XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK 0xF +#define A4XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT 0x0 + +#define A4XX_VBIF_TEST_BUS_OUT 0x308c + +#define A4XX_VBIF_PERF_CNT_EN0 0x30c0 +#define A4XX_VBIF_PERF_CNT_EN1 0x30c1 +#define A4XX_VBIF_PERF_CNT_EN2 0x30c2 +#define A4XX_VBIF_PERF_CNT_EN3 0x30c3 +#define A4XX_VBIF_PERF_CNT_CLR0 0x30c8 +#define A4XX_VBIF_PERF_CNT_CLR1 0x30c9 +#define A4XX_VBIF_PERF_CNT_CLR2 0x30ca +#define A4XX_VBIF_PERF_CNT_CLR3 0x30cb +#define A4XX_VBIF_PERF_CNT_SEL0 0x30d0 +#define A4XX_VBIF_PERF_CNT_SEL1 0x30d1 +#define A4XX_VBIF_PERF_CNT_SEL2 0x30d2 +#define A4XX_VBIF_PERF_CNT_SEL3 0x30d3 +#define A4XX_VBIF_PERF_CNT_LOW0 0x30d8 +#define A4XX_VBIF_PERF_CNT_LOW1 0x30d9 +#define A4XX_VBIF_PERF_CNT_LOW2 0x30da +#define A4XX_VBIF_PERF_CNT_LOW3 0x30db +#define A4XX_VBIF_PERF_CNT_HIGH0 0x30e0 +#define A4XX_VBIF_PERF_CNT_HIGH1 0x30e1 +#define A4XX_VBIF_PERF_CNT_HIGH2 0x30e2 +#define A4XX_VBIF_PERF_CNT_HIGH3 0x30e3 + +#define A4XX_VBIF_PERF_PWR_CNT_EN0 0x3100 +#define A4XX_VBIF_PERF_PWR_CNT_EN1 0x3101 +#define A4XX_VBIF_PERF_PWR_CNT_EN2 0x3102 +#define A4XX_VBIF_PERF_PWR_CNT_EN3 0x3103 +#define A4XX_VBIF_PERF_PWR_CNT_CLR0 0x3108 +#define A4XX_VBIF_PERF_PWR_CNT_CLR1 0x3109 +#define A4XX_VBIF_PERF_PWR_CNT_CLR2 0x310A +#define A4XX_VBIF_PERF_PWR_CNT_CLR3 0x310B +#define A4XX_VBIF_PERF_PWR_CNT_LOW0 0x3110 +#define A4XX_VBIF_PERF_PWR_CNT_LOW1 0x3111 +#define A4XX_VBIF_PERF_PWR_CNT_LOW2 0x3112 +#define A4XX_VBIF_PERF_PWR_CNT_LOW3 0x3113 +#define A4XX_VBIF_PERF_PWR_CNT_HIGH0 0x3118 +#define A4XX_VBIF_PERF_PWR_CNT_HIGH1 0x3119 +#define A4XX_VBIF_PERF_PWR_CNT_HIGH2 0x311a +#define A4XX_VBIF_PERF_PWR_CNT_HIGH3 0x311b + +/* Bit flags for RBBM_CTL */ +#define A4XX_RBBM_RBBM_CTL_RESET_PWR_CTR0 0x00000001 +#define A4XX_RBBM_RBBM_CTL_RESET_PWR_CTR1 0x00000002 +#define A4XX_RBBM_RBBM_CTL_ENABLE_PWR_CTR0 0x00000010 +#define A4XX_RBBM_RBBM_CTL_ENABLE_PWR_CTR1 0x00000020 + +/* GRAS registers */ +#define A4XX_GRAS_PERFCTR_TSE_SEL_0 0xc88 +#define A4XX_GRAS_PERFCTR_TSE_SEL_1 0xc89 +#define A4XX_GRAS_PERFCTR_TSE_SEL_2 0xc8a +#define A4XX_GRAS_PERFCTR_TSE_SEL_3 0xc8b +#define A4XX_GRAS_PERFCTR_RAS_SEL_0 0xc8c +#define A4XX_GRAS_PERFCTR_RAS_SEL_1 0xc8d +#define A4XX_GRAS_PERFCTR_RAS_SEL_2 0xc8e +#define A4XX_GRAS_PERFCTR_RAS_SEL_3 0xc8f + +/* PC registers */ +#define A4XX_PC_PERFCTR_PC_SEL_0 0xd10 +#define A4XX_PC_PERFCTR_PC_SEL_1 0xd11 +#define A4XX_PC_PERFCTR_PC_SEL_2 0xd12 +#define A4XX_PC_PERFCTR_PC_SEL_3 0xd13 +#define A4XX_PC_PERFCTR_PC_SEL_4 0xd14 +#define A4XX_PC_PERFCTR_PC_SEL_5 0xd15 +#define A4XX_PC_PERFCTR_PC_SEL_6 0xd16 +#define A4XX_PC_PERFCTR_PC_SEL_7 0xd17 + +enum a4xx_pc_perfctr_pc_sel { + PC_INSTANCES = 0x1, + PC_VERTEX_HITS = 0x8, + PC_GENERATED_FIBERS = 0x12, + PC_GENERATED_WAVES = 0x13, +}; + +/* HLSQ registers */ +#define A4XX_HLSQ_TIMEOUT_THRESHOLD 0xe00 +#define A4XX_HLSQ_STATE_RESTORE_TRIGGER 0xe01 +#define A4XX_HLSQ_MODE_CONTROL 0xe05 +#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_0 0xe06 +#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_1 0xe07 +#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_2 0xe08 +#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_3 0xe09 +#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_4 0xe0a +#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_5 0xe0b +#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_6 0xe0c +#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_7 0xe0d +#define A4XX_HLSQ_SPTP_RDSEL 0xe30 +#define A4xx_HLSQ_CONTROL_0 0x23c0 +#define A4xx_HLSQ_CONTROL_1 0x23c1 +#define A4xx_HLSQ_CONTROL_2 0x23c2 +#define A4xx_HLSQ_CONTROL_3 0x23c3 +#define A4xx_HLSQ_CONTROL_4 0x23c4 +#define A4XX_HLSQ_CS_CONTROL 0x23ca +#define A4XX_HLSQ_CL_NDRANGE_0 0x23cd +#define A4XX_HLSQ_CL_NDRANGE_1 0x23ce +#define A4XX_HLSQ_CL_NDRANGE_2 0x23cf +#define A4XX_HLSQ_CL_NDRANGE_3 0x23d0 +#define A4XX_HLSQ_CL_NDRANGE_4 0x23d1 +#define A4XX_HLSQ_CL_NDRANGE_5 0x23d2 +#define A4XX_HLSQ_CL_NDRANGE_6 0x23d3 +#define A4XX_HLSQ_CL_CONTROL_0 0x23d4 +#define A4XX_HLSQ_CL_CONTROL_1 0x23d5 +#define A4XX_HLSQ_CL_KERNEL_CONST 0x23d6 +#define A4XX_HLSQ_CL_KERNEL_GROUP_X 0x23d7 +#define A4XX_HLSQ_CL_KERNEL_GROUP_Y 0x23d8 +#define A4XX_HLSQ_CL_KERNEL_GROUP_Z 0x23d9 +#define A4XX_HLSQ_CL_WG_OFFSET 0x23da +#define A4XX_HLSQ_UPDATE_CONTROL 0x23db + +enum a4xx_hlsq_perfctr_hlsq_sel { + HLSQ_SP_VS_STAGE_CONSTANT = 0x0, + HLSQ_SP_VS_STAGE_INSTRUCTIONS = 0x1, + HLSQ_SP_FS_STAGE_CONSTANT = 0x2, + HLSQ_SP_FS_STAGE_INSTRUCTIONS = 0x3, + HLSQ_FS_STAGE_16_WAVES = 0x8, + HLSQ_FS_STAGE_32_WAVES = 0x9, + HLSQ_FS_STAGE_64_WAVES = 0xa, + HLSQ_VS_STAGE_16_WAVES = 0xb, + HLSQ_VS_STAGE_32_WAVES = 0xc, +}; + +/* CCU registers */ +#define A4XX_RB_PERFCTR_CCU_SEL_0 0xccf +#define A4XX_RB_PERFCTR_CCU_SEL_1 0xcd0 +#define A4XX_RB_PERFCTR_CCU_SEL_2 0xcd1 +#define A4XX_RB_PERFCTR_CCU_SEL_3 0xcd2 + +enum a4xx_cu_perfctr_ccu_sel { + CCU_VBIF_STALL = 0x1, + CCU_VBIF_LATENCY_CYCLES = 0x4, + CCU_VBIF_LATENCY_SAMPLES = 0x5, + CCU_Z_READ = 0x13, + CCU_Z_WRITE = 0x14, + CCU_C_READ = 0x15, + CCU_C_WRITE = 0x16, +}; + +/* UCHE registers */ +#define A4XX_UCHE_PERFCTR_UCHE_SEL_0 0xe8e +#define A4XX_UCHE_PERFCTR_UCHE_SEL_1 0xe8f +#define A4XX_UCHE_PERFCTR_UCHE_SEL_2 0xe90 +#define A4XX_UCHE_PERFCTR_UCHE_SEL_3 0xe91 +#define A4XX_UCHE_PERFCTR_UCHE_SEL_4 0xe92 +#define A4XX_UCHE_PERFCTR_UCHE_SEL_5 0xe93 +#define A4XX_UCHE_PERFCTR_UCHE_SEL_6 0xe94 +#define A4XX_UCHE_PERFCTR_UCHE_SEL_7 0xe95 + +/* TPL1 registers */ +enum a4xx_uche_perfctr_uche_sel { + UCHE_READ_REQUESTS_MARB = 0x8, + UCHE_READ_REQUESTS_SP = 0x9, + UCHE_WRITE_REQUESTS_MARB = 0xa, + UCHE_WRITE_REQUESTS_SP = 0xb, + UCHE_WRITE_REQUESTS_VPC = 0x14, +}; + +/* TPL1 registers */ +#define A4XX_TPL1_TP_MODE_CONTROL 0xf03 +#define A4XX_TPL1_PERFCTR_TP_SEL_0 0xf04 +#define A4XX_TPL1_PERFCTR_TP_SEL_1 0xf05 +#define A4XX_TPL1_PERFCTR_TP_SEL_2 0xf06 +#define A4XX_TPL1_PERFCTR_TP_SEL_3 0xf07 +#define A4XX_TPL1_PERFCTR_TP_SEL_4 0xf08 +#define A4XX_TPL1_PERFCTR_TP_SEL_5 0xf09 +#define A4XX_TPL1_PERFCTR_TP_SEL_6 0xf0a +#define A4XX_TPL1_PERFCTR_TP_SEL_7 0xf0b +#define A4XX_TPL1_TP_TEX_TSIZE_1 0x23a0 +#define A4XX_TPL1_TP_CS_BORDER_COLOR_BASE_ADDR 0x23A4 +#define A4XX_TPL1_TP_CS_SAMPLER_BASE_ADDR 0x23A5 +#define A4XX_TPL1_TP_CS_TEXMEMOBJ_BASE_ADDR 0x23A6 + +enum a4xx_tpl1_perfctr_tp_sel { + TP_OUTPUT_TEXELS_POINT = 0x2, + TP_OUTPUT_TEXELS_BILINEAR = 0x3, + TP_OUTPUT_TEXELS_MIP = 0x4, + TP_OUTPUT_TEXELS_ANISO = 0x5, + TP_OUTPUT_TEXELS_OPS16 = 0x6, + TP_OUTPUT_TEXELS_OPS32 = 0x7, + TP_ZERO_LOD = 0xe, + TP_LATENCY = 0x12, + TP_LATENCY_TRANS = 0x13, +}; + +/* Enum for debug bus */ +enum a4xx_rbbm_debbus_id { + A4XX_RBBM_DEBBUS_CP_ID = 0x1, + A4XX_RBBM_DEBBUS_RBBM_ID = 0x2, + A4XX_RBBM_DEBBUS_VBIF_ID = 0x3, + A4XX_RBBM_DEBBUS_HLSQ_ID = 0x4, + A4XX_RBBM_DEBBUS_UCHE_ID = 0x5, + A4XX_RBBM_DEBBUS_DPM_ID = 0x6, + A4XX_RBBM_DEBBUS_TESS_ID = 0x7, + A4XX_RBBM_DEBBUS_PC_ID = 0x8, + A4XX_RBBM_DEBBUS_VFD_ID = 0x9, + A4XX_RBBM_DEBBUS_VPC_ID = 0xa, + A4XX_RBBM_DEBBUS_TSE_ID = 0xb, + A4XX_RBBM_DEBBUS_RAS_ID = 0xc, + A4XX_RBBM_DEBBUS_VSC_ID = 0xd, + A4XX_RBBM_DEBBUS_COM_ID = 0xe, + A4XX_RBBM_DEBBUS_DCOM_ID = 0xf, + A4XX_RBBM_DEBBUS_SP_0_ID = 0x10, + A4XX_RBBM_DEBBUS_SP_1_ID = 0x11, + A4XX_RBBM_DEBBUS_SP_2_ID = 0x12, + A4XX_RBBM_DEBBUS_SP_3_ID = 0x13, + A4XX_RBBM_DEBBUS_TPL1_0_ID = 0x18, + A4XX_RBBM_DEBBUS_TPL1_1_ID = 0x19, + A4XX_RBBM_DEBBUS_TPL1_2_ID = 0x1a, + A4XX_RBBM_DEBBUS_TPL1_3_ID = 0x1b, + A4XX_RBBM_DEBBUS_RB_0_ID = 0x20, + A4XX_RBBM_DEBBUS_RB_1_ID = 0x21, + A4XX_RBBM_DEBBUS_RB_2_ID = 0x22, + A4XX_RBBM_DEBBUS_RB_3_ID = 0x23, + A4XX_RBBM_DEBBUS_MARB_0_ID = 0x28, + A4XX_RBBM_DEBBUS_MARB_1_ID = 0x29, + A4XX_RBBM_DEBBUS_MARB_2_ID = 0x2a, + A4XX_RBBM_DEBBUS_MARB_3_ID = 0x2b, + A4XX_RBBM_DEBBUS_CCU_0_ID = 0x30, + A4XX_RBBM_DEBBUS_CCU_1_ID = 0x31, + A4XX_RBBM_DEBBUS_CCU_2_ID = 0x32, + A4XX_RBBM_DEBBUS_CCU_3_ID = 0x33 +}; + +#define A4XX_NUM_AXI_ARB_BLOCKS 2 +#define A4XX_NUM_XIN_BLOCKS 5 + +#endif /* _A4XX_REG_H */ diff --git a/drivers/gpu/msm/a5xx_reg.h b/drivers/gpu/msm/a5xx_reg.h new file mode 100644 index 000000000000..cdaa7f513b9d --- /dev/null +++ b/drivers/gpu/msm/a5xx_reg.h @@ -0,0 +1,897 @@ +/* Copyright (c) 2014-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _A5XX_REG_H +#define _A5XX_REG_H + +/* A5XX interrupt bits */ +#define A5XX_INT_RBBM_GPU_IDLE 0 +#define A5XX_INT_RBBM_AHB_ERROR 1 +#define A5XX_INT_RBBM_TRANSFER_TIMEOUT 2 +#define A5XX_INT_RBBM_ME_MS_TIMEOUT 3 +#define A5XX_INT_RBBM_PFP_MS_TIMEOUT 4 +#define A5XX_INT_RBBM_ETS_MS_TIMEOUT 5 +#define A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW 6 +#define A5XX_INT_RBBM_GPC_ERROR 7 +#define A5XX_INT_CP_SW 8 +#define A5XX_INT_CP_HW_ERROR 9 +#define A5XX_INT_CP_CCU_FLUSH_DEPTH_TS 10 +#define A5XX_INT_CP_CCU_FLUSH_COLOR_TS 11 +#define A5XX_INT_CP_CCU_RESOLVE_TS 12 +#define A5XX_INT_CP_IB2 13 +#define A5XX_INT_CP_IB1 14 +#define A5XX_INT_CP_RB 15 +#define A5XX_INT_CP_UNUSED_1 16 +#define A5XX_INT_CP_RB_DONE_TS 17 +#define A5XX_INT_CP_WT_DONE_TS 18 +#define A5XX_INT_UNKNOWN_1 19 +#define A5XX_INT_CP_CACHE_FLUSH_TS 20 +#define A5XX_INT_UNUSED_2 21 +#define A5XX_INT_RBBM_ATB_BUS_OVERFLOW 22 +#define A5XX_INT_MISC_HANG_DETECT 23 +#define A5XX_INT_UCHE_OOB_ACCESS 24 +#define A5XX_INT_UCHE_TRAP_INTR 25 +#define A5XX_INT_DEBBUS_INTR_0 26 +#define A5XX_INT_DEBBUS_INTR_1 27 +#define A5XX_INT_GPMU_VOLTAGE_DROOP 28 +#define A5XX_INT_GPMU_FIRMWARE 29 +#define A5XX_INT_ISDB_CPU_IRQ 30 +#define A5XX_INT_ISDB_UNDER_DEBUG 31 + +/* CP Interrupt bits */ +#define A5XX_CP_OPCODE_ERROR 0 +#define A5XX_CP_RESERVED_BIT_ERROR 1 +#define A5XX_CP_HW_FAULT_ERROR 2 +#define A5XX_CP_DMA_ERROR 3 +#define A5XX_CP_REGISTER_PROTECTION_ERROR 4 +#define A5XX_CP_AHB_ERROR 5 + +/* CP registers */ +#define A5XX_CP_RB_BASE 0x800 +#define A5XX_CP_RB_BASE_HI 0x801 +#define A5XX_CP_RB_CNTL 0x802 +#define A5XX_CP_RB_RPTR 0x806 +#define A5XX_CP_RB_WPTR 0x807 +#define A5XX_CP_PFP_STAT_ADDR 0x808 +#define A5XX_CP_PFP_STAT_DATA 0x809 +#define A5XX_CP_DRAW_STATE_ADDR 0x80B +#define A5XX_CP_DRAW_STATE_DATA 0x80C +#define A5XX_CP_CRASH_SCRIPT_BASE_LO 0x817 +#define A5XX_CP_CRASH_SCRIPT_BASE_HI 0x818 +#define A5XX_CP_CRASH_DUMP_CNTL 0x819 +#define A5XX_CP_ME_STAT_ADDR 0x81A +#define A5XX_CP_ROQ_THRESHOLDS_1 0x81F +#define A5XX_CP_ROQ_THRESHOLDS_2 0x820 +#define A5XX_CP_ROQ_DBG_ADDR 0x821 +#define A5XX_CP_ROQ_DBG_DATA 0x822 +#define A5XX_CP_MEQ_DBG_ADDR 0x823 +#define A5XX_CP_MEQ_DBG_DATA 0x824 +#define A5XX_CP_MEQ_THRESHOLDS 0x825 +#define A5XX_CP_MERCIU_SIZE 0x826 +#define A5XX_CP_MERCIU_DBG_ADDR 0x827 +#define A5XX_CP_MERCIU_DBG_DATA_1 0x828 +#define A5XX_CP_MERCIU_DBG_DATA_2 0x829 +#define A5XX_CP_PFP_UCODE_DBG_ADDR 0x82A +#define A5XX_CP_PFP_UCODE_DBG_DATA 0x82B +#define A5XX_CP_ME_UCODE_DBG_ADDR 0x82F +#define A5XX_CP_ME_UCODE_DBG_DATA 0x830 +#define A5XX_CP_CNTL 0x831 +#define A5XX_CP_ME_CNTL 0x832 +#define A5XX_CP_CHICKEN_DBG 0x833 +#define A5XX_CP_PFP_INSTR_BASE_LO 0x835 +#define A5XX_CP_PFP_INSTR_BASE_HI 0x836 +#define A5XX_CP_PM4_INSTR_BASE_LO 0x838 +#define A5XX_CP_PM4_INSTR_BASE_HI 0x839 +#define A5XX_CP_CONTEXT_SWITCH_CNTL 0x83B +#define A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO 0x83C +#define A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI 0x83D +#define A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO 0x83E +#define A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI 0x83F +#define A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO 0x840 +#define A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI 0x841 +#define A5XX_CP_ADDR_MODE_CNTL 0x860 +#define A5XX_CP_ME_STAT_DATA 0xB14 +#define A5XX_CP_WFI_PEND_CTR 0xB15 +#define A5XX_CP_INTERRUPT_STATUS 0xB18 +#define A5XX_CP_HW_FAULT 0xB1A +#define A5XX_CP_PROTECT_STATUS 0xB1C +#define A5XX_CP_IB1_BASE 0xB1F +#define A5XX_CP_IB1_BASE_HI 0xB20 +#define A5XX_CP_IB1_BUFSZ 0xB21 +#define A5XX_CP_IB2_BASE 0xB22 +#define A5XX_CP_IB2_BASE_HI 0xB23 +#define A5XX_CP_IB2_BUFSZ 0xB24 +#define A5XX_CP_PROTECT_REG_0 0x880 +#define A5XX_CP_PROTECT_CNTL 0x8A0 +#define A5XX_CP_AHB_FAULT 0xB1B +#define A5XX_CP_PERFCTR_CP_SEL_0 0xBB0 +#define A5XX_CP_PERFCTR_CP_SEL_1 0xBB1 +#define A5XX_CP_PERFCTR_CP_SEL_2 0xBB2 +#define A5XX_CP_PERFCTR_CP_SEL_3 0xBB3 +#define A5XX_CP_PERFCTR_CP_SEL_4 0xBB4 +#define A5XX_CP_PERFCTR_CP_SEL_5 0xBB5 +#define A5XX_CP_PERFCTR_CP_SEL_6 0xBB6 +#define A5XX_CP_PERFCTR_CP_SEL_7 0xBB7 + +#define A5XX_VSC_ADDR_MODE_CNTL 0xBC1 + +/* CP Power Counter Registers Select */ +#define A5XX_CP_POWERCTR_CP_SEL_0 0xBBA +#define A5XX_CP_POWERCTR_CP_SEL_1 0xBBB +#define A5XX_CP_POWERCTR_CP_SEL_2 0xBBC +#define A5XX_CP_POWERCTR_CP_SEL_3 0xBBD + +/* CP_EVENT_WRITE events */ +#define A5XX_CACHE_FLUSH_TS 0x4 + +/* RBBM registers */ +#define A5XX_RBBM_CFG_DBGBUS_SEL_A 0x4 +#define A5XX_RBBM_CFG_DBGBUS_SEL_B 0x5 +#define A5XX_RBBM_CFG_DBGBUS_SEL_C 0x6 +#define A5XX_RBBM_CFG_DBGBUS_SEL_D 0x7 +#define A5XX_RBBM_CFG_DBGBUS_SEL_PING_INDEX_SHIFT 0x0 +#define A5XX_RBBM_CFG_DBGBUS_SEL_PING_BLK_SEL_SHIFT 0x8 +#define A5XX_RBBM_CFG_DBGBUS_SEL_PONG_INDEX_SHIFT 0x10 +#define A5XX_RBBM_CFG_DBGBUS_SEL_PONG_BLK_SEL_SHIFT 0x18 + +#define A5XX_RBBM_CFG_DBGBUS_CNTLT 0x8 +#define A5XX_RBBM_CFG_DBGBUS_CNTLM 0x9 +#define A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT 0x18 +#define A5XX_RBBM_CFG_DBGBUS_OPL 0xA +#define A5XX_RBBM_CFG_DBGBUS_OPE 0xB +#define A5XX_RBBM_CFG_DBGBUS_IVTL_0 0xC +#define A5XX_RBBM_CFG_DBGBUS_IVTL_1 0xD +#define A5XX_RBBM_CFG_DBGBUS_IVTL_2 0xE +#define A5XX_RBBM_CFG_DBGBUS_IVTL_3 0xF +#define A5XX_RBBM_CFG_DBGBUS_MASKL_0 0x10 +#define A5XX_RBBM_CFG_DBGBUS_MASKL_1 0x11 +#define A5XX_RBBM_CFG_DBGBUS_MASKL_2 0x12 +#define A5XX_RBBM_CFG_DBGBUS_MASKL_3 0x13 +#define A5XX_RBBM_CFG_DBGBUS_BYTEL_0 0x14 +#define A5XX_RBBM_CFG_DBGBUS_BYTEL_1 0x15 +#define A5XX_RBBM_CFG_DBGBUS_IVTE_0 0x16 +#define A5XX_RBBM_CFG_DBGBUS_IVTE_1 0x17 +#define A5XX_RBBM_CFG_DBGBUS_IVTE_2 0x18 +#define A5XX_RBBM_CFG_DBGBUS_IVTE_3 0x19 +#define A5XX_RBBM_CFG_DBGBUS_MASKE_0 0x1A +#define A5XX_RBBM_CFG_DBGBUS_MASKE_1 0x1B +#define A5XX_RBBM_CFG_DBGBUS_MASKE_2 0x1C +#define A5XX_RBBM_CFG_DBGBUS_MASKE_3 0x1D +#define A5XX_RBBM_CFG_DBGBUS_NIBBLEE 0x1E +#define A5XX_RBBM_CFG_DBGBUS_PTRC0 0x1F +#define A5XX_RBBM_CFG_DBGBUS_PTRC1 0x20 +#define A5XX_RBBM_CFG_DBGBUS_LOADREG 0x21 +#define A5XX_RBBM_CFG_DBGBUS_IDX 0x22 +#define A5XX_RBBM_CFG_DBGBUS_CLRC 0x23 +#define A5XX_RBBM_CFG_DBGBUS_LOADIVT 0x24 +#define A5XX_RBBM_INTERFACE_HANG_INT_CNTL 0x2F +#define A5XX_RBBM_INT_CLEAR_CMD 0x37 +#define A5XX_RBBM_INT_0_MASK 0x38 +#define A5XX_RBBM_AHB_DBG_CNTL 0x3F +#define A5XX_RBBM_EXT_VBIF_DBG_CNTL 0x41 +#define A5XX_RBBM_SW_RESET_CMD 0x43 +#define A5XX_RBBM_BLOCK_SW_RESET_CMD 0x45 +#define A5XX_RBBM_BLOCK_SW_RESET_CMD2 0x46 +#define A5XX_RBBM_DBG_LO_HI_GPIO 0x48 +#define A5XX_RBBM_EXT_TRACE_BUS_CNTL 0x49 +#define A5XX_RBBM_CLOCK_CNTL_TP0 0x4A +#define A5XX_RBBM_CLOCK_CNTL_TP1 0x4B +#define A5XX_RBBM_CLOCK_CNTL_TP2 0x4C +#define A5XX_RBBM_CLOCK_CNTL_TP3 0x4D +#define A5XX_RBBM_CLOCK_CNTL2_TP0 0x4E +#define A5XX_RBBM_CLOCK_CNTL2_TP1 0x4F +#define A5XX_RBBM_CLOCK_CNTL2_TP2 0x50 +#define A5XX_RBBM_CLOCK_CNTL2_TP3 0x51 +#define A5XX_RBBM_CLOCK_CNTL3_TP0 0x52 +#define A5XX_RBBM_CLOCK_CNTL3_TP1 0x53 +#define A5XX_RBBM_CLOCK_CNTL3_TP2 0x54 +#define A5XX_RBBM_CLOCK_CNTL3_TP3 0x55 +#define A5XX_RBBM_READ_AHB_THROUGH_DBG 0x59 +#define A5XX_RBBM_CLOCK_CNTL_UCHE 0x5A +#define A5XX_RBBM_CLOCK_CNTL2_UCHE 0x5B +#define A5XX_RBBM_CLOCK_CNTL3_UCHE 0x5C +#define A5XX_RBBM_CLOCK_CNTL4_UCHE 0x5D +#define A5XX_RBBM_CLOCK_HYST_UCHE 0x5E +#define A5XX_RBBM_CLOCK_DELAY_UCHE 0x5F +#define A5XX_RBBM_CLOCK_MODE_GPC 0x60 +#define A5XX_RBBM_CLOCK_DELAY_GPC 0x61 +#define A5XX_RBBM_CLOCK_HYST_GPC 0x62 +#define A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM 0x63 +#define A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM 0x64 +#define A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM 0x65 +#define A5XX_RBBM_CLOCK_DELAY_HLSQ 0x66 +#define A5XX_RBBM_CLOCK_CNTL 0x67 +#define A5XX_RBBM_CLOCK_CNTL_SP0 0x68 +#define A5XX_RBBM_CLOCK_CNTL_SP1 0x69 +#define A5XX_RBBM_CLOCK_CNTL_SP2 0x6A +#define A5XX_RBBM_CLOCK_CNTL_SP3 0x6B +#define A5XX_RBBM_CLOCK_CNTL2_SP0 0x6C +#define A5XX_RBBM_CLOCK_CNTL2_SP1 0x6D +#define A5XX_RBBM_CLOCK_CNTL2_SP2 0x6E +#define A5XX_RBBM_CLOCK_CNTL2_SP3 0x6F +#define A5XX_RBBM_CLOCK_HYST_SP0 0x70 +#define A5XX_RBBM_CLOCK_HYST_SP1 0x71 +#define A5XX_RBBM_CLOCK_HYST_SP2 0x72 +#define A5XX_RBBM_CLOCK_HYST_SP3 0x73 +#define A5XX_RBBM_CLOCK_DELAY_SP0 0x74 +#define A5XX_RBBM_CLOCK_DELAY_SP1 0x75 +#define A5XX_RBBM_CLOCK_DELAY_SP2 0x76 +#define A5XX_RBBM_CLOCK_DELAY_SP3 0x77 +#define A5XX_RBBM_CLOCK_CNTL_RB0 0x78 +#define A5XX_RBBM_CLOCK_CNTL_RB1 0x79 +#define A5XX_RBBM_CLOCK_CNTL_RB2 0x7a +#define A5XX_RBBM_CLOCK_CNTL_RB3 0x7B +#define A5XX_RBBM_CLOCK_CNTL2_RB0 0x7C +#define A5XX_RBBM_CLOCK_CNTL2_RB1 0x7D +#define A5XX_RBBM_CLOCK_CNTL2_RB2 0x7E +#define A5XX_RBBM_CLOCK_CNTL2_RB3 0x7F +#define A5XX_RBBM_CLOCK_HYST_RAC 0x80 +#define A5XX_RBBM_CLOCK_DELAY_RAC 0x81 +#define A5XX_RBBM_CLOCK_CNTL_CCU0 0x82 +#define A5XX_RBBM_CLOCK_CNTL_CCU1 0x83 +#define A5XX_RBBM_CLOCK_CNTL_CCU2 0x84 +#define A5XX_RBBM_CLOCK_CNTL_CCU3 0x85 +#define A5XX_RBBM_CLOCK_HYST_RB_CCU0 0x86 +#define A5XX_RBBM_CLOCK_HYST_RB_CCU1 0x87 +#define A5XX_RBBM_CLOCK_HYST_RB_CCU2 0x88 +#define A5XX_RBBM_CLOCK_HYST_RB_CCU3 0x89 +#define A5XX_RBBM_CLOCK_CNTL_RAC 0x8A +#define A5XX_RBBM_CLOCK_CNTL2_RAC 0x8B +#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0 0x8C +#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1 0x8D +#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2 0x8E +#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3 0x8F +#define A5XX_RBBM_CLOCK_HYST_VFD 0x90 +#define A5XX_RBBM_CLOCK_MODE_VFD 0x91 +#define A5XX_RBBM_CLOCK_DELAY_VFD 0x92 +#define A5XX_RBBM_AHB_CNTL0 0x93 +#define A5XX_RBBM_AHB_CNTL1 0x94 +#define A5XX_RBBM_AHB_CNTL2 0x95 +#define A5XX_RBBM_AHB_CMD 0x96 +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11 0x9C +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12 0x9D +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13 0x9E +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14 0x9F +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15 0xA0 +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16 0xA1 +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17 0xA2 +#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18 0xA3 +#define A5XX_RBBM_CLOCK_DELAY_TP0 0xA4 +#define A5XX_RBBM_CLOCK_DELAY_TP1 0xA5 +#define A5XX_RBBM_CLOCK_DELAY_TP2 0xA6 +#define A5XX_RBBM_CLOCK_DELAY_TP3 0xA7 +#define A5XX_RBBM_CLOCK_DELAY2_TP0 0xA8 +#define A5XX_RBBM_CLOCK_DELAY2_TP1 0xA9 +#define A5XX_RBBM_CLOCK_DELAY2_TP2 0xAA +#define A5XX_RBBM_CLOCK_DELAY2_TP3 0xAB +#define A5XX_RBBM_CLOCK_DELAY3_TP0 0xAC +#define A5XX_RBBM_CLOCK_DELAY3_TP1 0xAD +#define A5XX_RBBM_CLOCK_DELAY3_TP2 0xAE +#define A5XX_RBBM_CLOCK_DELAY3_TP3 0xAF +#define A5XX_RBBM_CLOCK_HYST_TP0 0xB0 +#define A5XX_RBBM_CLOCK_HYST_TP1 0xB1 +#define A5XX_RBBM_CLOCK_HYST_TP2 0xB2 +#define A5XX_RBBM_CLOCK_HYST_TP3 0xB3 +#define A5XX_RBBM_CLOCK_HYST2_TP0 0xB4 +#define A5XX_RBBM_CLOCK_HYST2_TP1 0xB5 +#define A5XX_RBBM_CLOCK_HYST2_TP2 0xB6 +#define A5XX_RBBM_CLOCK_HYST2_TP3 0xB7 +#define A5XX_RBBM_CLOCK_HYST3_TP0 0xB8 +#define A5XX_RBBM_CLOCK_HYST3_TP1 0xB9 +#define A5XX_RBBM_CLOCK_HYST3_TP2 0xBA +#define A5XX_RBBM_CLOCK_HYST3_TP3 0xBB +#define A5XX_RBBM_PERFCTR_CP_0_LO 0x3A0 +#define A5XX_RBBM_PERFCTR_CP_0_HI 0x3A1 +#define A5XX_RBBM_PERFCTR_CP_1_LO 0x3A2 +#define A5XX_RBBM_PERFCTR_CP_1_HI 0x3A3 +#define A5XX_RBBM_PERFCTR_CP_2_LO 0x3A4 +#define A5XX_RBBM_PERFCTR_CP_2_HI 0x3A5 +#define A5XX_RBBM_PERFCTR_CP_3_LO 0x3A6 +#define A5XX_RBBM_PERFCTR_CP_3_HI 0x3A7 +#define A5XX_RBBM_PERFCTR_CP_4_LO 0x3A8 +#define A5XX_RBBM_PERFCTR_CP_4_HI 0x3A9 +#define A5XX_RBBM_PERFCTR_CP_5_LO 0x3AA +#define A5XX_RBBM_PERFCTR_CP_5_HI 0x3AB +#define A5XX_RBBM_PERFCTR_CP_6_LO 0x3AC +#define A5XX_RBBM_PERFCTR_CP_6_HI 0x3AD +#define A5XX_RBBM_PERFCTR_CP_7_LO 0x3AE +#define A5XX_RBBM_PERFCTR_CP_7_HI 0x3AF +#define A5XX_RBBM_PERFCTR_RBBM_0_LO 0x3B0 +#define A5XX_RBBM_PERFCTR_RBBM_0_HI 0x3B1 +#define A5XX_RBBM_PERFCTR_RBBM_1_LO 0x3B2 +#define A5XX_RBBM_PERFCTR_RBBM_1_HI 0x3B3 +#define A5XX_RBBM_PERFCTR_RBBM_2_LO 0x3B4 +#define A5XX_RBBM_PERFCTR_RBBM_2_HI 0x3B5 +#define A5XX_RBBM_PERFCTR_RBBM_3_LO 0x3B6 +#define A5XX_RBBM_PERFCTR_RBBM_3_HI 0x3B7 +#define A5XX_RBBM_PERFCTR_PC_0_LO 0x3B8 +#define A5XX_RBBM_PERFCTR_PC_0_HI 0x3B9 +#define A5XX_RBBM_PERFCTR_PC_1_LO 0x3BA +#define A5XX_RBBM_PERFCTR_PC_1_HI 0x3BB +#define A5XX_RBBM_PERFCTR_PC_2_LO 0x3BC +#define A5XX_RBBM_PERFCTR_PC_2_HI 0x3BD +#define A5XX_RBBM_PERFCTR_PC_3_LO 0x3BE +#define A5XX_RBBM_PERFCTR_PC_3_HI 0x3BF +#define A5XX_RBBM_PERFCTR_PC_4_LO 0x3C0 +#define A5XX_RBBM_PERFCTR_PC_4_HI 0x3C1 +#define A5XX_RBBM_PERFCTR_PC_5_LO 0x3C2 +#define A5XX_RBBM_PERFCTR_PC_5_HI 0x3C3 +#define A5XX_RBBM_PERFCTR_PC_6_LO 0x3C4 +#define A5XX_RBBM_PERFCTR_PC_6_HI 0x3C5 +#define A5XX_RBBM_PERFCTR_PC_7_LO 0x3C6 +#define A5XX_RBBM_PERFCTR_PC_7_HI 0x3C7 +#define A5XX_RBBM_PERFCTR_VFD_0_LO 0x3C8 +#define A5XX_RBBM_PERFCTR_VFD_0_HI 0x3C9 +#define A5XX_RBBM_PERFCTR_VFD_1_LO 0x3CA +#define A5XX_RBBM_PERFCTR_VFD_1_HI 0x3CB +#define A5XX_RBBM_PERFCTR_VFD_2_LO 0x3CC +#define A5XX_RBBM_PERFCTR_VFD_2_HI 0x3CD +#define A5XX_RBBM_PERFCTR_VFD_3_LO 0x3CE +#define A5XX_RBBM_PERFCTR_VFD_3_HI 0x3CF +#define A5XX_RBBM_PERFCTR_VFD_4_LO 0x3D0 +#define A5XX_RBBM_PERFCTR_VFD_4_HI 0x3D1 +#define A5XX_RBBM_PERFCTR_VFD_5_LO 0x3D2 +#define A5XX_RBBM_PERFCTR_VFD_5_HI 0x3D3 +#define A5XX_RBBM_PERFCTR_VFD_6_LO 0x3D4 +#define A5XX_RBBM_PERFCTR_VFD_6_HI 0x3D5 +#define A5XX_RBBM_PERFCTR_VFD_7_LO 0x3D6 +#define A5XX_RBBM_PERFCTR_VFD_7_HI 0x3D7 +#define A5XX_RBBM_PERFCTR_HLSQ_0_LO 0x3D8 +#define A5XX_RBBM_PERFCTR_HLSQ_0_HI 0x3D9 +#define A5XX_RBBM_PERFCTR_HLSQ_1_LO 0x3DA +#define A5XX_RBBM_PERFCTR_HLSQ_1_HI 0x3DB +#define A5XX_RBBM_PERFCTR_HLSQ_2_LO 0x3DC +#define A5XX_RBBM_PERFCTR_HLSQ_2_HI 0x3DD +#define A5XX_RBBM_PERFCTR_HLSQ_3_LO 0x3DE +#define A5XX_RBBM_PERFCTR_HLSQ_3_HI 0x3DF +#define A5XX_RBBM_PERFCTR_HLSQ_4_LO 0x3E0 +#define A5XX_RBBM_PERFCTR_HLSQ_4_HI 0x3E1 +#define A5XX_RBBM_PERFCTR_HLSQ_5_LO 0x3E2 +#define A5XX_RBBM_PERFCTR_HLSQ_5_HI 0x3E3 +#define A5XX_RBBM_PERFCTR_HLSQ_6_LO 0x3E4 +#define A5XX_RBBM_PERFCTR_HLSQ_6_HI 0x3E5 +#define A5XX_RBBM_PERFCTR_HLSQ_7_LO 0x3E6 +#define A5XX_RBBM_PERFCTR_HLSQ_7_HI 0x3E7 +#define A5XX_RBBM_PERFCTR_VPC_0_LO 0x3E8 +#define A5XX_RBBM_PERFCTR_VPC_0_HI 0x3E9 +#define A5XX_RBBM_PERFCTR_VPC_1_LO 0x3EA +#define A5XX_RBBM_PERFCTR_VPC_1_HI 0x3EB +#define A5XX_RBBM_PERFCTR_VPC_2_LO 0x3EC +#define A5XX_RBBM_PERFCTR_VPC_2_HI 0x3ED +#define A5XX_RBBM_PERFCTR_VPC_3_LO 0x3EE +#define A5XX_RBBM_PERFCTR_VPC_3_HI 0x3EF +#define A5XX_RBBM_PERFCTR_CCU_0_LO 0x3F0 +#define A5XX_RBBM_PERFCTR_CCU_0_HI 0x3F1 +#define A5XX_RBBM_PERFCTR_CCU_1_LO 0x3F2 +#define A5XX_RBBM_PERFCTR_CCU_1_HI 0x3F3 +#define A5XX_RBBM_PERFCTR_CCU_2_LO 0x3F4 +#define A5XX_RBBM_PERFCTR_CCU_2_HI 0x3F5 +#define A5XX_RBBM_PERFCTR_CCU_3_LO 0x3F6 +#define A5XX_RBBM_PERFCTR_CCU_3_HI 0x3F7 +#define A5XX_RBBM_PERFCTR_TSE_0_LO 0x3F8 +#define A5XX_RBBM_PERFCTR_TSE_0_HI 0x3F9 +#define A5XX_RBBM_PERFCTR_TSE_1_LO 0x3FA +#define A5XX_RBBM_PERFCTR_TSE_1_HI 0x3FB +#define A5XX_RBBM_PERFCTR_TSE_2_LO 0x3FC +#define A5XX_RBBM_PERFCTR_TSE_2_HI 0x3FD +#define A5XX_RBBM_PERFCTR_TSE_3_LO 0x3FE +#define A5XX_RBBM_PERFCTR_TSE_3_HI 0x3FF +#define A5XX_RBBM_PERFCTR_RAS_0_LO 0x400 +#define A5XX_RBBM_PERFCTR_RAS_0_HI 0x401 +#define A5XX_RBBM_PERFCTR_RAS_1_LO 0x402 +#define A5XX_RBBM_PERFCTR_RAS_1_HI 0x403 +#define A5XX_RBBM_PERFCTR_RAS_2_LO 0x404 +#define A5XX_RBBM_PERFCTR_RAS_2_HI 0x405 +#define A5XX_RBBM_PERFCTR_RAS_3_LO 0x406 +#define A5XX_RBBM_PERFCTR_RAS_3_HI 0x407 +#define A5XX_RBBM_PERFCTR_UCHE_0_LO 0x408 +#define A5XX_RBBM_PERFCTR_UCHE_0_HI 0x409 +#define A5XX_RBBM_PERFCTR_UCHE_1_LO 0x40A +#define A5XX_RBBM_PERFCTR_UCHE_1_HI 0x40B +#define A5XX_RBBM_PERFCTR_UCHE_2_LO 0x40C +#define A5XX_RBBM_PERFCTR_UCHE_2_HI 0x40D +#define A5XX_RBBM_PERFCTR_UCHE_3_LO 0x40E +#define A5XX_RBBM_PERFCTR_UCHE_3_HI 0x40F +#define A5XX_RBBM_PERFCTR_UCHE_4_LO 0x410 +#define A5XX_RBBM_PERFCTR_UCHE_4_HI 0x411 +#define A5XX_RBBM_PERFCTR_UCHE_5_LO 0x412 +#define A5XX_RBBM_PERFCTR_UCHE_5_HI 0x413 +#define A5XX_RBBM_PERFCTR_UCHE_6_LO 0x414 +#define A5XX_RBBM_PERFCTR_UCHE_6_HI 0x415 +#define A5XX_RBBM_PERFCTR_UCHE_7_LO 0x416 +#define A5XX_RBBM_PERFCTR_UCHE_7_HI 0x417 +#define A5XX_RBBM_PERFCTR_TP_0_LO 0x418 +#define A5XX_RBBM_PERFCTR_TP_0_HI 0x419 +#define A5XX_RBBM_PERFCTR_TP_1_LO 0x41A +#define A5XX_RBBM_PERFCTR_TP_1_HI 0x41B +#define A5XX_RBBM_PERFCTR_TP_2_LO 0x41C +#define A5XX_RBBM_PERFCTR_TP_2_HI 0x41D +#define A5XX_RBBM_PERFCTR_TP_3_LO 0x41E +#define A5XX_RBBM_PERFCTR_TP_3_HI 0x41F +#define A5XX_RBBM_PERFCTR_TP_4_LO 0x420 +#define A5XX_RBBM_PERFCTR_TP_4_HI 0x421 +#define A5XX_RBBM_PERFCTR_TP_5_LO 0x422 +#define A5XX_RBBM_PERFCTR_TP_5_HI 0x423 +#define A5XX_RBBM_PERFCTR_TP_6_LO 0x424 +#define A5XX_RBBM_PERFCTR_TP_6_HI 0x425 +#define A5XX_RBBM_PERFCTR_TP_7_LO 0x426 +#define A5XX_RBBM_PERFCTR_TP_7_HI 0x427 +#define A5XX_RBBM_PERFCTR_SP_0_LO 0x428 +#define A5XX_RBBM_PERFCTR_SP_0_HI 0x429 +#define A5XX_RBBM_PERFCTR_SP_1_LO 0x42A +#define A5XX_RBBM_PERFCTR_SP_1_HI 0x42B +#define A5XX_RBBM_PERFCTR_SP_2_LO 0x42C +#define A5XX_RBBM_PERFCTR_SP_2_HI 0x42D +#define A5XX_RBBM_PERFCTR_SP_3_LO 0x42E +#define A5XX_RBBM_PERFCTR_SP_3_HI 0x42F +#define A5XX_RBBM_PERFCTR_SP_4_LO 0x430 +#define A5XX_RBBM_PERFCTR_SP_4_HI 0x431 +#define A5XX_RBBM_PERFCTR_SP_5_LO 0x432 +#define A5XX_RBBM_PERFCTR_SP_5_HI 0x433 +#define A5XX_RBBM_PERFCTR_SP_6_LO 0x434 +#define A5XX_RBBM_PERFCTR_SP_6_HI 0x435 +#define A5XX_RBBM_PERFCTR_SP_7_LO 0x436 +#define A5XX_RBBM_PERFCTR_SP_7_HI 0x437 +#define A5XX_RBBM_PERFCTR_SP_8_LO 0x438 +#define A5XX_RBBM_PERFCTR_SP_8_HI 0x439 +#define A5XX_RBBM_PERFCTR_SP_9_LO 0x43A +#define A5XX_RBBM_PERFCTR_SP_9_HI 0x43B +#define A5XX_RBBM_PERFCTR_SP_10_LO 0x43C +#define A5XX_RBBM_PERFCTR_SP_10_HI 0x43D +#define A5XX_RBBM_PERFCTR_SP_11_LO 0x43E +#define A5XX_RBBM_PERFCTR_SP_11_HI 0x43F +#define A5XX_RBBM_PERFCTR_RB_0_LO 0x440 +#define A5XX_RBBM_PERFCTR_RB_0_HI 0x441 +#define A5XX_RBBM_PERFCTR_RB_1_LO 0x442 +#define A5XX_RBBM_PERFCTR_RB_1_HI 0x443 +#define A5XX_RBBM_PERFCTR_RB_2_LO 0x444 +#define A5XX_RBBM_PERFCTR_RB_2_HI 0x445 +#define A5XX_RBBM_PERFCTR_RB_3_LO 0x446 +#define A5XX_RBBM_PERFCTR_RB_3_HI 0x447 +#define A5XX_RBBM_PERFCTR_RB_4_LO 0x448 +#define A5XX_RBBM_PERFCTR_RB_4_HI 0x449 +#define A5XX_RBBM_PERFCTR_RB_5_LO 0x44A +#define A5XX_RBBM_PERFCTR_RB_5_HI 0x44B +#define A5XX_RBBM_PERFCTR_RB_6_LO 0x44C +#define A5XX_RBBM_PERFCTR_RB_6_HI 0x44D +#define A5XX_RBBM_PERFCTR_RB_7_LO 0x44E +#define A5XX_RBBM_PERFCTR_RB_7_HI 0x44F +#define A5XX_RBBM_PERFCTR_VSC_0_LO 0x450 +#define A5XX_RBBM_PERFCTR_VSC_0_HI 0x451 +#define A5XX_RBBM_PERFCTR_VSC_1_LO 0x452 +#define A5XX_RBBM_PERFCTR_VSC_1_HI 0x453 +#define A5XX_RBBM_PERFCTR_LRZ_0_LO 0x454 +#define A5XX_RBBM_PERFCTR_LRZ_0_HI 0x455 +#define A5XX_RBBM_PERFCTR_LRZ_1_LO 0x456 +#define A5XX_RBBM_PERFCTR_LRZ_1_HI 0x457 +#define A5XX_RBBM_PERFCTR_LRZ_2_LO 0x458 +#define A5XX_RBBM_PERFCTR_LRZ_2_HI 0x459 +#define A5XX_RBBM_PERFCTR_LRZ_3_LO 0x45A +#define A5XX_RBBM_PERFCTR_LRZ_3_HI 0x45B +#define A5XX_RBBM_PERFCTR_CMP_0_LO 0x45C +#define A5XX_RBBM_PERFCTR_CMP_0_HI 0x45D +#define A5XX_RBBM_PERFCTR_CMP_1_LO 0x45E +#define A5XX_RBBM_PERFCTR_CMP_1_HI 0x45F +#define A5XX_RBBM_PERFCTR_CMP_2_LO 0x460 +#define A5XX_RBBM_PERFCTR_CMP_2_HI 0x461 +#define A5XX_RBBM_PERFCTR_CMP_3_LO 0x462 +#define A5XX_RBBM_PERFCTR_CMP_3_HI 0x463 +#define A5XX_RBBM_PERFCTR_RBBM_SEL_0 0x46B +#define A5XX_RBBM_PERFCTR_RBBM_SEL_1 0x46C +#define A5XX_RBBM_PERFCTR_RBBM_SEL_2 0x46D +#define A5XX_RBBM_PERFCTR_RBBM_SEL_3 0x46E +#define A5XX_RBBM_ALWAYSON_COUNTER_LO 0x4D2 +#define A5XX_RBBM_ALWAYSON_COUNTER_HI 0x4D3 +#define A5XX_RBBM_STATUS 0x4F5 +#define A5XX_RBBM_STATUS3 0x530 +#define A5XX_RBBM_INT_0_STATUS 0x4E1 +#define A5XX_RBBM_AHB_ME_SPLIT_STATUS 0x4F0 +#define A5XX_RBBM_AHB_PFP_SPLIT_STATUS 0x4F1 +#define A5XX_RBBM_AHB_ERROR_STATUS 0x4F4 +#define A5XX_RBBM_PERFCTR_CNTL 0x464 +#define A5XX_RBBM_PERFCTR_LOAD_CMD0 0x465 +#define A5XX_RBBM_PERFCTR_LOAD_CMD1 0x466 +#define A5XX_RBBM_PERFCTR_LOAD_CMD2 0x467 +#define A5XX_RBBM_PERFCTR_LOAD_CMD3 0x468 +#define A5XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x469 +#define A5XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x46A +#define A5XX_RBBM_PERFCTR_RBBM_SEL_0 0x46B +#define A5XX_RBBM_PERFCTR_RBBM_SEL_1 0x46C +#define A5XX_RBBM_PERFCTR_RBBM_SEL_2 0x46D +#define A5XX_RBBM_PERFCTR_RBBM_SEL_3 0x46E +#define A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED 0x46F +#define A5XX_RBBM_CFG_DBGBUS_EVENT_LOGIC 0x504 +#define A5XX_RBBM_CFG_DBGBUS_OVER 0x505 +#define A5XX_RBBM_CFG_DBGBUS_COUNT0 0x506 +#define A5XX_RBBM_CFG_DBGBUS_COUNT1 0x507 +#define A5XX_RBBM_CFG_DBGBUS_COUNT2 0x508 +#define A5XX_RBBM_CFG_DBGBUS_COUNT3 0x509 +#define A5XX_RBBM_CFG_DBGBUS_COUNT4 0x50A +#define A5XX_RBBM_CFG_DBGBUS_COUNT5 0x50B +#define A5XX_RBBM_CFG_DBGBUS_TRACE_ADDR 0x50C +#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF0 0x50D +#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1 0x50E +#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2 0x50F +#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF3 0x510 +#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF4 0x511 +#define A5XX_RBBM_CFG_DBGBUS_MISR0 0x512 +#define A5XX_RBBM_CFG_DBGBUS_MISR1 0x513 +#define A5XX_RBBM_ISDB_CNT 0x533 +#define A5XX_RBBM_SECVID_TRUST_CONFIG 0xF000 +#define A5XX_RBBM_SECVID_TRUST_CNTL 0xF400 +#define A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO 0xF800 +#define A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI 0xF801 +#define A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE 0xF802 +#define A5XX_RBBM_SECVID_TSB_CNTL 0xF803 +#define A5XX_RBBM_SECVID_TSB_COMP_STATUS_LO 0xF804 +#define A5XX_RBBM_SECVID_TSB_COMP_STATUS_HI 0xF805 +#define A5XX_RBBM_SECVID_TSB_UCHE_STATUS_LO 0xF806 +#define A5XX_RBBM_SECVID_TSB_UCHE_STATUS_HI 0xF807 +#define A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL 0xF810 + +/* VSC registers */ +#define A5XX_VSC_PERFCTR_VSC_SEL_0 0xC60 +#define A5XX_VSC_PERFCTR_VSC_SEL_1 0xC61 + +#define A5XX_GRAS_ADDR_MODE_CNTL 0xC81 + +/* TSE registers */ +#define A5XX_GRAS_PERFCTR_TSE_SEL_0 0xC90 +#define A5XX_GRAS_PERFCTR_TSE_SEL_1 0xC91 +#define A5XX_GRAS_PERFCTR_TSE_SEL_2 0xC92 +#define A5XX_GRAS_PERFCTR_TSE_SEL_3 0xC93 + +/* RAS registers */ +#define A5XX_GRAS_PERFCTR_RAS_SEL_0 0xC94 +#define A5XX_GRAS_PERFCTR_RAS_SEL_1 0xC95 +#define A5XX_GRAS_PERFCTR_RAS_SEL_2 0xC96 +#define A5XX_GRAS_PERFCTR_RAS_SEL_3 0xC97 + +/* LRZ registers */ +#define A5XX_GRAS_PERFCTR_LRZ_SEL_0 0xC98 +#define A5XX_GRAS_PERFCTR_LRZ_SEL_1 0xC99 +#define A5XX_GRAS_PERFCTR_LRZ_SEL_2 0xC9A +#define A5XX_GRAS_PERFCTR_LRZ_SEL_3 0xC9B + + +/* RB registers */ +#define A5XX_RB_ADDR_MODE_CNTL 0xCC5 +#define A5XX_RB_PERFCTR_RB_SEL_0 0xCD0 +#define A5XX_RB_PERFCTR_RB_SEL_1 0xCD1 +#define A5XX_RB_PERFCTR_RB_SEL_2 0xCD2 +#define A5XX_RB_PERFCTR_RB_SEL_3 0xCD3 +#define A5XX_RB_PERFCTR_RB_SEL_4 0xCD4 +#define A5XX_RB_PERFCTR_RB_SEL_5 0xCD5 +#define A5XX_RB_PERFCTR_RB_SEL_6 0xCD6 +#define A5XX_RB_PERFCTR_RB_SEL_7 0xCD7 + +/* CCU registers */ +#define A5XX_RB_PERFCTR_CCU_SEL_0 0xCD8 +#define A5XX_RB_PERFCTR_CCU_SEL_1 0xCD9 +#define A5XX_RB_PERFCTR_CCU_SEL_2 0xCDA +#define A5XX_RB_PERFCTR_CCU_SEL_3 0xCDB + +/* RB Power Counter RB Registers Select */ +#define A5XX_RB_POWERCTR_RB_SEL_0 0xCE0 +#define A5XX_RB_POWERCTR_RB_SEL_1 0xCE1 +#define A5XX_RB_POWERCTR_RB_SEL_2 0xCE2 +#define A5XX_RB_POWERCTR_RB_SEL_3 0xCE3 + +/* RB Power Counter CCU Registers Select */ +#define A5XX_RB_POWERCTR_CCU_SEL_0 0xCE4 +#define A5XX_RB_POWERCTR_CCU_SEL_1 0xCE5 + +/* CMP registers */ +#define A5XX_RB_PERFCTR_CMP_SEL_0 0xCEC +#define A5XX_RB_PERFCTR_CMP_SEL_1 0xCED +#define A5XX_RB_PERFCTR_CMP_SEL_2 0xCEE +#define A5XX_RB_PERFCTR_CMP_SEL_3 0xCEF + +/* PC registers */ +#define A5XX_PC_DBG_ECO_CNTL 0xD00 +#define A5XX_PC_ADDR_MODE_CNTL 0xD01 +#define A5XX_PC_PERFCTR_PC_SEL_0 0xD10 +#define A5XX_PC_PERFCTR_PC_SEL_1 0xD11 +#define A5XX_PC_PERFCTR_PC_SEL_2 0xD12 +#define A5XX_PC_PERFCTR_PC_SEL_3 0xD13 +#define A5XX_PC_PERFCTR_PC_SEL_4 0xD14 +#define A5XX_PC_PERFCTR_PC_SEL_5 0xD15 +#define A5XX_PC_PERFCTR_PC_SEL_6 0xD16 +#define A5XX_PC_PERFCTR_PC_SEL_7 0xD17 + +/* HLSQ registers */ +#define A5XX_HLSQ_TIMEOUT_THRESHOLD 0xE00 +#define A5XX_HLSQ_ADDR_MODE_CNTL 0xE05 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_0 0xE10 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_1 0xE11 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_2 0xE12 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_3 0xE13 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_4 0xE14 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_5 0xE15 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_6 0xE16 +#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_7 0xE17 +#define A5XX_HLSQ_SPTP_RDSEL 0xF08 +#define A5XX_HLSQ_DBG_READ_SEL 0xBC00 +#define A5XX_HLSQ_DBG_AHB_READ_APERTURE 0xA000 + +/* VFD registers */ +#define A5XX_VFD_ADDR_MODE_CNTL 0xE41 +#define A5XX_VFD_PERFCTR_VFD_SEL_0 0xE50 +#define A5XX_VFD_PERFCTR_VFD_SEL_1 0xE51 +#define A5XX_VFD_PERFCTR_VFD_SEL_2 0xE52 +#define A5XX_VFD_PERFCTR_VFD_SEL_3 0xE53 +#define A5XX_VFD_PERFCTR_VFD_SEL_4 0xE54 +#define A5XX_VFD_PERFCTR_VFD_SEL_5 0xE55 +#define A5XX_VFD_PERFCTR_VFD_SEL_6 0xE56 +#define A5XX_VFD_PERFCTR_VFD_SEL_7 0xE57 + +/* VPC registers */ +#define A5XX_VPC_ADDR_MODE_CNTL 0xE61 +#define A5XX_VPC_PERFCTR_VPC_SEL_0 0xE64 +#define A5XX_VPC_PERFCTR_VPC_SEL_1 0xE65 +#define A5XX_VPC_PERFCTR_VPC_SEL_2 0xE66 +#define A5XX_VPC_PERFCTR_VPC_SEL_3 0xE67 + +/* UCHE registers */ +#define A5XX_UCHE_ADDR_MODE_CNTL 0xE80 +#define A5XX_UCHE_SVM_CNTL 0xE82 +#define A5XX_UCHE_WRITE_THRU_BASE_LO 0xE87 +#define A5XX_UCHE_WRITE_THRU_BASE_HI 0xE88 +#define A5XX_UCHE_TRAP_BASE_LO 0xE89 +#define A5XX_UCHE_TRAP_BASE_HI 0xE8A +#define A5XX_UCHE_GMEM_RANGE_MIN_LO 0xE8B +#define A5XX_UCHE_GMEM_RANGE_MIN_HI 0xE8C +#define A5XX_UCHE_GMEM_RANGE_MAX_LO 0xE8D +#define A5XX_UCHE_GMEM_RANGE_MAX_HI 0xE8E +#define A5XX_UCHE_INVALIDATE0 0xE95 +#define A5XX_UCHE_CACHE_WAYS 0xE96 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_0 0xEA0 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_1 0xEA1 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_2 0xEA2 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_3 0xEA3 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_4 0xEA4 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_5 0xEA5 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_6 0xEA6 +#define A5XX_UCHE_PERFCTR_UCHE_SEL_7 0xEA7 + +/* UCHE Power Counter UCHE Registers Select */ +#define A5XX_UCHE_POWERCTR_UCHE_SEL_0 0xEA8 +#define A5XX_UCHE_POWERCTR_UCHE_SEL_1 0xEA9 +#define A5XX_UCHE_POWERCTR_UCHE_SEL_2 0xEAA +#define A5XX_UCHE_POWERCTR_UCHE_SEL_3 0xEAB + +/* SP registers */ +#define A5XX_SP_DBG_ECO_CNTL 0xEC0 +#define A5XX_SP_ADDR_MODE_CNTL 0xEC1 +#define A5XX_SP_PERFCTR_SP_SEL_0 0xED0 +#define A5XX_SP_PERFCTR_SP_SEL_1 0xED1 +#define A5XX_SP_PERFCTR_SP_SEL_2 0xED2 +#define A5XX_SP_PERFCTR_SP_SEL_3 0xED3 +#define A5XX_SP_PERFCTR_SP_SEL_4 0xED4 +#define A5XX_SP_PERFCTR_SP_SEL_5 0xED5 +#define A5XX_SP_PERFCTR_SP_SEL_6 0xED6 +#define A5XX_SP_PERFCTR_SP_SEL_7 0xED7 +#define A5XX_SP_PERFCTR_SP_SEL_8 0xED8 +#define A5XX_SP_PERFCTR_SP_SEL_9 0xED9 +#define A5XX_SP_PERFCTR_SP_SEL_10 0xEDA +#define A5XX_SP_PERFCTR_SP_SEL_11 0xEDB + +/* SP Power Counter SP Registers Select */ +#define A5XX_SP_POWERCTR_SP_SEL_0 0xEDC +#define A5XX_SP_POWERCTR_SP_SEL_1 0xEDD +#define A5XX_SP_POWERCTR_SP_SEL_2 0xEDE +#define A5XX_SP_POWERCTR_SP_SEL_3 0xEDF + +/* TP registers */ +#define A5XX_TPL1_ADDR_MODE_CNTL 0xF01 +#define A5XX_TPL1_PERFCTR_TP_SEL_0 0xF10 +#define A5XX_TPL1_PERFCTR_TP_SEL_1 0xF11 +#define A5XX_TPL1_PERFCTR_TP_SEL_2 0xF12 +#define A5XX_TPL1_PERFCTR_TP_SEL_3 0xF13 +#define A5XX_TPL1_PERFCTR_TP_SEL_4 0xF14 +#define A5XX_TPL1_PERFCTR_TP_SEL_5 0xF15 +#define A5XX_TPL1_PERFCTR_TP_SEL_6 0xF16 +#define A5XX_TPL1_PERFCTR_TP_SEL_7 0xF17 + +/* TP Power Counter TP Registers Select */ +#define A5XX_TPL1_POWERCTR_TP_SEL_0 0xF18 +#define A5XX_TPL1_POWERCTR_TP_SEL_1 0xF19 +#define A5XX_TPL1_POWERCTR_TP_SEL_2 0xF1A +#define A5XX_TPL1_POWERCTR_TP_SEL_3 0xF1B + +/* VBIF registers */ +#define A5XX_VBIF_VERSION 0x3000 +#define A5XX_VBIF_CLKON 0x3001 +#define A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK 0x1 +#define A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT 0x1 + +#define A5XX_VBIF_ABIT_SORT 0x3028 +#define A5XX_VBIF_ABIT_SORT_CONF 0x3029 +#define A5XX_VBIF_ROUND_ROBIN_QOS_ARB 0x3049 +#define A5XX_VBIF_GATE_OFF_WRREQ_EN 0x302A +#define A5XX_VBIF_IN_RD_LIM_CONF0 0x302C +#define A5XX_VBIF_IN_RD_LIM_CONF1 0x302D + +#define A5XX_VBIF_XIN_HALT_CTRL0 0x3080 +#define A5XX_VBIF_XIN_HALT_CTRL0_MASK 0xF +#define A510_VBIF_XIN_HALT_CTRL0_MASK 0x7 +#define A5XX_VBIF_XIN_HALT_CTRL1 0x3081 + +#define A5XX_VBIF_TEST_BUS_OUT_CTRL 0x3084 +#define A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK 0x1 +#define A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT 0x0 + +#define A5XX_VBIF_TEST_BUS1_CTRL0 0x3085 +#define A5XX_VBIF_TEST_BUS1_CTRL1 0x3086 +#define A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK 0xF +#define A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT 0x0 + +#define A5XX_VBIF_TEST_BUS2_CTRL0 0x3087 +#define A5XX_VBIF_TEST_BUS2_CTRL1 0x3088 +#define A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK 0xF +#define A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT 0x0 + +#define A5XX_VBIF_TEST_BUS_OUT 0x308c + +#define A5XX_VBIF_PERF_CNT_SEL0 0x30D0 +#define A5XX_VBIF_PERF_CNT_SEL1 0x30D1 +#define A5XX_VBIF_PERF_CNT_SEL2 0x30D2 +#define A5XX_VBIF_PERF_CNT_SEL3 0x30D3 +#define A5XX_VBIF_PERF_CNT_LOW0 0x30D8 +#define A5XX_VBIF_PERF_CNT_LOW1 0x30D9 +#define A5XX_VBIF_PERF_CNT_LOW2 0x30DA +#define A5XX_VBIF_PERF_CNT_LOW3 0x30DB +#define A5XX_VBIF_PERF_CNT_HIGH0 0x30E0 +#define A5XX_VBIF_PERF_CNT_HIGH1 0x30E1 +#define A5XX_VBIF_PERF_CNT_HIGH2 0x30E2 +#define A5XX_VBIF_PERF_CNT_HIGH3 0x30E3 + +#define A5XX_VBIF_PERF_PWR_CNT_EN0 0x3100 +#define A5XX_VBIF_PERF_PWR_CNT_EN1 0x3101 +#define A5XX_VBIF_PERF_PWR_CNT_EN2 0x3102 + +#define A5XX_VBIF_PERF_PWR_CNT_LOW0 0x3110 +#define A5XX_VBIF_PERF_PWR_CNT_LOW1 0x3111 +#define A5XX_VBIF_PERF_PWR_CNT_LOW2 0x3112 + +#define A5XX_VBIF_PERF_PWR_CNT_HIGH0 0x3118 +#define A5XX_VBIF_PERF_PWR_CNT_HIGH1 0x3119 +#define A5XX_VBIF_PERF_PWR_CNT_HIGH2 0x311A + +/* GPMU registers */ +#define A5XX_GPMU_INST_RAM_BASE 0x8800 +#define A5XX_GPMU_DATA_RAM_BASE 0x9800 +#define A5XX_GPMU_SP_POWER_CNTL 0xA881 +#define A5XX_GPMU_RBCCU_CLOCK_CNTL 0xA886 +#define A5XX_GPMU_RBCCU_POWER_CNTL 0xA887 +#define A5XX_GPMU_SP_PWR_CLK_STATUS 0xA88B +#define A5XX_GPMU_RBCCU_PWR_CLK_STATUS 0xA88D +#define A5XX_GPMU_PWR_COL_STAGGER_DELAY 0xA891 +#define A5XX_GPMU_PWR_COL_INTER_FRAME_CTRL 0xA892 +#define A5XX_GPMU_PWR_COL_INTER_FRAME_HYST 0xA893 +#define A5XX_GPMU_PWR_COL_BINNING_CTRL 0xA894 +#define A5XX_GPMU_CLOCK_THROTTLE_CTRL 0xA8A3 +#define A5XX_GPMU_WFI_CONFIG 0xA8C1 +#define A5XX_GPMU_RBBM_INTR_INFO 0xA8D6 +#define A5XX_GPMU_CM3_SYSRESET 0xA8D8 +#define A5XX_GPMU_GENERAL_0 0xA8E0 +#define A5XX_GPMU_GENERAL_1 0xA8E1 + +/* COUNTABLE FOR SP PERFCOUNTER */ +#define A5XX_SP_ALU_ACTIVE_CYCLES 0x1 +#define A5XX_SP0_ICL1_MISSES 0x35 +#define A5XX_SP_FS_CFLOW_INSTRUCTIONS 0x27 + +/* COUNTABLE FOR TSE PERFCOUNTER */ +#define A5XX_TSE_INPUT_PRIM_NUM 0x6 + +/* GPMU POWER COUNTERS */ +#define A5XX_SP_POWER_COUNTER_0_LO 0xA840 +#define A5XX_SP_POWER_COUNTER_0_HI 0xA841 +#define A5XX_SP_POWER_COUNTER_1_LO 0xA842 +#define A5XX_SP_POWER_COUNTER_1_HI 0xA843 +#define A5XX_SP_POWER_COUNTER_2_LO 0xA844 +#define A5XX_SP_POWER_COUNTER_2_HI 0xA845 +#define A5XX_SP_POWER_COUNTER_3_LO 0xA846 +#define A5XX_SP_POWER_COUNTER_3_HI 0xA847 + +#define A5XX_TP_POWER_COUNTER_0_LO 0xA848 +#define A5XX_TP_POWER_COUNTER_0_HI 0xA849 +#define A5XX_TP_POWER_COUNTER_1_LO 0xA84A +#define A5XX_TP_POWER_COUNTER_1_HI 0xA84B +#define A5XX_TP_POWER_COUNTER_2_LO 0xA84C +#define A5XX_TP_POWER_COUNTER_2_HI 0xA84D +#define A5XX_TP_POWER_COUNTER_3_LO 0xA84E +#define A5XX_TP_POWER_COUNTER_3_HI 0xA84F + +#define A5XX_RB_POWER_COUNTER_0_LO 0xA850 +#define A5XX_RB_POWER_COUNTER_0_HI 0xA851 +#define A5XX_RB_POWER_COUNTER_1_LO 0xA852 +#define A5XX_RB_POWER_COUNTER_1_HI 0xA853 +#define A5XX_RB_POWER_COUNTER_2_LO 0xA854 +#define A5XX_RB_POWER_COUNTER_2_HI 0xA855 +#define A5XX_RB_POWER_COUNTER_3_LO 0xA856 +#define A5XX_RB_POWER_COUNTER_3_HI 0xA857 + +#define A5XX_CCU_POWER_COUNTER_0_LO 0xA858 +#define A5XX_CCU_POWER_COUNTER_0_HI 0xA859 +#define A5XX_CCU_POWER_COUNTER_1_LO 0xA85A +#define A5XX_CCU_POWER_COUNTER_1_HI 0xA85B + +#define A5XX_UCHE_POWER_COUNTER_0_LO 0xA85C +#define A5XX_UCHE_POWER_COUNTER_0_HI 0xA85D +#define A5XX_UCHE_POWER_COUNTER_1_LO 0xA85E +#define A5XX_UCHE_POWER_COUNTER_1_HI 0xA85F +#define A5XX_UCHE_POWER_COUNTER_2_LO 0xA860 +#define A5XX_UCHE_POWER_COUNTER_2_HI 0xA861 +#define A5XX_UCHE_POWER_COUNTER_3_LO 0xA862 +#define A5XX_UCHE_POWER_COUNTER_3_HI 0xA863 + +#define A5XX_CP_POWER_COUNTER_0_LO 0xA864 +#define A5XX_CP_POWER_COUNTER_0_HI 0xA865 +#define A5XX_CP_POWER_COUNTER_1_LO 0xA866 +#define A5XX_CP_POWER_COUNTER_1_HI 0xA867 +#define A5XX_CP_POWER_COUNTER_2_LO 0xA868 +#define A5XX_CP_POWER_COUNTER_2_HI 0xA869 +#define A5XX_CP_POWER_COUNTER_3_LO 0xA86A +#define A5XX_CP_POWER_COUNTER_3_HI 0xA86B + +#define A5XX_GPMU_POWER_COUNTER_0_LO 0xA86C +#define A5XX_GPMU_POWER_COUNTER_0_HI 0xA86D +#define A5XX_GPMU_POWER_COUNTER_1_LO 0xA86E +#define A5XX_GPMU_POWER_COUNTER_1_HI 0xA86F +#define A5XX_GPMU_POWER_COUNTER_2_LO 0xA870 +#define A5XX_GPMU_POWER_COUNTER_2_HI 0xA871 +#define A5XX_GPMU_POWER_COUNTER_3_LO 0xA872 +#define A5XX_GPMU_POWER_COUNTER_3_HI 0xA873 +#define A5XX_GPMU_POWER_COUNTER_4_LO 0xA874 +#define A5XX_GPMU_POWER_COUNTER_4_HI 0xA875 +#define A5XX_GPMU_POWER_COUNTER_5_LO 0xA876 +#define A5XX_GPMU_POWER_COUNTER_5_HI 0xA877 + +#define A5XX_GPMU_POWER_COUNTER_ENABLE 0xA878 +#define A5XX_GPMU_ALWAYS_ON_COUNTER_LO 0xA879 +#define A5XX_GPMU_ALWAYS_ON_COUNTER_HI 0xA87A +#define A5XX_GPMU_ALWAYS_ON_COUNTER_RESET 0xA87B +#define A5XX_GPMU_POWER_COUNTER_SELECT_0 0xA87C +#define A5XX_GPMU_POWER_COUNTER_SELECT_1 0xA87D + +#define A5XX_GPMU_CLOCK_THROTTLE_CTRL 0xA8A3 +#define A5XX_GPMU_THROTTLE_UNMASK_FORCE_CTRL 0xA8A8 + +#define A5XX_GPMU_TEMP_SENSOR_ID 0xAC00 +#define A5XX_GPMU_TEMP_SENSOR_CONFIG 0xAC01 +#define A5XX_GPMU_TEMP_VAL 0xAC02 +#define A5XX_GPMU_DELTA_TEMP_THRESHOLD 0xAC03 +#define A5XX_GPMU_TEMP_THRESHOLD_INTR_STATUS 0xAC05 +#define A5XX_GPMU_TEMP_THRESHOLD_INTR_EN_MASK 0xAC06 + +#define A5XX_GPMU_LEAKAGE_TEMP_COEFF_0_1 0xAC40 +#define A5XX_GPMU_LEAKAGE_TEMP_COEFF_2_3 0xAC41 +#define A5XX_GPMU_LEAKAGE_VTG_COEFF_0_1 0xAC42 +#define A5XX_GPMU_LEAKAGE_VTG_COEFF_2_3 0xAC43 +#define A5XX_GPMU_BASE_LEAKAGE 0xAC46 + +#define A5XX_GPMU_GPMU_VOLTAGE 0xAC60 +#define A5XX_GPMU_GPMU_VOLTAGE_INTR_STATUS 0xAC61 +#define A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK 0xAC62 +#define A5XX_GPMU_GPMU_PWR_THRESHOLD 0xAC80 + +#define A5XX_GDPM_CONFIG1 0xB80C +#define A5XX_GDPM_CONFIG2 0xB80D +#define A5XX_GDPM_INT_EN 0xB80F +#define A5XX_GDPM_INT_MASK 0xB811 +#define A5XX_GPMU_BEC_ENABLE 0xB9A0 + +#endif /* _A5XX_REG_H */ + diff --git a/drivers/gpu/msm/adreno-gpulist.h b/drivers/gpu/msm/adreno-gpulist.h new file mode 100644 index 000000000000..6f333624a28d --- /dev/null +++ b/drivers/gpu/msm/adreno-gpulist.h @@ -0,0 +1,240 @@ +/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#define ANY_ID (~0) + +static const struct adreno_gpu_core adreno_gpulist[] = { + { + .gpurev = ADRENO_REV_A306, + .core = 3, + .major = 0, + .minor = 6, + .patchid = 0x00, + .pm4fw_name = "a300_pm4.fw", + .pfpfw_name = "a300_pfp.fw", + .gpudev = &adreno_a3xx_gpudev, + .gmem_size = SZ_128K, + .busy_mask = 0x7FFFFFFE, + }, + { + .gpurev = ADRENO_REV_A306A, + .core = 3, + .major = 0, + .minor = 6, + .patchid = 0x20, + .pm4fw_name = "a300_pm4.fw", + .pfpfw_name = "a300_pfp.fw", + .gpudev = &adreno_a3xx_gpudev, + .gmem_size = SZ_128K, + .busy_mask = 0x7FFFFFFE, + }, + { + .gpurev = ADRENO_REV_A304, + .core = 3, + .major = 0, + .minor = 4, + .patchid = 0x00, + .pm4fw_name = "a300_pm4.fw", + .pfpfw_name = "a300_pfp.fw", + .gpudev = &adreno_a3xx_gpudev, + .gmem_size = (SZ_64K + SZ_32K), + .busy_mask = 0x7FFFFFFE, + }, + { + .gpurev = ADRENO_REV_A405, + .core = 4, + .major = 0, + .minor = 5, + .patchid = ANY_ID, + .features = 0, + .pm4fw_name = "a420_pm4.fw", + .pfpfw_name = "a420_pfp.fw", + .gpudev = &adreno_a4xx_gpudev, + .gmem_size = SZ_256K, + .busy_mask = 0x7FFFFFFE, + }, + { + .gpurev = ADRENO_REV_A420, + .core = 4, + .major = 2, + .minor = 0, + .patchid = ANY_ID, + .features = ADRENO_USES_OCMEM | ADRENO_WARM_START | + ADRENO_USE_BOOTSTRAP, + .pm4fw_name = "a420_pm4.fw", + .pfpfw_name = "a420_pfp.fw", + .gpudev = &adreno_a4xx_gpudev, + .gmem_size = (SZ_1M + SZ_512K), + .pm4_jt_idx = 0x901, + .pm4_jt_addr = 0x300, + .pfp_jt_idx = 0x401, + .pfp_jt_addr = 0x400, + .pm4_bstrp_size = 0x06, + .pfp_bstrp_size = 0x28, + .pfp_bstrp_ver = 0x4ff083, + .busy_mask = 0x7FFFFFFE, + }, + { + .gpurev = ADRENO_REV_A430, + .core = 4, + .major = 3, + .minor = 0, + .patchid = ANY_ID, + .features = ADRENO_USES_OCMEM | ADRENO_WARM_START | + ADRENO_USE_BOOTSTRAP | ADRENO_SPTP_PC | ADRENO_PPD | + ADRENO_CONTENT_PROTECTION | ADRENO_PREEMPTION, + .pm4fw_name = "a420_pm4.fw", + .pfpfw_name = "a420_pfp.fw", + .gpudev = &adreno_a4xx_gpudev, + .gmem_size = (SZ_1M + SZ_512K), + .pm4_jt_idx = 0x901, + .pm4_jt_addr = 0x300, + .pfp_jt_idx = 0x401, + .pfp_jt_addr = 0x400, + .pm4_bstrp_size = 0x06, + .pfp_bstrp_size = 0x28, + .pfp_bstrp_ver = 0x4ff083, + .shader_offset = 0x20000, + .shader_size = 0x10000, + .num_protected_regs = 0x18, + .busy_mask = 0x7FFFFFFE, + }, + { + .gpurev = ADRENO_REV_A418, + .core = 4, + .major = 1, + .minor = 8, + .patchid = ANY_ID, + .features = ADRENO_USES_OCMEM | ADRENO_WARM_START | + ADRENO_USE_BOOTSTRAP | ADRENO_SPTP_PC, + .pm4fw_name = "a420_pm4.fw", + .pfpfw_name = "a420_pfp.fw", + .gpudev = &adreno_a4xx_gpudev, + .gmem_size = (SZ_512K), + .pm4_jt_idx = 0x901, + .pm4_jt_addr = 0x300, + .pfp_jt_idx = 0x401, + .pfp_jt_addr = 0x400, + .pm4_bstrp_size = 0x06, + .pfp_bstrp_size = 0x28, + .pfp_bstrp_ver = 0x4ff083, + .shader_offset = 0x20000, /* SP and TP addresses */ + .shader_size = 0x10000, + .num_protected_regs = 0x18, + .busy_mask = 0x7FFFFFFE, + }, + { + .gpurev = ADRENO_REV_A530, + .core = 5, + .major = 3, + .minor = 0, + .patchid = 0, + .pm4fw_name = "a530v1_pm4.fw", + .pfpfw_name = "a530v1_pfp.fw", + .gpudev = &adreno_a5xx_gpudev, + .gmem_size = SZ_1M, + .num_protected_regs = 0x20, + .busy_mask = 0xFFFFFFFE, + }, + { + .gpurev = ADRENO_REV_A530, + .core = 5, + .major = 3, + .minor = 0, + .patchid = 1, + .features = ADRENO_GPMU | ADRENO_SPTP_PC | ADRENO_LM | + ADRENO_PREEMPTION | ADRENO_64BIT | + ADRENO_CONTENT_PROTECTION, + .pm4fw_name = "a530_pm4.fw", + .pfpfw_name = "a530_pfp.fw", + .zap_name = "a530_zap", + .gpudev = &adreno_a5xx_gpudev, + .gmem_size = SZ_1M, + .num_protected_regs = 0x20, + .gpmufw_name = "a530_gpmu.fw2", + .gpmu_major = 1, + .gpmu_minor = 0, + .busy_mask = 0xFFFFFFFE, + .lm_major = 3, + .lm_minor = 0, + .gpmu_tsens = 0x00060007, + .max_power = 5448, + .regfw_name = "a530v2_seq.fw2", + }, + { + .gpurev = ADRENO_REV_A530, + .core = 5, + .major = 3, + .minor = 0, + .patchid = ANY_ID, + .features = ADRENO_GPMU | ADRENO_SPTP_PC | ADRENO_LM | + ADRENO_PREEMPTION | ADRENO_64BIT | + ADRENO_CONTENT_PROTECTION, + .pm4fw_name = "a530_pm4.fw", + .pfpfw_name = "a530_pfp.fw", + .zap_name = "a530_zap", + .gpudev = &adreno_a5xx_gpudev, + .gmem_size = SZ_1M, + .num_protected_regs = 0x20, + .gpmufw_name = "a530v3_gpmu.fw2", + .gpmu_major = 1, + .gpmu_minor = 0, + .busy_mask = 0xFFFFFFFE, + .lm_major = 1, + .lm_minor = 0, + .gpmu_tsens = 0x00060007, + .max_power = 5448, + .regfw_name = "a530v3_seq.fw2", + }, + { + .gpurev = ADRENO_REV_A505, + .core = 5, + .major = 0, + .minor = 5, + .patchid = ANY_ID, + .features = ADRENO_PREEMPTION | ADRENO_64BIT, + .pm4fw_name = "a530_pm4.fw", + .pfpfw_name = "a530_pfp.fw", + .gpudev = &adreno_a5xx_gpudev, + .gmem_size = (SZ_128K + SZ_8K), + .num_protected_regs = 0x20, + .busy_mask = 0xFFFFFFFE, + }, + { + .gpurev = ADRENO_REV_A506, + .core = 5, + .major = 0, + .minor = 6, + .patchid = ANY_ID, + .features = ADRENO_PREEMPTION | ADRENO_64BIT, + .pm4fw_name = "a530_pm4.fw", + .pfpfw_name = "a530_pfp.fw", + .gpudev = &adreno_a5xx_gpudev, + .gmem_size = (SZ_128K + SZ_8K), + .num_protected_regs = 0x20, + .busy_mask = 0xFFFFFFFE, + }, + { + .gpurev = ADRENO_REV_A510, + .core = 5, + .major = 1, + .minor = 0, + .patchid = ANY_ID, + .pm4fw_name = "a530_pm4.fw", + .pfpfw_name = "a530_pfp.fw", + .gpudev = &adreno_a5xx_gpudev, + .gmem_size = SZ_256K, + .num_protected_regs = 0x20, + .busy_mask = 0xFFFFFFFE, + }, +}; diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c new file mode 100644 index 000000000000..5bac1f2d79e0 --- /dev/null +++ b/drivers/gpu/msm/adreno.c @@ -0,0 +1,2884 @@ +/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/sched.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/delay.h> +#include <linux/of_coresight.h> +#include <linux/input.h> +#include <soc/qcom/scm.h> + +#include <linux/msm-bus-board.h> +#include <linux/msm-bus.h> + +#include "kgsl.h" +#include "kgsl_pwrscale.h" +#include "kgsl_cffdump.h" +#include "kgsl_sharedmem.h" +#include "kgsl_iommu.h" +#include "kgsl_trace.h" + +#include "adreno.h" +#include "adreno_compat.h" +#include "adreno_pm4types.h" +#include "adreno_trace.h" + +#include "a3xx_reg.h" +#include "adreno_snapshot.h" + +/* Include the master list of GPU cores that are supported */ +#include "adreno-gpulist.h" + +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX "adreno." + +static bool nopreempt; +module_param(nopreempt, bool, 0444); +MODULE_PARM_DESC(nopreempt, "Disable GPU preemption"); + +#define DRIVER_VERSION_MAJOR 3 +#define DRIVER_VERSION_MINOR 1 + +/* Number of times to try hard reset */ +#define NUM_TIMES_RESET_RETRY 5 + +#define KGSL_LOG_LEVEL_DEFAULT 3 + +static void adreno_input_work(struct work_struct *work); + +static struct devfreq_msm_adreno_tz_data adreno_tz_data = { + .bus = { + .max = 350, + }, + .device_id = KGSL_DEVICE_3D0, +}; + +static const struct kgsl_functable adreno_functable; + +static struct kgsl_iommu device_3d0_iommu; + +static struct adreno_device device_3d0 = { + .dev = { + KGSL_DEVICE_COMMON_INIT(device_3d0.dev), + .pwrscale = KGSL_PWRSCALE_INIT(&adreno_tz_data), + .name = DEVICE_3D0_NAME, + .id = KGSL_DEVICE_3D0, + .pwrctrl = { + .irq_name = "kgsl_3d0_irq", + }, + .iomemname = "kgsl_3d0_reg_memory", + .shadermemname = "kgsl_3d0_shader_memory", + .ftbl = &adreno_functable, + .cmd_log = KGSL_LOG_LEVEL_DEFAULT, + .ctxt_log = KGSL_LOG_LEVEL_DEFAULT, + .drv_log = KGSL_LOG_LEVEL_DEFAULT, + .mem_log = KGSL_LOG_LEVEL_DEFAULT, + .pwr_log = KGSL_LOG_LEVEL_DEFAULT, + }, + .gmem_size = SZ_256K, + .pfp_fw = NULL, + .pm4_fw = NULL, + .ft_policy = KGSL_FT_DEFAULT_POLICY, + .ft_pf_policy = KGSL_FT_PAGEFAULT_DEFAULT_POLICY, + .fast_hang_detect = 1, + .long_ib_detect = 1, + .input_work = __WORK_INITIALIZER(device_3d0.input_work, + adreno_input_work), + .pwrctrl_flag = BIT(ADRENO_SPTP_PC_CTRL) | BIT(ADRENO_PPD_CTRL) | + BIT(ADRENO_LM_CTRL), + .profile.enabled = false, +}; + +/* Ptr to array for the current set of fault detect registers */ +unsigned int *adreno_ft_regs; +/* Total number of fault detect registers */ +unsigned int adreno_ft_regs_num; +/* Ptr to array for the current fault detect registers values */ +unsigned int *adreno_ft_regs_val; +/* Array of default fault detect registers */ +static unsigned int adreno_ft_regs_default[] = { + ADRENO_REG_RBBM_STATUS, + ADRENO_REG_CP_RB_RPTR, + ADRENO_REG_CP_IB1_BASE, + ADRENO_REG_CP_IB1_BUFSZ, + ADRENO_REG_CP_IB2_BASE, + ADRENO_REG_CP_IB2_BUFSZ +}; + +/* Nice level for the higher priority GPU start thread */ +int adreno_wake_nice = -7; + +/* Number of milliseconds to stay active active after a wake on touch */ +unsigned int adreno_wake_timeout = 100; + +/** + * adreno_readreg64() - Read a 64bit register by getting its offset from the + * offset array defined in gpudev node + * @adreno_dev: Pointer to the the adreno device + * @lo: lower 32bit register enum that is to be read + * @hi: higher 32bit register enum that is to be read + * @val: 64 bit Register value read is placed here + */ +void adreno_readreg64(struct adreno_device *adreno_dev, + enum adreno_regs lo, enum adreno_regs hi, uint64_t *val) +{ + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + unsigned int val_lo = 0, val_hi = 0; + struct kgsl_device *device = &adreno_dev->dev; + + if (adreno_checkreg_off(adreno_dev, lo)) + kgsl_regread(device, gpudev->reg_offsets->offsets[lo], &val_lo); + if (adreno_checkreg_off(adreno_dev, hi)) + kgsl_regread(device, gpudev->reg_offsets->offsets[hi], &val_hi); + + *val = (val_lo | ((uint64_t)val_hi << 32)); +} + +/** + * adreno_writereg64() - Write a 64bit register by getting its offset from the + * offset array defined in gpudev node + * @adreno_dev: Pointer to the the adreno device + * @lo: lower 32bit register enum that is to be written + * @hi: higher 32bit register enum that is to be written + * @val: 64 bit value to write + */ +void adreno_writereg64(struct adreno_device *adreno_dev, + enum adreno_regs lo, enum adreno_regs hi, uint64_t val) +{ + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct kgsl_device *device = &adreno_dev->dev; + + if (adreno_checkreg_off(adreno_dev, lo)) + kgsl_regwrite(device, gpudev->reg_offsets->offsets[lo], + lower_32_bits(val)); + if (adreno_checkreg_off(adreno_dev, hi)) + kgsl_regwrite(device, gpudev->reg_offsets->offsets[hi], + upper_32_bits(val)); +} + +/** + * adreno_of_read_property() - Adreno read property + * @node: Device node + * + * Read a u32 property. + */ +static inline int adreno_of_read_property(struct device_node *node, + const char *prop, unsigned int *ptr) +{ + int ret = of_property_read_u32(node, prop, ptr); + if (ret) + KGSL_CORE_ERR("Unable to read '%s'\n", prop); + return ret; +} + +static void __iomem *efuse_base; +static size_t efuse_len; + +int adreno_efuse_map(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct resource *res; + + if (efuse_base != NULL) + return 0; + + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, + "qfprom_memory"); + + if (res == NULL) + return -ENODEV; + + efuse_base = ioremap(res->start, resource_size(res)); + if (efuse_base == NULL) + return -ENODEV; + + efuse_len = resource_size(res); + return 0; +} + +void adreno_efuse_unmap(struct adreno_device *adreno_dev) +{ + if (efuse_base != NULL) { + iounmap(efuse_base); + efuse_base = NULL; + efuse_len = 0; + } +} + +int adreno_efuse_read_u32(struct adreno_device *adreno_dev, unsigned int offset, + unsigned int *val) +{ + if (efuse_base == NULL) + return -ENODEV; + + if (offset >= efuse_len) + return -ERANGE; + + if (val != NULL) { + *val = readl_relaxed(efuse_base + offset); + /* Make sure memory is updated before returning */ + rmb(); + } + + return 0; +} + +/* + * adreno_iommu_cb_probe() - Adreno iommu context bank probe + * + * Iommu context bank probe function. + */ +static int adreno_iommu_cb_probe(struct platform_device *pdev) +{ + struct kgsl_iommu_context *ctx = NULL; + struct device_node *node = pdev->dev.of_node; + struct kgsl_iommu *iommu = &device_3d0_iommu; + int ret = 0; + + /* Map context names from dt to id's */ + if (!strcmp("gfx3d_user", node->name)) { + ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER]; + ctx->id = KGSL_IOMMU_CONTEXT_USER; + ctx->cb_num = -1; + } else if (!strcmp("gfx3d_secure", node->name)) { + ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_SECURE]; + ctx->id = KGSL_IOMMU_CONTEXT_SECURE; + ctx->cb_num = -1; + device_3d0.dev.mmu.secured = true; + } else { + KGSL_CORE_ERR("dt: Unknown context label %s\n", node->name); + return -EINVAL; + } + + if (ctx->name != NULL) { + KGSL_CORE_ERR("dt: %s appears multiple times\n", node->name); + return -EINVAL; + } + ctx->name = node->name; + + /* this property won't be found for all context banks */ + if (of_property_read_u32(node, "qcom,gpu-offset", + &ctx->gpu_offset)) + ctx->gpu_offset = UINT_MAX; + + ctx->kgsldev = &device_3d0.dev; + + /* arm-smmu driver we'll have the right device pointer here. */ + if (of_find_property(node, "iommus", NULL)) { + ctx->dev = &pdev->dev; + } else { + /* + * old iommu driver requires that we query the context bank + * device rather than getting it from dt. + */ + ctx->dev = kgsl_mmu_get_ctx(ctx->name); + if (IS_ERR_OR_NULL(ctx->dev)) { + ret = (ctx->dev == NULL) ? -ENODEV : PTR_ERR(ctx->dev); + KGSL_CORE_ERR("ctx %s: kgsl_mmu_get_ctx err: %d\n", + ctx->name, ret); + return ret; + } + } + + kgsl_mmu_set_mmutype(KGSL_MMU_TYPE_IOMMU); + + return ret; +} + +static struct of_device_id iommu_match_table[] = { + { .compatible = "qcom,kgsl-smmu-v1", }, + { .compatible = "qcom,kgsl-smmu-v2", }, + { .compatible = "qcom,smmu-kgsl-cb", }, + {} +}; + +/** + * adreno_iommu_pdev_probe() - Adreno iommu context bank probe + * @pdev: Platform device + * + * Iommu probe function. + */ +static int adreno_iommu_pdev_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + const char *cname; + struct property *prop; + u32 reg_val[2]; + int i = 0; + struct kgsl_iommu *iommu = &device_3d0_iommu; + + if (of_device_is_compatible(dev->of_node, "qcom,smmu-kgsl-cb")) + return adreno_iommu_cb_probe(pdev); + else if (of_device_is_compatible(dev->of_node, "qcom,kgsl-smmu-v1")) + iommu->version = 1; + else + iommu->version = 2; + + if (of_property_read_u32_array(pdev->dev.of_node, "reg", reg_val, 2)) { + KGSL_CORE_ERR("dt: Unable to read KGSL IOMMU register range\n"); + return -EINVAL; + } + iommu->regstart = reg_val[0]; + iommu->regsize = reg_val[1]; + + /* Protecting the SMMU registers is mandatory */ + if (of_property_read_u32_array(pdev->dev.of_node, "qcom,protect", + reg_val, 2)) { + KGSL_CORE_ERR("dt: no iommu protection range specified\n"); + return -EINVAL; + } + iommu->protect.base = reg_val[0] / sizeof(u32); + iommu->protect.range = ilog2(reg_val[1] / sizeof(u32)); + + of_property_for_each_string(dev->of_node, "clock-names", prop, cname) { + struct clk *c = devm_clk_get(dev, cname); + if (IS_ERR(c)) { + KGSL_CORE_ERR("dt: Couldn't get clock: %s\n", cname); + return -ENODEV; + } + if (i >= KGSL_IOMMU_MAX_CLKS) { + KGSL_CORE_ERR("dt: too many clocks defined.\n"); + return -EINVAL; + } + + iommu->clks[i] = c; + ++i; + } + + if (of_property_read_bool(pdev->dev.of_node, "qcom,retention")) + device_3d0.dev.mmu.features |= KGSL_MMU_RETENTION; + + if (of_property_read_bool(pdev->dev.of_node, "qcom,global_pt")) + device_3d0.dev.mmu.features |= KGSL_MMU_GLOBAL_PAGETABLE; + + if (of_property_read_bool(pdev->dev.of_node, "qcom,hyp_secure_alloc")) + device_3d0.dev.mmu.features |= KGSL_MMU_HYP_SECURE_ALLOC; + + if (of_property_read_bool(pdev->dev.of_node, "qcom,force-32bit")) + device_3d0.dev.mmu.features |= KGSL_MMU_FORCE_32BIT; + + if (of_property_read_u32(pdev->dev.of_node, "qcom,micro-mmu-control", + &iommu->micro_mmu_ctrl)) + iommu->micro_mmu_ctrl = UINT_MAX; + + if (of_property_read_bool(pdev->dev.of_node, "qcom,coherent-htw")) + device_3d0.dev.mmu.features |= KGSL_MMU_COHERENT_HTW; + + if (of_property_read_u32(pdev->dev.of_node, "qcom,secure_align_mask", + &device_3d0.dev.mmu.secure_align_mask)) + device_3d0.dev.mmu.secure_align_mask = 0xfff; + + return of_platform_populate(pdev->dev.of_node, iommu_match_table, + NULL, &pdev->dev); +} + +static struct platform_driver kgsl_iommu_platform_driver = { + .probe = adreno_iommu_pdev_probe, + .driver = { + .owner = THIS_MODULE, + .name = "kgsl-iommu", + .of_match_table = iommu_match_table, + } +}; + +static int __init kgsl_iommu_pdev_init(void) +{ + return platform_driver_register(&kgsl_iommu_platform_driver); +} + +static void __exit kgsl_iommu_pdev_exit(void) +{ + platform_driver_unregister(&kgsl_iommu_platform_driver); +} + +module_init(kgsl_iommu_pdev_init); +module_exit(kgsl_iommu_pdev_exit); + +static int _get_counter(struct adreno_device *adreno_dev, + int group, int countable, unsigned int *lo, + unsigned int *hi) +{ + int ret = 0; + + if (*lo == 0) { + + ret = adreno_perfcounter_get(adreno_dev, group, countable, + lo, hi, PERFCOUNTER_FLAG_KERNEL); + + if (ret) { + struct kgsl_device *device = &adreno_dev->dev; + + KGSL_DRV_ERR(device, + "Unable to allocate fault detect performance counter %d/%d\n", + group, countable); + KGSL_DRV_ERR(device, + "GPU fault detect will be less reliable\n"); + } + } + + return ret; +} + +static inline void _put_counter(struct adreno_device *adreno_dev, + int group, int countable, unsigned int *lo, + unsigned int *hi) +{ + if (*lo != 0) + adreno_perfcounter_put(adreno_dev, group, countable, + PERFCOUNTER_FLAG_KERNEL); + + *lo = 0; + *hi = 0; +} + +/** + * adreno_fault_detect_start() - Allocate performance counters + * used for fast fault detection + * @adreno_dev: Pointer to an adreno_device structure + * + * Allocate the series of performance counters that should be periodically + * checked to verify that the GPU is still moving + */ +void adreno_fault_detect_start(struct adreno_device *adreno_dev) +{ + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + unsigned int i, j = ARRAY_SIZE(adreno_ft_regs_default); + + if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv)) + return; + + if (adreno_dev->fast_hang_detect == 1) + return; + + for (i = 0; i < gpudev->ft_perf_counters_count; i++) { + _get_counter(adreno_dev, gpudev->ft_perf_counters[i].counter, + gpudev->ft_perf_counters[i].countable, + &adreno_ft_regs[j + (i * 2)], + &adreno_ft_regs[j + ((i * 2) + 1)]); + } + + adreno_dev->fast_hang_detect = 1; +} + +/** + * adreno_fault_detect_stop() - Release performance counters + * used for fast fault detection + * @adreno_dev: Pointer to an adreno_device structure + * + * Release the counters allocated in adreno_fault_detect_start + */ +void adreno_fault_detect_stop(struct adreno_device *adreno_dev) +{ + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + unsigned int i, j = ARRAY_SIZE(adreno_ft_regs_default); + + if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv)) + return; + + if (!adreno_dev->fast_hang_detect) + return; + + for (i = 0; i < gpudev->ft_perf_counters_count; i++) { + _put_counter(adreno_dev, gpudev->ft_perf_counters[i].counter, + gpudev->ft_perf_counters[i].countable, + &adreno_ft_regs[j + (i * 2)], + &adreno_ft_regs[j + ((i * 2) + 1)]); + + } + + adreno_dev->fast_hang_detect = 0; +} + +/* + * A workqueue callback responsible for actually turning on the GPU after a + * touch event. kgsl_pwrctrl_change_state(ACTIVE) is used without any + * active_count protection to avoid the need to maintain state. Either + * somebody will start using the GPU or the idle timer will fire and put the + * GPU back into slumber. + */ +static void adreno_input_work(struct work_struct *work) +{ + struct adreno_device *adreno_dev = container_of(work, + struct adreno_device, input_work); + struct kgsl_device *device = &adreno_dev->dev; + + mutex_lock(&device->mutex); + + device->flags |= KGSL_FLAG_WAKE_ON_TOUCH; + + /* + * Don't schedule adreno_start in a high priority workqueue, we are + * already in a workqueue which should be sufficient + */ + kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); + + /* + * When waking up from a touch event we want to stay active long enough + * for the user to send a draw command. The default idle timer timeout + * is shorter than we want so go ahead and push the idle timer out + * further for this special case + */ + mod_timer(&device->idle_timer, + jiffies + msecs_to_jiffies(adreno_wake_timeout)); + mutex_unlock(&device->mutex); +} + +/* + * Process input events and schedule work if needed. At this point we are only + * interested in groking EV_ABS touchscreen events + */ +static void adreno_input_event(struct input_handle *handle, unsigned int type, + unsigned int code, int value) +{ + struct kgsl_device *device = handle->handler->private; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + /* Only consider EV_ABS (touch) events */ + if (type != EV_ABS) + return; + + /* + * Don't do anything if anything hasn't been rendered since we've been + * here before + */ + + if (device->flags & KGSL_FLAG_WAKE_ON_TOUCH) + return; + + /* + * If the device is in nap, kick the idle timer to make sure that we + * don't go into slumber before the first render. If the device is + * already in slumber schedule the wake. + */ + + if (device->state == KGSL_STATE_NAP) { + /* + * Set the wake on touch bit to keep from coming back here and + * keeping the device in nap without rendering + */ + + device->flags |= KGSL_FLAG_WAKE_ON_TOUCH; + + mod_timer(&device->idle_timer, + jiffies + device->pwrctrl.interval_timeout); + } else if (device->state == KGSL_STATE_SLUMBER) { + schedule_work(&adreno_dev->input_work); + } +} + +#ifdef CONFIG_INPUT +static int adreno_input_connect(struct input_handler *handler, + struct input_dev *dev, const struct input_device_id *id) +{ + struct input_handle *handle; + int ret; + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (handle == NULL) + return -ENOMEM; + + handle->dev = dev; + handle->handler = handler; + handle->name = handler->name; + + ret = input_register_handle(handle); + if (ret) { + kfree(handle); + return ret; + } + + ret = input_open_device(handle); + if (ret) { + input_unregister_handle(handle); + kfree(handle); + } + + return ret; +} + +static void adreno_input_disconnect(struct input_handle *handle) +{ + input_close_device(handle); + input_unregister_handle(handle); + kfree(handle); +} +#else +static int adreno_input_connect(struct input_handler *handler, + struct input_dev *dev, const struct input_device_id *id) +{ + return 0; +} +static void adreno_input_disconnect(struct input_handle *handle) {} +#endif + +/* + * We are only interested in EV_ABS events so only register handlers for those + * input devices that have EV_ABS events + */ +static const struct input_device_id adreno_input_ids[] = { + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT, + .evbit = { BIT_MASK(EV_ABS) }, + /* assumption: MT_.._X & MT_.._Y are in the same long */ + .absbit = { [BIT_WORD(ABS_MT_POSITION_X)] = + BIT_MASK(ABS_MT_POSITION_X) | + BIT_MASK(ABS_MT_POSITION_Y) }, + }, + { }, +}; + +static struct input_handler adreno_input_handler = { + .event = adreno_input_event, + .connect = adreno_input_connect, + .disconnect = adreno_input_disconnect, + .name = "kgsl", + .id_table = adreno_input_ids, +}; + +static int adreno_soft_reset(struct kgsl_device *device); + +/* + * _soft_reset() - Soft reset GPU + * @adreno_dev: Pointer to adreno device + * + * Soft reset the GPU by doing a AHB write of value 1 to RBBM_SW_RESET + * register. This is used when we want to reset the GPU without + * turning off GFX power rail. The reset when asserted resets + * all the HW logic, restores GPU registers to default state and + * flushes out pending VBIF transactions. + */ +static void _soft_reset(struct adreno_device *adreno_dev) +{ + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + unsigned int reg; + + /* + * On a530 v1 RBBM cannot be reset in soft reset. + * Reset all blocks except RBBM for a530v1. + */ + if (adreno_is_a530v1(adreno_dev)) { + adreno_writereg(adreno_dev, ADRENO_REG_RBBM_BLOCK_SW_RESET_CMD, + 0xFFDFFC0); + adreno_writereg(adreno_dev, ADRENO_REG_RBBM_BLOCK_SW_RESET_CMD2, + 0x1FFFFFFF); + } else { + + adreno_writereg(adreno_dev, ADRENO_REG_RBBM_SW_RESET_CMD, 1); + /* + * Do a dummy read to get a brief read cycle delay for the + * reset to take effect + */ + adreno_readreg(adreno_dev, ADRENO_REG_RBBM_SW_RESET_CMD, ®); + adreno_writereg(adreno_dev, ADRENO_REG_RBBM_SW_RESET_CMD, 0); + } + + /* The SP/TP regulator gets turned off after a soft reset */ + + if (gpudev->regulator_enable) + gpudev->regulator_enable(adreno_dev); +} + + +void adreno_irqctrl(struct adreno_device *adreno_dev, int state) +{ + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + unsigned int mask = state ? gpudev->irq->mask : 0; + + adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_0_MASK, mask); +} + + /* + * adreno_hang_int_callback() - Isr for fatal interrupts that hang GPU + * @adreno_dev: Pointer to device + * @bit: Interrupt bit + */ +void adreno_hang_int_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = &adreno_dev->dev; + + KGSL_DRV_CRIT(device, "MISC: GPU hang detected\n"); + adreno_irqctrl(adreno_dev, 0); + + /* Trigger a fault in the dispatcher - this will effect a restart */ + adreno_set_gpu_fault(ADRENO_DEVICE(device), ADRENO_HARD_FAULT); + adreno_dispatcher_schedule(device); +} + + /* + * adreno_cp_callback() - CP interrupt handler + * @adreno_dev: Adreno device pointer + * @irq: irq number + * + * Handle the cp interrupt generated by GPU. + */ +void adreno_cp_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = &adreno_dev->dev; + + kgsl_schedule_work(&device->event_work); + adreno_dispatcher_schedule(device); +} + +static irqreturn_t adreno_irq_handler(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct adreno_irq *irq_params = gpudev->irq; + irqreturn_t ret = IRQ_NONE; + unsigned int status = 0, tmp; + int i; + + adreno_readreg(adreno_dev, ADRENO_REG_RBBM_INT_0_STATUS, &status); + + /* Loop through all set interrupts and call respective handlers */ + for (tmp = status; tmp != 0;) { + i = fls(tmp) - 1; + + if (irq_params->funcs[i].func != NULL) { + irq_params->funcs[i].func(adreno_dev, i); + ret = IRQ_HANDLED; + } else + KGSL_DRV_CRIT(device, + "Unhandled interrupt bit %x\n", i); + + tmp &= ~BIT(i); + } + + gpudev->irq_trace(adreno_dev, status); + + if (status) + adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_CLEAR_CMD, + status); + return ret; + +} + +static inline bool _rev_match(unsigned int id, unsigned int entry) +{ + return (entry == ANY_ID || entry == id); +} + +static inline const struct adreno_gpu_core *_get_gpu_core(unsigned int chipid) +{ + unsigned int core = ADRENO_CHIPID_CORE(chipid); + unsigned int major = ADRENO_CHIPID_MAJOR(chipid); + unsigned int minor = ADRENO_CHIPID_MINOR(chipid); + unsigned int patchid = ADRENO_CHIPID_PATCH(chipid); + int i; + + for (i = 0; i < ARRAY_SIZE(adreno_gpulist); i++) { + if (core == adreno_gpulist[i].core && + _rev_match(major, adreno_gpulist[i].major) && + _rev_match(minor, adreno_gpulist[i].minor) && + _rev_match(patchid, adreno_gpulist[i].patchid)) + return &adreno_gpulist[i]; + } + + return NULL; +} + +static void +adreno_identify_gpu(struct adreno_device *adreno_dev) +{ + const struct adreno_reg_offsets *reg_offsets; + struct adreno_gpudev *gpudev; + int i; + + if (kgsl_property_read_u32(&adreno_dev->dev, "qcom,chipid", + &adreno_dev->chipid)) + KGSL_DRV_FATAL(&adreno_dev->dev, + "No GPU chip ID was specified\n"); + + adreno_dev->gpucore = _get_gpu_core(adreno_dev->chipid); + + if (adreno_dev->gpucore == NULL) + KGSL_DRV_FATAL(&adreno_dev->dev, "Unknown GPU chip ID %8.8X\n", + adreno_dev->chipid); + + /* + * The gmem size might be dynamic when ocmem is involved so copy it out + * of the gpu device + */ + + adreno_dev->gmem_size = adreno_dev->gpucore->gmem_size; + + /* + * Initialize uninitialzed gpu registers, only needs to be done once + * Make all offsets that are not initialized to ADRENO_REG_UNUSED + */ + + gpudev = ADRENO_GPU_DEVICE(adreno_dev); + reg_offsets = gpudev->reg_offsets; + + for (i = 0; i < ADRENO_REG_REGISTER_MAX; i++) { + if (reg_offsets->offset_0 != i && !reg_offsets->offsets[i]) + reg_offsets->offsets[i] = ADRENO_REG_UNUSED; + } + + /* Do target specific identification */ + if (gpudev->platform_setup != NULL) + gpudev->platform_setup(adreno_dev); +} + +static const struct platform_device_id adreno_id_table[] = { + { DEVICE_3D0_NAME, (unsigned long) &device_3d0, }, + {}, +}; + +MODULE_DEVICE_TABLE(platform, adreno_id_table); + +static const struct of_device_id adreno_match_table[] = { + { .compatible = "qcom,kgsl-3d0", .data = &device_3d0 }, + {} +}; + +static int adreno_of_parse_pwrlevels(struct adreno_device *adreno_dev, + struct device_node *node) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct device_node *child; + + pwr->num_pwrlevels = 0; + + for_each_child_of_node(node, child) { + unsigned int index; + struct kgsl_pwrlevel *level; + + if (adreno_of_read_property(child, "reg", &index)) + return -EINVAL; + + if (index >= KGSL_MAX_PWRLEVELS) { + KGSL_CORE_ERR("Pwrlevel index %d is out of range\n", + index); + continue; + } + + if (index >= pwr->num_pwrlevels) + pwr->num_pwrlevels = index + 1; + + level = &pwr->pwrlevels[index]; + + if (adreno_of_read_property(child, "qcom,gpu-freq", + &level->gpu_freq)) + return -EINVAL; + + if (adreno_of_read_property(child, "qcom,bus-freq", + &level->bus_freq)) + return -EINVAL; + + if (of_property_read_u32(child, "qcom,bus-min", + &level->bus_min)) + level->bus_min = level->bus_freq; + + if (of_property_read_u32(child, "qcom,bus-max", + &level->bus_max)) + level->bus_max = level->bus_freq; + } + + return 0; +} + +static int adreno_of_get_legacy_pwrlevels(struct adreno_device *adreno_dev, + struct device_node *parent) +{ + struct device_node *node; + + node = of_find_node_by_name(parent, "qcom,gpu-pwrlevels"); + + if (node == NULL) { + KGSL_CORE_ERR("Unable to find 'qcom,gpu-pwrlevels'\n"); + return -EINVAL; + } + + return adreno_of_parse_pwrlevels(adreno_dev, node); +} + +static int adreno_of_get_pwrlevels(struct adreno_device *adreno_dev, + struct device_node *parent) +{ + struct device_node *node, *child; + + node = of_find_node_by_name(parent, "qcom,gpu-pwrlevel-bins"); + if (node == NULL) + return adreno_of_get_legacy_pwrlevels(adreno_dev, parent); + + for_each_child_of_node(node, child) { + unsigned int bin; + + if (of_property_read_u32(child, "qcom,speed-bin", &bin)) + continue; + + if (bin == adreno_dev->speed_bin) + return adreno_of_parse_pwrlevels(adreno_dev, child); + } + + return -ENODEV; +} + +static inline struct adreno_device *adreno_get_dev(struct platform_device *pdev) +{ + const struct of_device_id *of_id = + of_match_device(adreno_match_table, &pdev->dev); + + return of_id ? (struct adreno_device *) of_id->data : NULL; +} + +static struct { + unsigned int quirk; + const char *prop; +} adreno_quirks[] = { + { ADRENO_QUIRK_TWO_PASS_USE_WFI, "qcom,gpu-quirk-two-pass-use-wfi" }, + { ADRENO_QUIRK_IOMMU_SYNC, "qcom,gpu-quirk-iommu-sync" }, +}; + +static int adreno_of_get_power(struct adreno_device *adreno_dev, + struct platform_device *pdev) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct device_node *node = pdev->dev.of_node; + int i, init_level; + + if (of_property_read_string(node, "label", &pdev->name)) { + KGSL_CORE_ERR("Unable to read 'label'\n"); + return -EINVAL; + } + + if (adreno_of_read_property(node, "qcom,id", &pdev->id)) + return -EINVAL; + + /* Set up quirks and other boolean options */ + for (i = 0; i < ARRAY_SIZE(adreno_quirks); i++) { + if (of_property_read_bool(node, adreno_quirks[i].prop)) + adreno_dev->quirks |= adreno_quirks[i].quirk; + } + + if (adreno_of_get_pwrlevels(adreno_dev, node)) + return -EINVAL; + + if (of_property_read_u32(node, "qcom,initial-pwrlevel", &init_level)) + init_level = 1; + + if (init_level < 0 || init_level > pwr->num_pwrlevels) + init_level = 1; + + pwr->active_pwrlevel = init_level; + pwr->default_pwrlevel = init_level; + + /* get pm-qos-active-latency, set it to default if not found */ + if (of_property_read_u32(node, "qcom,pm-qos-active-latency", + &device->pwrctrl.pm_qos_active_latency)) + device->pwrctrl.pm_qos_active_latency = 501; + + /* get pm-qos-wakeup-latency, set it to default if not found */ + if (of_property_read_u32(node, "qcom,pm-qos-wakeup-latency", + &device->pwrctrl.pm_qos_wakeup_latency)) + device->pwrctrl.pm_qos_wakeup_latency = 101; + + if (of_property_read_u32(node, "qcom,idle-timeout", + (unsigned int *) &device->pwrctrl.interval_timeout)) + device->pwrctrl.interval_timeout = HZ/12; + + device->pwrctrl.strtstp_sleepwake = + of_property_read_bool(node, "qcom,strtstp-sleepwake"); + + device->pwrctrl.bus_control = of_property_read_bool(node, + "qcom,bus-control"); + + return 0; +} + +#ifdef CONFIG_MSM_OCMEM +static int +adreno_ocmem_malloc(struct adreno_device *adreno_dev) +{ + if (!ADRENO_FEATURE(adreno_dev, ADRENO_USES_OCMEM)) + return 0; + + if (adreno_dev->ocmem_hdl == NULL) { + adreno_dev->ocmem_hdl = + ocmem_allocate(OCMEM_GRAPHICS, adreno_dev->gmem_size); + if (IS_ERR_OR_NULL(adreno_dev->ocmem_hdl)) { + adreno_dev->ocmem_hdl = NULL; + return -ENOMEM; + } + + adreno_dev->gmem_size = adreno_dev->ocmem_hdl->len; + adreno_dev->gmem_base = adreno_dev->ocmem_hdl->addr; + } + + return 0; +} + +static void +adreno_ocmem_free(struct adreno_device *adreno_dev) +{ + if (adreno_dev->ocmem_hdl != NULL) { + ocmem_free(OCMEM_GRAPHICS, adreno_dev->ocmem_hdl); + adreno_dev->ocmem_hdl = NULL; + } +} +#else +static int +adreno_ocmem_malloc(struct adreno_device *adreno_dev) +{ + return 0; +} + +static void +adreno_ocmem_free(struct adreno_device *adreno_dev) +{ +} +#endif + +static int adreno_probe(struct platform_device *pdev) +{ + struct kgsl_device *device; + struct adreno_device *adreno_dev; + int status; + + /* Defer adreno probe if IOMMU is not already probed */ + if (device_3d0_iommu.regstart == 0) + return -EPROBE_DEFER; + + adreno_dev = adreno_get_dev(pdev); + + if (adreno_dev == NULL) { + pr_err("adreno: qcom,kgsl-3d0 does not exist in the device tree"); + return -ENODEV; + } + + device = &adreno_dev->dev; + device->pdev = pdev; + device->mmu.priv = &device_3d0_iommu; + + /* Get the chip ID from the DT and set up target specific parameters */ + adreno_identify_gpu(adreno_dev); + + status = adreno_of_get_power(adreno_dev, pdev); + if (status) { + device->pdev = NULL; + return status; + } + + /* + * The SMMU APIs use unsigned long for virtual addresses which means + * that we cannot use 64 bit virtual addresses on a 32 bit kernel even + * though the hardware and the rest of the KGSL driver supports it. + */ + if ((BITS_PER_LONG == 64) && ADRENO_FEATURE(adreno_dev, ADRENO_64BIT)) + device->mmu.features |= KGSL_MMU_64BIT; + + status = kgsl_device_platform_probe(device); + if (status) { + device->pdev = NULL; + return status; + } + + /* + * qcom,iommu-secure-id is used to identify MMUs that can handle secure + * content but that is only part of the story - the GPU also has to be + * able to handle secure content. Unfortunately in a classic catch-22 + * we cannot identify the GPU until after the DT is parsed. tl;dr - + * check the GPU capabilities here and modify mmu->secured accordingly + */ + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_CONTENT_PROTECTION)) + device->mmu.secured = false; + + status = adreno_ringbuffer_init(adreno_dev, nopreempt); + if (status) + goto out; + + status = adreno_dispatcher_init(adreno_dev); + if (status) + goto out; + + adreno_debugfs_init(adreno_dev); + adreno_profile_init(adreno_dev); + + adreno_sysfs_init(device); + + kgsl_pwrscale_init(&pdev->dev, CONFIG_MSM_ADRENO_DEFAULT_GOVERNOR); + + adreno_input_handler.private = device; + +#ifdef CONFIG_INPUT + /* + * It isn't fatal if we cannot register the input handler. Sad, + * perhaps, but not fatal + */ + if (input_register_handler(&adreno_input_handler)) + KGSL_DRV_ERR(device, "Unable to register the input handler\n"); +#endif +out: + if (status) { + adreno_ringbuffer_close(adreno_dev); + kgsl_device_platform_remove(device); + device->pdev = NULL; + } + + return status; +} + +static void _adreno_free_memories(struct adreno_device *adreno_dev) +{ + if (test_bit(ADRENO_DEVICE_CMDBATCH_PROFILE, &adreno_dev->priv)) + kgsl_free_global(&adreno_dev->cmdbatch_profile_buffer); + + /* Free local copies of firmware and other command streams */ + kfree(adreno_dev->pfp_fw); + adreno_dev->pfp_fw = NULL; + + kfree(adreno_dev->pm4_fw); + adreno_dev->pm4_fw = NULL; + + kfree(adreno_dev->gpmu_cmds); + adreno_dev->gpmu_cmds = NULL; + + kgsl_free_global(&adreno_dev->pm4); + kgsl_free_global(&adreno_dev->pfp); +} + +static int adreno_remove(struct platform_device *pdev) +{ + struct adreno_device *adreno_dev = adreno_get_dev(pdev); + struct kgsl_device *device; + + if (adreno_dev == NULL) + return 0; + + device = &adreno_dev->dev; + + /* The memory is fading */ + _adreno_free_memories(adreno_dev); + +#ifdef CONFIG_INPUT + input_unregister_handler(&adreno_input_handler); +#endif + adreno_sysfs_close(device); + + adreno_coresight_remove(adreno_dev); + adreno_profile_close(adreno_dev); + + kgsl_pwrscale_close(device); + + adreno_dispatcher_close(adreno_dev); + adreno_ringbuffer_close(adreno_dev); + + adreno_fault_detect_stop(adreno_dev); + + kfree(adreno_ft_regs); + adreno_ft_regs = NULL; + + kfree(adreno_ft_regs_val); + adreno_ft_regs_val = NULL; + + if (efuse_base != NULL) + iounmap(efuse_base); + + adreno_perfcounter_close(adreno_dev); + kgsl_device_platform_remove(device); + + if (test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv)) { + kgsl_free_global(&adreno_dev->pwron_fixup); + clear_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv); + } + clear_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv); + + return 0; +} + +static void adreno_fault_detect_init(struct adreno_device *adreno_dev) +{ + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + int i, val = adreno_dev->fast_hang_detect; + + /* Disable the fast hang detect bit until we know its a go */ + adreno_dev->fast_hang_detect = 0; + + adreno_ft_regs_num = (ARRAY_SIZE(adreno_ft_regs_default) + + gpudev->ft_perf_counters_count*2); + + adreno_ft_regs = kzalloc(adreno_ft_regs_num * sizeof(unsigned int), + GFP_KERNEL); + adreno_ft_regs_val = kzalloc(adreno_ft_regs_num * sizeof(unsigned int), + GFP_KERNEL); + + if (adreno_ft_regs == NULL || adreno_ft_regs_val == NULL) { + kfree(adreno_ft_regs); + kfree(adreno_ft_regs_val); + + adreno_ft_regs = NULL; + adreno_ft_regs_val = NULL; + + return; + } + + for (i = 0; i < ARRAY_SIZE(adreno_ft_regs_default); i++) + adreno_ft_regs[i] = adreno_getreg(adreno_dev, + adreno_ft_regs_default[i]); + + set_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv); + + if (val) + adreno_fault_detect_start(adreno_dev); +} + +static int adreno_init(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + int ret; + + kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT); + /* + * initialization only needs to be done once initially until + * device is shutdown + */ + if (test_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv)) + return 0; + + /* + * Either the microcode read failed because the usermodehelper isn't + * available or the microcode was corrupted. Fail the init and force + * the user to try the open() again + */ + + ret = gpudev->microcode_read(adreno_dev); + if (ret) + return ret; + + /* Put the GPU in a responsive state */ + ret = kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE); + if (ret) + return ret; + + ret = adreno_iommu_init(adreno_dev); + if (ret) + return ret; + + /* Initialize coresight for the target */ + adreno_coresight_init(adreno_dev); + + adreno_perfcounter_init(adreno_dev); + adreno_fault_detect_init(adreno_dev); + + /* Power down the device */ + kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT); + + /* + * Enable the power on shader corruption fix + * This is only applicable for 28nm targets + */ + if (adreno_is_a3xx(adreno_dev)) + adreno_a3xx_pwron_fixup_init(adreno_dev); + else if ((adreno_is_a405(adreno_dev)) || (adreno_is_a420(adreno_dev))) + adreno_a4xx_pwron_fixup_init(adreno_dev); + + if (gpudev->init != NULL) + gpudev->init(adreno_dev); + + set_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv); + + /* Use shader offset and length defined in gpudev */ + if (adreno_dev->gpucore->shader_offset && + adreno_dev->gpucore->shader_size) { + + if (device->shader_mem_phys || device->shader_mem_virt) + KGSL_DRV_ERR(device, + "Shader memory already specified in device tree\n"); + else { + device->shader_mem_phys = device->reg_phys + + adreno_dev->gpucore->shader_offset; + device->shader_mem_virt = device->reg_virt + + adreno_dev->gpucore->shader_offset; + device->shader_mem_len = + adreno_dev->gpucore->shader_size; + } + } + + /* Adjust snapshot section sizes according to core */ + if ((adreno_is_a330(adreno_dev) || adreno_is_a305b(adreno_dev))) { + gpudev->snapshot_data->sect_sizes->cp_pfp = + A320_SNAPSHOT_CP_STATE_SECTION_SIZE; + gpudev->snapshot_data->sect_sizes->roq = + A320_SNAPSHOT_ROQ_SECTION_SIZE; + gpudev->snapshot_data->sect_sizes->cp_merciu = + A320_SNAPSHOT_CP_MERCIU_SECTION_SIZE; + } + + /* + * Allocate a small chunk of memory for precise cmdbatch profiling for + * those targets that have the always on timer + */ + + if (!adreno_is_a3xx(adreno_dev)) { + int r = kgsl_allocate_global(&adreno_dev->dev, + &adreno_dev->cmdbatch_profile_buffer, PAGE_SIZE, 0, 0); + + adreno_dev->cmdbatch_profile_index = 0; + + if (r == 0) { + set_bit(ADRENO_DEVICE_CMDBATCH_PROFILE, + &adreno_dev->priv); + kgsl_sharedmem_set(&adreno_dev->dev, + &adreno_dev->cmdbatch_profile_buffer, 0, 0, + PAGE_SIZE); + } + + } + + if (nopreempt == false && + ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION)) { + int r = 0; + + if (gpudev->preemption_init) + r = gpudev->preemption_init(adreno_dev); + + if (r == 0) + set_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv); + else + WARN(1, "adreno: GPU preemption is disabled\n"); + } + + return 0; +} + +static bool regulators_left_on(struct kgsl_device *device) +{ + int i; + + for (i = 0; i < KGSL_MAX_REGULATORS; i++) { + struct kgsl_regulator *regulator = + &device->pwrctrl.regulators[i]; + + if (IS_ERR_OR_NULL(regulator->reg)) + break; + + if (regulator_is_enabled(regulator->reg)) + return true; + } + + return false; +} + +/** + * _adreno_start - Power up the GPU and prepare to accept commands + * @adreno_dev: Pointer to an adreno_device structure + * + * The core function that powers up and initalizes the GPU. This function is + * called at init and after coming out of SLUMBER + */ +static int _adreno_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + int status = -EINVAL; + unsigned int state = device->state; + bool regulator_left_on; + unsigned int pmqos_wakeup_vote = device->pwrctrl.pm_qos_wakeup_latency; + unsigned int pmqos_active_vote = device->pwrctrl.pm_qos_active_latency; + + /* make sure ADRENO_DEVICE_STARTED is not set here */ + BUG_ON(test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv)); + + pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma, + pmqos_wakeup_vote); + + kgsl_cffdump_open(device); + + regulator_left_on = regulators_left_on(device); + + /* Clear any GPU faults that might have been left over */ + adreno_clear_gpu_fault(adreno_dev); + + /* Put the GPU in a responsive state */ + status = kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE); + if (status) + goto error_pwr_off; + + /* Set the bit to indicate that we've just powered on */ + set_bit(ADRENO_DEVICE_PWRON, &adreno_dev->priv); + + /* Soft reset the GPU if a regulator is stuck on*/ + if (regulator_left_on) + _soft_reset(adreno_dev); + + status = kgsl_mmu_start(device); + if (status) + goto error_pwr_off; + + /* Program GPU contect protection init values */ + if (device->mmu.secured) { + if (adreno_is_a4xx(adreno_dev)) + adreno_writereg(adreno_dev, + ADRENO_REG_RBBM_SECVID_TRUST_CONFIG, 0x2); + adreno_writereg(adreno_dev, + ADRENO_REG_RBBM_SECVID_TSB_CONTROL, 0x0); + + adreno_writereg64(adreno_dev, + ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_BASE, + ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_BASE_HI, + KGSL_IOMMU_SECURE_BASE); + adreno_writereg(adreno_dev, + ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_SIZE, + KGSL_IOMMU_SECURE_SIZE); + } + + status = adreno_ocmem_malloc(adreno_dev); + if (status) { + KGSL_DRV_ERR(device, "OCMEM malloc failed\n"); + goto error_mmu_off; + } + + /* Enable 64 bit gpu addr if feature is set */ + if (gpudev->enable_64bit && + ADRENO_FEATURE(adreno_dev, ADRENO_64BIT)) + gpudev->enable_64bit(adreno_dev); + + if (adreno_dev->perfctr_pwr_lo == 0) { + int ret = adreno_perfcounter_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_PWR, 1, + &adreno_dev->perfctr_pwr_lo, NULL, + PERFCOUNTER_FLAG_KERNEL); + + if (ret) { + KGSL_DRV_ERR(device, + "Unable to get the perf counters for DCVS\n"); + adreno_dev->perfctr_pwr_lo = 0; + } + } + + if (device->pwrctrl.bus_control) { + int ret; + + /* VBIF waiting for RAM */ + if (adreno_dev->starved_ram_lo == 0) { + ret = adreno_perfcounter_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_VBIF_PWR, 0, + &adreno_dev->starved_ram_lo, NULL, + PERFCOUNTER_FLAG_KERNEL); + + if (ret) { + KGSL_DRV_ERR(device, + "Unable to get perf counters for bus DCVS\n"); + adreno_dev->starved_ram_lo = 0; + } + } + + /* VBIF DDR cycles */ + if (adreno_dev->ram_cycles_lo == 0) { + ret = adreno_perfcounter_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_VBIF, + VBIF_AXI_TOTAL_BEATS, + &adreno_dev->ram_cycles_lo, NULL, + PERFCOUNTER_FLAG_KERNEL); + + if (ret) { + KGSL_DRV_ERR(device, + "Unable to get perf counters for bus DCVS\n"); + adreno_dev->ram_cycles_lo = 0; + } + } + } + + /* Clear the busy_data stats - we're starting over from scratch */ + adreno_dev->busy_data.gpu_busy = 0; + adreno_dev->busy_data.vbif_ram_cycles = 0; + adreno_dev->busy_data.vbif_starved_ram = 0; + + if (ADRENO_FEATURE(adreno_dev, ADRENO_LM) + && adreno_dev->lm_threshold_count == 0) { + int ret; + + ret = adreno_perfcounter_get(adreno_dev, + KGSL_PERFCOUNTER_GROUP_GPMU_PWR, 27, + &adreno_dev->lm_threshold_count, NULL, + PERFCOUNTER_FLAG_KERNEL); + /* Ignore noncritical ret - used for debugfs */ + if (ret) + adreno_dev->lm_threshold_count = 0; + } + + /* Restore performance counter registers with saved values */ + adreno_perfcounter_restore(adreno_dev); + + /* Start the GPU */ + gpudev->start(adreno_dev); + + /* Re-initialize the coresight registers if applicable */ + adreno_coresight_start(adreno_dev); + + adreno_irqctrl(adreno_dev, 1); + + adreno_perfcounter_start(adreno_dev); + + /* Clear FSR here in case it is set from a previous pagefault */ + kgsl_mmu_clear_fsr(&device->mmu); + + status = adreno_ringbuffer_start(adreno_dev, ADRENO_START_COLD); + if (status) + goto error_mmu_off; + + if (gpudev->hw_init) { + status = gpudev->hw_init(adreno_dev); + if (status) + goto error_mmu_off; + } + + /* Start the dispatcher */ + adreno_dispatcher_start(device); + + device->reset_counter++; + + set_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv); + + if (pmqos_active_vote != pmqos_wakeup_vote) + pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma, + pmqos_active_vote); + + return 0; + +error_mmu_off: + kgsl_mmu_stop(&device->mmu); + +error_pwr_off: + /* set the state back to original state */ + kgsl_pwrctrl_change_state(device, state); + + if (pmqos_active_vote != pmqos_wakeup_vote) + pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma, + pmqos_active_vote); + + return status; +} + +/** + * adreno_start() - Power up and initialize the GPU + * @device: Pointer to the KGSL device to power up + * @priority: Boolean flag to specify of the start should be scheduled in a low + * latency work queue + * + * Power up the GPU and initialize it. If priority is specified then elevate + * the thread priority for the duration of the start operation + */ +static int adreno_start(struct kgsl_device *device, int priority) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + int nice = task_nice(current); + int ret; + + if (priority && (adreno_wake_nice < nice)) + set_user_nice(current, adreno_wake_nice); + + ret = _adreno_start(adreno_dev); + + if (priority) + set_user_nice(current, nice); + + return ret; +} + +/** + * adreno_vbif_clear_pending_transactions() - Clear transactions in VBIF pipe + * @device: Pointer to the device whose VBIF pipe is to be cleared + */ +static int adreno_vbif_clear_pending_transactions(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + unsigned int mask = gpudev->vbif_xin_halt_ctrl0_mask; + unsigned int val; + unsigned long wait_for_vbif; + int ret = 0; + + adreno_writereg(adreno_dev, ADRENO_REG_VBIF_XIN_HALT_CTRL0, mask); + /* wait for the transactions to clear */ + wait_for_vbif = jiffies + msecs_to_jiffies(100); + while (1) { + adreno_readreg(adreno_dev, + ADRENO_REG_VBIF_XIN_HALT_CTRL1, &val); + if ((val & mask) == mask) + break; + if (time_after(jiffies, wait_for_vbif)) { + KGSL_DRV_ERR(device, + "Wait limit reached for VBIF XIN Halt\n"); + ret = -ETIMEDOUT; + break; + } + } + adreno_writereg(adreno_dev, ADRENO_REG_VBIF_XIN_HALT_CTRL0, 0); + return ret; +} + +static int adreno_stop(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + if (!test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv)) + return 0; + + adreno_set_active_ctxs_null(adreno_dev); + + adreno_dispatcher_stop(adreno_dev); + + adreno_ringbuffer_stop(adreno_dev); + + adreno_irqctrl(adreno_dev, 0); + + adreno_ocmem_free(adreno_dev); + + /* Save active coresight registers if applicable */ + adreno_coresight_stop(adreno_dev); + + /* Save physical performance counter values before GPU power down*/ + adreno_perfcounter_save(adreno_dev); + + adreno_vbif_clear_pending_transactions(device); + + kgsl_mmu_stop(&device->mmu); + kgsl_cffdump_close(device); + + clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv); + + return 0; +} + +static inline bool adreno_try_soft_reset(struct kgsl_device *device, int fault) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + /* + * Do not do soft reset for a IOMMU fault (because the IOMMU hardware + * needs a reset too) or for the A304 because it can't do SMMU + * programming of any kind after a soft reset + */ + + if ((fault & ADRENO_IOMMU_PAGE_FAULT) || adreno_is_a304(adreno_dev)) + return false; + + return true; +} + +/** + * adreno_reset() - Helper function to reset the GPU + * @device: Pointer to the KGSL device structure for the GPU + * @fault: Type of fault. Needed to skip soft reset for MMU fault + * + * Try to reset the GPU to recover from a fault. First, try to do a low latency + * soft reset. If the soft reset fails for some reason, then bring out the big + * guns and toggle the footswitch. + */ +int adreno_reset(struct kgsl_device *device, int fault) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + int ret = -EINVAL; + int i = 0; + + /* Try soft reset first */ + if (adreno_try_soft_reset(device, fault)) { + /* Make sure VBIF is cleared before resetting */ + ret = adreno_vbif_clear_pending_transactions(device); + + if (ret == 0) { + ret = adreno_soft_reset(device); + if (ret) + KGSL_DEV_ERR_ONCE(device, + "Device soft reset failed\n"); + } + } + if (ret) { + /* If soft reset failed/skipped, then pull the power */ + kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT); + /* since device is officially off now clear start bit */ + clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv); + + /* Keep trying to start the device until it works */ + for (i = 0; i < NUM_TIMES_RESET_RETRY; i++) { + ret = adreno_start(device, 0); + if (!ret) + break; + + msleep(20); + } + } + if (ret) + return ret; + + if (0 != i) + KGSL_DRV_WARN(device, "Device hard reset tried %d tries\n", i); + + /* + * If active_cnt is non-zero then the system was active before + * going into a reset - put it back in that state + */ + + if (atomic_read(&device->active_cnt)) + kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); + else + kgsl_pwrctrl_change_state(device, KGSL_STATE_NAP); + + /* Set the page table back to the default page table */ + kgsl_mmu_set_pt(&device->mmu, device->mmu.defaultpagetable); + kgsl_sharedmem_writel(device, + &adreno_dev->ringbuffers[0].pagetable_desc, + offsetof(struct adreno_ringbuffer_pagetable_info, + current_global_ptname), 0); + + return ret; +} + +static int adreno_getproperty(struct kgsl_device *device, + unsigned int type, + void __user *value, + size_t sizebytes) +{ + int status = -EINVAL; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + switch (type) { + case KGSL_PROP_DEVICE_INFO: + { + struct kgsl_devinfo devinfo; + + if (sizebytes != sizeof(devinfo)) { + status = -EINVAL; + break; + } + + memset(&devinfo, 0, sizeof(devinfo)); + devinfo.device_id = device->id+1; + devinfo.chip_id = adreno_dev->chipid; + devinfo.mmu_enabled = kgsl_mmu_enabled(); + devinfo.gmem_gpubaseaddr = adreno_dev->gmem_base; + devinfo.gmem_sizebytes = adreno_dev->gmem_size; + + if (copy_to_user(value, &devinfo, sizeof(devinfo)) != + 0) { + status = -EFAULT; + break; + } + status = 0; + } + break; + case KGSL_PROP_DEVICE_SHADOW: + { + struct kgsl_shadowprop shadowprop; + + if (sizebytes != sizeof(shadowprop)) { + status = -EINVAL; + break; + } + memset(&shadowprop, 0, sizeof(shadowprop)); + if (device->memstore.hostptr) { + /*NOTE: with mmu enabled, gpuaddr doesn't mean + * anything to mmap(). + */ + shadowprop.gpuaddr = + (unsigned int) device->memstore.gpuaddr; + shadowprop.size = device->memstore.size; + /* GSL needs this to be set, even if it + appears to be meaningless */ + shadowprop.flags = KGSL_FLAGS_INITIALIZED | + KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS; + } + if (copy_to_user(value, &shadowprop, + sizeof(shadowprop))) { + status = -EFAULT; + break; + } + status = 0; + } + break; + case KGSL_PROP_MMU_ENABLE: + { + int mmu_prop = kgsl_mmu_enabled(); + + if (sizebytes != sizeof(int)) { + status = -EINVAL; + break; + } + if (copy_to_user(value, &mmu_prop, sizeof(mmu_prop))) { + status = -EFAULT; + break; + } + status = 0; + } + break; + case KGSL_PROP_INTERRUPT_WAITS: + { + int int_waits = 1; + if (sizebytes != sizeof(int)) { + status = -EINVAL; + break; + } + if (copy_to_user(value, &int_waits, sizeof(int))) { + status = -EFAULT; + break; + } + status = 0; + } + break; + case KGSL_PROP_UCHE_GMEM_VADDR: + { + uint64_t gmem_vaddr = 0; + if (adreno_is_a5xx(adreno_dev)) + gmem_vaddr = ADRENO_UCHE_GMEM_BASE; + if (sizebytes != sizeof(uint64_t)) { + status = -EINVAL; + break; + } + if (copy_to_user(value, &gmem_vaddr, + sizeof(uint64_t))) { + status = -EFAULT; + break; + } + status = 0; + } + break; + case KGSL_PROP_SP_GENERIC_MEM: + { + struct kgsl_sp_generic_mem sp_mem; + if (sizebytes != sizeof(sp_mem)) { + status = -EINVAL; + break; + } + memset(&sp_mem, 0, sizeof(sp_mem)); + + sp_mem.local = adreno_dev->sp_local_gpuaddr; + sp_mem.pvt = adreno_dev->sp_pvt_gpuaddr; + + if (copy_to_user(value, &sp_mem, sizeof(sp_mem))) { + status = -EFAULT; + break; + } + status = 0; + } + break; + case KGSL_PROP_UCODE_VERSION: + { + struct kgsl_ucode_version ucode; + + if (sizebytes != sizeof(ucode)) { + status = -EINVAL; + break; + } + memset(&ucode, 0, sizeof(ucode)); + + ucode.pfp = adreno_dev->pfp_fw_version; + ucode.pm4 = adreno_dev->pm4_fw_version; + + if (copy_to_user(value, &ucode, sizeof(ucode))) { + status = -EFAULT; + break; + } + status = 0; + } + break; + case KGSL_PROP_GPMU_VERSION: + { + struct kgsl_gpmu_version gpmu; + + if (adreno_dev->gpucore == NULL) { + status = -EINVAL; + break; + } + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_GPMU)) { + status = -EOPNOTSUPP; + break; + } + + if (sizebytes != sizeof(gpmu)) { + status = -EINVAL; + break; + } + memset(&gpmu, 0, sizeof(gpmu)); + + gpmu.major = adreno_dev->gpucore->gpmu_major; + gpmu.minor = adreno_dev->gpucore->gpmu_minor; + gpmu.features = adreno_dev->gpucore->gpmu_features; + + if (copy_to_user(value, &gpmu, sizeof(gpmu))) { + status = -EFAULT; + break; + } + status = 0; + } + break; + default: + status = -EINVAL; + } + + return status; +} + +int adreno_set_constraint(struct kgsl_device *device, + struct kgsl_context *context, + struct kgsl_device_constraint *constraint) +{ + int status = 0; + + switch (constraint->type) { + case KGSL_CONSTRAINT_PWRLEVEL: { + struct kgsl_device_constraint_pwrlevel pwr; + + if (constraint->size != sizeof(pwr)) { + status = -EINVAL; + break; + } + + if (copy_from_user(&pwr, + (void __user *)constraint->data, + sizeof(pwr))) { + status = -EFAULT; + break; + } + if (pwr.level >= KGSL_CONSTRAINT_PWR_MAXLEVELS) { + status = -EINVAL; + break; + } + + context->pwr_constraint.type = + KGSL_CONSTRAINT_PWRLEVEL; + context->pwr_constraint.sub_type = pwr.level; + trace_kgsl_user_pwrlevel_constraint(device, + context->id, + context->pwr_constraint.type, + context->pwr_constraint.sub_type); + } + break; + case KGSL_CONSTRAINT_NONE: + if (context->pwr_constraint.type == KGSL_CONSTRAINT_PWRLEVEL) + trace_kgsl_user_pwrlevel_constraint(device, + context->id, + KGSL_CONSTRAINT_NONE, + context->pwr_constraint.sub_type); + context->pwr_constraint.type = KGSL_CONSTRAINT_NONE; + break; + + default: + status = -EINVAL; + break; + } + + /* If a new constraint has been set for a context, cancel the old one */ + if ((status == 0) && + (context->id == device->pwrctrl.constraint.owner_id)) { + trace_kgsl_constraint(device, device->pwrctrl.constraint.type, + device->pwrctrl.active_pwrlevel, 0); + device->pwrctrl.constraint.type = KGSL_CONSTRAINT_NONE; + } + + return status; +} + +static int adreno_setproperty(struct kgsl_device_private *dev_priv, + unsigned int type, + void __user *value, + unsigned int sizebytes) +{ + int status = -EINVAL; + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + switch (type) { + case KGSL_PROP_PWRCTRL: { + unsigned int enable; + + if (sizebytes != sizeof(enable)) + break; + + if (copy_from_user(&enable, value, sizeof(enable))) { + status = -EFAULT; + break; + } + + mutex_lock(&device->mutex); + + if (enable) { + device->pwrctrl.ctrl_flags = 0; + + if (!kgsl_active_count_get(&adreno_dev->dev)) { + adreno_fault_detect_start(adreno_dev); + kgsl_active_count_put(&adreno_dev->dev); + } + + kgsl_pwrscale_enable(device); + } else { + kgsl_pwrctrl_change_state(device, + KGSL_STATE_ACTIVE); + device->pwrctrl.ctrl_flags = KGSL_PWR_ON; + adreno_fault_detect_stop(adreno_dev); + kgsl_pwrscale_disable(device); + } + + mutex_unlock(&device->mutex); + status = 0; + } + break; + case KGSL_PROP_PWR_CONSTRAINT: { + struct kgsl_device_constraint constraint; + struct kgsl_context *context; + + if (sizebytes != sizeof(constraint)) + break; + + if (copy_from_user(&constraint, value, + sizeof(constraint))) { + status = -EFAULT; + break; + } + + context = kgsl_context_get_owner(dev_priv, + constraint.context_id); + + if (context == NULL) + break; + + status = adreno_set_constraint(device, context, + &constraint); + + kgsl_context_put(context); + } + break; + default: + break; + } + + return status; +} + +/* + * adreno_irq_pending() - Checks if interrupt is generated by h/w + * @adreno_dev: Pointer to device whose interrupts are checked + * + * Returns true if interrupts are pending from device else 0. + */ +inline unsigned int adreno_irq_pending(struct adreno_device *adreno_dev) +{ + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + unsigned int status; + + adreno_readreg(adreno_dev, ADRENO_REG_RBBM_INT_0_STATUS, &status); + + return (status & gpudev->irq->mask) ? 1 : 0; +} + + +/** + * adreno_hw_isidle() - Check if the GPU core is idle + * @adreno_dev: Pointer to the Adreno device structure for the GPU + * + * Return true if the RBBM status register for the GPU type indicates that the + * hardware is idle + */ +bool adreno_hw_isidle(struct adreno_device *adreno_dev) +{ + const struct adreno_gpu_core *gpucore = adreno_dev->gpucore; + unsigned int reg_rbbm_status; + + adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS, + ®_rbbm_status); + + if (reg_rbbm_status & gpucore->busy_mask) + return false; + + /* Don't consider ourselves idle if there is an IRQ pending */ + if (adreno_irq_pending(adreno_dev)) + return false; + + return true; +} + +/** + * adreno_soft_reset() - Do a soft reset of the GPU hardware + * @device: KGSL device to soft reset + * + * "soft reset" the GPU hardware - this is a fast path GPU reset + * The GPU hardware is reset but we never pull power so we can skip + * a lot of the standard adreno_stop/adreno_start sequence + */ +static int adreno_soft_reset(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + int ret; + + kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE); + adreno_set_active_ctxs_null(adreno_dev); + + adreno_irqctrl(adreno_dev, 0); + + adreno_clear_gpu_fault(adreno_dev); + /* since device is oficially off now clear start bit */ + clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv); + + /* save physical performance counter values before GPU soft reset */ + adreno_perfcounter_save(adreno_dev); + + kgsl_cffdump_close(device); + /* Reset the GPU */ + _soft_reset(adreno_dev); + + /* start of new CFF after reset */ + kgsl_cffdump_open(device); + + /* Enable 64 bit gpu addr if feature is set */ + if (gpudev->enable_64bit && + ADRENO_FEATURE(adreno_dev, ADRENO_64BIT)) + gpudev->enable_64bit(adreno_dev); + + /* Restore physical performance counter values after soft reset */ + adreno_perfcounter_restore(adreno_dev); + + /* Reinitialize the GPU */ + gpudev->start(adreno_dev); + + /* Re-initialize the coresight registers if applicable */ + adreno_coresight_start(adreno_dev); + + /* Enable IRQ */ + adreno_irqctrl(adreno_dev, 1); + + /* stop all ringbuffers to cancel RB events */ + adreno_ringbuffer_stop(adreno_dev); + /* + * If we have offsets for the jump tables we can try to do a warm start, + * otherwise do a full ringbuffer restart + */ + + if (ADRENO_FEATURE(adreno_dev, ADRENO_WARM_START)) + ret = adreno_ringbuffer_start(adreno_dev, ADRENO_START_WARM); + else + ret = adreno_ringbuffer_start(adreno_dev, ADRENO_START_COLD); + if (ret) + goto done; + + if (gpudev->hw_init) + ret = gpudev->hw_init(adreno_dev); + if (ret) + goto done; + + device->reset_counter++; + /* device is back online */ + set_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv); + +done: + return ret; +} + +/* + * adreno_isidle() - return true if the GPU hardware is idle + * @device: Pointer to the KGSL device structure for the GPU + * + * Return true if the GPU hardware is idle and there are no commands pending in + * the ringbuffer + */ +bool adreno_isidle(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_ringbuffer *rb; + int i; + + if (!kgsl_state_is_awake(device)) + return true; + + adreno_get_rptr(ADRENO_CURRENT_RINGBUFFER(adreno_dev)); + + /* + * wptr is updated when we add commands to ringbuffer, add a barrier + * to make sure updated wptr is compared to rptr + */ + smp_mb(); + + /* + * ringbuffer is truly idle when all ringbuffers read and write + * pointers are equal + */ + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + if (rb->rptr != rb->wptr) + break; + } + + if (i == adreno_dev->num_ringbuffers) + return adreno_hw_isidle(adreno_dev); + + return false; +} + +/** + * adreno_spin_idle() - Spin wait for the GPU to idle + * @device: Pointer to the KGSL device + * @timeout: milliseconds to wait before returning error + * + * Spin the CPU waiting for the RBBM status to return idle + */ +int adreno_spin_idle(struct kgsl_device *device, unsigned int timeout) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + unsigned long wait = jiffies + msecs_to_jiffies(timeout); + + kgsl_cffdump_regpoll(device, + adreno_getreg(adreno_dev, ADRENO_REG_RBBM_STATUS) << 2, + 0x00000000, 0x80000000); + + while (time_before(jiffies, wait)) { + /* + * If we fault, stop waiting and return an error. The dispatcher + * will clean up the fault from the work queue, but we need to + * make sure we don't block it by waiting for an idle that + * will never come. + */ + + if (adreno_gpu_fault(adreno_dev) != 0) + return -EDEADLK; + + if (adreno_isidle(device)) + return 0; + } + + return -ETIMEDOUT; +} + +/** + * adreno_idle() - wait for the GPU hardware to go idle + * @device: Pointer to the KGSL device structure for the GPU + * + * Wait up to ADRENO_IDLE_TIMEOUT milliseconds for the GPU hardware to go quiet. + */ + +int adreno_idle(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + int ret; + + /* + * Make sure the device mutex is held so the dispatcher can't send any + * more commands to the hardware + */ + + BUG_ON(!mutex_is_locked(&device->mutex)); + + /* Check if we are already idle before idling dispatcher */ + if (adreno_isidle(device)) + return 0; + /* + * Wait for dispatcher to finish completing commands + * already submitted + */ + ret = adreno_dispatcher_idle(adreno_dev); + if (ret) + return ret; + + return adreno_spin_idle(device, ADRENO_IDLE_TIMEOUT); +} + +/** + * adreno_drain() - Drain the dispatch queue + * @device: Pointer to the KGSL device structure for the GPU + * + * Drain the dispatcher of existing command batches. This halts + * additional commands from being issued until the gate is completed. + */ +static int adreno_drain(struct kgsl_device *device) +{ + reinit_completion(&device->cmdbatch_gate); + + return 0; +} + +/* Caller must hold the device mutex. */ +static int adreno_suspend_context(struct kgsl_device *device) +{ + int status = 0; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + /* process any profiling results that are available */ + adreno_profile_process_results(adreno_dev); + + status = adreno_idle(device); + if (status) + return status; + /* set the device to default pagetable */ + kgsl_mmu_set_pt(&device->mmu, device->mmu.defaultpagetable); + kgsl_sharedmem_writel(device, + &adreno_dev->ringbuffers[0].pagetable_desc, + offsetof(struct adreno_ringbuffer_pagetable_info, + current_global_ptname), 0); + /* set ringbuffers to NULL ctxt */ + adreno_set_active_ctxs_null(adreno_dev); + + return status; +} + +/** + * adreno_read - General read function to read adreno device memory + * @device - Pointer to the GPU device struct (for adreno device) + * @base - Base address (kernel virtual) where the device memory is mapped + * @offsetwords - Offset in words from the base address, of the memory that + * is to be read + * @value - Value read from the device memory + * @mem_len - Length of the device memory mapped to the kernel + */ +static void adreno_read(struct kgsl_device *device, void __iomem *base, + unsigned int offsetwords, unsigned int *value, + unsigned int mem_len) +{ + + unsigned int __iomem *reg; + BUG_ON(offsetwords*sizeof(uint32_t) >= mem_len); + reg = (unsigned int __iomem *)(base + (offsetwords << 2)); + + if (!in_interrupt()) + kgsl_pre_hwaccess(device); + + /*ensure this read finishes before the next one. + * i.e. act like normal readl() */ + *value = __raw_readl(reg); + rmb(); +} + +/** + * adreno_regread - Used to read adreno device registers + * @offsetwords - Word (4 Bytes) offset to the register to be read + * @value - Value read from device register + */ +static void adreno_regread(struct kgsl_device *device, unsigned int offsetwords, + unsigned int *value) +{ + adreno_read(device, device->reg_virt, offsetwords, value, + device->reg_len); +} + +/** + * adreno_shadermem_regread - Used to read GPU (adreno) shader memory + * @device - GPU device whose shader memory is to be read + * @offsetwords - Offset in words, of the shader memory address to be read + * @value - Pointer to where the read shader mem value is to be stored + */ +void adreno_shadermem_regread(struct kgsl_device *device, + unsigned int offsetwords, unsigned int *value) +{ + adreno_read(device, device->shader_mem_virt, offsetwords, value, + device->shader_mem_len); +} + +static void adreno_regwrite(struct kgsl_device *device, + unsigned int offsetwords, + unsigned int value) +{ + unsigned int __iomem *reg; + + BUG_ON(offsetwords*sizeof(uint32_t) >= device->reg_len); + + if (!in_interrupt()) + kgsl_pre_hwaccess(device); + + trace_kgsl_regwrite(device, offsetwords, value); + + kgsl_cffdump_regwrite(device, offsetwords << 2, value); + reg = (unsigned int __iomem *)(device->reg_virt + (offsetwords << 2)); + + /*ensure previous writes post before this one, + * i.e. act like normal writel() */ + wmb(); + __raw_writel(value, reg); +} + +/** + * adreno_waittimestamp - sleep while waiting for the specified timestamp + * @device - pointer to a KGSL device structure + * @context - pointer to the active kgsl context + * @timestamp - GPU timestamp to wait for + * @msecs - amount of time to wait (in milliseconds) + * + * Wait up to 'msecs' milliseconds for the specified timestamp to expire. + */ +static int adreno_waittimestamp(struct kgsl_device *device, + struct kgsl_context *context, + unsigned int timestamp, + unsigned int msecs) +{ + int ret; + + if (context == NULL) { + /* If they are doing then complain once */ + dev_WARN_ONCE(device->dev, 1, + "IOCTL_KGSL_DEVICE_WAITTIMESTAMP is deprecated\n"); + return -ENOTTY; + } + + /* Return -ENOENT if the context has been detached */ + if (kgsl_context_detached(context)) + return -ENOENT; + + ret = adreno_drawctxt_wait(ADRENO_DEVICE(device), context, + timestamp, msecs); + + /* If the context got invalidated then return a specific error */ + if (kgsl_context_invalid(context)) + ret = -EDEADLK; + + /* + * Return -EPROTO if the device has faulted since the last time we + * checked. Userspace uses this as a marker for performing post + * fault activities + */ + + if (!ret && test_and_clear_bit(ADRENO_CONTEXT_FAULT, &context->priv)) + ret = -EPROTO; + + return ret; +} + +/** + * __adreno_readtimestamp() - Reads the timestamp from memstore memory + * @device: Pointer to device whose memstore is read + * @index: Index into the memstore memory + * @type: Type of timestamp to read + * @timestamp: The out parameter where the timestamp is read + */ +static int __adreno_readtimestamp(struct kgsl_device *device, int index, + int type, unsigned int *timestamp) +{ + int status = 0; + + switch (type) { + case KGSL_TIMESTAMP_CONSUMED: + kgsl_sharedmem_readl(&device->memstore, timestamp, + KGSL_MEMSTORE_OFFSET(index, soptimestamp)); + break; + case KGSL_TIMESTAMP_RETIRED: + kgsl_sharedmem_readl(&device->memstore, timestamp, + KGSL_MEMSTORE_OFFSET(index, eoptimestamp)); + break; + default: + status = -EINVAL; + *timestamp = 0; + break; + } + return status; +} + +/** + * adreno_rb_readtimestamp(): Return the value of given type of timestamp + * for a RB + * @device: GPU device whose timestamp values are being queried + * @priv: The object being queried for a timestamp (expected to be a rb pointer) + * @type: The type of timestamp (one of 3) to be read + * @timestamp: Pointer to where the read timestamp is to be written to + * + * CONSUMED and RETIRED type timestamps are sorted by id and are constantly + * updated by the GPU through shared memstore memory. QUEUED type timestamps + * are read directly from context struct. + + * The function returns 0 on success and timestamp value at the *timestamp + * address and returns -EINVAL on any read error/invalid type and timestamp = 0. + */ +int adreno_rb_readtimestamp(struct kgsl_device *device, + void *priv, enum kgsl_timestamp_type type, + unsigned int *timestamp) +{ + int status = 0; + struct adreno_ringbuffer *rb = priv; + + /* + * If user passed in a NULL pointer for timestamp, return without + * doing anything. + */ + if (!timestamp) + return status; + + if (KGSL_TIMESTAMP_QUEUED == type) + *timestamp = rb->timestamp; + else + status = __adreno_readtimestamp(device, + rb->id + KGSL_MEMSTORE_MAX, + type, timestamp); + + return status; +} + +/** + * adreno_readtimestamp(): Return the value of given type of timestamp + * @device: GPU device whose timestamp values are being queried + * @priv: The object being queried for a timestamp (expected to be a context) + * @type: The type of timestamp (one of 3) to be read + * @timestamp: Pointer to where the read timestamp is to be written to + * + * CONSUMED and RETIRED type timestamps are sorted by id and are constantly + * updated by the GPU through shared memstore memory. QUEUED type timestamps + * are read directly from context struct. + + * The function returns 0 on success and timestamp value at the *timestamp + * address and returns -EINVAL on any read error/invalid type and timestamp = 0. + */ +static int adreno_readtimestamp(struct kgsl_device *device, + void *priv, enum kgsl_timestamp_type type, + unsigned int *timestamp) +{ + int status = 0; + struct kgsl_context *context = priv; + unsigned int id = KGSL_CONTEXT_ID(context); + + BUG_ON(NULL == context || id >= KGSL_MEMSTORE_MAX); + /* + * If user passed in a NULL pointer for timestamp, return without + * doing anything. + */ + if (!timestamp) + return status; + + if (KGSL_TIMESTAMP_QUEUED == type) + *timestamp = adreno_context_timestamp(context); + else + status = __adreno_readtimestamp(device, + context->id, type, timestamp); + + return status; +} + +static inline s64 adreno_ticks_to_us(u32 ticks, u32 freq) +{ + freq /= 1000000; + return ticks / freq; +} + +static unsigned int counter_delta(struct adreno_device *adreno_dev, + unsigned int reg, unsigned int *counter) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int val; + unsigned int ret = 0; + + /* Read the value */ + kgsl_regread(device, reg, &val); + + /* Return 0 for the first read */ + if (*counter != 0) { + if (val < *counter) + ret = (0xFFFFFFFF - *counter) + val; + else + ret = val - *counter; + } + + *counter = val; + return ret; +} + +/** + * adreno_power_stats() - Reads the counters needed for freq decisions + * @device: Pointer to device whose counters are read + * @stats: Pointer to stats set that needs updating + * Power: The caller is expected to be in a clock enabled state as this + * function does reg reads + */ +static void adreno_power_stats(struct kgsl_device *device, + struct kgsl_power_stats *stats) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct adreno_busy_data *busy = &adreno_dev->busy_data; + + memset(stats, 0, sizeof(*stats)); + + /* Get the busy cycles counted since the counter was last reset */ + if (adreno_dev->perfctr_pwr_lo != 0) { + uint64_t gpu_busy; + + gpu_busy = counter_delta(adreno_dev, adreno_dev->perfctr_pwr_lo, + &busy->gpu_busy); + + stats->busy_time = adreno_ticks_to_us(gpu_busy, + kgsl_pwrctrl_active_freq(pwr)); + } + + if (device->pwrctrl.bus_control) { + uint64_t ram_cycles = 0, starved_ram = 0; + + if (adreno_dev->ram_cycles_lo != 0) + ram_cycles = counter_delta(adreno_dev, + adreno_dev->ram_cycles_lo, + &busy->vbif_ram_cycles); + + if (adreno_dev->starved_ram_lo != 0) + starved_ram = counter_delta(adreno_dev, + adreno_dev->starved_ram_lo, + &busy->vbif_starved_ram); + + stats->ram_time = ram_cycles; + stats->ram_wait = starved_ram; + } + if (adreno_dev->lm_threshold_count) + kgsl_regread(&adreno_dev->dev, adreno_dev->lm_threshold_count, + &adreno_dev->lm_threshold_cross); +} + +static unsigned int adreno_gpuid(struct kgsl_device *device, + unsigned int *chipid) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + /* Some applications need to know the chip ID too, so pass + * that as a parameter */ + + if (chipid != NULL) + *chipid = adreno_dev->chipid; + + /* Standard KGSL gpuid format: + * top word is 0x0002 for 2D or 0x0003 for 3D + * Bottom word is core specific identifer + */ + + return (0x0003 << 16) | ADRENO_GPUREV(adreno_dev); +} + +static int adreno_regulator_enable(struct kgsl_device *device) +{ + int ret = 0; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + if (gpudev->regulator_enable && + !test_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED, + &adreno_dev->priv)) { + ret = gpudev->regulator_enable(adreno_dev); + if (!ret) + set_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED, + &adreno_dev->priv); + } + return ret; +} + +static bool adreno_is_hw_collapsible(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + /* + * Skip power collapse for A304, if power ctrl flag is set to + * non zero. As A304 soft_reset will not work, power collapse + * needs to disable to avoid soft_reset. + */ + if (adreno_is_a304(adreno_dev) && + device->pwrctrl.ctrl_flags) + return false; + + return adreno_isidle(device) && (gpudev->is_sptp_idle ? + gpudev->is_sptp_idle(adreno_dev) : true); +} + +static void adreno_regulator_disable(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + if (gpudev->regulator_disable && + test_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED, + &adreno_dev->priv)) { + gpudev->regulator_disable(adreno_dev); + clear_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED, + &adreno_dev->priv); + } +} + +static void adreno_pwrlevel_change_settings(struct kgsl_device *device, + unsigned int prelevel, unsigned int postlevel, bool post) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (gpudev->pwrlevel_change_settings) + gpudev->pwrlevel_change_settings(adreno_dev, prelevel, + postlevel, post); +} + +static void adreno_iommu_sync(struct kgsl_device *device, bool sync) +{ + struct scm_desc desc = {0}; + int ret; + + if (sync == true) { + mutex_lock(&kgsl_mmu_sync); + desc.args[0] = true; + desc.arginfo = SCM_ARGS(1); + ret = scm_call2_atomic(SCM_SIP_FNID(SCM_SVC_PWR, 0x8), &desc); + if (ret) + KGSL_DRV_ERR(device, + "MMU sync with Hypervisor off %x\n", ret); + } else { + desc.args[0] = false; + desc.arginfo = SCM_ARGS(1); + scm_call2_atomic(SCM_SIP_FNID(SCM_SVC_PWR, 0x8), &desc); + mutex_unlock(&kgsl_mmu_sync); + } +} + +static void _regulator_disable(struct kgsl_regulator *regulator, bool poll) +{ + unsigned long wait_time = jiffies + msecs_to_jiffies(200); + + if (IS_ERR_OR_NULL(regulator->reg)) + return; + + regulator_disable(regulator->reg); + + if (poll == false) + return; + + while (!time_after(jiffies, wait_time)) { + if (!regulator_is_enabled(regulator->reg)) + return; + cpu_relax(); + } + + KGSL_CORE_ERR("regulator '%s' still on after 200ms\n", regulator->name); +} + +static void adreno_regulator_disable_poll(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int i; + + /* Fast path - hopefully we don't need this quirk */ + if (!ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_IOMMU_SYNC)) { + for (i = KGSL_MAX_REGULATORS - 1; i >= 0; i--) + _regulator_disable(&pwr->regulators[i], false); + return; + } + + adreno_iommu_sync(device, true); + + for (i = 0; i < KGSL_MAX_REGULATORS; i++) + _regulator_disable(&pwr->regulators[i], true); + + adreno_iommu_sync(device, false); +} + +static const struct kgsl_functable adreno_functable = { + /* Mandatory functions */ + .regread = adreno_regread, + .regwrite = adreno_regwrite, + .idle = adreno_idle, + .isidle = adreno_isidle, + .suspend_context = adreno_suspend_context, + .init = adreno_init, + .start = adreno_start, + .stop = adreno_stop, + .getproperty = adreno_getproperty, + .getproperty_compat = adreno_getproperty_compat, + .waittimestamp = adreno_waittimestamp, + .readtimestamp = adreno_readtimestamp, + .issueibcmds = adreno_ringbuffer_issueibcmds, + .ioctl = adreno_ioctl, + .compat_ioctl = adreno_compat_ioctl, + .power_stats = adreno_power_stats, + .gpuid = adreno_gpuid, + .snapshot = adreno_snapshot, + .irq_handler = adreno_irq_handler, + .drain = adreno_drain, + /* Optional functions */ + .drawctxt_create = adreno_drawctxt_create, + .drawctxt_detach = adreno_drawctxt_detach, + .drawctxt_destroy = adreno_drawctxt_destroy, + .drawctxt_dump = adreno_drawctxt_dump, + .setproperty = adreno_setproperty, + .setproperty_compat = adreno_setproperty_compat, + .drawctxt_sched = adreno_drawctxt_sched, + .resume = adreno_dispatcher_start, + .regulator_enable = adreno_regulator_enable, + .is_hw_collapsible = adreno_is_hw_collapsible, + .regulator_disable = adreno_regulator_disable, + .pwrlevel_change_settings = adreno_pwrlevel_change_settings, + .regulator_disable_poll = adreno_regulator_disable_poll, +}; + +static struct platform_driver adreno_platform_driver = { + .probe = adreno_probe, + .remove = adreno_remove, + .suspend = kgsl_suspend_driver, + .resume = kgsl_resume_driver, + .id_table = adreno_id_table, + .driver = { + .owner = THIS_MODULE, + .name = DEVICE_3D_NAME, + .pm = &kgsl_pm_ops, + .of_match_table = adreno_match_table, + } +}; + +static int __init kgsl_3d_init(void) +{ + return platform_driver_register(&adreno_platform_driver); +} + +static void __exit kgsl_3d_exit(void) +{ + platform_driver_unregister(&adreno_platform_driver); +} + +module_init(kgsl_3d_init); +module_exit(kgsl_3d_exit); + + +static struct of_device_id busmon_match_table[] = { + { .compatible = "qcom,kgsl-busmon", .data = &device_3d0 }, + {} +}; + +static int adreno_busmon_probe(struct platform_device *pdev) +{ + struct kgsl_device *device; + const struct of_device_id *pdid = + of_match_device(busmon_match_table, &pdev->dev); + + if (pdid == NULL) + return -ENXIO; + + device = (struct kgsl_device *)pdid->data; + device->busmondev = &pdev->dev; + dev_set_drvdata(device->busmondev, device); + + return 0; +} + +static struct platform_driver kgsl_bus_platform_driver = { + .probe = adreno_busmon_probe, + .driver = { + .owner = THIS_MODULE, + .name = "kgsl-busmon", + .of_match_table = busmon_match_table, + } +}; + +static int __init kgsl_busmon_init(void) +{ + return platform_driver_register(&kgsl_bus_platform_driver); +} + +static void __exit kgsl_busmon_exit(void) +{ + platform_driver_unregister(&kgsl_bus_platform_driver); +} + +module_init(kgsl_busmon_init); +module_exit(kgsl_busmon_exit); + +MODULE_DESCRIPTION("3D Graphics driver"); +MODULE_VERSION("1.2"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:kgsl_3d"); diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h new file mode 100644 index 000000000000..82399db488ae --- /dev/null +++ b/drivers/gpu/msm/adreno.h @@ -0,0 +1,1399 @@ +/* Copyright (c) 2008-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __ADRENO_H +#define __ADRENO_H + +#include "kgsl_device.h" +#include "kgsl_sharedmem.h" +#include "adreno_drawctxt.h" +#include "adreno_ringbuffer.h" +#include "adreno_profile.h" +#include "adreno_dispatch.h" +#include "kgsl_iommu.h" +#include "adreno_perfcounter.h" +#include <linux/stat.h> +#include <linux/delay.h> + +#include "a4xx_reg.h" + +#ifdef CONFIG_MSM_OCMEM +#include <soc/qcom/ocmem.h> +#endif + +#define DEVICE_3D_NAME "kgsl-3d" +#define DEVICE_3D0_NAME "kgsl-3d0" + +#define ADRENO_PRIORITY_MAX_RB_LEVELS 4 + +/* ADRENO_DEVICE - Given a kgsl_device return the adreno device struct */ +#define ADRENO_DEVICE(device) \ + container_of(device, struct adreno_device, dev) + +/* ADRENO_CONTEXT - Given a context return the adreno context struct */ +#define ADRENO_CONTEXT(context) \ + container_of(context, struct adreno_context, base) + +/* ADRENO_GPU_DEVICE - Given an adreno device return the GPU specific struct */ +#define ADRENO_GPU_DEVICE(_a) ((_a)->gpucore->gpudev) + +#define ADRENO_CHIPID_CORE(_id) (((_id) >> 24) & 0xFF) +#define ADRENO_CHIPID_MAJOR(_id) (((_id) >> 16) & 0xFF) +#define ADRENO_CHIPID_MINOR(_id) (((_id) >> 8) & 0xFF) +#define ADRENO_CHIPID_PATCH(_id) ((_id) & 0xFF) + +/* ADRENO_GPUREV - Return the GPU ID for the given adreno_device */ +#define ADRENO_GPUREV(_a) ((_a)->gpucore->gpurev) + +/* ADRENO_GPUREV - Return the GPU patchid for the given adreno_device */ +#define ADRENO_PATCHID(_a) ((_a)->gpucore->patchid) + +/* + * ADRENO_FEATURE - return true if the specified feature is supported by the GPU + * core + */ +#define ADRENO_FEATURE(_dev, _bit) \ + ((_dev)->gpucore->features & (_bit)) + +/** + * ADRENO_QUIRK - return true if the specified quirk is required by the GPU + */ +#define ADRENO_QUIRK(_dev, _bit) \ + ((_dev)->quirks & (_bit)) + +/* + * ADRENO_PREEMPT_STYLE - return preemption style + */ +#define ADRENO_PREEMPT_STYLE(flags) \ + ((flags & KGSL_CONTEXT_PREEMPT_STYLE_MASK) >> \ + KGSL_CONTEXT_PREEMPT_STYLE_SHIFT) + +/* + * return the dispatcher cmdqueue in which the given cmdbatch should + * be submitted + */ +#define ADRENO_CMDBATCH_DISPATCH_CMDQUEUE(c) \ + (&((ADRENO_CONTEXT(c->context))->rb->dispatch_q)) + +#define ADRENO_CMDBATCH_RB(c) \ + ((ADRENO_CONTEXT(c->context))->rb) + +/* Adreno core features */ +/* The core uses OCMEM for GMEM/binning memory */ +#define ADRENO_USES_OCMEM BIT(0) +/* The core supports an accelerated warm start */ +#define ADRENO_WARM_START BIT(1) +/* The core supports the microcode bootstrap functionality */ +#define ADRENO_USE_BOOTSTRAP BIT(2) +/* The core supports SP/TP hw controlled power collapse */ +#define ADRENO_SPTP_PC BIT(3) +/* The core supports Peak Power Detection(PPD)*/ +#define ADRENO_PPD BIT(4) +/* The GPU supports content protection */ +#define ADRENO_CONTENT_PROTECTION BIT(5) +/* The GPU supports preemption */ +#define ADRENO_PREEMPTION BIT(6) +/* The core uses GPMU for power and limit management */ +#define ADRENO_GPMU BIT(7) +/* The GPMU supports Limits Management */ +#define ADRENO_LM BIT(8) +/* The core uses 64 bit GPU addresses */ +#define ADRENO_64BIT BIT(9) + +/* + * Adreno GPU quirks - control bits for various workarounds + */ + +/* Set TWOPASSUSEWFI in PC_DBG_ECO_CNTL (5XX) */ +#define ADRENO_QUIRK_TWO_PASS_USE_WFI BIT(0) +/* Lock/unlock mutex to sync with the IOMMU */ +#define ADRENO_QUIRK_IOMMU_SYNC BIT(1) + +/* Flags to control command packet settings */ +#define KGSL_CMD_FLAGS_NONE 0 +#define KGSL_CMD_FLAGS_PMODE BIT(0) +#define KGSL_CMD_FLAGS_INTERNAL_ISSUE BIT(1) +#define KGSL_CMD_FLAGS_WFI BIT(2) +#define KGSL_CMD_FLAGS_PROFILE BIT(3) +#define KGSL_CMD_FLAGS_PWRON_FIXUP BIT(4) +#define KGSL_CMD_FLAGS_MEMLIST BIT(5) + +/* Command identifiers */ +#define KGSL_CONTEXT_TO_MEM_IDENTIFIER 0x2EADBEEF +#define KGSL_CMD_IDENTIFIER 0x2EEDFACE +#define KGSL_CMD_INTERNAL_IDENTIFIER 0x2EEDD00D +#define KGSL_START_OF_IB_IDENTIFIER 0x2EADEABE +#define KGSL_END_OF_IB_IDENTIFIER 0x2ABEDEAD +#define KGSL_END_OF_FRAME_IDENTIFIER 0x2E0F2E0F +#define KGSL_NOP_IB_IDENTIFIER 0x20F20F20 +#define KGSL_START_OF_PROFILE_IDENTIFIER 0x2DEFADE1 +#define KGSL_END_OF_PROFILE_IDENTIFIER 0x2DEFADE2 +#define KGSL_PWRON_FIXUP_IDENTIFIER 0x2AFAFAFA + +#define ADRENO_ISTORE_START 0x5000 /* Istore offset */ + +#define ADRENO_NUM_CTX_SWITCH_ALLOWED_BEFORE_DRAW 50 + +/* One cannot wait forever for the core to idle, so set an upper limit to the + * amount of time to wait for the core to go idle + */ + +#define ADRENO_IDLE_TIMEOUT (20 * 1000) + +#define ADRENO_UCHE_GMEM_BASE 0x100000 + +enum adreno_gpurev { + ADRENO_REV_UNKNOWN = 0, + ADRENO_REV_A304 = 304, + ADRENO_REV_A305 = 305, + ADRENO_REV_A305C = 306, + ADRENO_REV_A306 = 307, + ADRENO_REV_A306A = 308, + ADRENO_REV_A310 = 310, + ADRENO_REV_A320 = 320, + ADRENO_REV_A330 = 330, + ADRENO_REV_A305B = 335, + ADRENO_REV_A405 = 405, + ADRENO_REV_A418 = 418, + ADRENO_REV_A420 = 420, + ADRENO_REV_A430 = 430, + ADRENO_REV_A505 = 505, + ADRENO_REV_A506 = 506, + ADRENO_REV_A510 = 510, + ADRENO_REV_A530 = 530, +}; + +#define ADRENO_START_WARM 0 +#define ADRENO_START_COLD 1 + +#define ADRENO_SOFT_FAULT BIT(0) +#define ADRENO_HARD_FAULT BIT(1) +#define ADRENO_TIMEOUT_FAULT BIT(2) +#define ADRENO_IOMMU_PAGE_FAULT BIT(3) +#define ADRENO_PREEMPT_FAULT BIT(4) + +#define ADRENO_SPTP_PC_CTRL 0 +#define ADRENO_PPD_CTRL 1 +#define ADRENO_LM_CTRL 2 + +struct adreno_gpudev; + +struct adreno_busy_data { + unsigned int gpu_busy; + unsigned int vbif_ram_cycles; + unsigned int vbif_starved_ram; +}; + +/** + * struct adreno_gpu_core - A specific GPU core definition + * @gpurev: Unique GPU revision identifier + * @core: Match for the core version of the GPU + * @major: Match for the major version of the GPU + * @minor: Match for the minor version of the GPU + * @patchid: Match for the patch revision of the GPU + * @features: Common adreno features supported by this core + * @pm4fw_name: Filename for th PM4 firmware + * @pfpfw_name: Filename for the PFP firmware + * @zap_name: Filename for the Zap Shader ucode + * @gpudev: Pointer to the GPU family specific functions for this core + * @gmem_size: Amount of binning memory (GMEM/OCMEM) to reserve for the core + * @pm4_jt_idx: Index of the jump table in the PM4 microcode + * @pm4_jt_addr: Address offset to load the jump table for the PM4 microcode + * @pfp_jt_idx: Index of the jump table in the PFP microcode + * @pfp_jt_addr: Address offset to load the jump table for the PFP microcode + * @pm4_bstrp_size: Size of the bootstrap loader for PM4 microcode + * @pfp_bstrp_size: Size of the bootstrap loader for PFP microcde + * @pfp_bstrp_ver: Version of the PFP microcode that supports bootstraping + * @shader_offset: Offset of shader from gpu reg base + * @shader_size: Shader size + * @num_protected_regs: number of protected registers + * @gpmufw_name: Filename for the GPMU firmware + * @gpmu_major: Match for the GPMU & firmware, major revision + * @gpmu_minor: Match for the GPMU & firmware, minor revision + * @gpmu_features: Supported features for any given GPMU version + * @busy_mask: mask to check if GPU is busy in RBBM_STATUS + * @lm_major: Limits Management register sequence, major revision + * @lm_minor: LM register sequence, minor revision + * @regfw_name: Filename for the register sequence firmware + * @gpmu_tsens: ID for the temporature sensor used by the GPMU + * @max_power: Max possible power draw of a core, units elephant tail hairs + */ +struct adreno_gpu_core { + enum adreno_gpurev gpurev; + unsigned int core, major, minor, patchid; + unsigned long features; + const char *pm4fw_name; + const char *pfpfw_name; + const char *zap_name; + struct adreno_gpudev *gpudev; + size_t gmem_size; + unsigned int pm4_jt_idx; + unsigned int pm4_jt_addr; + unsigned int pfp_jt_idx; + unsigned int pfp_jt_addr; + unsigned int pm4_bstrp_size; + unsigned int pfp_bstrp_size; + unsigned int pfp_bstrp_ver; + unsigned long shader_offset; + unsigned int shader_size; + unsigned int num_protected_regs; + const char *gpmufw_name; + unsigned int gpmu_major; + unsigned int gpmu_minor; + unsigned int gpmu_features; + unsigned int busy_mask; + unsigned int lm_major, lm_minor; + const char *regfw_name; + unsigned int gpmu_tsens; + unsigned int max_power; +}; + +/** + * struct adreno_device - The mothership structure for all adreno related info + * @dev: Reference to struct kgsl_device + * @priv: Holds the private flags specific to the adreno_device + * @chipid: Chip ID specific to the GPU + * @gmem_base: Base physical address of GMEM + * @gmem_size: GMEM size + * @gpucore: Pointer to the adreno_gpu_core structure + * @pfp_fw: Buffer which holds the pfp ucode + * @pfp_fw_size: Size of pfp ucode buffer + * @pfp_fw_version: Version of pfp ucode + * @pfp: Memory descriptor which holds pfp ucode buffer info + * @pm4_fw: Buffer which holds the pm4 ucode + * @pm4_fw_size: Size of pm4 ucode buffer + * @pm4_fw_version: Version of pm4 ucode + * @pm4: Memory descriptor which holds pm4 ucode buffer info + * @gpmu_cmds_size: Length of gpmu cmd stream + * @gpmu_cmds: gpmu cmd stream + * @ringbuffers: Array of pointers to adreno_ringbuffers + * @num_ringbuffers: Number of ringbuffers for the GPU + * @cur_rb: Pointer to the current ringbuffer + * @next_rb: Ringbuffer we are switching to during preemption + * @prev_rb: Ringbuffer we are switching from during preemption + * @fast_hang_detect: Software fault detection availability + * @ft_policy: Defines the fault tolerance policy + * @long_ib_detect: Long IB detection availability + * @ft_pf_policy: Defines the fault policy for page faults + * @ocmem_hdl: Handle to the ocmem allocated buffer + * @profile: Container for adreno profiler information + * @dispatcher: Container for adreno GPU dispatcher + * @pwron_fixup: Command buffer to run a post-power collapse shader workaround + * @pwron_fixup_dwords: Number of dwords in the command buffer + * @input_work: Work struct for turning on the GPU after a touch event + * @busy_data: Struct holding GPU VBIF busy stats + * @ram_cycles_lo: Number of DDR clock cycles for the monitor session + * @perfctr_pwr_lo: Number of cycles VBIF is stalled by DDR + * @halt: Atomic variable to check whether the GPU is currently halted + * @ctx_d_debugfs: Context debugfs node + * @pwrctrl_flag: Flag to hold adreno specific power attributes + * @cmdbatch_profile_buffer: Memdesc holding the cmdbatch profiling buffer + * @cmdbatch_profile_index: Index to store the start/stop ticks in the profiling + * buffer + * @sp_local_gpuaddr: Base GPU virtual address for SP local memory + * @sp_pvt_gpuaddr: Base GPU virtual address for SP private memory + * @lm_fw: The LM firmware handle + * @lm_sequence: Pointer to the start of the register write sequence for LM + * @lm_size: The dword size of the LM sequence + * @lm_limit: limiting value for LM + * @lm_threshold_count: register value for counter for lm threshold breakin + * @lm_threshold_cross: number of current peaks exceeding threshold + * @speed_bin: Indicate which power level set to use + * @csdev: Pointer to a coresight device (if applicable) + */ +struct adreno_device { + struct kgsl_device dev; /* Must be first field in this struct */ + unsigned long priv; + unsigned int chipid; + unsigned long gmem_base; + unsigned long gmem_size; + const struct adreno_gpu_core *gpucore; + unsigned int *pfp_fw; + size_t pfp_fw_size; + unsigned int pfp_fw_version; + struct kgsl_memdesc pfp; + unsigned int *pm4_fw; + size_t pm4_fw_size; + unsigned int pm4_fw_version; + struct kgsl_memdesc pm4; + size_t gpmu_cmds_size; + unsigned int *gpmu_cmds; + struct adreno_ringbuffer ringbuffers[ADRENO_PRIORITY_MAX_RB_LEVELS]; + int num_ringbuffers; + struct adreno_ringbuffer *cur_rb; + struct adreno_ringbuffer *next_rb; + struct adreno_ringbuffer *prev_rb; + unsigned int fast_hang_detect; + unsigned long ft_policy; + unsigned int long_ib_detect; + unsigned long ft_pf_policy; + struct ocmem_buf *ocmem_hdl; + struct adreno_profile profile; + struct adreno_dispatcher dispatcher; + struct kgsl_memdesc pwron_fixup; + unsigned int pwron_fixup_dwords; + struct work_struct input_work; + struct adreno_busy_data busy_data; + unsigned int ram_cycles_lo; + unsigned int starved_ram_lo; + unsigned int perfctr_pwr_lo; + atomic_t halt; + struct dentry *ctx_d_debugfs; + unsigned long pwrctrl_flag; + + struct kgsl_memdesc cmdbatch_profile_buffer; + unsigned int cmdbatch_profile_index; + uint64_t sp_local_gpuaddr; + uint64_t sp_pvt_gpuaddr; + const struct firmware *lm_fw; + uint32_t *lm_sequence; + uint32_t lm_size; + struct kgsl_memdesc preemption_counters; + struct work_struct gpmu_work; + uint32_t lm_leakage; + uint32_t lm_limit; + uint32_t lm_threshold_count; + uint32_t lm_threshold_cross; + + unsigned int speed_bin; + unsigned int quirks; + + struct coresight_device *csdev; +}; + +/** + * enum adreno_device_flags - Private flags for the adreno_device + * @ADRENO_DEVICE_PWRON - Set during init after a power collapse + * @ADRENO_DEVICE_PWRON_FIXUP - Set if the target requires the shader fixup + * after power collapse + * @ADRENO_DEVICE_CORESIGHT - Set if the coresight (trace bus) registers should + * be restored after power collapse + * @ADRENO_DEVICE_HANG_INTR - Set if the hang interrupt should be enabled for + * this target + * @ADRENO_DEVICE_STARTED - Set if the device start sequence is in progress + * @ADRENO_DEVICE_FAULT - Set if the device is currently in fault (and shouldn't + * send any more commands to the ringbuffer) + * @ADRENO_DEVICE_CMDBATCH_PROFILE - Set if the device supports command batch + * profiling via the ALWAYSON counter + * @ADRENO_DEVICE_PREEMPTION - Turn on/off preemption + * @ADRENO_DEVICE_SOFT_FAULT_DETECT - Set if soft fault detect is enabled + * @ADRENO_DEVICE_GPMU_INITIALIZED - Set if GPMU firmware initialization succeed + * @ADRENO_DEVICE_ISDB_ENABLED - Set if the Integrated Shader DeBugger is + * attached and enabled + */ +enum adreno_device_flags { + ADRENO_DEVICE_PWRON = 0, + ADRENO_DEVICE_PWRON_FIXUP = 1, + ADRENO_DEVICE_INITIALIZED = 2, + ADRENO_DEVICE_CORESIGHT = 3, + ADRENO_DEVICE_HANG_INTR = 4, + ADRENO_DEVICE_STARTED = 5, + ADRENO_DEVICE_FAULT = 6, + ADRENO_DEVICE_CMDBATCH_PROFILE = 7, + ADRENO_DEVICE_GPU_REGULATOR_ENABLED = 8, + ADRENO_DEVICE_PREEMPTION = 9, + ADRENO_DEVICE_SOFT_FAULT_DETECT = 10, + ADRENO_DEVICE_GPMU_INITIALIZED = 11, + ADRENO_DEVICE_ISDB_ENABLED = 12, +}; + +/** + * struct adreno_cmdbatch_profile_entry - a single command batch entry in the + * kernel profiling buffer + * @started: Number of GPU ticks at start of the command batch + * @retired: Number of GPU ticks at the end of the command batch + */ +struct adreno_cmdbatch_profile_entry { + uint64_t started; + uint64_t retired; +}; + +#define ADRENO_CMDBATCH_PROFILE_COUNT \ + (PAGE_SIZE / sizeof(struct adreno_cmdbatch_profile_entry)) + +#define ADRENO_CMDBATCH_PROFILE_OFFSET(_index, _member) \ + ((_index) * sizeof(struct adreno_cmdbatch_profile_entry) \ + + offsetof(struct adreno_cmdbatch_profile_entry, _member)) + + +/** + * adreno_regs: List of registers that are used in kgsl driver for all + * 3D devices. Each device type has different offset value for the same + * register, so an array of register offsets are declared for every device + * and are indexed by the enumeration values defined in this enum + */ +enum adreno_regs { + ADRENO_REG_CP_ME_RAM_WADDR, + ADRENO_REG_CP_ME_RAM_DATA, + ADRENO_REG_CP_PFP_UCODE_DATA, + ADRENO_REG_CP_PFP_UCODE_ADDR, + ADRENO_REG_CP_WFI_PEND_CTR, + ADRENO_REG_CP_RB_BASE, + ADRENO_REG_CP_RB_BASE_HI, + ADRENO_REG_CP_RB_RPTR, + ADRENO_REG_CP_RB_WPTR, + ADRENO_REG_CP_CNTL, + ADRENO_REG_CP_ME_CNTL, + ADRENO_REG_CP_RB_CNTL, + ADRENO_REG_CP_IB1_BASE, + ADRENO_REG_CP_IB1_BASE_HI, + ADRENO_REG_CP_IB1_BUFSZ, + ADRENO_REG_CP_IB2_BASE, + ADRENO_REG_CP_IB2_BASE_HI, + ADRENO_REG_CP_IB2_BUFSZ, + ADRENO_REG_CP_TIMESTAMP, + ADRENO_REG_CP_SCRATCH_REG6, + ADRENO_REG_CP_SCRATCH_REG7, + ADRENO_REG_CP_ME_RAM_RADDR, + ADRENO_REG_CP_ROQ_ADDR, + ADRENO_REG_CP_ROQ_DATA, + ADRENO_REG_CP_MERCIU_ADDR, + ADRENO_REG_CP_MERCIU_DATA, + ADRENO_REG_CP_MERCIU_DATA2, + ADRENO_REG_CP_MEQ_ADDR, + ADRENO_REG_CP_MEQ_DATA, + ADRENO_REG_CP_HW_FAULT, + ADRENO_REG_CP_PROTECT_STATUS, + ADRENO_REG_CP_PREEMPT, + ADRENO_REG_CP_PREEMPT_DEBUG, + ADRENO_REG_CP_PREEMPT_DISABLE, + ADRENO_REG_CP_PROTECT_REG_0, + ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO, + ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI, + ADRENO_REG_RBBM_STATUS, + ADRENO_REG_RBBM_STATUS3, + ADRENO_REG_RBBM_PERFCTR_CTL, + ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0, + ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1, + ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2, + ADRENO_REG_RBBM_PERFCTR_LOAD_CMD3, + ADRENO_REG_RBBM_PERFCTR_PWR_1_LO, + ADRENO_REG_RBBM_INT_0_MASK, + ADRENO_REG_RBBM_INT_0_STATUS, + ADRENO_REG_RBBM_PM_OVERRIDE2, + ADRENO_REG_RBBM_INT_CLEAR_CMD, + ADRENO_REG_RBBM_SW_RESET_CMD, + ADRENO_REG_RBBM_BLOCK_SW_RESET_CMD, + ADRENO_REG_RBBM_BLOCK_SW_RESET_CMD2, + ADRENO_REG_RBBM_CLOCK_CTL, + ADRENO_REG_VPC_DEBUG_RAM_SEL, + ADRENO_REG_VPC_DEBUG_RAM_READ, + ADRENO_REG_PA_SC_AA_CONFIG, + ADRENO_REG_SQ_GPR_MANAGEMENT, + ADRENO_REG_SQ_INST_STORE_MANAGMENT, + ADRENO_REG_TP0_CHICKEN, + ADRENO_REG_RBBM_RBBM_CTL, + ADRENO_REG_UCHE_INVALIDATE0, + ADRENO_REG_UCHE_INVALIDATE1, + ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO, + ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI, + ADRENO_REG_RBBM_SECVID_TRUST_CONTROL, + ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO, + ADRENO_REG_RBBM_ALWAYSON_COUNTER_HI, + ADRENO_REG_RBBM_SECVID_TRUST_CONFIG, + ADRENO_REG_RBBM_SECVID_TSB_CONTROL, + ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_BASE, + ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_BASE_HI, + ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_SIZE, + ADRENO_REG_VBIF_XIN_HALT_CTRL0, + ADRENO_REG_VBIF_XIN_HALT_CTRL1, + ADRENO_REG_VBIF_VERSION, + ADRENO_REG_REGISTER_MAX, +}; + +/** + * adreno_reg_offsets: Holds array of register offsets + * @offsets: Offset array of size defined by enum adreno_regs + * @offset_0: This is the index of the register in offset array whose value + * is 0. 0 is a valid register offset and during initialization of the + * offset array we need to know if an offset value is correctly defined to 0 + */ +struct adreno_reg_offsets { + unsigned int *const offsets; + enum adreno_regs offset_0; +}; + +#define ADRENO_REG_UNUSED 0xFFFFFFFF +#define ADRENO_REG_SKIP 0xFFFFFFFE +#define ADRENO_REG_DEFINE(_offset, _reg) [_offset] = _reg + +/* + * struct adreno_vbif_data - Describes vbif register value pair + * @reg: Offset to vbif register + * @val: The value that should be programmed in the register at reg + */ +struct adreno_vbif_data { + unsigned int reg; + unsigned int val; +}; + +/* + * struct adreno_vbif_platform - Holds an array of vbif reg value pairs + * for a particular core + * @devfunc: Pointer to platform/core identification function + * @vbif: Array of reg value pairs for vbif registers + */ +struct adreno_vbif_platform { + int(*devfunc)(struct adreno_device *); + const struct adreno_vbif_data *vbif; +}; + +/* + * struct adreno_vbif_snapshot_registers - Holds an array of vbif registers + * listed for snapshot dump for a particular core + * @version: vbif version + * @registers: vbif registers listed for snapshot dump + * @count: count of vbif registers listed for snapshot + */ +struct adreno_vbif_snapshot_registers { + const unsigned int version; + const unsigned int *registers; + const int count; +}; + +/** + * struct adreno_coresight_register - Definition for a coresight (tracebus) + * debug register + * @offset: Offset of the debug register in the KGSL mmio region + * @initial: Default value to write when coresight is enabled + * @value: Current shadow value of the register (to be reprogrammed after power + * collapse) + */ +struct adreno_coresight_register { + unsigned int offset; + unsigned int initial; + unsigned int value; +}; + +struct adreno_coresight_attr { + struct device_attribute attr; + struct adreno_coresight_register *reg; +}; + +ssize_t adreno_coresight_show_register(struct device *device, + struct device_attribute *attr, char *buf); + +ssize_t adreno_coresight_store_register(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size); + +#define ADRENO_CORESIGHT_ATTR(_attrname, _reg) \ + struct adreno_coresight_attr coresight_attr_##_attrname = { \ + __ATTR(_attrname, S_IRUGO | S_IWUSR, \ + adreno_coresight_show_register, \ + adreno_coresight_store_register), \ + (_reg), } + +/** + * struct adreno_coresight - GPU specific coresight definition + * @registers - Array of GPU specific registers to configure trace bus output + * @count - Number of registers in the array + * @groups - Pointer to an attribute list of control files + */ +struct adreno_coresight { + struct adreno_coresight_register *registers; + unsigned int count; + const struct attribute_group **groups; +}; + + +struct adreno_irq_funcs { + void (*func)(struct adreno_device *, int); +}; +#define ADRENO_IRQ_CALLBACK(_c) { .func = _c } + +struct adreno_irq { + unsigned int mask; + struct adreno_irq_funcs *funcs; +}; + +/* + * struct adreno_debugbus_block - Holds info about debug buses of a chip + * @block_id: Bus identifier + * @dwords: Number of dwords of data that this block holds + */ +struct adreno_debugbus_block { + unsigned int block_id; + unsigned int dwords; +}; + +/* + * struct adreno_snapshot_section_sizes - Structure holding the size of + * different sections dumped during device snapshot + * @cp_pfp: CP PFP data section size + * @cp_me: CP ME data section size + * @vpc_mem: VPC memory section size + * @cp_meq: CP MEQ size + * @shader_mem: Size of shader memory of 1 shader section + * @cp_merciu: CP MERCIU size + * @roq: ROQ size + */ +struct adreno_snapshot_sizes { + int cp_pfp; + int cp_me; + int vpc_mem; + int cp_meq; + int shader_mem; + int cp_merciu; + int roq; +}; + +/* + * struct adreno_snapshot_data - Holds data used in snapshot + * @sect_sizes: Has sections sizes + */ +struct adreno_snapshot_data { + struct adreno_snapshot_sizes *sect_sizes; +}; + +struct adreno_gpudev { + /* + * These registers are in a different location on different devices, + * so define them in the structure and use them as variables. + */ + const struct adreno_reg_offsets *reg_offsets; + const struct adreno_ft_perf_counters *ft_perf_counters; + unsigned int ft_perf_counters_count; + + struct adreno_perfcounters *perfcounters; + const struct adreno_invalid_countables + *invalid_countables; + struct adreno_snapshot_data *snapshot_data; + + struct adreno_coresight *coresight; + + struct adreno_irq *irq; + int num_prio_levels; + unsigned int vbif_xin_halt_ctrl0_mask; + /* GPU specific function hooks */ + void (*irq_trace)(struct adreno_device *, unsigned int status); + void (*snapshot)(struct adreno_device *, struct kgsl_snapshot *); + void (*platform_setup)(struct adreno_device *); + void (*init)(struct adreno_device *); + int (*rb_init)(struct adreno_device *, struct adreno_ringbuffer *); + int (*hw_init)(struct adreno_device *); + int (*microcode_read)(struct adreno_device *); + int (*microcode_load)(struct adreno_device *, unsigned int start_type); + void (*perfcounter_init)(struct adreno_device *); + void (*perfcounter_close)(struct adreno_device *); + void (*start)(struct adreno_device *); + bool (*is_sptp_idle)(struct adreno_device *); + int (*regulator_enable)(struct adreno_device *); + void (*regulator_disable)(struct adreno_device *); + void (*pwrlevel_change_settings)(struct adreno_device *, + unsigned int prelevel, unsigned int postlevel, + bool post); + int (*preemption_pre_ibsubmit)(struct adreno_device *, + struct adreno_ringbuffer *, unsigned int *, + struct kgsl_context *, uint64_t cond_addr, + struct kgsl_memobj_node *); + int (*preemption_post_ibsubmit)(struct adreno_device *, + struct adreno_ringbuffer *, unsigned int *, + struct kgsl_context *); + int (*preemption_token)(struct adreno_device *, + struct adreno_ringbuffer *, unsigned int *, + uint64_t gpuaddr); + int (*preemption_init)(struct adreno_device *); + void (*preemption_schedule)(struct adreno_device *); + void (*enable_64bit)(struct adreno_device *); +}; + +struct log_field { + bool show; + const char *display; +}; + +/** + * enum kgsl_ft_policy_bits - KGSL fault tolerance policy bits + * @KGSL_FT_OFF: Disable fault detection (not used) + * @KGSL_FT_REPLAY: Replay the faulting command + * @KGSL_FT_SKIPIB: Skip the faulting indirect buffer + * @KGSL_FT_SKIPFRAME: Skip the frame containing the faulting IB + * @KGSL_FT_DISABLE: Tells the dispatcher to disable FT for the command batch + * @KGSL_FT_TEMP_DISABLE: Disables FT for all commands + * @KGSL_FT_THROTTLE: Disable the context if it faults too often + * @KGSL_FT_SKIPCMD: Skip the command containing the faulting IB + */ +enum kgsl_ft_policy_bits { + KGSL_FT_OFF = 0, + KGSL_FT_REPLAY = 1, + KGSL_FT_SKIPIB = 2, + KGSL_FT_SKIPFRAME = 3, + KGSL_FT_DISABLE = 4, + KGSL_FT_TEMP_DISABLE = 5, + KGSL_FT_THROTTLE = 6, + KGSL_FT_SKIPCMD = 7, + /* KGSL_FT_MAX_BITS is used to calculate the mask */ + KGSL_FT_MAX_BITS, + /* Internal bits - set during GFT */ + /* Skip the PM dump on replayed command batches */ + KGSL_FT_SKIP_PMDUMP = 31, +}; + +#define KGSL_FT_POLICY_MASK GENMASK(KGSL_FT_MAX_BITS - 1, 0) + +#define KGSL_FT_DEFAULT_POLICY \ + (BIT(KGSL_FT_REPLAY) | \ + BIT(KGSL_FT_SKIPCMD) | \ + BIT(KGSL_FT_THROTTLE)) + +#define ADRENO_FT_TYPES \ + { BIT(KGSL_FT_OFF), "off" }, \ + { BIT(KGSL_FT_REPLAY), "replay" }, \ + { BIT(KGSL_FT_SKIPIB), "skipib" }, \ + { BIT(KGSL_FT_SKIPFRAME), "skipframe" }, \ + { BIT(KGSL_FT_DISABLE), "disable" }, \ + { BIT(KGSL_FT_TEMP_DISABLE), "temp" }, \ + { BIT(KGSL_FT_THROTTLE), "throttle"}, \ + { BIT(KGSL_FT_SKIPCMD), "skipcmd" } + +/** + * enum kgsl_ft_pagefault_policy_bits - KGSL pagefault policy bits + * @KGSL_FT_PAGEFAULT_INT_ENABLE: No longer used, but retained for compatibility + * @KGSL_FT_PAGEFAULT_GPUHALT_ENABLE: enable GPU halt on pagefaults + * @KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE: log one pagefault per page + * @KGSL_FT_PAGEFAULT_LOG_ONE_PER_INT: log one pagefault per interrupt + */ +enum { + KGSL_FT_PAGEFAULT_INT_ENABLE = 0, + KGSL_FT_PAGEFAULT_GPUHALT_ENABLE = 1, + KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE = 2, + KGSL_FT_PAGEFAULT_LOG_ONE_PER_INT = 3, + /* KGSL_FT_PAGEFAULT_MAX_BITS is used to calculate the mask */ + KGSL_FT_PAGEFAULT_MAX_BITS, +}; + +#define KGSL_FT_PAGEFAULT_MASK GENMASK(KGSL_FT_PAGEFAULT_MAX_BITS - 1, 0) + +#define KGSL_FT_PAGEFAULT_DEFAULT_POLICY 0 + +#define FOR_EACH_RINGBUFFER(_dev, _rb, _i) \ + for ((_i) = 0, (_rb) = &((_dev)->ringbuffers[0]); \ + (_i) < (_dev)->num_ringbuffers; \ + (_i)++, (_rb)++) + +struct adreno_ft_perf_counters { + unsigned int counter; + unsigned int countable; +}; + +extern unsigned int *adreno_ft_regs; +extern unsigned int adreno_ft_regs_num; +extern unsigned int *adreno_ft_regs_val; + +extern struct adreno_gpudev adreno_a3xx_gpudev; +extern struct adreno_gpudev adreno_a4xx_gpudev; +extern struct adreno_gpudev adreno_a5xx_gpudev; + +extern int adreno_wake_nice; +extern unsigned int adreno_wake_timeout; + +long adreno_ioctl(struct kgsl_device_private *dev_priv, + unsigned int cmd, unsigned long arg); + +long adreno_ioctl_helper(struct kgsl_device_private *dev_priv, + unsigned int cmd, unsigned long arg, + const struct kgsl_ioctl *cmds, int len); + +int adreno_spin_idle(struct kgsl_device *device, unsigned int timeout); +int adreno_idle(struct kgsl_device *device); +bool adreno_isidle(struct kgsl_device *device); + +int adreno_set_constraint(struct kgsl_device *device, + struct kgsl_context *context, + struct kgsl_device_constraint *constraint); + +void adreno_shadermem_regread(struct kgsl_device *device, + unsigned int offsetwords, + unsigned int *value); + +void adreno_snapshot(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + struct kgsl_context *context); + +int adreno_reset(struct kgsl_device *device, int fault); + +void adreno_fault_skipcmd_detached(struct kgsl_device *device, + struct adreno_context *drawctxt, + struct kgsl_cmdbatch *cmdbatch); + +int adreno_a3xx_pwron_fixup_init(struct adreno_device *adreno_dev); +int adreno_a4xx_pwron_fixup_init(struct adreno_device *adreno_dev); + +int adreno_coresight_init(struct adreno_device *adreno_dev); + +void adreno_coresight_start(struct adreno_device *adreno_dev); +void adreno_coresight_stop(struct adreno_device *adreno_dev); + +void adreno_coresight_remove(struct adreno_device *adreno_dev); + +bool adreno_hw_isidle(struct adreno_device *adreno_dev); + +int adreno_iommu_set_pt_ctx(struct adreno_ringbuffer *rb, + struct kgsl_pagetable *new_pt, + struct adreno_context *drawctxt); + +int adreno_iommu_init(struct adreno_device *adreno_dev); + +void adreno_iommu_set_pt_generate_rb_cmds(struct adreno_ringbuffer *rb, + struct kgsl_pagetable *pt); + +void adreno_fault_detect_start(struct adreno_device *adreno_dev); +void adreno_fault_detect_stop(struct adreno_device *adreno_dev); + +void adreno_hang_int_callback(struct adreno_device *adreno_dev, int bit); +void adreno_cp_callback(struct adreno_device *adreno_dev, int bit); + +unsigned int adreno_iommu_set_pt_ib(struct adreno_ringbuffer *rb, + unsigned int *cmds, + struct kgsl_pagetable *pt); + +unsigned int adreno_iommu_set_pt_generate_cmds( + struct adreno_ringbuffer *rb, + unsigned int *cmds, + struct kgsl_pagetable *pt); + +int adreno_sysfs_init(struct kgsl_device *device); +void adreno_sysfs_close(struct kgsl_device *device); + +void adreno_irqctrl(struct adreno_device *adreno_dev, int state); + +long adreno_ioctl_perfcounter_get(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); + +long adreno_ioctl_perfcounter_put(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); + +int adreno_efuse_map(struct adreno_device *adreno_dev); +int adreno_efuse_read_u32(struct adreno_device *adreno_dev, unsigned int offset, + unsigned int *val); +void adreno_efuse_unmap(struct adreno_device *adreno_dev); + +#define ADRENO_TARGET(_name, _id) \ +static inline int adreno_is_##_name(struct adreno_device *adreno_dev) \ +{ \ + return (ADRENO_GPUREV(adreno_dev) == (_id)); \ +} + +static inline int adreno_is_a3xx(struct adreno_device *adreno_dev) +{ + return ((ADRENO_GPUREV(adreno_dev) >= 300) && + (ADRENO_GPUREV(adreno_dev) < 400)); +} + +ADRENO_TARGET(a304, ADRENO_REV_A304) +ADRENO_TARGET(a305, ADRENO_REV_A305) +ADRENO_TARGET(a305b, ADRENO_REV_A305B) +ADRENO_TARGET(a305c, ADRENO_REV_A305C) +ADRENO_TARGET(a306, ADRENO_REV_A306) +ADRENO_TARGET(a306a, ADRENO_REV_A306A) +ADRENO_TARGET(a310, ADRENO_REV_A310) +ADRENO_TARGET(a320, ADRENO_REV_A320) +ADRENO_TARGET(a330, ADRENO_REV_A330) + +static inline int adreno_is_a330v2(struct adreno_device *adreno_dev) +{ + return ((ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A330) && + (ADRENO_CHIPID_PATCH(adreno_dev->chipid) > 0)); +} + +static inline int adreno_is_a330v21(struct adreno_device *adreno_dev) +{ + return ((ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A330) && + (ADRENO_CHIPID_PATCH(adreno_dev->chipid) > 0xF)); +} + +static inline int adreno_is_a4xx(struct adreno_device *adreno_dev) +{ + return ADRENO_GPUREV(adreno_dev) >= 400 && + ADRENO_GPUREV(adreno_dev) < 500; +} + +ADRENO_TARGET(a405, ADRENO_REV_A405); + +static inline int adreno_is_a405v2(struct adreno_device *adreno_dev) +{ + return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A405) && + (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 0x10); +} + +ADRENO_TARGET(a418, ADRENO_REV_A418) +ADRENO_TARGET(a420, ADRENO_REV_A420) +ADRENO_TARGET(a430, ADRENO_REV_A430) + +static inline int adreno_is_a430v2(struct adreno_device *adreno_dev) +{ + return ((ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A430) && + (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 1)); +} + +static inline int adreno_is_a5xx(struct adreno_device *adreno_dev) +{ + return ADRENO_GPUREV(adreno_dev) >= 500 && + ADRENO_GPUREV(adreno_dev) < 600; +} + +ADRENO_TARGET(a505, ADRENO_REV_A505) +ADRENO_TARGET(a506, ADRENO_REV_A506) +ADRENO_TARGET(a510, ADRENO_REV_A510) +ADRENO_TARGET(a530, ADRENO_REV_A530) + +static inline int adreno_is_a530v1(struct adreno_device *adreno_dev) +{ + return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A530) && + (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 0); +} + +static inline int adreno_is_a530v2(struct adreno_device *adreno_dev) +{ + return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A530) && + (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 1); +} + +static inline int adreno_is_a530v3(struct adreno_device *adreno_dev) +{ + return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A530) && + (ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 2); +} + +static inline int adreno_is_a505_or_a506(struct adreno_device *adreno_dev) +{ + return ADRENO_GPUREV(adreno_dev) >= 505 && + ADRENO_GPUREV(adreno_dev) <= 506; +} +/** + * adreno_context_timestamp() - Return the last queued timestamp for the context + * @k_ctxt: Pointer to the KGSL context to query + * + * Return the last queued context for the given context. This is used to verify + * that incoming requests are not using an invalid (unsubmitted) timestamp + */ +static inline int adreno_context_timestamp(struct kgsl_context *k_ctxt) +{ + struct adreno_context *drawctxt = ADRENO_CONTEXT(k_ctxt); + return drawctxt->timestamp; +} + +/* + * adreno_checkreg_off() - Checks the validity of a register enum + * @adreno_dev: Pointer to adreno device + * @offset_name: The register enum that is checked + */ +static inline bool adreno_checkreg_off(struct adreno_device *adreno_dev, + enum adreno_regs offset_name) +{ + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (offset_name >= ADRENO_REG_REGISTER_MAX || + ADRENO_REG_UNUSED == gpudev->reg_offsets->offsets[offset_name]) + BUG(); + + /* + * GPU register programming is kept common as much as possible + * across the cores, Use ADRENO_REG_SKIP when certain register + * programming needs to be skipped for certain GPU cores. + * Example: Certain registers on a5xx like IB1_BASE are 64 bit. + * Common programming programs 64bit register but upper 32 bits + * are skipped in a4xx and a3xx using ADRENO_REG_SKIP. + */ + if (ADRENO_REG_SKIP == gpudev->reg_offsets->offsets[offset_name]) + return false; + + return true; +} + +/* + * adreno_readreg() - Read a register by getting its offset from the + * offset array defined in gpudev node + * @adreno_dev: Pointer to the the adreno device + * @offset_name: The register enum that is to be read + * @val: Register value read is placed here + */ +static inline void adreno_readreg(struct adreno_device *adreno_dev, + enum adreno_regs offset_name, unsigned int *val) +{ + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + if (adreno_checkreg_off(adreno_dev, offset_name)) + kgsl_regread(&adreno_dev->dev, + gpudev->reg_offsets->offsets[offset_name], val); + else + *val = 0; +} + +/* + * adreno_writereg() - Write a register by getting its offset from the + * offset array defined in gpudev node + * @adreno_dev: Pointer to the the adreno device + * @offset_name: The register enum that is to be written + * @val: Value to write + */ +static inline void adreno_writereg(struct adreno_device *adreno_dev, + enum adreno_regs offset_name, unsigned int val) +{ + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + if (adreno_checkreg_off(adreno_dev, offset_name)) + kgsl_regwrite(&adreno_dev->dev, + gpudev->reg_offsets->offsets[offset_name], val); +} + +/* + * adreno_getreg() - Returns the offset value of a register from the + * register offset array in the gpudev node + * @adreno_dev: Pointer to the the adreno device + * @offset_name: The register enum whore offset is returned + */ +static inline unsigned int adreno_getreg(struct adreno_device *adreno_dev, + enum adreno_regs offset_name) +{ + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + if (!adreno_checkreg_off(adreno_dev, offset_name)) + return ADRENO_REG_REGISTER_MAX; + return gpudev->reg_offsets->offsets[offset_name]; +} + +/** + * adreno_gpu_fault() - Return the current state of the GPU + * @adreno_dev: A pointer to the adreno_device to query + * + * Return 0 if there is no fault or positive with the last type of fault that + * occurred + */ +static inline unsigned int adreno_gpu_fault(struct adreno_device *adreno_dev) +{ + smp_rmb(); + return atomic_read(&adreno_dev->dispatcher.fault); +} + +/** + * adreno_set_gpu_fault() - Set the current fault status of the GPU + * @adreno_dev: A pointer to the adreno_device to set + * @state: fault state to set + * + */ +static inline void adreno_set_gpu_fault(struct adreno_device *adreno_dev, + int state) +{ + /* only set the fault bit w/o overwriting other bits */ + atomic_add(state, &adreno_dev->dispatcher.fault); + smp_wmb(); +} + + +/** + * adreno_clear_gpu_fault() - Clear the GPU fault register + * @adreno_dev: A pointer to an adreno_device structure + * + * Clear the GPU fault status for the adreno device + */ + +static inline void adreno_clear_gpu_fault(struct adreno_device *adreno_dev) +{ + atomic_set(&adreno_dev->dispatcher.fault, 0); + smp_wmb(); +} + +/** + * adreno_gpu_halt() - Return the GPU halt refcount + * @adreno_dev: A pointer to the adreno_device + */ +static inline int adreno_gpu_halt(struct adreno_device *adreno_dev) +{ + smp_rmb(); + return atomic_read(&adreno_dev->halt); +} + + +/** + * adreno_clear_gpu_halt() - Clear the GPU halt refcount + * @adreno_dev: A pointer to the adreno_device + */ +static inline void adreno_clear_gpu_halt(struct adreno_device *adreno_dev) +{ + atomic_set(&adreno_dev->halt, 0); + smp_wmb(); +} + +/** + * adreno_get_gpu_halt() - Increment GPU halt refcount + * @adreno_dev: A pointer to the adreno_device + */ +static inline void adreno_get_gpu_halt(struct adreno_device *adreno_dev) +{ + atomic_inc(&adreno_dev->halt); +} + +/** + * adreno_put_gpu_halt() - Decrement GPU halt refcount + * @adreno_dev: A pointer to the adreno_device + */ +static inline void adreno_put_gpu_halt(struct adreno_device *adreno_dev) +{ + if (atomic_dec_return(&adreno_dev->halt) < 0) + BUG(); +} + + +/* + * adreno_vbif_start() - Program VBIF registers, called in device start + * @adreno_dev: Pointer to device whose vbif data is to be programmed + * @vbif_platforms: list register value pair of vbif for a family + * of adreno cores + * @num_platforms: Number of platforms contained in vbif_platforms + */ +static inline void adreno_vbif_start(struct adreno_device *adreno_dev, + const struct adreno_vbif_platform *vbif_platforms, + int num_platforms) +{ + int i; + const struct adreno_vbif_data *vbif = NULL; + + for (i = 0; i < num_platforms; i++) { + if (vbif_platforms[i].devfunc(adreno_dev)) { + vbif = vbif_platforms[i].vbif; + break; + } + } + + while ((vbif != NULL) && (vbif->reg != 0)) { + kgsl_regwrite(&adreno_dev->dev, vbif->reg, vbif->val); + vbif++; + } +} + +/** + * adreno_set_protected_registers() - Protect the specified range of registers + * from being accessed by the GPU + * @adreno_dev: pointer to the Adreno device + * @index: Pointer to the index of the protect mode register to write to + * @reg: Starting dword register to write + * @mask_len: Size of the mask to protect (# of registers = 2 ** mask_len) + * + * Add the range of registers to the list of protected mode registers that will + * cause an exception if the GPU accesses them. There are 16 available + * protected mode registers. Index is used to specify which register to write + * to - the intent is to call this function multiple times with the same index + * pointer for each range and the registers will be magically programmed in + * incremental fashion + */ +static inline void adreno_set_protected_registers( + struct adreno_device *adreno_dev, unsigned int *index, + unsigned int reg, int mask_len) +{ + unsigned int val; + unsigned int base = + adreno_getreg(adreno_dev, ADRENO_REG_CP_PROTECT_REG_0); + unsigned int offset = *index; + + if (adreno_dev->gpucore->num_protected_regs) + BUG_ON(*index >= adreno_dev->gpucore->num_protected_regs); + else + BUG_ON(*index >= 16); + + /* + * On A4XX targets with more than 16 protected mode registers + * the upper registers are not contiguous with the lower 16 + * registers so we have to adjust the base and offset accordingly + */ + + if (adreno_is_a4xx(adreno_dev) && *index >= 0x10) { + base = A4XX_CP_PROTECT_REG_10; + offset = *index - 0x10; + } + + val = 0x60000000 | ((mask_len & 0x1F) << 24) | ((reg << 2) & 0xFFFFF); + + kgsl_regwrite(&adreno_dev->dev, base + offset, val); + *index = *index + 1; +} + +#ifdef CONFIG_DEBUG_FS +void adreno_debugfs_init(struct adreno_device *adreno_dev); +void adreno_context_debugfs_init(struct adreno_device *, + struct adreno_context *); +#else +static inline void adreno_debugfs_init(struct adreno_device *adreno_dev) { } +static inline void adreno_context_debugfs_init(struct adreno_device *device, + struct adreno_context *context) + { } +#endif + +/** + * adreno_compare_pm4_version() - Compare the PM4 microcode version + * @adreno_dev: Pointer to the adreno_device struct + * @version: Version number to compare again + * + * Compare the current version against the specified version and return -1 if + * the current code is older, 0 if equal or 1 if newer. + */ +static inline int adreno_compare_pm4_version(struct adreno_device *adreno_dev, + unsigned int version) +{ + if (adreno_dev->pm4_fw_version == version) + return 0; + + return (adreno_dev->pm4_fw_version > version) ? 1 : -1; +} + +/** + * adreno_compare_pfp_version() - Compare the PFP microcode version + * @adreno_dev: Pointer to the adreno_device struct + * @version: Version number to compare against + * + * Compare the current version against the specified version and return -1 if + * the current code is older, 0 if equal or 1 if newer. + */ +static inline int adreno_compare_pfp_version(struct adreno_device *adreno_dev, + unsigned int version) +{ + if (adreno_dev->pfp_fw_version == version) + return 0; + + return (adreno_dev->pfp_fw_version > version) ? 1 : -1; +} + +/* + * adreno_bootstrap_ucode() - Checks if Ucode bootstrapping is supported + * @adreno_dev: Pointer to the the adreno device + */ +static inline int adreno_bootstrap_ucode(struct adreno_device *adreno_dev) +{ + return (ADRENO_FEATURE(adreno_dev, ADRENO_USE_BOOTSTRAP) && + adreno_compare_pfp_version(adreno_dev, + adreno_dev->gpucore->pfp_bstrp_ver) >= 0) ? 1 : 0; +} + +/** + * adreno_preempt_state() - Check if preemption state is equal to given state + * @adreno_dev: Device whose preemption state is checked + * @state: State to compare against + */ +static inline unsigned int adreno_preempt_state( + struct adreno_device *adreno_dev, + enum adreno_dispatcher_preempt_states state) +{ + return atomic_read(&adreno_dev->dispatcher.preemption_state) == + state; +} + +/** + * adreno_get_rptr() - Get the current ringbuffer read pointer + * @rb: Pointer the ringbuffer to query + * + * Get the current read pointer from the GPU register. + */ +static inline unsigned int +adreno_get_rptr(struct adreno_ringbuffer *rb) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device); + if (adreno_dev->cur_rb == rb && + adreno_preempt_state(adreno_dev, + ADRENO_DISPATCHER_PREEMPT_CLEAR)) + adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &(rb->rptr)); + + return rb->rptr; +} + +static inline bool adreno_is_preemption_enabled( + struct adreno_device *adreno_dev) +{ + return test_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv); +} + +/** + * adreno_ctx_get_rb() - Return the ringbuffer that a context should + * use based on priority + * @adreno_dev: The adreno device that context is using + * @drawctxt: The context pointer + */ +static inline struct adreno_ringbuffer *adreno_ctx_get_rb( + struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + struct kgsl_context *context; + int level; + if (!drawctxt) + return NULL; + + context = &(drawctxt->base); + + /* + * If preemption is disabled then everybody needs to go on the same + * ringbuffer + */ + + if (!adreno_is_preemption_enabled(adreno_dev)) + return &(adreno_dev->ringbuffers[0]); + + /* + * Math to convert the priority field in context structure to an RB ID. + * Divide up the context priority based on number of ringbuffer levels. + */ + level = context->priority / adreno_dev->num_ringbuffers; + if (level < adreno_dev->num_ringbuffers) + return &(adreno_dev->ringbuffers[level]); + else + return &(adreno_dev->ringbuffers[ + adreno_dev->num_ringbuffers - 1]); +} +/* + * adreno_set_active_ctxs_null() - Put back reference to any active context + * and set the active context to NULL + * @adreno_dev: The adreno device + */ +static inline void adreno_set_active_ctxs_null(struct adreno_device *adreno_dev) +{ + int i; + struct adreno_ringbuffer *rb; + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + if (rb->drawctxt_active) + kgsl_context_put(&(rb->drawctxt_active->base)); + rb->drawctxt_active = NULL; + kgsl_sharedmem_writel(rb->device, &rb->pagetable_desc, + offsetof(struct adreno_ringbuffer_pagetable_info, + current_rb_ptname), 0); + } +} + +/* + * adreno_compare_prio_level() - Compares 2 priority levels based on enum values + * @p1: First priority level + * @p2: Second priority level + * + * Returns greater than 0 if p1 is higher priority, 0 if levels are equal else + * less than 0 + */ +static inline int adreno_compare_prio_level(int p1, int p2) +{ + return p2 - p1; +} + +void adreno_readreg64(struct adreno_device *adreno_dev, + enum adreno_regs lo, enum adreno_regs hi, uint64_t *val); + +void adreno_writereg64(struct adreno_device *adreno_dev, + enum adreno_regs lo, enum adreno_regs hi, uint64_t val); + +unsigned int adreno_iommu_set_apriv(struct adreno_device *adreno_dev, + unsigned int *cmds, int set); + +static inline bool adreno_soft_fault_detect(struct adreno_device *adreno_dev) +{ + return adreno_dev->fast_hang_detect && + !test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv); +} + +static inline bool adreno_long_ib_detect(struct adreno_device *adreno_dev) +{ + return adreno_dev->long_ib_detect && + !test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv); +} + +#endif /*__ADRENO_H */ diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c new file mode 100644 index 000000000000..18b68ac7c633 --- /dev/null +++ b/drivers/gpu/msm/adreno_a3xx.c @@ -0,0 +1,1847 @@ +/* Copyright (c) 2012-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/firmware.h> +#include <linux/delay.h> +#include <linux/sched.h> +#include <linux/msm_kgsl.h> + +#include "kgsl.h" +#include "adreno.h" +#include "kgsl_sharedmem.h" +#include "kgsl_cffdump.h" +#include "a3xx_reg.h" +#include "adreno_a3xx.h" +#include "adreno_a4xx.h" +#include "a4xx_reg.h" +#include "adreno_cp_parser.h" +#include "adreno_trace.h" +#include "adreno_pm4types.h" +#include "adreno_perfcounter.h" + +/* + * Define registers for a3xx that contain addresses used by the + * cp parser logic + */ +const unsigned int a3xx_cp_addr_regs[ADRENO_CP_ADDR_MAX] = { + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0, + A3XX_VSC_PIPE_DATA_ADDRESS_0), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_0, + A3XX_VSC_PIPE_DATA_LENGTH_0), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_1, + A3XX_VSC_PIPE_DATA_ADDRESS_1), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_1, + A3XX_VSC_PIPE_DATA_LENGTH_1), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_2, + A3XX_VSC_PIPE_DATA_ADDRESS_2), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_2, + A3XX_VSC_PIPE_DATA_LENGTH_2), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_3, + A3XX_VSC_PIPE_DATA_ADDRESS_3), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_3, + A3XX_VSC_PIPE_DATA_LENGTH_3), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_4, + A3XX_VSC_PIPE_DATA_ADDRESS_4), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_4, + A3XX_VSC_PIPE_DATA_LENGTH_4), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_5, + A3XX_VSC_PIPE_DATA_ADDRESS_5), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_5, + A3XX_VSC_PIPE_DATA_LENGTH_5), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_6, + A3XX_VSC_PIPE_DATA_ADDRESS_6), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_6, + A3XX_VSC_PIPE_DATA_LENGTH_6), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_7, + A3XX_VSC_PIPE_DATA_ADDRESS_7), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7, + A3XX_VSC_PIPE_DATA_LENGTH_7), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0, + A3XX_VFD_FETCH_INSTR_1_0), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_1, + A3XX_VFD_FETCH_INSTR_1_1), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_2, + A3XX_VFD_FETCH_INSTR_1_2), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_3, + A3XX_VFD_FETCH_INSTR_1_3), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_4, + A3XX_VFD_FETCH_INSTR_1_4), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_5, + A3XX_VFD_FETCH_INSTR_1_5), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_6, + A3XX_VFD_FETCH_INSTR_1_6), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_7, + A3XX_VFD_FETCH_INSTR_1_7), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_8, + A3XX_VFD_FETCH_INSTR_1_8), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_9, + A3XX_VFD_FETCH_INSTR_1_9), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_10, + A3XX_VFD_FETCH_INSTR_1_A), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_11, + A3XX_VFD_FETCH_INSTR_1_B), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_12, + A3XX_VFD_FETCH_INSTR_1_C), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_13, + A3XX_VFD_FETCH_INSTR_1_D), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_14, + A3XX_VFD_FETCH_INSTR_1_E), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15, + A3XX_VFD_FETCH_INSTR_1_F), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_SIZE_ADDRESS, + A3XX_VSC_SIZE_ADDRESS), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR, + A3XX_SP_VS_PVT_MEM_ADDR_REG), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR, + A3XX_SP_FS_PVT_MEM_ADDR_REG), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_VS_OBJ_START_REG, + A3XX_SP_VS_OBJ_START_REG), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_FS_OBJ_START_REG, + A3XX_SP_FS_OBJ_START_REG), +}; + +static unsigned int adreno_a3xx_rbbm_clock_ctl_default(struct adreno_device + *adreno_dev) +{ + if (adreno_is_a320(adreno_dev)) + return A320_RBBM_CLOCK_CTL_DEFAULT; + else if (adreno_is_a330v2(adreno_dev)) + return A3XX_RBBM_CLOCK_CTL_DEFAULT; + else if (adreno_is_a330(adreno_dev)) + return A330_RBBM_CLOCK_CTL_DEFAULT; + return A3XX_RBBM_CLOCK_CTL_DEFAULT; +} + +static const unsigned int _a3xx_pwron_fixup_fs_instructions[] = { + 0x00000000, 0x302CC300, 0x00000000, 0x302CC304, + 0x00000000, 0x302CC308, 0x00000000, 0x302CC30C, + 0x00000000, 0x302CC310, 0x00000000, 0x302CC314, + 0x00000000, 0x302CC318, 0x00000000, 0x302CC31C, + 0x00000000, 0x302CC320, 0x00000000, 0x302CC324, + 0x00000000, 0x302CC328, 0x00000000, 0x302CC32C, + 0x00000000, 0x302CC330, 0x00000000, 0x302CC334, + 0x00000000, 0x302CC338, 0x00000000, 0x302CC33C, + 0x00000000, 0x00000400, 0x00020000, 0x63808003, + 0x00060004, 0x63828007, 0x000A0008, 0x6384800B, + 0x000E000C, 0x6386800F, 0x00120010, 0x63888013, + 0x00160014, 0x638A8017, 0x001A0018, 0x638C801B, + 0x001E001C, 0x638E801F, 0x00220020, 0x63908023, + 0x00260024, 0x63928027, 0x002A0028, 0x6394802B, + 0x002E002C, 0x6396802F, 0x00320030, 0x63988033, + 0x00360034, 0x639A8037, 0x003A0038, 0x639C803B, + 0x003E003C, 0x639E803F, 0x00000000, 0x00000400, + 0x00000003, 0x80D60003, 0x00000007, 0x80D60007, + 0x0000000B, 0x80D6000B, 0x0000000F, 0x80D6000F, + 0x00000013, 0x80D60013, 0x00000017, 0x80D60017, + 0x0000001B, 0x80D6001B, 0x0000001F, 0x80D6001F, + 0x00000023, 0x80D60023, 0x00000027, 0x80D60027, + 0x0000002B, 0x80D6002B, 0x0000002F, 0x80D6002F, + 0x00000033, 0x80D60033, 0x00000037, 0x80D60037, + 0x0000003B, 0x80D6003B, 0x0000003F, 0x80D6003F, + 0x00000000, 0x03000000, 0x00000000, 0x00000000, +}; + +/** + * adreno_a3xx_pwron_fixup_init() - Initalize a special command buffer to run a + * post-power collapse shader workaround + * @adreno_dev: Pointer to a adreno_device struct + * + * Some targets require a special workaround shader to be executed after + * power-collapse. Construct the IB once at init time and keep it + * handy + * + * Returns: 0 on success or negative on error + */ +int adreno_a3xx_pwron_fixup_init(struct adreno_device *adreno_dev) +{ + unsigned int *cmds; + int count = ARRAY_SIZE(_a3xx_pwron_fixup_fs_instructions); + int ret; + + /* Return if the fixup is already in place */ + if (test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv)) + return 0; + + ret = kgsl_allocate_global(&adreno_dev->dev, + &adreno_dev->pwron_fixup, PAGE_SIZE, + KGSL_MEMFLAGS_GPUREADONLY, 0); + + if (ret) + return ret; + + cmds = adreno_dev->pwron_fixup.hostptr; + + *cmds++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2); + *cmds++ = 0x00000000; + *cmds++ = 0x90000000; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_REG_RMW, 3); + *cmds++ = A3XX_RBBM_CLOCK_CTL; + *cmds++ = 0xFFFCFFFF; + *cmds++ = 0x00010000; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1); + *cmds++ = 0x1E000150; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG); + *cmds++ = 0x1E000150; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1); + *cmds++ = 0x1E000150; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_1_REG, 1); + *cmds++ = 0x00000040; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_2_REG, 1); + *cmds++ = 0x80000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_3_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_VS_CONTROL_REG, 1); + *cmds++ = 0x00000001; + *cmds++ = cp_type0_packet(A3XX_HLSQ_FS_CONTROL_REG, 1); + *cmds++ = 0x0D001002; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_0_REG, 1); + *cmds++ = 0x00401101; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_1_REG, 1); + *cmds++ = 0x00000400; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_2_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_3_REG, 1); + *cmds++ = 0x00000001; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_4_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_5_REG, 1); + *cmds++ = 0x00000001; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_6_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_0_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_1_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_CONST_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_X_REG, 1); + *cmds++ = 0x00000010; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG, 1); + *cmds++ = 0x00000001; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG, 1); + *cmds++ = 0x00000001; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_WG_OFFSET_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_SP_CTRL_REG, 1); + *cmds++ = 0x00040000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1); + *cmds++ = 0x0000000A; + *cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG1, 1); + *cmds++ = 0x00000001; + *cmds++ = cp_type0_packet(A3XX_SP_VS_PARAM_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_4, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_5, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_6, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_7, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OBJ_OFFSET_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_OBJ_START_REG, 1); + *cmds++ = 0x00000004; + *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_PARAM_REG, 1); + *cmds++ = 0x04008001; + *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_ADDR_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_VS_LENGTH_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1); + *cmds++ = 0x0DB0400A; + *cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG1, 1); + *cmds++ = 0x00300402; + *cmds++ = cp_type0_packet(A3XX_SP_FS_OBJ_OFFSET_REG, 1); + *cmds++ = 0x00010000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_OBJ_START_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_PARAM_REG, 1); + *cmds++ = 0x04008001; + *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_ADDR_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_FLAT_SHAD_MODE_REG_1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_OUTPUT_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_SP_FS_LENGTH_REG, 1); + *cmds++ = 0x0000000D; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_CLIP_CNTL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_GB_CLIP_ADJ, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_XOFFSET, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_XSCALE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_YOFFSET, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_YSCALE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_ZOFFSET, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_ZSCALE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X4, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y4, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z4, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W4, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X5, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y5, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z5, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W5, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SU_POINT_MINMAX, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SU_POINT_SIZE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SU_POLY_OFFSET_OFFSET, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SU_POLY_OFFSET_SCALE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SU_MODE_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SC_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SC_SCREEN_SCISSOR_BR, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SC_WINDOW_SCISSOR_BR, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_TSE_DEBUG_ECO, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER0_SELECT, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER1_SELECT, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER2_SELECT, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER3_SELECT, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MODE_CONTROL, 1); + *cmds++ = 0x00008000; + *cmds++ = cp_type0_packet(A3XX_RB_RENDER_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MSAA_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_ALPHA_REFERENCE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_BLEND_RED, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_BLEND_GREEN, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_BLEND_BLUE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_BLEND_ALPHA, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW0, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW1, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW2, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW3, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_COPY_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_COPY_DEST_BASE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_COPY_DEST_PITCH, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_COPY_DEST_INFO, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_DEPTH_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_DEPTH_CLEAR, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_DEPTH_BUF_INFO, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_DEPTH_BUF_PITCH, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_CLEAR, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_BUF_INFO, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_BUF_PITCH, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_REF_MASK, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_STENCIL_REF_MASK_BF, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_LRZ_VSC_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_WINDOW_OFFSET, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_SAMPLE_COUNT_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_SAMPLE_COUNT_ADDR, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_Z_CLAMP_MIN, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_Z_CLAMP_MAX, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_GMEM_BASE_ADDR, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_DEBUG_ECO_CONTROLS_ADDR, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_PERFCOUNTER0_SELECT, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_PERFCOUNTER1_SELECT, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_RB_FRAME_BUFFER_DIMENSION, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4); + *cmds++ = (1 << CP_LOADSTATE_DSTOFFSET_SHIFT) | + (0 << CP_LOADSTATE_STATESRC_SHIFT) | + (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) | + (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (1 << CP_LOADSTATE_STATETYPE_SHIFT) | + (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT); + *cmds++ = 0x00400000; + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4); + *cmds++ = (2 << CP_LOADSTATE_DSTOFFSET_SHIFT) | + (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) | + (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (1 << CP_LOADSTATE_STATETYPE_SHIFT); + *cmds++ = 0x00400220; + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 4); + *cmds++ = (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) | + (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = (1 << CP_LOADSTATE_STATETYPE_SHIFT); + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 2 + count); + *cmds++ = (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) | + (13 << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = 0x00000000; + + memcpy(cmds, _a3xx_pwron_fixup_fs_instructions, count << 2); + + cmds += count; + + *cmds++ = cp_type3_packet(CP_EXEC_CL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_0_REG, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1); + *cmds++ = 0x1E000150; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2); + *cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG); + *cmds++ = 0x1E000050; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_REG_RMW, 3); + *cmds++ = A3XX_RBBM_CLOCK_CTL; + *cmds++ = 0xFFFCFFFF; + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + + /* + * Remember the number of dwords in the command buffer for when we + * program the indirect buffer call in the ringbuffer + */ + adreno_dev->pwron_fixup_dwords = + (cmds - (unsigned int *) adreno_dev->pwron_fixup.hostptr); + + /* Mark the flag in ->priv to show that we have the fix */ + set_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv); + return 0; +} + +static void a3xx_platform_setup(struct adreno_device *adreno_dev) +{ + struct adreno_gpudev *gpudev; + const struct adreno_reg_offsets *reg_offsets; + + if (adreno_is_a306(adreno_dev) || adreno_is_a306a(adreno_dev)) { + gpudev = ADRENO_GPU_DEVICE(adreno_dev); + reg_offsets = gpudev->reg_offsets; + reg_offsets->offsets[ADRENO_REG_VBIF_XIN_HALT_CTRL0] = + A3XX_VBIF2_XIN_HALT_CTRL0; + reg_offsets->offsets[ADRENO_REG_VBIF_XIN_HALT_CTRL1] = + A3XX_VBIF2_XIN_HALT_CTRL1; + gpudev->vbif_xin_halt_ctrl0_mask = + A3XX_VBIF2_XIN_HALT_CTRL0_MASK; + } +} + +/* + * a3xx_rb_init() - Initialize ringbuffer + * @adreno_dev: Pointer to adreno device + * @rb: Pointer to the ringbuffer of device + * + * Submit commands for ME initialization, common function shared between + * a3xx devices + */ +static int a3xx_rb_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + unsigned int *cmds; + int ret; + + cmds = adreno_ringbuffer_allocspace(rb, 18); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + if (cmds == NULL) + return -ENOSPC; + + *cmds++ = cp_type3_packet(CP_ME_INIT, 17); + + *cmds++ = 0x000003f7; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000080; + *cmds++ = 0x00000100; + *cmds++ = 0x00000180; + *cmds++ = 0x00006600; + *cmds++ = 0x00000150; + *cmds++ = 0x0000014e; + *cmds++ = 0x00000154; + *cmds++ = 0x00000001; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + + /* Enable protected mode registers for A3XX/A4XX */ + *cmds++ = 0x20000000; + + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + + ret = adreno_ringbuffer_submit_spin(rb, NULL, 2000); + if (ret) { + struct kgsl_device *device = &adreno_dev->dev; + + dev_err(device->dev, "CP initialization failed to idle\n"); + kgsl_device_snapshot(device, NULL); + } + + return ret; +} + +/* + * a3xx_err_callback() - Call back for a3xx error interrupts + * @adreno_dev: Pointer to device + * @bit: Interrupt bit + */ +static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int reg; + + switch (bit) { + case A3XX_INT_RBBM_AHB_ERROR: { + kgsl_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, ®); + + /* + * Return the word address of the erroring register so that it + * matches the register specification + */ + KGSL_DRV_CRIT(device, + "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n", + reg & (1 << 28) ? "WRITE" : "READ", + (reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3, + (reg >> 24) & 0xF); + + /* Clear the error */ + kgsl_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3)); + + return; + } + case A3XX_INT_RBBM_ATB_BUS_OVERFLOW: + KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: ATB bus oveflow\n"); + break; + case A3XX_INT_CP_T0_PACKET_IN_IB: + KGSL_DRV_CRIT_RATELIMIT(device, + "ringbuffer TO packet in IB interrupt\n"); + break; + case A3XX_INT_CP_OPCODE_ERROR: + KGSL_DRV_CRIT_RATELIMIT(device, + "ringbuffer opcode error interrupt\n"); + break; + case A3XX_INT_CP_RESERVED_BIT_ERROR: + KGSL_DRV_CRIT_RATELIMIT(device, + "ringbuffer reserved bit error interrupt\n"); + break; + case A3XX_INT_CP_HW_FAULT: + kgsl_regread(device, A3XX_CP_HW_FAULT, ®); + KGSL_DRV_CRIT_RATELIMIT(device, + "CP | Ringbuffer HW fault | status=%x\n", reg); + break; + case A3XX_INT_CP_REG_PROTECT_FAULT: + kgsl_regread(device, A3XX_CP_PROTECT_STATUS, ®); + KGSL_DRV_CRIT(device, + "CP | Protected mode error| %s | addr=%x\n", + reg & (1 << 24) ? "WRITE" : "READ", + (reg & 0xFFFFF) >> 2); + return; + case A3XX_INT_CP_AHB_ERROR_HALT: + KGSL_DRV_CRIT_RATELIMIT(device, + "ringbuffer AHB error interrupt\n"); + break; + case A3XX_INT_UCHE_OOB_ACCESS: + KGSL_DRV_CRIT_RATELIMIT(device, "UCHE: Out of bounds access\n"); + break; + default: + KGSL_DRV_CRIT_RATELIMIT(device, "Unknown interrupt\n"); + } +} + +#define A3XX_INT_MASK \ + ((1 << A3XX_INT_RBBM_AHB_ERROR) | \ + (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \ + (1 << A3XX_INT_CP_T0_PACKET_IN_IB) | \ + (1 << A3XX_INT_CP_OPCODE_ERROR) | \ + (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \ + (1 << A3XX_INT_CP_HW_FAULT) | \ + (1 << A3XX_INT_CP_IB1_INT) | \ + (1 << A3XX_INT_CP_IB2_INT) | \ + (1 << A3XX_INT_CP_RB_INT) | \ + (1 << A3XX_INT_CACHE_FLUSH_TS) | \ + (1 << A3XX_INT_CP_REG_PROTECT_FAULT) | \ + (1 << A3XX_INT_CP_AHB_ERROR_HALT) | \ + (1 << A3XX_INT_UCHE_OOB_ACCESS)) + +static struct adreno_irq_funcs a3xx_irq_funcs[32] = { + ADRENO_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */ + ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */ + ADRENO_IRQ_CALLBACK(NULL), /* 2 - RBBM_REG_TIMEOUT */ + ADRENO_IRQ_CALLBACK(NULL), /* 3 - RBBM_ME_MS_TIMEOUT */ + ADRENO_IRQ_CALLBACK(NULL), /* 4 - RBBM_PFP_MS_TIMEOUT */ + ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */ + ADRENO_IRQ_CALLBACK(NULL), /* 6 - RBBM_VFD_ERROR */ + ADRENO_IRQ_CALLBACK(NULL), /* 7 - CP_SW */ + ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */ + ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */ + /* 10 - CP_RESERVED_BIT_ERROR */ + ADRENO_IRQ_CALLBACK(a3xx_err_callback), + ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */ + ADRENO_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 13 - CP_IB2_INT */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 14 - CP_IB1_INT */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 15 - CP_RB_INT */ + /* 16 - CP_REG_PROTECT_FAULT */ + ADRENO_IRQ_CALLBACK(a3xx_err_callback), + ADRENO_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */ + /* 21 - CP_AHB_ERROR_FAULT */ + ADRENO_IRQ_CALLBACK(a3xx_err_callback), + ADRENO_IRQ_CALLBACK(NULL), /* 22 - Unused */ + ADRENO_IRQ_CALLBACK(NULL), /* 23 - Unused */ + /* 24 - MISC_HANG_DETECT */ + ADRENO_IRQ_CALLBACK(adreno_hang_int_callback), + ADRENO_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */ +}; + +static struct adreno_irq a3xx_irq = { + .funcs = a3xx_irq_funcs, + .mask = A3XX_INT_MASK, +}; + +/* VBIF registers start after 0x3000 so use 0x0 as end of list marker */ +static const struct adreno_vbif_data a304_vbif[] = { + { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 }, + {0, 0}, +}; + +static const struct adreno_vbif_data a305_vbif[] = { + /* Set up 16 deep read/write request queues */ + { A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010 }, + { A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010 }, + { A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010 }, + { A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010 }, + { A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303 }, + { A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010 }, + { A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010 }, + /* Enable WR-REQ */ + { A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000FF }, + /* Set up round robin arbitration between both AXI ports */ + { A3XX_VBIF_ARB_CTL, 0x00000030 }, + /* Set up AOOO */ + { A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003C }, + { A3XX_VBIF_OUT_AXI_AOOO, 0x003C003C }, + {0, 0}, +}; + +static const struct adreno_vbif_data a305b_vbif[] = { + { A3XX_VBIF_IN_RD_LIM_CONF0, 0x00181818 }, + { A3XX_VBIF_IN_WR_LIM_CONF0, 0x00181818 }, + { A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000018 }, + { A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000018 }, + { A3XX_VBIF_DDR_OUT_MAX_BURST, 0x00000303 }, + { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 }, + {0, 0}, +}; + +static const struct adreno_vbif_data a305c_vbif[] = { + { A3XX_VBIF_IN_RD_LIM_CONF0, 0x00101010 }, + { A3XX_VBIF_IN_WR_LIM_CONF0, 0x00101010 }, + { A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000010 }, + { A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000010 }, + { A3XX_VBIF_DDR_OUT_MAX_BURST, 0x00000101 }, + { A3XX_VBIF_ARB_CTL, 0x00000010 }, + /* Set up AOOO */ + { A3XX_VBIF_OUT_AXI_AOOO_EN, 0x00000007 }, + { A3XX_VBIF_OUT_AXI_AOOO, 0x00070007 }, + {0, 0}, +}; + +static const struct adreno_vbif_data a306_vbif[] = { + { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 }, + { A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000A }, + { A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000A }, + {0, 0}, +}; + +static const struct adreno_vbif_data a306a_vbif[] = { + { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 }, + { A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000A }, + { A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000A }, + {0, 0}, +}; + +static const struct adreno_vbif_data a310_vbif[] = { + { A3XX_VBIF_ABIT_SORT, 0x0001000F }, + { A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4 }, + /* Enable WR-REQ */ + { A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001 }, + /* Set up VBIF_ROUND_ROBIN_QOS_ARB */ + { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x3 }, + { A3XX_VBIF_IN_RD_LIM_CONF0, 0x18180C0C }, + { A3XX_VBIF_IN_WR_LIM_CONF0, 0x1818000C }, + {0, 0}, +}; + +static const struct adreno_vbif_data a320_vbif[] = { + /* Set up 16 deep read/write request queues */ + { A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010 }, + { A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010 }, + { A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010 }, + { A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010 }, + { A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303 }, + { A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010 }, + { A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010 }, + /* Enable WR-REQ */ + { A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000FF }, + /* Set up round robin arbitration between both AXI ports */ + { A3XX_VBIF_ARB_CTL, 0x00000030 }, + /* Set up AOOO */ + { A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003C }, + { A3XX_VBIF_OUT_AXI_AOOO, 0x003C003C }, + /* Enable 1K sort */ + { A3XX_VBIF_ABIT_SORT, 0x000000FF }, + { A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4 }, + {0, 0}, +}; + +static const struct adreno_vbif_data a330_vbif[] = { + /* Set up 16 deep read/write request queues */ + { A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818 }, + { A3XX_VBIF_IN_RD_LIM_CONF1, 0x00001818 }, + { A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00001818 }, + { A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00001818 }, + { A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303 }, + { A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818 }, + { A3XX_VBIF_IN_WR_LIM_CONF1, 0x00001818 }, + /* Enable WR-REQ */ + { A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003F }, + /* Set up round robin arbitration between both AXI ports */ + { A3XX_VBIF_ARB_CTL, 0x00000030 }, + /* Set up VBIF_ROUND_ROBIN_QOS_ARB */ + { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001 }, + /* Set up AOOO */ + { A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003F }, + { A3XX_VBIF_OUT_AXI_AOOO, 0x003F003F }, + /* Enable 1K sort */ + { A3XX_VBIF_ABIT_SORT, 0x0001003F }, + { A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4 }, + /* Disable VBIF clock gating. This is to enable AXI running + * higher frequency than GPU. + */ + { A3XX_VBIF_CLKON, 1 }, + {0, 0}, +}; + +/* + * Most of the VBIF registers on 8974v2 have the correct values at power on, so + * we won't modify those if we don't need to + */ +static const struct adreno_vbif_data a330v2_vbif[] = { + /* Enable 1k sort */ + { A3XX_VBIF_ABIT_SORT, 0x0001003F }, + { A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4 }, + /* Enable WR-REQ */ + { A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003F }, + { A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303 }, + /* Set up VBIF_ROUND_ROBIN_QOS_ARB */ + { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 }, + {0, 0}, +}; + +/* + * Most of the VBIF registers on a330v2.1 have the correct values at power on, + * so we won't modify those if we don't need to + */ +static const struct adreno_vbif_data a330v21_vbif[] = { + /* Enable WR-REQ */ + { A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x1 }, + /* Set up VBIF_ROUND_ROBIN_QOS_ARB */ + { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 }, + { A3XX_VBIF_IN_RD_LIM_CONF0, 0x18180c0c }, + {0, 0}, +}; + +static const struct adreno_vbif_platform a3xx_vbif_platforms[] = { + { adreno_is_a304, a304_vbif }, + { adreno_is_a305, a305_vbif }, + { adreno_is_a305c, a305c_vbif }, + { adreno_is_a306, a306_vbif }, + { adreno_is_a306a, a306a_vbif }, + { adreno_is_a310, a310_vbif }, + { adreno_is_a320, a320_vbif }, + /* A330v2.1 needs to be ahead of A330v2 so the right device matches */ + { adreno_is_a330v21, a330v21_vbif}, + /* A330v2 needs to be ahead of A330 so the right device matches */ + { adreno_is_a330v2, a330v2_vbif }, + { adreno_is_a330, a330_vbif }, + { adreno_is_a305b, a305b_vbif }, +}; + +/* + * Define the available perfcounter groups - these get used by + * adreno_perfcounter_get and adreno_perfcounter_put + */ + +static struct adreno_perfcount_register a3xx_perfcounters_cp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_CP_0_LO, + A3XX_RBBM_PERFCTR_CP_0_HI, 0, A3XX_CP_PERFCOUNTER_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_rbbm[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_0_LO, + A3XX_RBBM_PERFCTR_RBBM_0_HI, 1, A3XX_RBBM_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_1_LO, + A3XX_RBBM_PERFCTR_RBBM_1_HI, 2, A3XX_RBBM_PERFCOUNTER1_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_pc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_0_LO, + A3XX_RBBM_PERFCTR_PC_0_HI, 3, A3XX_PC_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_1_LO, + A3XX_RBBM_PERFCTR_PC_1_HI, 4, A3XX_PC_PERFCOUNTER1_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_2_LO, + A3XX_RBBM_PERFCTR_PC_2_HI, 5, A3XX_PC_PERFCOUNTER2_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_3_LO, + A3XX_RBBM_PERFCTR_PC_3_HI, 6, A3XX_PC_PERFCOUNTER3_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_vfd[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_0_LO, + A3XX_RBBM_PERFCTR_VFD_0_HI, 7, A3XX_VFD_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_1_LO, + A3XX_RBBM_PERFCTR_VFD_1_HI, 8, A3XX_VFD_PERFCOUNTER1_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_hlsq[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_0_LO, + A3XX_RBBM_PERFCTR_HLSQ_0_HI, 9, + A3XX_HLSQ_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_1_LO, + A3XX_RBBM_PERFCTR_HLSQ_1_HI, 10, + A3XX_HLSQ_PERFCOUNTER1_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_2_LO, + A3XX_RBBM_PERFCTR_HLSQ_2_HI, 11, + A3XX_HLSQ_PERFCOUNTER2_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_3_LO, + A3XX_RBBM_PERFCTR_HLSQ_3_HI, 12, + A3XX_HLSQ_PERFCOUNTER3_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_4_LO, + A3XX_RBBM_PERFCTR_HLSQ_4_HI, 13, + A3XX_HLSQ_PERFCOUNTER4_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_5_LO, + A3XX_RBBM_PERFCTR_HLSQ_5_HI, 14, + A3XX_HLSQ_PERFCOUNTER5_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_vpc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_0_LO, + A3XX_RBBM_PERFCTR_VPC_0_HI, 15, A3XX_VPC_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_1_LO, + A3XX_RBBM_PERFCTR_VPC_1_HI, 16, A3XX_VPC_PERFCOUNTER1_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_tse[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_0_LO, + A3XX_RBBM_PERFCTR_TSE_0_HI, 17, A3XX_GRAS_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_1_LO, + A3XX_RBBM_PERFCTR_TSE_1_HI, 18, A3XX_GRAS_PERFCOUNTER1_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_ras[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_0_LO, + A3XX_RBBM_PERFCTR_RAS_0_HI, 19, A3XX_GRAS_PERFCOUNTER2_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_1_LO, + A3XX_RBBM_PERFCTR_RAS_1_HI, 20, A3XX_GRAS_PERFCOUNTER3_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_uche[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_0_LO, + A3XX_RBBM_PERFCTR_UCHE_0_HI, 21, + A3XX_UCHE_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_1_LO, + A3XX_RBBM_PERFCTR_UCHE_1_HI, 22, + A3XX_UCHE_PERFCOUNTER1_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_2_LO, + A3XX_RBBM_PERFCTR_UCHE_2_HI, 23, + A3XX_UCHE_PERFCOUNTER2_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_3_LO, + A3XX_RBBM_PERFCTR_UCHE_3_HI, 24, + A3XX_UCHE_PERFCOUNTER3_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_4_LO, + A3XX_RBBM_PERFCTR_UCHE_4_HI, 25, + A3XX_UCHE_PERFCOUNTER4_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_5_LO, + A3XX_RBBM_PERFCTR_UCHE_5_HI, 26, + A3XX_UCHE_PERFCOUNTER5_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_tp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_0_LO, + A3XX_RBBM_PERFCTR_TP_0_HI, 27, A3XX_TP_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_1_LO, + A3XX_RBBM_PERFCTR_TP_1_HI, 28, A3XX_TP_PERFCOUNTER1_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_2_LO, + A3XX_RBBM_PERFCTR_TP_2_HI, 29, A3XX_TP_PERFCOUNTER2_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_3_LO, + A3XX_RBBM_PERFCTR_TP_3_HI, 30, A3XX_TP_PERFCOUNTER3_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_4_LO, + A3XX_RBBM_PERFCTR_TP_4_HI, 31, A3XX_TP_PERFCOUNTER4_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_5_LO, + A3XX_RBBM_PERFCTR_TP_5_HI, 32, A3XX_TP_PERFCOUNTER5_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_sp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_0_LO, + A3XX_RBBM_PERFCTR_SP_0_HI, 33, A3XX_SP_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_1_LO, + A3XX_RBBM_PERFCTR_SP_1_HI, 34, A3XX_SP_PERFCOUNTER1_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_2_LO, + A3XX_RBBM_PERFCTR_SP_2_HI, 35, A3XX_SP_PERFCOUNTER2_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_3_LO, + A3XX_RBBM_PERFCTR_SP_3_HI, 36, A3XX_SP_PERFCOUNTER3_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_4_LO, + A3XX_RBBM_PERFCTR_SP_4_HI, 37, A3XX_SP_PERFCOUNTER4_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_5_LO, + A3XX_RBBM_PERFCTR_SP_5_HI, 38, A3XX_SP_PERFCOUNTER5_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_6_LO, + A3XX_RBBM_PERFCTR_SP_6_HI, 39, A3XX_SP_PERFCOUNTER6_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_7_LO, + A3XX_RBBM_PERFCTR_SP_7_HI, 40, A3XX_SP_PERFCOUNTER7_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_rb[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_0_LO, + A3XX_RBBM_PERFCTR_RB_0_HI, 41, A3XX_RB_PERFCOUNTER0_SELECT }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_1_LO, + A3XX_RBBM_PERFCTR_RB_1_HI, 42, A3XX_RB_PERFCOUNTER1_SELECT }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_pwr[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PWR_0_LO, + A3XX_RBBM_PERFCTR_PWR_0_HI, -1, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PWR_1_LO, + A3XX_RBBM_PERFCTR_PWR_1_HI, -1, 0 }, +}; + +static struct adreno_perfcount_register a3xx_perfcounters_vbif[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_CNT0_LO, + A3XX_VBIF_PERF_CNT0_HI, -1, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_CNT1_LO, + A3XX_VBIF_PERF_CNT1_HI, -1, 0 }, +}; +static struct adreno_perfcount_register a3xx_perfcounters_vbif_pwr[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_PWR_CNT0_LO, + A3XX_VBIF_PERF_PWR_CNT0_HI, -1, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_PWR_CNT1_LO, + A3XX_VBIF_PERF_PWR_CNT1_HI, -1, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_PWR_CNT2_LO, + A3XX_VBIF_PERF_PWR_CNT2_HI, -1, 0 }, +}; +static struct adreno_perfcount_register a3xx_perfcounters_vbif2[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW0, + A3XX_VBIF2_PERF_CNT_HIGH0, -1, A3XX_VBIF2_PERF_CNT_SEL0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW1, + A3XX_VBIF2_PERF_CNT_HIGH1, -1, A3XX_VBIF2_PERF_CNT_SEL1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW2, + A3XX_VBIF2_PERF_CNT_HIGH2, -1, A3XX_VBIF2_PERF_CNT_SEL2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW3, + A3XX_VBIF2_PERF_CNT_HIGH3, -1, A3XX_VBIF2_PERF_CNT_SEL3 }, +}; +/* + * Placing EN register in select field since vbif perf counters + * dont have select register to program + */ +static struct adreno_perfcount_register a3xx_perfcounters_vbif2_pwr[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, + 0, A3XX_VBIF2_PERF_PWR_CNT_LOW0, + A3XX_VBIF2_PERF_PWR_CNT_HIGH0, -1, + A3XX_VBIF2_PERF_PWR_CNT_EN0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, + 0, A3XX_VBIF2_PERF_PWR_CNT_LOW1, + A3XX_VBIF2_PERF_PWR_CNT_HIGH1, -1, + A3XX_VBIF2_PERF_PWR_CNT_EN1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, + 0, A3XX_VBIF2_PERF_PWR_CNT_LOW2, + A3XX_VBIF2_PERF_PWR_CNT_HIGH2, -1, + A3XX_VBIF2_PERF_PWR_CNT_EN2 }, +}; + +#define A3XX_PERFCOUNTER_GROUP(offset, name) \ + ADRENO_PERFCOUNTER_GROUP(a3xx, offset, name) + +#define A3XX_PERFCOUNTER_GROUP_FLAGS(offset, name, flags) \ + ADRENO_PERFCOUNTER_GROUP_FLAGS(a3xx, offset, name, flags) + +static struct adreno_perfcount_group a3xx_perfcounter_groups[] = { + A3XX_PERFCOUNTER_GROUP(CP, cp), + A3XX_PERFCOUNTER_GROUP(RBBM, rbbm), + A3XX_PERFCOUNTER_GROUP(PC, pc), + A3XX_PERFCOUNTER_GROUP(VFD, vfd), + A3XX_PERFCOUNTER_GROUP(HLSQ, hlsq), + A3XX_PERFCOUNTER_GROUP(VPC, vpc), + A3XX_PERFCOUNTER_GROUP(TSE, tse), + A3XX_PERFCOUNTER_GROUP(RAS, ras), + A3XX_PERFCOUNTER_GROUP(UCHE, uche), + A3XX_PERFCOUNTER_GROUP(TP, tp), + A3XX_PERFCOUNTER_GROUP(SP, sp), + A3XX_PERFCOUNTER_GROUP(RB, rb), + A3XX_PERFCOUNTER_GROUP_FLAGS(PWR, pwr, + ADRENO_PERFCOUNTER_GROUP_FIXED), + A3XX_PERFCOUNTER_GROUP(VBIF, vbif), + A3XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, vbif_pwr, + ADRENO_PERFCOUNTER_GROUP_FIXED), +}; + +static struct adreno_perfcounters a3xx_perfcounters = { + a3xx_perfcounter_groups, + ARRAY_SIZE(a3xx_perfcounter_groups), +}; + +static struct adreno_ft_perf_counters a3xx_ft_perf_counters[] = { + {KGSL_PERFCOUNTER_GROUP_SP, SP_ALU_ACTIVE_CYCLES}, + {KGSL_PERFCOUNTER_GROUP_SP, SP0_ICL1_MISSES}, + {KGSL_PERFCOUNTER_GROUP_SP, SP_FS_CFLOW_INSTRUCTIONS}, + {KGSL_PERFCOUNTER_GROUP_TSE, TSE_INPUT_PRIM_NUM}, +}; + +static void a3xx_perfcounter_init(struct adreno_device *adreno_dev) +{ + struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev); + + /* SP[3] counter is broken on a330 so disable it if a330 device */ + if (adreno_is_a330(adreno_dev)) + a3xx_perfcounters_sp[3].countable = KGSL_PERFCOUNTER_BROKEN; + + if (counters && + (adreno_is_a306(adreno_dev) || adreno_is_a304(adreno_dev) || + adreno_is_a306a(adreno_dev))) { + counters->groups[KGSL_PERFCOUNTER_GROUP_VBIF].regs = + a3xx_perfcounters_vbif2; + counters->groups[KGSL_PERFCOUNTER_GROUP_VBIF_PWR].regs = + a3xx_perfcounters_vbif2_pwr; + } + + /* + * Enable the GPU busy count counter. This is a fixed counter on + * A3XX so we don't need to bother checking the return value + */ + adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_PWR, 1, + NULL, NULL, PERFCOUNTER_FLAG_KERNEL); +} + +static void a3xx_perfcounter_close(struct adreno_device *adreno_dev) +{ + adreno_perfcounter_put(adreno_dev, KGSL_PERFCOUNTER_GROUP_PWR, 1, + PERFCOUNTER_FLAG_KERNEL); +} + +/** + * a3xx_protect_init() - Initializes register protection on a3xx + * @adreno_dev: Pointer to the device structure + * Performs register writes to enable protected access to sensitive + * registers + */ +static void a3xx_protect_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + int index = 0; + struct kgsl_protected_registers *iommu_regs; + + /* enable access protection to privileged registers */ + kgsl_regwrite(device, A3XX_CP_PROTECT_CTRL, 0x00000007); + + /* RBBM registers */ + adreno_set_protected_registers(adreno_dev, &index, 0x18, 0); + adreno_set_protected_registers(adreno_dev, &index, 0x20, 2); + adreno_set_protected_registers(adreno_dev, &index, 0x33, 0); + adreno_set_protected_registers(adreno_dev, &index, 0x42, 0); + adreno_set_protected_registers(adreno_dev, &index, 0x50, 4); + adreno_set_protected_registers(adreno_dev, &index, 0x63, 0); + adreno_set_protected_registers(adreno_dev, &index, 0x100, 4); + + /* CP registers */ + adreno_set_protected_registers(adreno_dev, &index, 0x1C0, 5); + adreno_set_protected_registers(adreno_dev, &index, 0x1EC, 1); + adreno_set_protected_registers(adreno_dev, &index, 0x1F6, 1); + adreno_set_protected_registers(adreno_dev, &index, 0x1F8, 2); + adreno_set_protected_registers(adreno_dev, &index, 0x45E, 2); + adreno_set_protected_registers(adreno_dev, &index, 0x460, 4); + + /* RB registers */ + adreno_set_protected_registers(adreno_dev, &index, 0xCC0, 0); + + /* VBIF registers */ + adreno_set_protected_registers(adreno_dev, &index, 0x3000, 6); + + /* SMMU registers */ + iommu_regs = kgsl_mmu_get_prot_regs(&device->mmu); + if (iommu_regs) + adreno_set_protected_registers(adreno_dev, &index, + iommu_regs->base, iommu_regs->range); +} + +static void a3xx_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + adreno_vbif_start(adreno_dev, a3xx_vbif_platforms, + ARRAY_SIZE(a3xx_vbif_platforms)); + + /* Make all blocks contribute to the GPU BUSY perf counter */ + kgsl_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF); + + /* Tune the hystersis counters for SP and CP idle detection */ + kgsl_regwrite(device, A3XX_RBBM_SP_HYST_CNT, 0x10); + kgsl_regwrite(device, A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10); + + /* Enable the RBBM error reporting bits. This lets us get + useful information on failure */ + + kgsl_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001); + + /* Enable AHB error reporting */ + kgsl_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF); + + /* Turn on the power counters */ + kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00030000); + + /* Turn on hang detection - this spews a lot of useful information + * into the RBBM registers on a hang */ + if (adreno_is_a330v2(adreno_dev)) { + set_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv); + gpudev->irq->mask |= (1 << A3XX_INT_MISC_HANG_DETECT); + kgsl_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL, + (1 << 31) | 0xFFFF); + } else + kgsl_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL, + (1 << 16) | 0xFFF); + + /* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0). */ + kgsl_regwrite(device, A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001); + + /* Enable VFD to access most of the UCHE (7 ways out of 8) */ + kgsl_regwrite(device, A3XX_UCHE_CACHE_WAYS_VFD, 0x07); + + /* Enable Clock gating */ + kgsl_regwrite(device, A3XX_RBBM_CLOCK_CTL, + adreno_a3xx_rbbm_clock_ctl_default(adreno_dev)); + + if (adreno_is_a330v2(adreno_dev)) + kgsl_regwrite(device, A3XX_RBBM_GPR0_CTL, + A330v2_RBBM_GPR0_CTL_DEFAULT); + else if (adreno_is_a330(adreno_dev)) + kgsl_regwrite(device, A3XX_RBBM_GPR0_CTL, + A330_RBBM_GPR0_CTL_DEFAULT); + else if (adreno_is_a310(adreno_dev)) + kgsl_regwrite(device, A3XX_RBBM_GPR0_CTL, + A310_RBBM_GPR0_CTL_DEFAULT); + + if (ADRENO_FEATURE(adreno_dev, ADRENO_USES_OCMEM)) + kgsl_regwrite(device, A3XX_RB_GMEM_BASE_ADDR, + (unsigned int)(adreno_dev->gmem_base >> 14)); + + /* Turn on protection */ + a3xx_protect_init(adreno_dev); + + /* Turn on performance counters */ + kgsl_regwrite(device, A3XX_RBBM_PERFCTR_CTL, 0x01); + + kgsl_regwrite(device, A3XX_CP_DEBUG, A3XX_CP_DEBUG_DEFAULT); +} + +static struct adreno_coresight_register a3xx_coresight_registers[] = { + { A3XX_RBBM_DEBUG_BUS_CTL, 0x0001093F }, + { A3XX_RBBM_EXT_TRACE_STOP_CNT, 0x00017fff }, + { A3XX_RBBM_EXT_TRACE_START_CNT, 0x0001000f }, + { A3XX_RBBM_EXT_TRACE_PERIOD_CNT, 0x0001ffff }, + { A3XX_RBBM_EXT_TRACE_CMD, 0x00000001 }, + { A3XX_RBBM_EXT_TRACE_BUS_CTL, 0x89100010 }, + { A3XX_RBBM_DEBUG_BUS_STB_CTL0, 0x00000000 }, + { A3XX_RBBM_DEBUG_BUS_STB_CTL1, 0xFFFFFFFE }, + { A3XX_RBBM_INT_TRACE_BUS_CTL, 0x00201111 }, +}; + +static ADRENO_CORESIGHT_ATTR(config_debug_bus, + &a3xx_coresight_registers[0]); +static ADRENO_CORESIGHT_ATTR(config_trace_stop_cnt, + &a3xx_coresight_registers[1]); +static ADRENO_CORESIGHT_ATTR(config_trace_start_cnt, + &a3xx_coresight_registers[2]); +static ADRENO_CORESIGHT_ATTR(config_trace_period_cnt, + &a3xx_coresight_registers[3]); +static ADRENO_CORESIGHT_ATTR(config_trace_cmd, + &a3xx_coresight_registers[4]); +static ADRENO_CORESIGHT_ATTR(config_trace_bus_ctl, + &a3xx_coresight_registers[5]); + +static struct attribute *a3xx_coresight_attrs[] = { + &coresight_attr_config_debug_bus.attr.attr, + &coresight_attr_config_trace_start_cnt.attr.attr, + &coresight_attr_config_trace_stop_cnt.attr.attr, + &coresight_attr_config_trace_period_cnt.attr.attr, + &coresight_attr_config_trace_cmd.attr.attr, + &coresight_attr_config_trace_bus_ctl.attr.attr, + NULL, +}; + +static const struct attribute_group a3xx_coresight_group = { + .attrs = a3xx_coresight_attrs, +}; + +static const struct attribute_group *a3xx_coresight_groups[] = { + &a3xx_coresight_group, + NULL, +}; + +static struct adreno_coresight a3xx_coresight = { + .registers = a3xx_coresight_registers, + .count = ARRAY_SIZE(a3xx_coresight_registers), + .groups = a3xx_coresight_groups, +}; + +/* Register offset defines for A3XX */ +static unsigned int a3xx_register_offsets[ADRENO_REG_REGISTER_MAX] = { + ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_RAM_WADDR, A3XX_CP_ME_RAM_WADDR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_RAM_DATA, A3XX_CP_ME_RAM_DATA), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PFP_UCODE_DATA, A3XX_CP_PFP_UCODE_DATA), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PFP_UCODE_ADDR, A3XX_CP_PFP_UCODE_ADDR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_WFI_PEND_CTR, A3XX_CP_WFI_PEND_CTR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A3XX_CP_RB_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, ADRENO_REG_SKIP), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A3XX_CP_RB_RPTR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A3XX_CP_RB_WPTR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_CNTL, A3XX_CP_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A3XX_CP_ME_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A3XX_CP_RB_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A3XX_CP_IB1_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, ADRENO_REG_SKIP), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, A3XX_CP_IB1_BUFSZ), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, A3XX_CP_IB2_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, ADRENO_REG_SKIP), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A3XX_CP_IB2_BUFSZ), + ADRENO_REG_DEFINE(ADRENO_REG_CP_TIMESTAMP, A3XX_CP_SCRATCH_REG0), + ADRENO_REG_DEFINE(ADRENO_REG_CP_SCRATCH_REG6, A3XX_CP_SCRATCH_REG6), + ADRENO_REG_DEFINE(ADRENO_REG_CP_SCRATCH_REG7, A3XX_CP_SCRATCH_REG7), + ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_RAM_RADDR, A3XX_CP_ME_RAM_RADDR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_ROQ_ADDR, A3XX_CP_ROQ_ADDR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_ROQ_DATA, A3XX_CP_ROQ_DATA), + ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_ADDR, A3XX_CP_MERCIU_ADDR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_DATA, A3XX_CP_MERCIU_DATA), + ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_DATA2, A3XX_CP_MERCIU_DATA2), + ADRENO_REG_DEFINE(ADRENO_REG_CP_MEQ_ADDR, A3XX_CP_MEQ_ADDR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_MEQ_DATA, A3XX_CP_MEQ_DATA), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PROTECT_REG_0, A3XX_CP_PROTECT_REG_0), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A3XX_RBBM_STATUS), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_CTL, A3XX_RBBM_PERFCTR_CTL), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0, + A3XX_RBBM_PERFCTR_LOAD_CMD0), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1, + A3XX_RBBM_PERFCTR_LOAD_CMD1), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_PWR_1_LO, + A3XX_RBBM_PERFCTR_PWR_1_LO), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, A3XX_RBBM_INT_0_MASK), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_STATUS, A3XX_RBBM_INT_0_STATUS), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_CLEAR_CMD, + A3XX_RBBM_INT_CLEAR_CMD), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_CLOCK_CTL, A3XX_RBBM_CLOCK_CTL), + ADRENO_REG_DEFINE(ADRENO_REG_VPC_DEBUG_RAM_SEL, + A3XX_VPC_VPC_DEBUG_RAM_SEL), + ADRENO_REG_DEFINE(ADRENO_REG_VPC_DEBUG_RAM_READ, + A3XX_VPC_VPC_DEBUG_RAM_READ), + ADRENO_REG_DEFINE(ADRENO_REG_PA_SC_AA_CONFIG, A3XX_PA_SC_AA_CONFIG), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PM_OVERRIDE2, A3XX_RBBM_PM_OVERRIDE2), + ADRENO_REG_DEFINE(ADRENO_REG_SQ_GPR_MANAGEMENT, A3XX_SQ_GPR_MANAGEMENT), + ADRENO_REG_DEFINE(ADRENO_REG_SQ_INST_STORE_MANAGMENT, + A3XX_SQ_INST_STORE_MANAGMENT), + ADRENO_REG_DEFINE(ADRENO_REG_TP0_CHICKEN, A3XX_TP0_CHICKEN), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_RBBM_CTL, A3XX_RBBM_RBBM_CTL), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A3XX_RBBM_SW_RESET_CMD), + ADRENO_REG_DEFINE(ADRENO_REG_UCHE_INVALIDATE0, + A3XX_UCHE_CACHE_INVALIDATE0_REG), + ADRENO_REG_DEFINE(ADRENO_REG_UCHE_INVALIDATE1, + A3XX_UCHE_CACHE_INVALIDATE1_REG), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO, + A3XX_RBBM_PERFCTR_LOAD_VALUE_LO), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI, + A3XX_RBBM_PERFCTR_LOAD_VALUE_HI), + ADRENO_REG_DEFINE(ADRENO_REG_VBIF_XIN_HALT_CTRL0, + A3XX_VBIF_XIN_HALT_CTRL0), + ADRENO_REG_DEFINE(ADRENO_REG_VBIF_XIN_HALT_CTRL1, + A3XX_VBIF_XIN_HALT_CTRL1), +}; + +static const struct adreno_reg_offsets a3xx_reg_offsets = { + .offsets = a3xx_register_offsets, + .offset_0 = ADRENO_REG_REGISTER_MAX, +}; + +/* + * Defined the size of sections dumped in snapshot, these values + * may change after initialization based on the specific core + */ +static struct adreno_snapshot_sizes a3xx_snap_sizes = { + .cp_pfp = 0x14, + .vpc_mem = 512, + .cp_meq = 16, + .shader_mem = 0x4000, + .cp_merciu = 0, + .roq = 128, +}; + +static struct adreno_snapshot_data a3xx_snapshot_data = { + .sect_sizes = &a3xx_snap_sizes, +}; + +static int _load_firmware(struct kgsl_device *device, const char *fwfile, + void **buf, int *len) +{ + const struct firmware *fw = NULL; + int ret; + + ret = request_firmware(&fw, fwfile, device->dev); + + if (ret) { + KGSL_DRV_ERR(device, "request_firmware(%s) failed: %d\n", + fwfile, ret); + return ret; + } + + if (fw) + *buf = kmalloc(fw->size, GFP_KERNEL); + else + return -EINVAL; + + if (*buf) { + memcpy(*buf, fw->data, fw->size); + *len = fw->size; + } + + release_firmware(fw); + return (*buf != NULL) ? 0 : -ENOMEM; +} + +int a3xx_microcode_read(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + + if (adreno_dev->pm4_fw == NULL) { + int len; + void *ptr; + + int ret = _load_firmware(device, + adreno_dev->gpucore->pm4fw_name, &ptr, &len); + + if (ret) { + KGSL_DRV_FATAL(device, "Failed to read pm4 ucode %s\n", + adreno_dev->gpucore->pm4fw_name); + return ret; + } + + /* PM4 size is 3 dword aligned plus 1 dword of version */ + if (len % ((sizeof(uint32_t) * 3)) != sizeof(uint32_t)) { + KGSL_DRV_ERR(device, "Bad pm4 microcode size: %d\n", + len); + kfree(ptr); + return -ENOMEM; + } + + adreno_dev->pm4_fw_size = len / sizeof(uint32_t); + adreno_dev->pm4_fw = ptr; + adreno_dev->pm4_fw_version = adreno_dev->pm4_fw[1]; + } + + if (adreno_dev->pfp_fw == NULL) { + int len; + void *ptr; + + int ret = _load_firmware(device, + adreno_dev->gpucore->pfpfw_name, &ptr, &len); + if (ret) { + KGSL_DRV_FATAL(device, "Failed to read pfp ucode %s\n", + adreno_dev->gpucore->pfpfw_name); + return ret; + } + + /* PFP size shold be dword aligned */ + if (len % sizeof(uint32_t) != 0) { + KGSL_DRV_ERR(device, "Bad PFP microcode size: %d\n", + len); + kfree(ptr); + return -ENOMEM; + } + + adreno_dev->pfp_fw_size = len / sizeof(uint32_t); + adreno_dev->pfp_fw = ptr; + adreno_dev->pfp_fw_version = adreno_dev->pfp_fw[5]; + } + + return 0; +} + +/** + * adreno_ringbuffer_load_pm4_ucode() - Load pm4 ucode + * @device: Pointer to a KGSL device + * @start: Starting index in pm4 ucode to load + * @end: Ending index of pm4 ucode to load + * @addr: Address to load the pm4 ucode + * + * Load the pm4 ucode from @start at @addr. + */ +static inline int adreno_ringbuffer_load_pm4_ucode(struct kgsl_device *device, + unsigned int start, unsigned int end, unsigned int addr) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + int i; + + adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_RAM_WADDR, addr); + for (i = start; i < end; i++) + adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_RAM_DATA, + adreno_dev->pm4_fw[i]); + + return 0; +} + +/** + * adreno_ringbuffer_load_pfp_ucode() - Load pfp ucode + * @device: Pointer to a KGSL device + * @start: Starting index in pfp ucode to load + * @end: Ending index of pfp ucode to load + * @addr: Address to load the pfp ucode + * + * Load the pfp ucode from @start at @addr. + */ +static inline int adreno_ringbuffer_load_pfp_ucode(struct kgsl_device *device, + unsigned int start, unsigned int end, unsigned int addr) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + int i; + + adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, addr); + for (i = start; i < end; i++) + adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, + adreno_dev->pfp_fw[i]); + + return 0; +} + +/** + * _ringbuffer_bootstrap_ucode() - Bootstrap GPU Ucode + * @rb: Pointer to adreno ringbuffer + * @load_jt: If non zero only load Jump tables + * + * Bootstrap ucode for GPU + * load_jt == 0, bootstrap full microcode + * load_jt == 1, bootstrap jump tables of microcode + * + * For example a bootstrap packet would like below + * Setup a type3 bootstrap packet + * PFP size to bootstrap + * PFP addr to write the PFP data + * PM4 size to bootstrap + * PM4 addr to write the PM4 data + * PFP dwords from microcode to bootstrap + * PM4 size dwords from microcode to bootstrap + */ +static int _ringbuffer_bootstrap_ucode(struct adreno_ringbuffer *rb, + unsigned int load_jt) +{ + unsigned int *cmds, bootstrap_size, rb_size; + int i = 0; + int ret; + struct kgsl_device *device = rb->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + unsigned int pm4_size, pm4_idx, pm4_addr, pfp_size, pfp_idx, pfp_addr; + + /* Only bootstrap jump tables of ucode */ + if (load_jt) { + pm4_idx = adreno_dev->gpucore->pm4_jt_idx; + pm4_addr = adreno_dev->gpucore->pm4_jt_addr; + pfp_idx = adreno_dev->gpucore->pfp_jt_idx; + pfp_addr = adreno_dev->gpucore->pfp_jt_addr; + } else { + /* Bootstrap full ucode */ + pm4_idx = 1; + pm4_addr = 0; + pfp_idx = 1; + pfp_addr = 0; + } + + pm4_size = (adreno_dev->pm4_fw_size - pm4_idx); + pfp_size = (adreno_dev->pfp_fw_size - pfp_idx); + + bootstrap_size = (pm4_size + pfp_size + 5); + + /* + * Overwrite the first entry in the jump table with the special + * bootstrap opcode + */ + + if (adreno_is_a4xx(adreno_dev)) { + adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, + 0x400); + adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, + 0x6f0009); + /* + * The support packets (the RMW and INTERRUPT) that are sent + * after the bootstrap packet should not be included in the size + * of the bootstrap packet but we do need to reserve enough + * space for those too + */ + rb_size = bootstrap_size + 6; + } else { + adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, + 0x200); + adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, + 0x6f0005); + rb_size = bootstrap_size; + } + + /* clear ME_HALT to start micro engine */ + adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0); + + cmds = adreno_ringbuffer_allocspace(rb, rb_size); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + if (cmds == NULL) + return -ENOSPC; + + /* Construct the packet that bootsraps the ucode */ + *cmds++ = cp_type3_packet(CP_BOOTSTRAP_UCODE, (bootstrap_size - 1)); + *cmds++ = pfp_size; + *cmds++ = pfp_addr; + *cmds++ = pm4_size; + *cmds++ = pm4_addr; + + /** + * Theory of operation: + * + * In A4x, we cannot have the PFP executing instructions while its + * instruction RAM is loading. We load the PFP's instruction RAM + * using type-0 writes from the ME. + * + * To make sure the PFP is not fetching instructions at the same + * time, we put it in a one-instruction loop: + * mvc (ME), (ringbuffer) + * which executes repeatedly until all of the data has been moved + * from the ring buffer to the ME. + */ + if (adreno_is_a4xx(adreno_dev)) { + for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++) + *cmds++ = adreno_dev->pm4_fw[i]; + for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++) + *cmds++ = adreno_dev->pfp_fw[i]; + + *cmds++ = cp_type3_packet(CP_REG_RMW, 3); + *cmds++ = 0x20000000 + A4XX_CP_RB_WPTR; + *cmds++ = 0xffffffff; + *cmds++ = 0x00000002; + *cmds++ = cp_type3_packet(CP_INTERRUPT, 1); + *cmds++ = 0; + + rb->wptr = rb->wptr - 2; + adreno_ringbuffer_submit(rb, NULL); + rb->wptr = rb->wptr + 2; + } else { + for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++) + *cmds++ = adreno_dev->pfp_fw[i]; + for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++) + *cmds++ = adreno_dev->pm4_fw[i]; + adreno_ringbuffer_submit(rb, NULL); + } + + /* idle device to validate bootstrap */ + ret = adreno_spin_idle(device, 2000); + + if (ret) { + KGSL_DRV_ERR(rb->device, + "microcode bootstrap failed to idle\n"); + kgsl_device_snapshot(device, NULL); + } + + /* Clear the chicken bit for speed up on A430 and its derivatives */ + if (!adreno_is_a420(adreno_dev)) + kgsl_regwrite(device, A4XX_CP_DEBUG, + A4XX_CP_DEBUG_DEFAULT & ~(1 << 14)); + + return ret; +} + +int a3xx_microcode_load(struct adreno_device *adreno_dev, + unsigned int start_type) +{ + int status; + struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); + struct kgsl_device *device = rb->device; + + if (start_type == ADRENO_START_COLD) { + /* If bootstrapping if supported to load ucode */ + if (adreno_bootstrap_ucode(adreno_dev)) { + + /* + * load first pm4_bstrp_size + pfp_bstrp_size microcode + * dwords using AHB write, this small microcode has + * dispatcher + booter this initial microcode enables + * CP to understand CP_BOOTSTRAP_UCODE packet in + * function _ringbuffer_bootstrap_ucode. + * CP_BOOTSTRAP_UCODE packet loads rest of the + * microcode. + */ + + status = adreno_ringbuffer_load_pm4_ucode(rb->device, 1, + adreno_dev->gpucore->pm4_bstrp_size+1, 0); + if (status != 0) + return status; + + status = adreno_ringbuffer_load_pfp_ucode(rb->device, 1, + adreno_dev->gpucore->pfp_bstrp_size+1, 0); + if (status != 0) + return status; + + /* Bootstrap rest of the ucode here */ + status = _ringbuffer_bootstrap_ucode(rb, 0); + if (status != 0) + return status; + + } else { + /* load the CP ucode using AHB writes */ + status = adreno_ringbuffer_load_pm4_ucode(rb->device, 1, + adreno_dev->pm4_fw_size, 0); + if (status != 0) + return status; + + /* load the prefetch parser ucode using AHB writes */ + status = adreno_ringbuffer_load_pfp_ucode(rb->device, 1, + adreno_dev->pfp_fw_size, 0); + if (status != 0) + return status; + } + } else if (start_type == ADRENO_START_WARM) { + /* If bootstrapping if supported to load jump tables */ + if (adreno_bootstrap_ucode(adreno_dev)) { + status = _ringbuffer_bootstrap_ucode(rb, 1); + if (status != 0) + return status; + + } else { + /* load the CP jump tables using AHB writes */ + status = adreno_ringbuffer_load_pm4_ucode(device, + adreno_dev->gpucore->pm4_jt_idx, + adreno_dev->pm4_fw_size, + adreno_dev->gpucore->pm4_jt_addr); + if (status != 0) + return status; + + /* + * load the prefetch parser jump tables using AHB writes + */ + status = adreno_ringbuffer_load_pfp_ucode(device, + adreno_dev->gpucore->pfp_jt_idx, + adreno_dev->pfp_fw_size, + adreno_dev->gpucore->pfp_jt_addr); + if (status != 0) + return status; + } + } else + return -EINVAL; + + return 0; +} + +struct adreno_gpudev adreno_a3xx_gpudev = { + .reg_offsets = &a3xx_reg_offsets, + .ft_perf_counters = a3xx_ft_perf_counters, + .ft_perf_counters_count = ARRAY_SIZE(a3xx_ft_perf_counters), + .perfcounters = &a3xx_perfcounters, + .irq = &a3xx_irq, + .irq_trace = trace_kgsl_a3xx_irq_status, + .snapshot_data = &a3xx_snapshot_data, + .num_prio_levels = 1, + .vbif_xin_halt_ctrl0_mask = A3XX_VBIF_XIN_HALT_CTRL0_MASK, + .platform_setup = a3xx_platform_setup, + .rb_init = a3xx_rb_init, + .microcode_read = a3xx_microcode_read, + .microcode_load = a3xx_microcode_load, + .perfcounter_init = a3xx_perfcounter_init, + .perfcounter_close = a3xx_perfcounter_close, + .start = a3xx_start, + .snapshot = a3xx_snapshot, + .coresight = &a3xx_coresight, +}; diff --git a/drivers/gpu/msm/adreno_a3xx.h b/drivers/gpu/msm/adreno_a3xx.h new file mode 100644 index 000000000000..4ab1236020e8 --- /dev/null +++ b/drivers/gpu/msm/adreno_a3xx.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __A3XX_H +#define __A3XX_H + +unsigned int a3xx_irq_pending(struct adreno_device *adreno_dev); + +int a3xx_microcode_read(struct adreno_device *adreno_dev); +int a3xx_microcode_load(struct adreno_device *adreno_dev, + unsigned int start_type); +int a3xx_perfcounter_enable(struct adreno_device *adreno_dev, + unsigned int group, unsigned int counter, unsigned int countable); +uint64_t a3xx_perfcounter_read(struct adreno_device *adreno_dev, + unsigned int group, unsigned int counter); + +void a3xx_a4xx_err_callback(struct adreno_device *adreno_dev, int bit); + +void a3xx_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); +#endif /*__A3XX_H */ diff --git a/drivers/gpu/msm/adreno_a3xx_snapshot.c b/drivers/gpu/msm/adreno_a3xx_snapshot.c new file mode 100644 index 000000000000..c4d415adc9f0 --- /dev/null +++ b/drivers/gpu/msm/adreno_a3xx_snapshot.c @@ -0,0 +1,370 @@ +/* Copyright (c) 2012-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/io.h> +#include "kgsl.h" +#include "adreno.h" +#include "kgsl_snapshot.h" +#include "a3xx_reg.h" +#include "adreno_snapshot.h" +#include "adreno_a3xx.h" + +/* + * Set of registers to dump for A3XX on snapshot. + * Registers in pairs - first value is the start offset, second + * is the stop offset (inclusive) + */ + +static const unsigned int a3xx_registers[] = { + 0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027, + 0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c, + 0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5, + 0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1, + 0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd, + 0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f6, 0x01f8, 0x01f9, + 0x01fc, 0x01ff, + 0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f, + 0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f, + 0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e, + 0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f, + 0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7, + 0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, + 0x0e41, 0x0e45, 0x0e64, 0x0e65, + 0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7, + 0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09, + 0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069, + 0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075, + 0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109, + 0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115, + 0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0, + 0x2240, 0x227e, + 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8, + 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7, + 0x22ff, 0x22ff, 0x2340, 0x2343, + 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d, + 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472, + 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef, + 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511, + 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed, + 0x25f0, 0x25f0, + 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce, + 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec, + 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, + 0x300C, 0x300E, 0x301C, 0x301D, + 0x302A, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 0x3034, 0x3036, + 0x303C, 0x303C, 0x305E, 0x305F, +}; + +/* Removed the following HLSQ register ranges from being read during + * fault tolerance since reading the registers may cause the device to hang: + */ +static const unsigned int a3xx_hlsq_registers[] = { + 0x0e00, 0x0e05, 0x0e0c, 0x0e0c, 0x0e22, 0x0e23, + 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, + 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a, +}; + +/* The set of additional registers to be dumped for A330 */ + +static const unsigned int a330_registers[] = { + 0x1d0, 0x1d0, 0x1d4, 0x1d4, 0x453, 0x453, +}; + +/* Shader memory size in words */ +#define SHADER_MEMORY_SIZE 0x4000 + +/** + * _rbbm_debug_bus_read - Helper function to read data from the RBBM + * debug bus. + * @device - GPU device to read/write registers + * @block_id - Debug bus block to read from + * @index - Index in the debug bus block to read + * @ret - Value of the register read + */ +static void _rbbm_debug_bus_read(struct kgsl_device *device, + unsigned int block_id, unsigned int index, unsigned int *val) +{ + unsigned int block = (block_id << 8) | 1 << 16; + kgsl_regwrite(device, A3XX_RBBM_DEBUG_BUS_CTL, block | index); + kgsl_regread(device, A3XX_RBBM_DEBUG_BUS_DATA_STATUS, val); +} + +/** + * a3xx_snapshot_shader_memory - Helper function to dump the GPU shader + * memory to the snapshot buffer. + * @device: GPU device whose shader memory is to be dumped + * @buf: Pointer to binary snapshot data blob being made + * @remain: Number of remaining bytes in the snapshot blob + * @priv: Unused parameter + * + */ +static size_t a3xx_snapshot_shader_memory(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int i; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + unsigned int shader_read_len = SHADER_MEMORY_SIZE; + + if (shader_read_len > (device->shader_mem_len >> 2)) + shader_read_len = (device->shader_mem_len >> 2); + + if (remain < DEBUG_SECTION_SZ(shader_read_len)) { + SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_SHADER_MEMORY; + header->size = shader_read_len; + + /* Map shader memory to kernel, for dumping */ + if (device->shader_mem_virt == NULL) + device->shader_mem_virt = devm_ioremap(device->dev, + device->shader_mem_phys, + device->shader_mem_len); + + if (device->shader_mem_virt == NULL) { + KGSL_DRV_ERR(device, + "Unable to map shader memory region\n"); + return 0; + } + + /* Now, dump shader memory to snapshot */ + for (i = 0; i < shader_read_len; i++) + adreno_shadermem_regread(device, i, &data[i]); + + + return DEBUG_SECTION_SZ(shader_read_len); +} + +static size_t a3xx_snapshot_debugbus_block(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + struct kgsl_snapshot_debugbus *header + = (struct kgsl_snapshot_debugbus *)buf; + struct adreno_debugbus_block *block = priv; + int i; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + unsigned int dwords; + size_t size; + + /* + * For A305 and A320 all debug bus regions are the same size (0x40). For + * A330, they can be different sizes - most are still 0x40, but some + * like CP are larger + */ + + dwords = (adreno_is_a330(adreno_dev) || + adreno_is_a305b(adreno_dev)) ? + block->dwords : 0x40; + + size = (dwords * sizeof(unsigned int)) + sizeof(*header); + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + + header->id = block->block_id; + header->count = dwords; + + for (i = 0; i < dwords; i++) + _rbbm_debug_bus_read(device, block->block_id, i, &data[i]); + + return size; +} + +static struct adreno_debugbus_block debugbus_blocks[] = { + { RBBM_BLOCK_ID_CP, 0x52, }, + { RBBM_BLOCK_ID_RBBM, 0x40, }, + { RBBM_BLOCK_ID_VBIF, 0x40, }, + { RBBM_BLOCK_ID_HLSQ, 0x40, }, + { RBBM_BLOCK_ID_UCHE, 0x40, }, + { RBBM_BLOCK_ID_PC, 0x40, }, + { RBBM_BLOCK_ID_VFD, 0x40, }, + { RBBM_BLOCK_ID_VPC, 0x40, }, + { RBBM_BLOCK_ID_TSE, 0x40, }, + { RBBM_BLOCK_ID_RAS, 0x40, }, + { RBBM_BLOCK_ID_VSC, 0x40, }, + { RBBM_BLOCK_ID_SP_0, 0x40, }, + { RBBM_BLOCK_ID_SP_1, 0x40, }, + { RBBM_BLOCK_ID_SP_2, 0x40, }, + { RBBM_BLOCK_ID_SP_3, 0x40, }, + { RBBM_BLOCK_ID_TPL1_0, 0x40, }, + { RBBM_BLOCK_ID_TPL1_1, 0x40, }, + { RBBM_BLOCK_ID_TPL1_2, 0x40, }, + { RBBM_BLOCK_ID_TPL1_3, 0x40, }, + { RBBM_BLOCK_ID_RB_0, 0x40, }, + { RBBM_BLOCK_ID_RB_1, 0x40, }, + { RBBM_BLOCK_ID_RB_2, 0x40, }, + { RBBM_BLOCK_ID_RB_3, 0x40, }, + { RBBM_BLOCK_ID_MARB_0, 0x40, }, + { RBBM_BLOCK_ID_MARB_1, 0x40, }, + { RBBM_BLOCK_ID_MARB_2, 0x40, }, + { RBBM_BLOCK_ID_MARB_3, 0x40, }, +}; + +static void a3xx_snapshot_debugbus(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(debugbus_blocks); i++) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, snapshot, + a3xx_snapshot_debugbus_block, + (void *) &debugbus_blocks[i]); + } +} + +static void _snapshot_hlsq_regs(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + /* + * Trying to read HLSQ registers when the HLSQ block is busy + * will cause the device to hang. The RBBM_DEBUG_BUS has information + * that will tell us if the HLSQ block is busy or not. Read values + * from the debug bus to ensure the HLSQ block is not busy (this + * is hardware dependent). If the HLSQ block is busy do not + * dump the registers, otherwise dump the HLSQ registers. + */ + + if (adreno_is_a330(adreno_dev)) { + /* + * stall_ctxt_full status bit: RBBM_BLOCK_ID_HLSQ index 49 [27] + * + * if (!stall_context_full) + * then dump HLSQ registers + */ + unsigned int stall_context_full = 0; + + _rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 49, + &stall_context_full); + stall_context_full &= 0x08000000; + + if (stall_context_full) + return; + } else { + /* + * tpif status bits: RBBM_BLOCK_ID_HLSQ index 4 [4:0] + * spif status bits: RBBM_BLOCK_ID_HLSQ index 7 [5:0] + * + * if ((tpif == 0, 1, 28) && (spif == 0, 1, 10)) + * then dump HLSQ registers + */ + unsigned int next_pif = 0; + + /* check tpif */ + _rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 4, &next_pif); + next_pif &= 0x1f; + if (next_pif != 0 && next_pif != 1 && next_pif != 28) + return; + + /* check spif */ + _rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 7, &next_pif); + next_pif &= 0x3f; + if (next_pif != 0 && next_pif != 1 && next_pif != 10) + return; + } + + SNAPSHOT_REGISTERS(device, snapshot, a3xx_hlsq_registers); +} + +/* + * a3xx_snapshot() - A3XX GPU snapshot function + * @adreno_dev: Device being snapshotted + * @snapshot: Snapshot meta data + * @remain: Amount of space left in snapshot memory + * + * This is where all of the A3XX specific bits and pieces are grabbed + * into the snapshot memory + */ +void a3xx_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct adreno_snapshot_data *snap_data = gpudev->snapshot_data; + unsigned int reg; + + /* Disable Clock gating temporarily for the debug bus to work */ + adreno_writereg(adreno_dev, ADRENO_REG_RBBM_CLOCK_CTL, 0x00); + + SNAPSHOT_REGISTERS(device, snapshot, a3xx_registers); + + _snapshot_hlsq_regs(device, snapshot); + + if (adreno_is_a330(adreno_dev) || adreno_is_a305b(adreno_dev)) + SNAPSHOT_REGISTERS(device, snapshot, a330_registers); + + kgsl_snapshot_indexed_registers(device, snapshot, + A3XX_CP_STATE_DEBUG_INDEX, A3XX_CP_STATE_DEBUG_DATA, + 0x0, snap_data->sect_sizes->cp_pfp); + + /* CP_ME indexed registers */ + kgsl_snapshot_indexed_registers(device, snapshot, + A3XX_CP_ME_CNTL, A3XX_CP_ME_STATUS, 64, 44); + + /* VPC memory */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, adreno_snapshot_vpc_memory, + &snap_data->sect_sizes->vpc_mem); + + /* CP MEQ */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, + adreno_snapshot_cp_meq, &snap_data->sect_sizes->cp_meq); + + /* Shader working/shadow memory */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a3xx_snapshot_shader_memory, + &snap_data->sect_sizes->shader_mem); + + + /* CP PFP and PM4 */ + + /* + * Reading the microcode while the CP is running will + * basically move the CP instruction pointer to + * whatever address we read. Big badaboom ensues. Stop the CP + * (if it isn't already stopped) to ensure that we are safe. + * We do this here and not earlier to avoid corrupting the RBBM + * status and CP registers - by the time we get here we don't + * care about the contents of the CP anymore. + */ + + adreno_readreg(adreno_dev, ADRENO_REG_CP_ME_CNTL, ®); + reg |= (1 << 27) | (1 << 28); + adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, reg); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, adreno_snapshot_cp_pfp_ram, NULL); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, adreno_snapshot_cp_pm4_ram, NULL); + + /* CP ROQ */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, adreno_snapshot_cp_roq, &snap_data->sect_sizes->roq); + + if (snap_data->sect_sizes->cp_merciu) { + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, adreno_snapshot_cp_merciu, + &snap_data->sect_sizes->cp_merciu); + } + + a3xx_snapshot_debugbus(device, snapshot); +} diff --git a/drivers/gpu/msm/adreno_a4xx.c b/drivers/gpu/msm/adreno_a4xx.c new file mode 100644 index 000000000000..99a331d7a470 --- /dev/null +++ b/drivers/gpu/msm/adreno_a4xx.c @@ -0,0 +1,2167 @@ +/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/delay.h> +#include <linux/sched.h> +#include <linux/msm_kgsl.h> + +#include "adreno.h" +#include "kgsl_sharedmem.h" +#include "a4xx_reg.h" +#include "adreno_a3xx.h" +#include "adreno_a4xx.h" +#include "adreno_cp_parser.h" +#include "adreno_trace.h" +#include "adreno_pm4types.h" +#include "adreno_perfcounter.h" + +#define SP_TP_PWR_ON BIT(20) + +/* + * Define registers for a4xx that contain addresses used by the + * cp parser logic + */ +const unsigned int a4xx_cp_addr_regs[ADRENO_CP_ADDR_MAX] = { + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0, + A4XX_VSC_PIPE_DATA_ADDRESS_0), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_0, + A4XX_VSC_PIPE_DATA_LENGTH_0), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_1, + A4XX_VSC_PIPE_DATA_ADDRESS_1), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_1, + A4XX_VSC_PIPE_DATA_LENGTH_1), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_2, + A4XX_VSC_PIPE_DATA_ADDRESS_2), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_2, + A4XX_VSC_PIPE_DATA_LENGTH_2), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_3, + A4XX_VSC_PIPE_DATA_ADDRESS_3), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_3, + A4XX_VSC_PIPE_DATA_LENGTH_3), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_4, + A4XX_VSC_PIPE_DATA_ADDRESS_4), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_4, + A4XX_VSC_PIPE_DATA_LENGTH_4), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_5, + A4XX_VSC_PIPE_DATA_ADDRESS_5), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_5, + A4XX_VSC_PIPE_DATA_LENGTH_5), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_6, + A4XX_VSC_PIPE_DATA_ADDRESS_6), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_6, + A4XX_VSC_PIPE_DATA_LENGTH_6), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_7, + A4XX_VSC_PIPE_DATA_ADDRESS_7), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7, + A4XX_VSC_PIPE_DATA_LENGTH_7), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0, + A4XX_VFD_FETCH_INSTR_1_0), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_1, + A4XX_VFD_FETCH_INSTR_1_1), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_2, + A4XX_VFD_FETCH_INSTR_1_2), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_3, + A4XX_VFD_FETCH_INSTR_1_3), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_4, + A4XX_VFD_FETCH_INSTR_1_4), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_5, + A4XX_VFD_FETCH_INSTR_1_5), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_6, + A4XX_VFD_FETCH_INSTR_1_6), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_7, + A4XX_VFD_FETCH_INSTR_1_7), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_8, + A4XX_VFD_FETCH_INSTR_1_8), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_9, + A4XX_VFD_FETCH_INSTR_1_9), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_10, + A4XX_VFD_FETCH_INSTR_1_10), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_11, + A4XX_VFD_FETCH_INSTR_1_11), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_12, + A4XX_VFD_FETCH_INSTR_1_12), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_13, + A4XX_VFD_FETCH_INSTR_1_13), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_14, + A4XX_VFD_FETCH_INSTR_1_14), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15, + A4XX_VFD_FETCH_INSTR_1_15), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_16, + A4XX_VFD_FETCH_INSTR_1_16), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_17, + A4XX_VFD_FETCH_INSTR_1_17), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_18, + A4XX_VFD_FETCH_INSTR_1_18), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_19, + A4XX_VFD_FETCH_INSTR_1_19), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_20, + A4XX_VFD_FETCH_INSTR_1_20), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_21, + A4XX_VFD_FETCH_INSTR_1_21), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_22, + A4XX_VFD_FETCH_INSTR_1_22), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_23, + A4XX_VFD_FETCH_INSTR_1_23), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_24, + A4XX_VFD_FETCH_INSTR_1_24), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_25, + A4XX_VFD_FETCH_INSTR_1_25), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_26, + A4XX_VFD_FETCH_INSTR_1_26), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_27, + A4XX_VFD_FETCH_INSTR_1_27), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_28, + A4XX_VFD_FETCH_INSTR_1_28), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_29, + A4XX_VFD_FETCH_INSTR_1_29), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_30, + A4XX_VFD_FETCH_INSTR_1_30), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_31, + A4XX_VFD_FETCH_INSTR_1_31), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_SIZE_ADDRESS, + A4XX_VSC_SIZE_ADDRESS), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR, + A4XX_SP_VS_PVT_MEM_ADDR), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR, + A4XX_SP_FS_PVT_MEM_ADDR), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_VS_OBJ_START_REG, + A4XX_SP_VS_OBJ_START), + ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_FS_OBJ_START_REG, + A4XX_SP_FS_OBJ_START), + ADRENO_REG_DEFINE(ADRENO_CP_UCHE_INVALIDATE0, + A4XX_UCHE_INVALIDATE0), + ADRENO_REG_DEFINE(ADRENO_CP_UCHE_INVALIDATE1, + A4XX_UCHE_INVALIDATE1), +}; + +static const struct adreno_vbif_data a405_vbif[] = { + { A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003 }, + {0, 0}, +}; + +static const struct adreno_vbif_data a420_vbif[] = { + { A4XX_VBIF_ABIT_SORT, 0x0001001F }, + { A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4 }, + { A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001 }, + { A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818 }, + { A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018 }, + { A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818 }, + { A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018 }, + { A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003 }, + {0, 0}, +}; + +static const struct adreno_vbif_data a430_vbif[] = { + { A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001 }, + { A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818 }, + { A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018 }, + { A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818 }, + { A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018 }, + { A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003 }, + {0, 0}, +}; + +static const struct adreno_vbif_platform a4xx_vbif_platforms[] = { + { adreno_is_a405, a405_vbif }, + { adreno_is_a420, a420_vbif }, + { adreno_is_a430, a430_vbif }, + { adreno_is_a418, a430_vbif }, +}; + +/* a4xx_preemption_start() - Setup state to start preemption */ +static void a4xx_preemption_start(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + struct kgsl_device *device = &adreno_dev->dev; + uint32_t val; + + /* + * Setup scratch registers from which the GPU will program the + * registers required to start execution of new ringbuffer + * set ringbuffer address + */ + kgsl_regwrite(device, A4XX_CP_SCRATCH_REG8, + rb->buffer_desc.gpuaddr); + kgsl_regread(device, A4XX_CP_RB_CNTL, &val); + /* scratch REG9 corresponds to CP_RB_CNTL register */ + kgsl_regwrite(device, A4XX_CP_SCRATCH_REG9, val); + /* scratch REG10 corresponds to rptr address */ + kgsl_regwrite(device, A4XX_CP_SCRATCH_REG10, 0); + /* scratch REG11 corresponds to rptr */ + kgsl_regwrite(device, A4XX_CP_SCRATCH_REG11, rb->rptr); + /* scratch REG12 corresponds to wptr */ + kgsl_regwrite(device, A4XX_CP_SCRATCH_REG12, rb->wptr); + /* + * scratch REG13 corresponds to IB1_BASE, + * 0 since we do not do switches in between IB's + */ + kgsl_regwrite(device, A4XX_CP_SCRATCH_REG13, 0); + /* scratch REG14 corresponds to IB1_BUFSZ */ + kgsl_regwrite(device, A4XX_CP_SCRATCH_REG14, 0); + /* scratch REG15 corresponds to IB2_BASE */ + kgsl_regwrite(device, A4XX_CP_SCRATCH_REG15, 0); + /* scratch REG16 corresponds to IB2_BUFSZ */ + kgsl_regwrite(device, A4XX_CP_SCRATCH_REG16, 0); + /* scratch REG17 corresponds to GPR11 */ + kgsl_regwrite(device, A4XX_CP_SCRATCH_REG17, rb->gpr11); +} + +/* a4xx_preemption_save() - Save the state after preemption is done */ +static void a4xx_preemption_save(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + struct kgsl_device *device = &adreno_dev->dev; + + kgsl_regread(device, A4XX_CP_SCRATCH_REG18, &rb->rptr); + kgsl_regread(device, A4XX_CP_SCRATCH_REG23, &rb->gpr11); +} + +static int a4xx_preemption_token(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, unsigned int *cmds, + uint64_t gpuaddr) +{ + unsigned int *cmds_orig = cmds; + + /* Turn on preemption flag */ + /* preemption token - fill when pt switch command size is known */ + *cmds++ = cp_type3_packet(CP_PREEMPT_TOKEN, 3); + *cmds++ = (uint)gpuaddr; + *cmds++ = 1; + /* generate interrupt on preemption completion */ + *cmds++ = 1 << CP_PREEMPT_ORDINAL_INTERRUPT; + + return cmds - cmds_orig; + +} + +static int a4xx_preemption_pre_ibsubmit( + struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, unsigned int *cmds, + struct kgsl_context *context, uint64_t cond_addr, + struct kgsl_memobj_node *ib) +{ + unsigned int *cmds_orig = cmds; + int exec_ib = 0; + + cmds += a4xx_preemption_token(adreno_dev, rb, cmds, + rb->device->memstore.gpuaddr + + KGSL_MEMSTORE_OFFSET(context->id, preempted)); + + if (ib) + exec_ib = 1; + + *cmds++ = cp_type3_packet(CP_COND_EXEC, 4); + *cmds++ = cond_addr; + *cmds++ = cond_addr; + *cmds++ = 1; + *cmds++ = 7 + exec_ib * 3; + if (exec_ib) { + *cmds++ = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2); + *cmds++ = ib->gpuaddr; + *cmds++ = (unsigned int) ib->size >> 2; + } + /* clear preemption flag */ + *cmds++ = cp_type3_packet(CP_MEM_WRITE, 2); + *cmds++ = cond_addr; + *cmds++ = 0; + *cmds++ = cp_type3_packet(CP_WAIT_MEM_WRITES, 1); + *cmds++ = 0; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_ME, 1); + *cmds++ = 0; + + return cmds - cmds_orig; +} + +/* + * a4xx_is_sptp_idle() - A430 SP/TP should be off to be considered idle + * @adreno_dev: The adreno device pointer + */ +static bool a4xx_is_sptp_idle(struct adreno_device *adreno_dev) +{ + unsigned int reg; + struct kgsl_device *device = &adreno_dev->dev; + if (!ADRENO_FEATURE(adreno_dev, ADRENO_SPTP_PC)) + return true; + + /* If SP/TP pc isn't enabled, don't worry about power */ + kgsl_regread(device, A4XX_CP_POWER_COLLAPSE_CNTL, ®); + if (!(reg & 0x10)) + return true; + + /* Check that SP/TP is off */ + kgsl_regread(device, A4XX_RBBM_POWER_STATUS, ®); + return !(reg & SP_TP_PWR_ON); +} + +/* + * a4xx_regulator_enable() - Enable any necessary HW regulators + * @adreno_dev: The adreno device pointer + * + * Some HW blocks may need their regulators explicitly enabled + * on a restart. Clocks must be on during this call. + */ +static int a4xx_regulator_enable(struct adreno_device *adreno_dev) +{ + unsigned int reg; + struct kgsl_device *device = &adreno_dev->dev; + if (!(adreno_is_a430(adreno_dev) || adreno_is_a418(adreno_dev))) + return 0; + + /* Set the default register values; set SW_COLLAPSE to 0 */ + kgsl_regwrite(device, A4XX_RBBM_POWER_CNTL_IP, 0x778000); + do { + udelay(5); + kgsl_regread(device, A4XX_RBBM_POWER_STATUS, ®); + } while (!(reg & SP_TP_PWR_ON)); + return 0; +} + +/* + * a4xx_regulator_disable() - Disable any necessary HW regulators + * @adreno_dev: The adreno device pointer + * + * Some HW blocks may need their regulators explicitly disabled + * on a power down to prevent current spikes. Clocks must be on + * during this call. + */ +static void a4xx_regulator_disable(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + if (!(adreno_is_a430(adreno_dev) || adreno_is_a418(adreno_dev))) + return; + + /* Set the default register values; set SW_COLLAPSE to 1 */ + kgsl_regwrite(device, A4XX_RBBM_POWER_CNTL_IP, 0x778001); +} + +/* + * a4xx_enable_pc() - Enable the SP/TP block power collapse + * @adreno_dev: The adreno device pointer + */ +static void a4xx_enable_pc(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + if (!ADRENO_FEATURE(adreno_dev, ADRENO_SPTP_PC) || + !test_bit(ADRENO_SPTP_PC_CTRL, &adreno_dev->pwrctrl_flag)) + return; + + kgsl_regwrite(device, A4XX_CP_POWER_COLLAPSE_CNTL, 0x00400010); + trace_adreno_sp_tp((unsigned long) __builtin_return_address(0)); +}; + +/* + * a4xx_enable_ppd() - Enable the Peak power detect logic in the h/w + * @adreno_dev: The adreno device pointer + * + * A430 can detect peak current conditions inside h/w and throttle + * the workload to ALUs to mitigate it. + */ +static void a4xx_enable_ppd(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_PPD) || + !test_bit(ADRENO_PPD_CTRL, &adreno_dev->pwrctrl_flag) || + !adreno_is_a430v2(adreno_dev)) + return; + + /* Program thresholds */ + kgsl_regwrite(device, A4XX_RBBM_PPD_EPOCH_INTER_TH_HIGH_CLEAR_THR, + 0x003F0101); + kgsl_regwrite(device, A4XX_RBBM_PPD_EPOCH_INTER_TH_LOW, 0x00000101); + kgsl_regwrite(device, A4XX_RBBM_PPD_V2_SP_PWR_WEIGHTS, 0x00085014); + kgsl_regwrite(device, A4XX_RBBM_PPD_V2_SP_RB_EPOCH_TH, 0x00000B46); + kgsl_regwrite(device, A4XX_RBBM_PPD_V2_TP_CONFIG, 0xE4525111); + kgsl_regwrite(device, A4XX_RBBM_PPD_RAMP_V2_CONTROL, 0x0000000B); + + /* Enable PPD*/ + kgsl_regwrite(device, A4XX_RBBM_PPD_CTRL, 0x1002E40C); +}; + +/* + * a4xx_pwrlevel_change_settings() - Program the hardware during power level + * transitions + * @adreno_dev: The adreno device pointer + * @prelevel: The previous power level + * @postlevel: The new power level + * @post: True if called after the clock change has taken effect + */ +static void a4xx_pwrlevel_change_settings(struct adreno_device *adreno_dev, + unsigned int prelevel, unsigned int postlevel, + bool post) +{ + struct kgsl_device *device = &adreno_dev->dev; + static int pre; + + /* PPD programming only for A430v2 */ + if (!ADRENO_FEATURE(adreno_dev, ADRENO_PPD) || + !test_bit(ADRENO_PPD_CTRL, &adreno_dev->pwrctrl_flag) || + !adreno_is_a430v2(adreno_dev)) + return; + + /* if this is a real pre, or a post without a previous pre, set pre */ + if ((post == 0) || (pre == 0 && post == 1)) + pre = 1; + else if (post == 1) + pre = 0; + + if ((prelevel == 0) && pre) { + /* Going to Non-Turbo mode - mask the throttle and reset */ + kgsl_regwrite(device, A4XX_RBBM_PPD_CTRL, 0x1002E40E); + kgsl_regwrite(device, A4XX_RBBM_PPD_CTRL, 0x1002E40C); + } else if ((postlevel == 0) && post) { + /* Going to Turbo mode - unmask the throttle and reset */ + kgsl_regwrite(device, A4XX_RBBM_PPD_CTRL, 0x1002E40A); + kgsl_regwrite(device, A4XX_RBBM_PPD_CTRL, 0x1002E408); + } + + if (post) + pre = 0; +} + +/* + * a4xx_enable_hwcg() - Program the clock control registers + * @device: The adreno device pointer + */ +static void a4xx_enable_hwcg(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_TP0, 0x02222202); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_TP1, 0x02222202); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_TP2, 0x02222202); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_TP3, 0x02222202); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_TP0, 0x00002222); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_TP1, 0x00002222); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_TP2, 0x00002222); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_TP3, 0x00002222); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_TP0, 0x0E739CE7); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_TP1, 0x0E739CE7); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_TP2, 0x0E739CE7); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_TP3, 0x0E739CE7); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_TP0, 0x00111111); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_TP1, 0x00111111); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_TP2, 0x00111111); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_TP3, 0x00111111); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP0, 0x22222222); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP1, 0x22222222); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP2, 0x22222222); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP3, 0x22222222); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP0, 0x00222222); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP1, 0x00222222); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP2, 0x00222222); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP3, 0x00222222); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_SP0, 0x00000104); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_SP1, 0x00000104); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_SP2, 0x00000104); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_SP3, 0x00000104); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_SP0, 0x00000081); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_SP1, 0x00000081); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_SP2, 0x00000081); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_SP3, 0x00000081); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_RB0, 0x22222222); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_RB1, 0x22222222); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_RB2, 0x22222222); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_RB3, 0x22222222); + /* Disable L1 clocking in A420 due to CCU issues with it */ + if (adreno_is_a420(adreno_dev)) { + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB0, 0x00002020); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB1, 0x00002020); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB2, 0x00002020); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB3, 0x00002020); + } else { + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB0, 0x00022020); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB1, 0x00022020); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB2, 0x00022020); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB3, 0x00022020); + } + /* No CCU for A405 */ + if (!adreno_is_a405(adreno_dev)) { + kgsl_regwrite(device, + A4XX_RBBM_CLOCK_CTL_MARB_CCU0, 0x00000922); + kgsl_regwrite(device, + A4XX_RBBM_CLOCK_CTL_MARB_CCU1, 0x00000922); + kgsl_regwrite(device, + A4XX_RBBM_CLOCK_CTL_MARB_CCU2, 0x00000922); + kgsl_regwrite(device, + A4XX_RBBM_CLOCK_CTL_MARB_CCU3, 0x00000922); + kgsl_regwrite(device, + A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU0, 0x00000000); + kgsl_regwrite(device, + A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU1, 0x00000000); + kgsl_regwrite(device, + A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU2, 0x00000000); + kgsl_regwrite(device, + A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU3, 0x00000000); + kgsl_regwrite(device, + A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_0, + 0x00000001); + kgsl_regwrite(device, + A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_1, + 0x00000001); + kgsl_regwrite(device, + A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_2, + 0x00000001); + kgsl_regwrite(device, + A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_3, + 0x00000001); + } + kgsl_regwrite(device, A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000); + /* + * Due to a HW timing issue, top level HW clock gating is causing + * register read/writes to be dropped in adreno a430. + * This timing issue started happening because of SP/TP power collapse. + * On targets that do not have SP/TP PC there is no timing issue. + * The HW timing issue could be fixed by + * a) disabling SP/TP power collapse + * b) or disabling HW clock gating. + * Disabling HW clock gating + NAP enabled combination has + * minimal power impact. So this option is chosen over disabling + * SP/TP power collapse. + * Revisions of A430 which chipid 2 and above do not have the issue. + */ + if (adreno_is_a430(adreno_dev) && + (ADRENO_CHIPID_PATCH(adreno_dev->chipid) < 2)) + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL, 0); + else + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2, 0); +} + +/** + * a4xx_protect_init() - Initializes register protection on a4xx + * @adreno_dev: Pointer to the device structure + * Performs register writes to enable protected access to sensitive + * registers + */ +static void a4xx_protect_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + int index = 0; + struct kgsl_protected_registers *iommu_regs; + + /* enable access protection to privileged registers */ + kgsl_regwrite(device, A4XX_CP_PROTECT_CTRL, 0x00000007); + /* RBBM registers */ + adreno_set_protected_registers(adreno_dev, &index, 0x4, 2); + adreno_set_protected_registers(adreno_dev, &index, 0x8, 3); + adreno_set_protected_registers(adreno_dev, &index, 0x10, 4); + adreno_set_protected_registers(adreno_dev, &index, 0x20, 5); + adreno_set_protected_registers(adreno_dev, &index, 0x40, 6); + adreno_set_protected_registers(adreno_dev, &index, 0x80, 4); + + /* Content protection registers */ + if (kgsl_mmu_is_secured(&device->mmu)) { + adreno_set_protected_registers(adreno_dev, &index, + A4XX_RBBM_SECVID_TSB_TRUSTED_BASE, 3); + adreno_set_protected_registers(adreno_dev, &index, + A4XX_RBBM_SECVID_TRUST_CONTROL, 1); + } + + /* CP registers */ + adreno_set_protected_registers(adreno_dev, &index, 0x200, 7); + adreno_set_protected_registers(adreno_dev, &index, 0x580, 4); + adreno_set_protected_registers(adreno_dev, &index, A4XX_CP_PREEMPT, 1); + /* RB registers */ + adreno_set_protected_registers(adreno_dev, &index, 0xCC0, 0); + + /* HLSQ registers */ + adreno_set_protected_registers(adreno_dev, &index, 0xE00, 0); + + /* VPC registers */ + adreno_set_protected_registers(adreno_dev, &index, 0xE60, 1); + + if (adreno_is_a430(adreno_dev) || adreno_is_a420(adreno_dev) || + adreno_is_a418(adreno_dev)) { + /* + * Protect registers that might cause XPU violation if + * accessed by GPU + */ + adreno_set_protected_registers(adreno_dev, &index, 0x2c00, 10); + adreno_set_protected_registers(adreno_dev, &index, 0x3300, 8); + adreno_set_protected_registers(adreno_dev, &index, 0x3400, 10); + } + + /* SMMU registers */ + iommu_regs = kgsl_mmu_get_prot_regs(&device->mmu); + if (iommu_regs) + adreno_set_protected_registers(adreno_dev, &index, + iommu_regs->base, iommu_regs->range); +} + +static struct adreno_snapshot_sizes a4xx_snap_sizes = { + .cp_pfp = 0x14, + .vpc_mem = 2048, + .cp_meq = 64, + .shader_mem = 0x4000, + .cp_merciu = 64, + .roq = 512, +}; + + +static void a4xx_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + unsigned int cp_debug = A4XX_CP_DEBUG_DEFAULT; + + adreno_vbif_start(adreno_dev, a4xx_vbif_platforms, + ARRAY_SIZE(a4xx_vbif_platforms)); + /* Make all blocks contribute to the GPU BUSY perf counter */ + kgsl_regwrite(device, A4XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF); + + /* Tune the hystersis counters for SP and CP idle detection */ + kgsl_regwrite(device, A4XX_RBBM_SP_HYST_CNT, 0x10); + kgsl_regwrite(device, A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10); + if (adreno_is_a430(adreno_dev)) + kgsl_regwrite(device, A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30); + + /* + * Enable the RBBM error reporting bits. This lets us get + * useful information on failure + */ + + kgsl_regwrite(device, A4XX_RBBM_AHB_CTL0, 0x00000001); + + /* Enable AHB error reporting */ + kgsl_regwrite(device, A4XX_RBBM_AHB_CTL1, 0xA6FFFFFF); + + /* Turn on the power counters */ + kgsl_regwrite(device, A4XX_RBBM_RBBM_CTL, 0x00000030); + + /* + * Turn on hang detection - this spews a lot of useful information + * into the RBBM registers on a hang + */ + set_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv); + gpudev->irq->mask |= (1 << A4XX_INT_MISC_HANG_DETECT); + kgsl_regwrite(device, A4XX_RBBM_INTERFACE_HANG_INT_CTL, + (1 << 30) | 0xFFFF); + + /* Set the GMEM/OCMEM base address for A4XX */ + kgsl_regwrite(device, A4XX_RB_GMEM_BASE_ADDR, + (unsigned int)(adreno_dev->gmem_base >> 14)); + + /* Turn on performance counters */ + kgsl_regwrite(device, A4XX_RBBM_PERFCTR_CTL, 0x01); + + /* Enable VFD to access most of the UCHE (7 ways out of 8) */ + kgsl_regwrite(device, A4XX_UCHE_CACHE_WAYS_VFD, 0x07); + + /* Disable L2 bypass to avoid UCHE out of bounds errors */ + kgsl_regwrite(device, UCHE_TRAP_BASE_LO, 0xffff0000); + kgsl_regwrite(device, UCHE_TRAP_BASE_HI, 0xffff0000); + + /* On A420 cores turn on SKIP_IB2_DISABLE in addition to the default */ + if (adreno_is_a420(adreno_dev)) + cp_debug |= (1 << 29); + /* + * Set chicken bit to disable the speed up of bootstrap on A430 + * and its derivatives + */ + else + cp_debug |= (1 << 14); + + kgsl_regwrite(device, A4XX_CP_DEBUG, cp_debug); + + /* On A430 enable SP regfile sleep for power savings */ + if (!adreno_is_a420(adreno_dev)) { + kgsl_regwrite(device, A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0, + 0x00000441); + kgsl_regwrite(device, A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1, + 0x00000441); + } + + a4xx_enable_hwcg(device); + /* + * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2 + * due to timing issue with HLSQ_TP_CLK_EN + */ + if (adreno_is_a420(adreno_dev)) { + unsigned int val; + kgsl_regread(device, A4XX_RBBM_CLOCK_DELAY_HLSQ, &val); + val &= ~A4XX_CGC_HLSQ_TP_EARLY_CYC_MASK; + val |= 2 << A4XX_CGC_HLSQ_TP_EARLY_CYC_SHIFT; + kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_HLSQ, val); + } + + /* A430 and derivatives offers bigger chunk of CP_STATE_DEBUG regs */ + if (!adreno_is_a420(adreno_dev)) + a4xx_snap_sizes.cp_pfp = 0x34; + + if (adreno_is_a405(adreno_dev)) + gpudev->vbif_xin_halt_ctrl0_mask = + A405_VBIF_XIN_HALT_CTRL0_MASK; + + a4xx_protect_init(adreno_dev); +} + +/* + * a4xx_err_callback() - Callback for a4xx error interrupts + * @adreno_dev: Pointer to device + * @bit: Interrupt bit + */ +static void a4xx_err_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int reg; + + switch (bit) { + case A4XX_INT_RBBM_AHB_ERROR: { + kgsl_regread(device, A4XX_RBBM_AHB_ERROR_STATUS, ®); + + /* + * Return the word address of the erroring register so that it + * matches the register specification + */ + KGSL_DRV_CRIT(device, + "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n", + reg & (1 << 28) ? "WRITE" : "READ", + (reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3, + (reg >> 24) & 0xF); + + /* Clear the error */ + kgsl_regwrite(device, A4XX_RBBM_AHB_CMD, (1 << 4)); + return; + } + case A4XX_INT_RBBM_REG_TIMEOUT: + KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: AHB register timeout\n"); + break; + case A4XX_INT_RBBM_ME_MS_TIMEOUT: + kgsl_regread(device, A4XX_RBBM_AHB_ME_SPLIT_STATUS, ®); + KGSL_DRV_CRIT_RATELIMIT(device, + "RBBM | ME master split timeout | status=%x\n", reg); + break; + case A4XX_INT_RBBM_PFP_MS_TIMEOUT: + kgsl_regread(device, A4XX_RBBM_AHB_PFP_SPLIT_STATUS, ®); + KGSL_DRV_CRIT_RATELIMIT(device, + "RBBM | PFP master split timeout | status=%x\n", reg); + break; + case A4XX_INT_RBBM_ETS_MS_TIMEOUT: + KGSL_DRV_CRIT_RATELIMIT(device, + "RBBM: ME master split timeout\n"); + break; + case A4XX_INT_RBBM_ASYNC_OVERFLOW: + KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: ASYNC overflow\n"); + break; + case A4XX_INT_CP_OPCODE_ERROR: + KGSL_DRV_CRIT_RATELIMIT(device, + "ringbuffer opcode error interrupt\n"); + break; + case A4XX_INT_CP_RESERVED_BIT_ERROR: + KGSL_DRV_CRIT_RATELIMIT(device, + "ringbuffer reserved bit error interrupt\n"); + break; + case A4XX_INT_CP_HW_FAULT: + { + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + kgsl_regread(device, A4XX_CP_HW_FAULT, ®); + KGSL_DRV_CRIT_RATELIMIT(device, + "CP | Ringbuffer HW fault | status=%x\n", reg); + /* + * mask off this interrupt since it can spam, it will be + * turned on again when device resets + */ + adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_0_MASK, + gpudev->irq->mask & ~(1 << A4XX_INT_CP_HW_FAULT)); + break; + } + case A4XX_INT_CP_REG_PROTECT_FAULT: + kgsl_regread(device, A4XX_CP_PROTECT_STATUS, ®); + KGSL_DRV_CRIT(device, + "CP | Protected mode error| %s | addr=%x\n", + reg & (1 << 24) ? "WRITE" : "READ", + (reg & 0xFFFFF) >> 2); + return; + case A4XX_INT_CP_AHB_ERROR_HALT: + KGSL_DRV_CRIT_RATELIMIT(device, + "ringbuffer AHB error interrupt\n"); + break; + case A4XX_INT_RBBM_ATB_BUS_OVERFLOW: + KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: ATB bus overflow\n"); + break; + case A4XX_INT_UCHE_OOB_ACCESS: + KGSL_DRV_CRIT_RATELIMIT(device, "UCHE: Out of bounds access\n"); + break; + case A4XX_INT_RBBM_DPM_CALC_ERR: + KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: dpm calc error\n"); + break; + case A4XX_INT_RBBM_DPM_EPOCH_ERR: + KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: dpm epoch error\n"); + break; + case A4XX_INT_RBBM_DPM_THERMAL_YELLOW_ERR: + KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: dpm thermal yellow\n"); + break; + case A4XX_INT_RBBM_DPM_THERMAL_RED_ERR: + KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: dpm thermal red\n"); + break; + default: + KGSL_DRV_CRIT_RATELIMIT(device, "Unknown interrupt\n"); + } +} + +/* Register offset defines for A4XX, in order of enum adreno_regs */ +static unsigned int a4xx_register_offsets[ADRENO_REG_REGISTER_MAX] = { + ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_RAM_WADDR, A4XX_CP_ME_RAM_WADDR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_RAM_DATA, A4XX_CP_ME_RAM_DATA), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PFP_UCODE_DATA, A4XX_CP_PFP_UCODE_DATA), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PFP_UCODE_ADDR, A4XX_CP_PFP_UCODE_ADDR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_WFI_PEND_CTR, A4XX_CP_WFI_PEND_CTR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A4XX_CP_RB_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, ADRENO_REG_SKIP), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A4XX_CP_RB_RPTR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A4XX_CP_RB_WPTR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_CNTL, A4XX_CP_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A4XX_CP_ME_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A4XX_CP_RB_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A4XX_CP_IB1_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, ADRENO_REG_SKIP), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, A4XX_CP_IB1_BUFSZ), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, A4XX_CP_IB2_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, ADRENO_REG_SKIP), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A4XX_CP_IB2_BUFSZ), + ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_RAM_RADDR, A4XX_CP_ME_RAM_RADDR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_ROQ_ADDR, A4XX_CP_ROQ_ADDR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_ROQ_DATA, A4XX_CP_ROQ_DATA), + ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_ADDR, A4XX_CP_MERCIU_ADDR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_DATA, A4XX_CP_MERCIU_DATA), + ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_DATA2, A4XX_CP_MERCIU_DATA2), + ADRENO_REG_DEFINE(ADRENO_REG_CP_MEQ_ADDR, A4XX_CP_MEQ_ADDR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_MEQ_DATA, A4XX_CP_MEQ_DATA), + ADRENO_REG_DEFINE(ADRENO_REG_CP_HW_FAULT, A4XX_CP_HW_FAULT), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PROTECT_STATUS, A4XX_CP_PROTECT_STATUS), + ADRENO_REG_DEFINE(ADRENO_REG_CP_SCRATCH_REG6, A4XX_CP_SCRATCH_REG6), + ADRENO_REG_DEFINE(ADRENO_REG_CP_SCRATCH_REG7, A4XX_CP_SCRATCH_REG7), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT, A4XX_CP_PREEMPT), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_DEBUG, A4XX_CP_PREEMPT_DEBUG), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_DISABLE, + A4XX_CP_PREEMPT_DISABLE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PROTECT_REG_0, A4XX_CP_PROTECT_REG_0), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A4XX_RBBM_STATUS), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_CTL, A4XX_RBBM_PERFCTR_CTL), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0, + A4XX_RBBM_PERFCTR_LOAD_CMD0), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1, + A4XX_RBBM_PERFCTR_LOAD_CMD1), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2, + A4XX_RBBM_PERFCTR_LOAD_CMD2), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD3, + ADRENO_REG_SKIP), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_PWR_1_LO, + A4XX_RBBM_PERFCTR_PWR_1_LO), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, A4XX_RBBM_INT_0_MASK), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_STATUS, A4XX_RBBM_INT_0_STATUS), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_CLOCK_CTL, A4XX_RBBM_CLOCK_CTL), + ADRENO_REG_DEFINE(ADRENO_REG_VPC_DEBUG_RAM_SEL, + A4XX_VPC_DEBUG_RAM_SEL), + ADRENO_REG_DEFINE(ADRENO_REG_VPC_DEBUG_RAM_READ, + A4XX_VPC_DEBUG_RAM_READ), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_CLEAR_CMD, + A4XX_RBBM_INT_CLEAR_CMD), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_RBBM_CTL, A4XX_RBBM_RBBM_CTL), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A4XX_RBBM_SW_RESET_CMD), + ADRENO_REG_DEFINE(ADRENO_REG_UCHE_INVALIDATE0, A4XX_UCHE_INVALIDATE0), + ADRENO_REG_DEFINE(ADRENO_REG_UCHE_INVALIDATE1, A4XX_UCHE_INVALIDATE1), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO, + A4XX_RBBM_PERFCTR_LOAD_VALUE_LO), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI, + A4XX_RBBM_PERFCTR_LOAD_VALUE_HI), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TRUST_CONTROL, + A4XX_RBBM_SECVID_TRUST_CONTROL), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO, + A4XX_RBBM_ALWAYSON_COUNTER_LO), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_ALWAYSON_COUNTER_HI, + A4XX_RBBM_ALWAYSON_COUNTER_HI), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TRUST_CONFIG, + A4XX_RBBM_SECVID_TRUST_CONFIG), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TSB_CONTROL, + A4XX_RBBM_SECVID_TSB_CONTROL), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_BASE, + A4XX_RBBM_SECVID_TSB_TRUSTED_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_SIZE, + A4XX_RBBM_SECVID_TSB_TRUSTED_SIZE), + ADRENO_REG_DEFINE(ADRENO_REG_VBIF_XIN_HALT_CTRL0, + A4XX_VBIF_XIN_HALT_CTRL0), + ADRENO_REG_DEFINE(ADRENO_REG_VBIF_XIN_HALT_CTRL1, + A4XX_VBIF_XIN_HALT_CTRL1), + ADRENO_REG_DEFINE(ADRENO_REG_VBIF_VERSION, + A4XX_VBIF_VERSION), +}; + +static const struct adreno_reg_offsets a4xx_reg_offsets = { + .offsets = a4xx_register_offsets, + .offset_0 = ADRENO_REG_REGISTER_MAX, +}; + +static struct adreno_perfcount_register a4xx_perfcounters_cp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_0_LO, + A4XX_RBBM_PERFCTR_CP_0_HI, 0, A4XX_CP_PERFCTR_CP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_1_LO, + A4XX_RBBM_PERFCTR_CP_1_HI, 1, A4XX_CP_PERFCTR_CP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_2_LO, + A4XX_RBBM_PERFCTR_CP_2_HI, 2, A4XX_CP_PERFCTR_CP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_3_LO, + A4XX_RBBM_PERFCTR_CP_3_HI, 3, A4XX_CP_PERFCTR_CP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_4_LO, + A4XX_RBBM_PERFCTR_CP_4_HI, 4, A4XX_CP_PERFCTR_CP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_5_LO, + A4XX_RBBM_PERFCTR_CP_5_HI, 5, A4XX_CP_PERFCTR_CP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_6_LO, + A4XX_RBBM_PERFCTR_CP_6_HI, 6, A4XX_CP_PERFCTR_CP_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_7_LO, + A4XX_RBBM_PERFCTR_CP_7_HI, 7, A4XX_CP_PERFCTR_CP_SEL_7 }, +}; + +/* + * Special list of CP registers for 420 to account for flaws. This array is + * inserted into the tables during perfcounter init + */ +static struct adreno_perfcount_register a420_perfcounters_cp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_0_LO, + A4XX_RBBM_PERFCTR_CP_0_HI, 0, A4XX_CP_PERFCTR_CP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_1_LO, + A4XX_RBBM_PERFCTR_CP_1_HI, 1, A4XX_CP_PERFCTR_CP_SEL_1 }, + /* + * The selector registers for 3, 5, and 7 are swizzled on the hardware. + * CP_4 and CP_6 are duped to SEL_2 and SEL_3 so we don't enable them + * here + */ + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_3_LO, + A4XX_RBBM_PERFCTR_CP_3_HI, 3, A4XX_CP_PERFCTR_CP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_5_LO, + A4XX_RBBM_PERFCTR_CP_5_HI, 5, A4XX_CP_PERFCTR_CP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_7_LO, + A4XX_RBBM_PERFCTR_CP_7_HI, 7, A4XX_CP_PERFCTR_CP_SEL_4 }, +}; + +static struct adreno_perfcount_register a4xx_perfcounters_rbbm[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RBBM_0_LO, + A4XX_RBBM_PERFCTR_RBBM_0_HI, 8, A4XX_RBBM_PERFCTR_RBBM_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RBBM_1_LO, + A4XX_RBBM_PERFCTR_RBBM_1_HI, 9, A4XX_RBBM_PERFCTR_RBBM_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RBBM_2_LO, + A4XX_RBBM_PERFCTR_RBBM_2_HI, 10, A4XX_RBBM_PERFCTR_RBBM_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RBBM_3_LO, + A4XX_RBBM_PERFCTR_RBBM_3_HI, 11, A4XX_RBBM_PERFCTR_RBBM_SEL_3 }, +}; + +static struct adreno_perfcount_register a4xx_perfcounters_pc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_0_LO, + A4XX_RBBM_PERFCTR_PC_0_HI, 12, A4XX_PC_PERFCTR_PC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_1_LO, + A4XX_RBBM_PERFCTR_PC_1_HI, 13, A4XX_PC_PERFCTR_PC_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_2_LO, + A4XX_RBBM_PERFCTR_PC_2_HI, 14, A4XX_PC_PERFCTR_PC_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_3_LO, + A4XX_RBBM_PERFCTR_PC_3_HI, 15, A4XX_PC_PERFCTR_PC_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_4_LO, + A4XX_RBBM_PERFCTR_PC_4_HI, 16, A4XX_PC_PERFCTR_PC_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_5_LO, + A4XX_RBBM_PERFCTR_PC_5_HI, 17, A4XX_PC_PERFCTR_PC_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_6_LO, + A4XX_RBBM_PERFCTR_PC_6_HI, 18, A4XX_PC_PERFCTR_PC_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_7_LO, + A4XX_RBBM_PERFCTR_PC_7_HI, 19, A4XX_PC_PERFCTR_PC_SEL_7 }, +}; + +static struct adreno_perfcount_register a4xx_perfcounters_vfd[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_0_LO, + A4XX_RBBM_PERFCTR_VFD_0_HI, 20, A4XX_VFD_PERFCTR_VFD_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_1_LO, + A4XX_RBBM_PERFCTR_VFD_1_HI, 21, A4XX_VFD_PERFCTR_VFD_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_2_LO, + A4XX_RBBM_PERFCTR_VFD_2_HI, 22, A4XX_VFD_PERFCTR_VFD_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_3_LO, + A4XX_RBBM_PERFCTR_VFD_3_HI, 23, A4XX_VFD_PERFCTR_VFD_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_4_LO, + A4XX_RBBM_PERFCTR_VFD_4_HI, 24, A4XX_VFD_PERFCTR_VFD_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_5_LO, + A4XX_RBBM_PERFCTR_VFD_5_HI, 25, A4XX_VFD_PERFCTR_VFD_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_6_LO, + A4XX_RBBM_PERFCTR_VFD_6_HI, 26, A4XX_VFD_PERFCTR_VFD_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_7_LO, + A4XX_RBBM_PERFCTR_VFD_7_HI, 27, A4XX_VFD_PERFCTR_VFD_SEL_7 }, +}; + +static struct adreno_perfcount_register a4xx_perfcounters_hlsq[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_0_LO, + A4XX_RBBM_PERFCTR_HLSQ_0_HI, 28, A4XX_HLSQ_PERFCTR_HLSQ_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_1_LO, + A4XX_RBBM_PERFCTR_HLSQ_1_HI, 29, A4XX_HLSQ_PERFCTR_HLSQ_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_2_LO, + A4XX_RBBM_PERFCTR_HLSQ_2_HI, 30, A4XX_HLSQ_PERFCTR_HLSQ_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_3_LO, + A4XX_RBBM_PERFCTR_HLSQ_3_HI, 31, A4XX_HLSQ_PERFCTR_HLSQ_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_4_LO, + A4XX_RBBM_PERFCTR_HLSQ_4_HI, 32, A4XX_HLSQ_PERFCTR_HLSQ_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_5_LO, + A4XX_RBBM_PERFCTR_HLSQ_5_HI, 33, A4XX_HLSQ_PERFCTR_HLSQ_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_6_LO, + A4XX_RBBM_PERFCTR_HLSQ_6_HI, 34, A4XX_HLSQ_PERFCTR_HLSQ_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_7_LO, + A4XX_RBBM_PERFCTR_HLSQ_7_HI, 35, A4XX_HLSQ_PERFCTR_HLSQ_SEL_7 }, +}; + +static struct adreno_perfcount_register a4xx_perfcounters_vpc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VPC_0_LO, + A4XX_RBBM_PERFCTR_VPC_0_HI, 36, A4XX_VPC_PERFCTR_VPC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VPC_1_LO, + A4XX_RBBM_PERFCTR_VPC_1_HI, 37, A4XX_VPC_PERFCTR_VPC_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VPC_2_LO, + A4XX_RBBM_PERFCTR_VPC_2_HI, 38, A4XX_VPC_PERFCTR_VPC_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VPC_3_LO, + A4XX_RBBM_PERFCTR_VPC_3_HI, 39, A4XX_VPC_PERFCTR_VPC_SEL_3 }, +}; + +static struct adreno_perfcount_register a4xx_perfcounters_ccu[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CCU_0_LO, + A4XX_RBBM_PERFCTR_CCU_0_HI, 40, A4XX_RB_PERFCTR_CCU_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CCU_1_LO, + A4XX_RBBM_PERFCTR_CCU_1_HI, 41, A4XX_RB_PERFCTR_CCU_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CCU_2_LO, + A4XX_RBBM_PERFCTR_CCU_2_HI, 42, A4XX_RB_PERFCTR_CCU_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CCU_3_LO, + A4XX_RBBM_PERFCTR_CCU_3_HI, 43, A4XX_RB_PERFCTR_CCU_SEL_3 }, +}; + +static struct adreno_perfcount_register a4xx_perfcounters_tse[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TSE_0_LO, + A4XX_RBBM_PERFCTR_TSE_0_HI, 44, A4XX_GRAS_PERFCTR_TSE_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TSE_1_LO, + A4XX_RBBM_PERFCTR_TSE_1_HI, 45, A4XX_GRAS_PERFCTR_TSE_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TSE_2_LO, + A4XX_RBBM_PERFCTR_TSE_2_HI, 46, A4XX_GRAS_PERFCTR_TSE_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TSE_3_LO, + A4XX_RBBM_PERFCTR_TSE_3_HI, 47, A4XX_GRAS_PERFCTR_TSE_SEL_3 }, +}; + + +static struct adreno_perfcount_register a4xx_perfcounters_ras[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RAS_0_LO, + A4XX_RBBM_PERFCTR_RAS_0_HI, 48, A4XX_GRAS_PERFCTR_RAS_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RAS_1_LO, + A4XX_RBBM_PERFCTR_RAS_1_HI, 49, A4XX_GRAS_PERFCTR_RAS_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RAS_2_LO, + A4XX_RBBM_PERFCTR_RAS_2_HI, 50, A4XX_GRAS_PERFCTR_RAS_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RAS_3_LO, + A4XX_RBBM_PERFCTR_RAS_3_HI, 51, A4XX_GRAS_PERFCTR_RAS_SEL_3 }, +}; + +static struct adreno_perfcount_register a4xx_perfcounters_uche[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_0_LO, + A4XX_RBBM_PERFCTR_UCHE_0_HI, 52, A4XX_UCHE_PERFCTR_UCHE_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_1_LO, + A4XX_RBBM_PERFCTR_UCHE_1_HI, 53, A4XX_UCHE_PERFCTR_UCHE_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_2_LO, + A4XX_RBBM_PERFCTR_UCHE_2_HI, 54, A4XX_UCHE_PERFCTR_UCHE_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_3_LO, + A4XX_RBBM_PERFCTR_UCHE_3_HI, 55, A4XX_UCHE_PERFCTR_UCHE_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_4_LO, + A4XX_RBBM_PERFCTR_UCHE_4_HI, 56, A4XX_UCHE_PERFCTR_UCHE_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_5_LO, + A4XX_RBBM_PERFCTR_UCHE_5_HI, 57, A4XX_UCHE_PERFCTR_UCHE_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_6_LO, + A4XX_RBBM_PERFCTR_UCHE_6_HI, 58, A4XX_UCHE_PERFCTR_UCHE_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_7_LO, + A4XX_RBBM_PERFCTR_UCHE_7_HI, 59, A4XX_UCHE_PERFCTR_UCHE_SEL_7 }, +}; + +static struct adreno_perfcount_register a4xx_perfcounters_tp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_0_LO, + A4XX_RBBM_PERFCTR_TP_0_HI, 60, A4XX_TPL1_PERFCTR_TP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_1_LO, + A4XX_RBBM_PERFCTR_TP_1_HI, 61, A4XX_TPL1_PERFCTR_TP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_2_LO, + A4XX_RBBM_PERFCTR_TP_2_HI, 62, A4XX_TPL1_PERFCTR_TP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_3_LO, + A4XX_RBBM_PERFCTR_TP_3_HI, 63, A4XX_TPL1_PERFCTR_TP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_4_LO, + A4XX_RBBM_PERFCTR_TP_4_HI, 64, A4XX_TPL1_PERFCTR_TP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_5_LO, + A4XX_RBBM_PERFCTR_TP_5_HI, 65, A4XX_TPL1_PERFCTR_TP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_6_LO, + A4XX_RBBM_PERFCTR_TP_6_HI, 66, A4XX_TPL1_PERFCTR_TP_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_7_LO, + A4XX_RBBM_PERFCTR_TP_7_HI, 67, A4XX_TPL1_PERFCTR_TP_SEL_7 }, +}; + +static struct adreno_perfcount_register a4xx_perfcounters_sp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_0_LO, + A4XX_RBBM_PERFCTR_SP_0_HI, 68, A4XX_SP_PERFCTR_SP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_1_LO, + A4XX_RBBM_PERFCTR_SP_1_HI, 69, A4XX_SP_PERFCTR_SP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_2_LO, + A4XX_RBBM_PERFCTR_SP_2_HI, 70, A4XX_SP_PERFCTR_SP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_3_LO, + A4XX_RBBM_PERFCTR_SP_3_HI, 71, A4XX_SP_PERFCTR_SP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_4_LO, + A4XX_RBBM_PERFCTR_SP_4_HI, 72, A4XX_SP_PERFCTR_SP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_5_LO, + A4XX_RBBM_PERFCTR_SP_5_HI, 73, A4XX_SP_PERFCTR_SP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_6_LO, + A4XX_RBBM_PERFCTR_SP_6_HI, 74, A4XX_SP_PERFCTR_SP_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_7_LO, + A4XX_RBBM_PERFCTR_SP_7_HI, 75, A4XX_SP_PERFCTR_SP_SEL_7 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_8_LO, + A4XX_RBBM_PERFCTR_SP_8_HI, 76, A4XX_SP_PERFCTR_SP_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_9_LO, + A4XX_RBBM_PERFCTR_SP_9_HI, 77, A4XX_SP_PERFCTR_SP_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_10_LO, + A4XX_RBBM_PERFCTR_SP_10_HI, 78, A4XX_SP_PERFCTR_SP_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_11_LO, + A4XX_RBBM_PERFCTR_SP_11_HI, 79, A4XX_SP_PERFCTR_SP_SEL_11 }, +}; + +static struct adreno_perfcount_register a4xx_perfcounters_rb[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_0_LO, + A4XX_RBBM_PERFCTR_RB_0_HI, 80, A4XX_RB_PERFCTR_RB_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_1_LO, + A4XX_RBBM_PERFCTR_RB_1_HI, 81, A4XX_RB_PERFCTR_RB_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_2_LO, + A4XX_RBBM_PERFCTR_RB_2_HI, 82, A4XX_RB_PERFCTR_RB_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_3_LO, + A4XX_RBBM_PERFCTR_RB_3_HI, 83, A4XX_RB_PERFCTR_RB_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_4_LO, + A4XX_RBBM_PERFCTR_RB_4_HI, 84, A4XX_RB_PERFCTR_RB_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_5_LO, + A4XX_RBBM_PERFCTR_RB_5_HI, 85, A4XX_RB_PERFCTR_RB_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_6_LO, + A4XX_RBBM_PERFCTR_RB_6_HI, 86, A4XX_RB_PERFCTR_RB_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_7_LO, + A4XX_RBBM_PERFCTR_RB_7_HI, 87, A4XX_RB_PERFCTR_RB_SEL_7 }, +}; + +static struct adreno_perfcount_register a4xx_perfcounters_vsc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VSC_0_LO, + A4XX_RBBM_PERFCTR_VSC_0_HI, 88, A4XX_VSC_PERFCTR_VSC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VSC_1_LO, + A4XX_RBBM_PERFCTR_VSC_1_HI, 89, A4XX_VSC_PERFCTR_VSC_SEL_1 }, +}; + +static struct adreno_perfcount_register a4xx_perfcounters_pwr[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PWR_0_LO, + A4XX_RBBM_PERFCTR_PWR_0_HI, -1, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PWR_1_LO, + A4XX_RBBM_PERFCTR_PWR_1_HI, -1, 0}, +}; + +static struct adreno_perfcount_register a4xx_perfcounters_vbif[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_CNT_LOW0, + A4XX_VBIF_PERF_CNT_HIGH0, -1, A4XX_VBIF_PERF_CNT_SEL0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_CNT_LOW1, + A4XX_VBIF_PERF_CNT_HIGH1, -1, A4XX_VBIF_PERF_CNT_SEL1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_CNT_LOW2, + A4XX_VBIF_PERF_CNT_HIGH2, -1, A4XX_VBIF_PERF_CNT_SEL2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_CNT_LOW3, + A4XX_VBIF_PERF_CNT_HIGH3, -1, A4XX_VBIF_PERF_CNT_SEL3 }, +}; + +static struct adreno_perfcount_register a4xx_perfcounters_vbif_pwr[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_PWR_CNT_LOW0, + A4XX_VBIF_PERF_PWR_CNT_HIGH0, -1, A4XX_VBIF_PERF_PWR_CNT_EN0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_PWR_CNT_LOW1, + A4XX_VBIF_PERF_PWR_CNT_HIGH1, -1, A4XX_VBIF_PERF_PWR_CNT_EN1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_PWR_CNT_LOW2, + A4XX_VBIF_PERF_PWR_CNT_HIGH2, -1, A4XX_VBIF_PERF_PWR_CNT_EN2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_PWR_CNT_LOW3, + A4XX_VBIF_PERF_PWR_CNT_HIGH3, -1, A4XX_VBIF_PERF_PWR_CNT_EN3 }, +}; + +static struct adreno_perfcount_register a4xx_perfcounters_alwayson[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_ALWAYSON_COUNTER_LO, + A4XX_RBBM_ALWAYSON_COUNTER_HI, -1 }, +}; + +#define A4XX_PERFCOUNTER_GROUP(offset, name) \ + ADRENO_PERFCOUNTER_GROUP(a4xx, offset, name) + +#define A4XX_PERFCOUNTER_GROUP_FLAGS(offset, name, flags) \ + ADRENO_PERFCOUNTER_GROUP_FLAGS(a4xx, offset, name, flags) + +static struct adreno_perfcount_group a4xx_perfcounter_groups + [KGSL_PERFCOUNTER_GROUP_MAX] = { + A4XX_PERFCOUNTER_GROUP(CP, cp), + A4XX_PERFCOUNTER_GROUP(RBBM, rbbm), + A4XX_PERFCOUNTER_GROUP(PC, pc), + A4XX_PERFCOUNTER_GROUP(VFD, vfd), + A4XX_PERFCOUNTER_GROUP(HLSQ, hlsq), + A4XX_PERFCOUNTER_GROUP(VPC, vpc), + A4XX_PERFCOUNTER_GROUP(CCU, ccu), + A4XX_PERFCOUNTER_GROUP(TSE, tse), + A4XX_PERFCOUNTER_GROUP(RAS, ras), + A4XX_PERFCOUNTER_GROUP(UCHE, uche), + A4XX_PERFCOUNTER_GROUP(TP, tp), + A4XX_PERFCOUNTER_GROUP(SP, sp), + A4XX_PERFCOUNTER_GROUP(RB, rb), + A4XX_PERFCOUNTER_GROUP(VSC, vsc), + A4XX_PERFCOUNTER_GROUP_FLAGS(PWR, pwr, + ADRENO_PERFCOUNTER_GROUP_FIXED), + A4XX_PERFCOUNTER_GROUP(VBIF, vbif), + A4XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, vbif_pwr, + ADRENO_PERFCOUNTER_GROUP_FIXED), + A4XX_PERFCOUNTER_GROUP_FLAGS(ALWAYSON, alwayson, + ADRENO_PERFCOUNTER_GROUP_FIXED), +}; + +static struct adreno_perfcounters a4xx_perfcounters = { + a4xx_perfcounter_groups, + ARRAY_SIZE(a4xx_perfcounter_groups), +}; + +static struct adreno_ft_perf_counters a4xx_ft_perf_counters[] = { + {KGSL_PERFCOUNTER_GROUP_SP, A4XX_SP_ALU_ACTIVE_CYCLES}, + {KGSL_PERFCOUNTER_GROUP_SP, A4XX_SP0_ICL1_MISSES}, + {KGSL_PERFCOUNTER_GROUP_SP, A4XX_SP_FS_CFLOW_INSTRUCTIONS}, + {KGSL_PERFCOUNTER_GROUP_TSE, A4XX_TSE_INPUT_PRIM_NUM}, +}; + +/* + * On A420 a number of perfcounters are un-usable. The following defines the + * array of countables that do not work and should not be used + */ +static const unsigned int a420_pc_invalid_countables[] = { + PC_INSTANCES, PC_VERTEX_HITS, PC_GENERATED_FIBERS, PC_GENERATED_WAVES, +}; + +static const unsigned int a420_vfd_invalid_countables[] = { + VFD_VPC_BYPASS_TRANS, VFD_UPPER_SHADER_FIBERS, VFD_LOWER_SHADER_FIBERS, +}; + +static const unsigned int a420_hlsq_invalid_countables[] = { + HLSQ_SP_VS_STAGE_CONSTANT, HLSQ_SP_VS_STAGE_INSTRUCTIONS, + HLSQ_SP_FS_STAGE_CONSTANT, HLSQ_SP_FS_STAGE_INSTRUCTIONS, + HLSQ_FS_STAGE_16_WAVES, HLSQ_FS_STAGE_32_WAVES, HLSQ_FS_STAGE_64_WAVES, + HLSQ_VS_STAGE_16_WAVES, HLSQ_VS_STAGE_32_WAVES, +}; + +static const unsigned int a420_uche_invalid_countables[] = { + UCHE_READ_REQUESTS_MARB, UCHE_READ_REQUESTS_SP, + UCHE_WRITE_REQUESTS_MARB, UCHE_WRITE_REQUESTS_SP, + UCHE_WRITE_REQUESTS_VPC +}; + +static const unsigned int a420_tp_invalid_countables[] = { + TP_OUTPUT_TEXELS_POINT, TP_OUTPUT_TEXELS_BILINEAR, TP_OUTPUT_TEXELS_MIP, + TP_OUTPUT_TEXELS_ANISO, TP_OUTPUT_TEXELS_OPS16, TP_OUTPUT_TEXELS_OPS32, + TP_ZERO_LOD, TP_LATENCY, TP_LATENCY_TRANS, +}; + +static const unsigned int a420_sp_invalid_countables[] = { + SP_FS_STAGE_BARY_INSTRUCTIONS, +}; + +static const unsigned int a420_rb_invalid_countables[] = { + RB_VALID_SAMPLES, RB_Z_FAIL, RB_S_FAIL, +}; + +static const unsigned int a420_ccu_invalid_countables[] = { + CCU_VBIF_STALL, CCU_VBIF_LATENCY_CYCLES, CCU_VBIF_LATENCY_SAMPLES, + CCU_Z_READ, CCU_Z_WRITE, CCU_C_READ, CCU_C_WRITE, +}; + +static const struct adreno_invalid_countables + a420_perfctr_invalid_countables[KGSL_PERFCOUNTER_GROUP_MAX] = { + ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_pc, PC), + ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_vfd, VFD), + ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_hlsq, HLSQ), + ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_tp, TP), + ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_sp, SP), + ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_rb, RB), + ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_ccu, CCU), + ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_uche, UCHE), +}; + +static struct adreno_coresight_register a4xx_coresight_registers[] = { + { A4XX_RBBM_CFG_DEBBUS_CTLT }, + { A4XX_RBBM_CFG_DEBBUS_SEL_A }, + { A4XX_RBBM_CFG_DEBBUS_SEL_B }, + { A4XX_RBBM_CFG_DEBBUS_SEL_C }, + { A4XX_RBBM_CFG_DEBBUS_SEL_D }, + { A4XX_RBBM_CFG_DEBBUS_OPL }, + { A4XX_RBBM_CFG_DEBBUS_OPE }, + { A4XX_RBBM_CFG_DEBBUS_IVTL_0 }, + { A4XX_RBBM_CFG_DEBBUS_IVTL_1 }, + { A4XX_RBBM_CFG_DEBBUS_IVTL_2 }, + { A4XX_RBBM_CFG_DEBBUS_IVTL_3 }, + { A4XX_RBBM_CFG_DEBBUS_MASKL_0 }, + { A4XX_RBBM_CFG_DEBBUS_MASKL_1 }, + { A4XX_RBBM_CFG_DEBBUS_MASKL_2 }, + { A4XX_RBBM_CFG_DEBBUS_MASKL_3 }, + { A4XX_RBBM_CFG_DEBBUS_BYTEL_0 }, + { A4XX_RBBM_CFG_DEBBUS_BYTEL_1 }, + { A4XX_RBBM_CFG_DEBBUS_IVTE_0 }, + { A4XX_RBBM_CFG_DEBBUS_IVTE_1 }, + { A4XX_RBBM_CFG_DEBBUS_IVTE_2 }, + { A4XX_RBBM_CFG_DEBBUS_IVTE_3 }, + { A4XX_RBBM_CFG_DEBBUS_MASKE_0 }, + { A4XX_RBBM_CFG_DEBBUS_MASKE_1 }, + { A4XX_RBBM_CFG_DEBBUS_MASKE_2 }, + { A4XX_RBBM_CFG_DEBBUS_MASKE_3 }, + { A4XX_RBBM_CFG_DEBBUS_NIBBLEE }, + { A4XX_RBBM_CFG_DEBBUS_PTRC0 }, + { A4XX_RBBM_CFG_DEBBUS_PTRC1 }, + { A4XX_RBBM_CFG_DEBBUS_CLRC }, + { A4XX_RBBM_CFG_DEBBUS_LOADIVT }, + { A4XX_RBBM_CFG_DEBBUS_IDX }, + { A4XX_RBBM_CFG_DEBBUS_LOADREG }, + { A4XX_RBBM_EXT_TRACE_BUS_CTL }, + { A4XX_RBBM_CFG_DEBBUS_CTLM }, +}; + +static void a4xx_perfcounter_init(struct adreno_device *adreno_dev) +{ + if (adreno_is_a420(adreno_dev)) { + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct adreno_perfcounters *counters = gpudev->perfcounters; + + /* + * The CP counters on A420 are... special. Some of the counters + * are swizzled so only a subset of them are usable + */ + + if (counters != NULL) { + counters->groups[KGSL_PERFCOUNTER_GROUP_CP].regs = + a420_perfcounters_cp; + counters->groups[KGSL_PERFCOUNTER_GROUP_CP].reg_count = + ARRAY_SIZE(a420_perfcounters_cp); + } + + /* + * Also on A420 a number of the countables are not functional so + * we maintain a blacklist of countables to protect the user + */ + + gpudev->invalid_countables = a420_perfctr_invalid_countables; + } + + /* + * Enable the GPU busy count counter. This is a fixed counter on + * A4XX so we don't need to bother checking the return value + */ + + adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_PWR, 1, + NULL, NULL, PERFCOUNTER_FLAG_KERNEL); +} + +static void a4xx_perfcounter_close(struct adreno_device *adreno_dev) +{ + adreno_perfcounter_put(adreno_dev, KGSL_PERFCOUNTER_GROUP_PWR, 1, + PERFCOUNTER_FLAG_KERNEL); +} + +static const unsigned int _a4xx_pwron_fixup_fs_instructions[] = { + 0x00000000, 0x304CC300, 0x00000000, 0x304CC304, + 0x00000000, 0x304CC308, 0x00000000, 0x304CC30C, + 0x00000000, 0x304CC310, 0x00000000, 0x304CC314, + 0x00000000, 0x304CC318, 0x00000000, 0x304CC31C, + 0x00000000, 0x304CC320, 0x00000000, 0x304CC324, + 0x00000000, 0x304CC328, 0x00000000, 0x304CC32C, + 0x00000000, 0x304CC330, 0x00000000, 0x304CC334, + 0x00000000, 0x304CC338, 0x00000000, 0x304CC33C, + 0x00000000, 0x00000400, 0x00020000, 0x63808003, + 0x00060004, 0x63828007, 0x000A0008, 0x6384800B, + 0x000E000C, 0x6386800F, 0x00120010, 0x63888013, + 0x00160014, 0x638A8017, 0x001A0018, 0x638C801B, + 0x001E001C, 0x638E801F, 0x00220020, 0x63908023, + 0x00260024, 0x63928027, 0x002A0028, 0x6394802B, + 0x002E002C, 0x6396802F, 0x00320030, 0x63988033, + 0x00360034, 0x639A8037, 0x003A0038, 0x639C803B, + 0x003E003C, 0x639E803F, 0x00000000, 0x00000400, + 0x00000003, 0x80D00003, 0x00000007, 0x80D00007, + 0x0000000B, 0x80D0000B, 0x0000000F, 0x80D0000F, + 0x00000013, 0x80D00013, 0x00000017, 0x80D00017, + 0x0000001B, 0x80D0001B, 0x0000001F, 0x80D0001F, + 0x00000023, 0x80D00023, 0x00000027, 0x80D00027, + 0x0000002B, 0x80D0002B, 0x0000002F, 0x80D0002F, + 0x00000033, 0x80D00033, 0x00000037, 0x80D00037, + 0x0000003B, 0x80D0003B, 0x0000003F, 0x80D0003F, + 0x00000000, 0x00000400, 0xFFFFFFFF, 0x304CC300, + 0xFFFFFFFF, 0x304CC304, 0xFFFFFFFF, 0x304CC308, + 0xFFFFFFFF, 0x304CC30C, 0xFFFFFFFF, 0x304CC310, + 0xFFFFFFFF, 0x304CC314, 0xFFFFFFFF, 0x304CC318, + 0xFFFFFFFF, 0x304CC31C, 0xFFFFFFFF, 0x304CC320, + 0xFFFFFFFF, 0x304CC324, 0xFFFFFFFF, 0x304CC328, + 0xFFFFFFFF, 0x304CC32C, 0xFFFFFFFF, 0x304CC330, + 0xFFFFFFFF, 0x304CC334, 0xFFFFFFFF, 0x304CC338, + 0xFFFFFFFF, 0x304CC33C, 0x00000000, 0x00000400, + 0x00020000, 0x63808003, 0x00060004, 0x63828007, + 0x000A0008, 0x6384800B, 0x000E000C, 0x6386800F, + 0x00120010, 0x63888013, 0x00160014, 0x638A8017, + 0x001A0018, 0x638C801B, 0x001E001C, 0x638E801F, + 0x00220020, 0x63908023, 0x00260024, 0x63928027, + 0x002A0028, 0x6394802B, 0x002E002C, 0x6396802F, + 0x00320030, 0x63988033, 0x00360034, 0x639A8037, + 0x003A0038, 0x639C803B, 0x003E003C, 0x639E803F, + 0x00000000, 0x00000400, 0x00000003, 0x80D00003, + 0x00000007, 0x80D00007, 0x0000000B, 0x80D0000B, + 0x0000000F, 0x80D0000F, 0x00000013, 0x80D00013, + 0x00000017, 0x80D00017, 0x0000001B, 0x80D0001B, + 0x0000001F, 0x80D0001F, 0x00000023, 0x80D00023, + 0x00000027, 0x80D00027, 0x0000002B, 0x80D0002B, + 0x0000002F, 0x80D0002F, 0x00000033, 0x80D00033, + 0x00000037, 0x80D00037, 0x0000003B, 0x80D0003B, + 0x0000003F, 0x80D0003F, 0x00000000, 0x03000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/** + * adreno_a4xx_pwron_fixup_init() - Initalize a special command buffer to run a + * post-power collapse shader workaround + * @adreno_dev: Pointer to a adreno_device struct + * + * Some targets require a special workaround shader to be executed after + * power-collapse. Construct the IB once at init time and keep it + * handy + * + * Returns: 0 on success or negative on error + */ +int adreno_a4xx_pwron_fixup_init(struct adreno_device *adreno_dev) +{ + unsigned int *cmds; + unsigned int count = ARRAY_SIZE(_a4xx_pwron_fixup_fs_instructions); + unsigned int num_units = count >> 5; + int ret; + + /* Return if the fixup is already in place */ + if (test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv)) + return 0; + + ret = kgsl_allocate_global(&adreno_dev->dev, + &adreno_dev->pwron_fixup, PAGE_SIZE, + KGSL_MEMFLAGS_GPUREADONLY, 0); + + if (ret) + return ret; + + cmds = adreno_dev->pwron_fixup.hostptr; + + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A4XX_SP_MODE_CONTROL, 1); + *cmds++ = 0x00000018; + *cmds++ = cp_type0_packet(A4XX_TPL1_TP_MODE_CONTROL, 1); + *cmds++ = 0x00000002; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A4xx_HLSQ_CONTROL_0, 5); + *cmds++ = 0x800001a0; + *cmds++ = 0xfcfc0000; + *cmds++ = 0xcff3f3f0; + *cmds++ = 0xfcfcfcfc; + *cmds++ = 0xccfcfcfc; + *cmds++ = cp_type0_packet(A4XX_SP_FS_CTRL_1, 1); + *cmds++ = 0x80000000; + *cmds++ = cp_type0_packet(A4XX_HLSQ_UPDATE_CONTROL, 1); + *cmds++ = 0x00000038; + *cmds++ = cp_type0_packet(A4XX_HLSQ_MODE_CONTROL, 1); + *cmds++ = 0x00000003; + *cmds++ = cp_type0_packet(A4XX_HLSQ_UPDATE_CONTROL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A4XX_TPL1_TP_TEX_TSIZE_1, 1); + *cmds++ = 0x00008000; + *cmds++ = cp_type0_packet(A4xx_HLSQ_CONTROL_0, 2); + *cmds++ = 0x800001a0; + *cmds++ = 0xfcfc0000; + *cmds++ = cp_type0_packet(A4XX_HLSQ_CS_CONTROL, 1); + *cmds++ = 0x00018030 | (num_units << 24); + *cmds++ = cp_type0_packet(A4XX_HLSQ_CL_NDRANGE_0, 7); + *cmds++ = 0x000000fd; + *cmds++ = 0x00000040; + *cmds++ = 0x00000000; + *cmds++ = 0x00000001; + *cmds++ = 0x00000000; + *cmds++ = 0x00000001; + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A4XX_HLSQ_CL_CONTROL_0, 2); + *cmds++ = 0x0001201f; + *cmds++ = 0x0000f003; + *cmds++ = cp_type0_packet(A4XX_HLSQ_CL_KERNEL_CONST, 1); + *cmds++ = 0x0001800b; + *cmds++ = cp_type0_packet(A4XX_HLSQ_CL_KERNEL_GROUP_X, 3); + *cmds++ = 0x00000001; + *cmds++ = 0x00000001; + *cmds++ = 0x00000001; + *cmds++ = cp_type0_packet(A4XX_HLSQ_CL_WG_OFFSET, 1); + *cmds++ = 0x00000022; + *cmds++ = cp_type0_packet(A4XX_UCHE_INVALIDATE0, 2); + *cmds++ = 0x00000000; + *cmds++ = 0x00000012; + *cmds++ = cp_type0_packet(A4XX_HLSQ_MODE_CONTROL, 1); + *cmds++ = 0x00000003; + *cmds++ = cp_type0_packet(A4XX_SP_SP_CTRL, 1); + *cmds++ = 0x00920000; + *cmds++ = cp_type0_packet(A4XX_SP_INSTR_CACHE_CTRL, 1); + *cmds++ = 0x00000260; + *cmds++ = cp_type0_packet(A4XX_SP_CS_CTRL_0, 1); + *cmds++ = 0x00200400; + *cmds++ = cp_type0_packet(A4XX_SP_CS_OBJ_OFFSET, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A4XX_SP_CS_OBJ_START, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type0_packet(A4XX_SP_CS_LENGTH, 1); + *cmds++ = num_units; + *cmds++ = cp_type0_packet(A4XX_SP_MODE_CONTROL, 1); + *cmds++ = 0x00000018; + *cmds++ = cp_type3_packet(CP_LOAD_STATE, 2 + count); + *cmds++ = 0x00340000 | (num_units << CP_LOADSTATE_NUMOFUNITS_SHIFT); + *cmds++ = 0x00000000; + + memcpy(cmds, _a4xx_pwron_fixup_fs_instructions, count << 2); + cmds += count; + + *cmds++ = cp_type3_packet(CP_EXEC_CL, 1); + *cmds++ = 0x00000000; + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0x00000000; + + /* + * Remember the number of dwords in the command buffer for when we + * program the indirect buffer call in the ringbuffer + */ + adreno_dev->pwron_fixup_dwords = + (cmds - (unsigned int *) adreno_dev->pwron_fixup.hostptr); + + /* Mark the flag in ->priv to show that we have the fix */ + set_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv); + return 0; +} + +static int a4xx_hw_init(struct adreno_device *adreno_dev) +{ + a4xx_enable_pc(adreno_dev); + a4xx_enable_ppd(adreno_dev); + + return 0; +} + +/* + * a4xx_rb_init() - Initialize ringbuffer + * @adreno_dev: Pointer to adreno device + * @rb: Pointer to the ringbuffer of device + * + * Submit commands for ME initialization, common function shared between + * a4xx devices + */ +static int a4xx_rb_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + unsigned int *cmds; + int ret; + + cmds = adreno_ringbuffer_allocspace(rb, 20); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + if (cmds == NULL) + return -ENOSPC; + + *cmds++ = cp_type3_packet(CP_ME_INIT, 17); + + /* + * Ordinal 2 of ME_INIT packet, the bits being set are: + * Ordinal 3, 4, 5-12, 14, 15, 16, 17, 18 are present + * Microcode Default Reset Control = 3 + */ + *cmds++ = 0x000003f7; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + *cmds++ = 0x00000080; + *cmds++ = 0x00000100; + *cmds++ = 0x00000180; + *cmds++ = 0x00006600; + *cmds++ = 0x00000150; + *cmds++ = 0x0000014e; + *cmds++ = 0x00000154; + /* MAX Context */ + *cmds++ = 0x00000001; + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + + /* Enable protected mode registers for A3XX/A4XX */ + *cmds++ = 0x20000000; + + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + + *cmds++ = cp_type3_packet(CP_PREEMPT_ENABLE, 1); + *cmds++ = 1; + + ret = adreno_ringbuffer_submit_spin(rb, NULL, 2000); + if (ret) { + struct kgsl_device *device = &adreno_dev->dev; + + dev_err(device->dev, "CP initialization failed to idle\n"); + kgsl_device_snapshot(device, NULL); + } + + return ret; +} + +static ADRENO_CORESIGHT_ATTR(cfg_debbus_ctrlt, &a4xx_coresight_registers[0]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_sela, &a4xx_coresight_registers[1]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_selb, &a4xx_coresight_registers[2]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_selc, &a4xx_coresight_registers[3]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_seld, &a4xx_coresight_registers[4]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_opl, &a4xx_coresight_registers[5]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_ope, &a4xx_coresight_registers[6]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivtl0, &a4xx_coresight_registers[7]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivtl1, &a4xx_coresight_registers[8]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivtl2, &a4xx_coresight_registers[9]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivtl3, &a4xx_coresight_registers[10]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_maskl0, &a4xx_coresight_registers[11]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_maskl1, &a4xx_coresight_registers[12]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_maskl2, &a4xx_coresight_registers[13]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_maskl3, &a4xx_coresight_registers[14]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_bytel0, &a4xx_coresight_registers[15]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_bytel1, &a4xx_coresight_registers[16]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivte0, &a4xx_coresight_registers[17]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivte1, &a4xx_coresight_registers[18]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivte2, &a4xx_coresight_registers[19]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivte3, &a4xx_coresight_registers[20]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_maske0, &a4xx_coresight_registers[21]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_maske1, &a4xx_coresight_registers[22]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_maske2, &a4xx_coresight_registers[23]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_maske3, &a4xx_coresight_registers[24]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_nibblee, &a4xx_coresight_registers[25]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_ptrc0, &a4xx_coresight_registers[26]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_ptrc1, &a4xx_coresight_registers[27]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_clrc, &a4xx_coresight_registers[28]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_loadivt, &a4xx_coresight_registers[29]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_idx, &a4xx_coresight_registers[30]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_loadreg, &a4xx_coresight_registers[31]); +static ADRENO_CORESIGHT_ATTR(ext_tracebus_ctl, &a4xx_coresight_registers[32]); +static ADRENO_CORESIGHT_ATTR(cfg_debbus_ctrlm, &a4xx_coresight_registers[33]); + + +static struct attribute *a4xx_coresight_attrs[] = { + &coresight_attr_cfg_debbus_ctrlt.attr.attr, + &coresight_attr_cfg_debbus_sela.attr.attr, + &coresight_attr_cfg_debbus_selb.attr.attr, + &coresight_attr_cfg_debbus_selc.attr.attr, + &coresight_attr_cfg_debbus_seld.attr.attr, + &coresight_attr_cfg_debbus_opl.attr.attr, + &coresight_attr_cfg_debbus_ope.attr.attr, + &coresight_attr_cfg_debbus_ivtl0.attr.attr, + &coresight_attr_cfg_debbus_ivtl1.attr.attr, + &coresight_attr_cfg_debbus_ivtl2.attr.attr, + &coresight_attr_cfg_debbus_ivtl3.attr.attr, + &coresight_attr_cfg_debbus_maskl0.attr.attr, + &coresight_attr_cfg_debbus_maskl1.attr.attr, + &coresight_attr_cfg_debbus_maskl2.attr.attr, + &coresight_attr_cfg_debbus_maskl3.attr.attr, + &coresight_attr_cfg_debbus_bytel0.attr.attr, + &coresight_attr_cfg_debbus_bytel1.attr.attr, + &coresight_attr_cfg_debbus_ivte0.attr.attr, + &coresight_attr_cfg_debbus_ivte1.attr.attr, + &coresight_attr_cfg_debbus_ivte2.attr.attr, + &coresight_attr_cfg_debbus_ivte3.attr.attr, + &coresight_attr_cfg_debbus_maske0.attr.attr, + &coresight_attr_cfg_debbus_maske1.attr.attr, + &coresight_attr_cfg_debbus_maske2.attr.attr, + &coresight_attr_cfg_debbus_maske3.attr.attr, + &coresight_attr_cfg_debbus_nibblee.attr.attr, + &coresight_attr_cfg_debbus_ptrc0.attr.attr, + &coresight_attr_cfg_debbus_ptrc1.attr.attr, + &coresight_attr_cfg_debbus_clrc.attr.attr, + &coresight_attr_cfg_debbus_loadivt.attr.attr, + &coresight_attr_cfg_debbus_idx.attr.attr, + &coresight_attr_cfg_debbus_loadreg.attr.attr, + &coresight_attr_ext_tracebus_ctl.attr.attr, + &coresight_attr_cfg_debbus_ctrlm.attr.attr, + NULL, +}; + +static const struct attribute_group a4xx_coresight_group = { + .attrs = a4xx_coresight_attrs, +}; + +static const struct attribute_group *a4xx_coresight_groups[] = { + &a4xx_coresight_group, + NULL, +}; + +static struct adreno_coresight a4xx_coresight = { + .registers = a4xx_coresight_registers, + .count = ARRAY_SIZE(a4xx_coresight_registers), + .groups = a4xx_coresight_groups, +}; + +#define A4XX_INT_MASK \ + ((1 << A4XX_INT_RBBM_AHB_ERROR) | \ + (1 << A4XX_INT_RBBM_REG_TIMEOUT) | \ + (1 << A4XX_INT_RBBM_ME_MS_TIMEOUT) | \ + (1 << A4XX_INT_RBBM_PFP_MS_TIMEOUT) | \ + (1 << A4XX_INT_RBBM_ETS_MS_TIMEOUT) | \ + (1 << A4XX_INT_RBBM_ASYNC_OVERFLOW) | \ + (1 << A4XX_INT_CP_SW) | \ + (1 << A4XX_INT_CP_OPCODE_ERROR) | \ + (1 << A4XX_INT_CP_RESERVED_BIT_ERROR) | \ + (1 << A4XX_INT_CP_HW_FAULT) | \ + (1 << A4XX_INT_CP_IB1_INT) | \ + (1 << A4XX_INT_CP_IB2_INT) | \ + (1 << A4XX_INT_CP_RB_INT) | \ + (1 << A4XX_INT_CACHE_FLUSH_TS) | \ + (1 << A4XX_INT_CP_REG_PROTECT_FAULT) | \ + (1 << A4XX_INT_CP_AHB_ERROR_HALT) | \ + (1 << A4XX_INT_RBBM_ATB_BUS_OVERFLOW) | \ + (1 << A4XX_INT_UCHE_OOB_ACCESS) | \ + (1 << A4XX_INT_RBBM_DPM_CALC_ERR) | \ + (1 << A4XX_INT_RBBM_DPM_EPOCH_ERR) | \ + (1 << A4XX_INT_RBBM_DPM_THERMAL_YELLOW_ERR) |\ + (1 << A4XX_INT_RBBM_DPM_THERMAL_RED_ERR)) + + +static struct adreno_irq_funcs a4xx_irq_funcs[32] = { + ADRENO_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */ + ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 1 - RBBM_AHB_ERROR */ + ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */ + /* 3 - RBBM_ME_MS_TIMEOUT */ + ADRENO_IRQ_CALLBACK(a4xx_err_callback), + /* 4 - RBBM_PFP_MS_TIMEOUT */ + ADRENO_IRQ_CALLBACK(a4xx_err_callback), + ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 5 - RBBM_ETS_MS_TIMEOUT */ + /* 6 - RBBM_ATB_ASYNC_OVERFLOW */ + ADRENO_IRQ_CALLBACK(a4xx_err_callback), + ADRENO_IRQ_CALLBACK(NULL), /* 7 - RBBM_GPC_ERR */ + ADRENO_IRQ_CALLBACK(adreno_dispatcher_preempt_callback), /* 8 - CP_SW */ + ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 9 - CP_OPCODE_ERROR */ + /* 10 - CP_RESERVED_BIT_ERROR */ + ADRENO_IRQ_CALLBACK(a4xx_err_callback), + ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 11 - CP_HW_FAULT */ + ADRENO_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 13 - CP_IB2_INT */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 14 - CP_IB1_INT */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 15 - CP_RB_INT */ + /* 16 - CP_REG_PROTECT_FAULT */ + ADRENO_IRQ_CALLBACK(a4xx_err_callback), + ADRENO_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */ + /* 21 - CP_AHB_ERROR_FAULT */ + ADRENO_IRQ_CALLBACK(a4xx_err_callback), + ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 22 - RBBM_ATB_BUS_OVERFLOW */ + ADRENO_IRQ_CALLBACK(NULL), /* 23 - Unused */ + /* 24 - MISC_HANG_DETECT */ + ADRENO_IRQ_CALLBACK(adreno_hang_int_callback), + ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 25 - UCHE_OOB_ACCESS */ + ADRENO_IRQ_CALLBACK(NULL), /* 26 - Unused */ + ADRENO_IRQ_CALLBACK(NULL), /* 27 - RBBM_TRACE_MISR */ + ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 28 - RBBM_DPM_CALC_ERR */ + ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 29 - RBBM_DPM_EPOCH_ERR */ + /* 30 - RBBM_DPM_THERMAL_YELLOW_ERR */ + ADRENO_IRQ_CALLBACK(a4xx_err_callback), + /* 31 - RBBM_DPM_THERMAL_RED_ERR */ + ADRENO_IRQ_CALLBACK(a4xx_err_callback), +}; + +static struct adreno_irq a4xx_irq = { + .funcs = a4xx_irq_funcs, + .mask = A4XX_INT_MASK, +}; + +static struct adreno_snapshot_data a4xx_snapshot_data = { + .sect_sizes = &a4xx_snap_sizes, +}; + +/** + * a4xx_preempt_trig_state() - Schedule preemption in TRIGGERRED + * state + * @adreno_dev: Device which is in TRIGGERRED state + */ +static void a4xx_preempt_trig_state( + struct adreno_device *adreno_dev) +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + struct kgsl_device *device = &(adreno_dev->dev); + unsigned int rbbase, val; + + /* + * Hardware not yet idle means that preemption interrupt + * may still occur, nothing to do here until interrupt signals + * completion of preemption, just return here + */ + if (!adreno_hw_isidle(adreno_dev)) + return; + + /* + * We just changed states, reschedule dispatcher to change + * preemption states + */ + if (ADRENO_DISPATCHER_PREEMPT_TRIGGERED != + atomic_read(&dispatcher->preemption_state)) { + adreno_dispatcher_schedule(device); + return; + } + + /* + * H/W is idle and we did not get a preemption interrupt, may + * be device went idle w/o encountering any preempt token or + * we already preempted w/o interrupt + */ + adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_BASE, &rbbase); + /* Did preemption occur, if so then change states and return */ + if (rbbase != adreno_dev->cur_rb->buffer_desc.gpuaddr) { + adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT_DEBUG, &val); + if (val && rbbase == adreno_dev->next_rb->buffer_desc.gpuaddr) { + KGSL_DRV_INFO(device, + "Preemption completed without interrupt\n"); + trace_adreno_hw_preempt_trig_to_comp(adreno_dev->cur_rb, + adreno_dev->next_rb); + atomic_set(&dispatcher->preemption_state, + ADRENO_DISPATCHER_PREEMPT_COMPLETE); + adreno_dispatcher_schedule(device); + return; + } + adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT); + /* reschedule dispatcher to take care of the fault */ + adreno_dispatcher_schedule(device); + return; + } + /* + * Check if preempt token was submitted after preemption trigger, if so + * then preemption should have occurred, since device is already idle it + * means something went wrong - trigger FT + */ + if (dispatcher->preempt_token_submit) { + adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT); + /* reschedule dispatcher to take care of the fault */ + adreno_dispatcher_schedule(device); + return; + } + /* + * Preempt token was not submitted after preemption trigger so device + * may have gone idle before preemption could occur, if there are + * commands that got submitted to current RB after triggering preemption + * then submit them as those commands may have a preempt token in them + */ + adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, + &adreno_dev->cur_rb->rptr); + if (adreno_dev->cur_rb->rptr != adreno_dev->cur_rb->wptr) { + /* + * Memory barrier before informing the + * hardware of new commands + */ + mb(); + kgsl_pwrscale_busy(device); + adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR, + adreno_dev->cur_rb->wptr); + return; + } + + /* Submit preempt token to make preemption happen */ + if (adreno_drawctxt_switch(adreno_dev, adreno_dev->cur_rb, NULL, 0)) + BUG(); + if (adreno_ringbuffer_submit_preempt_token(adreno_dev->cur_rb, + adreno_dev->next_rb)) + BUG(); + dispatcher->preempt_token_submit = 1; + adreno_dev->cur_rb->wptr_preempt_end = adreno_dev->cur_rb->wptr; + trace_adreno_hw_preempt_token_submit(adreno_dev->cur_rb, + adreno_dev->next_rb); +} + +/** + * a4xx_preempt_clear_state() - Schedule preemption in + * CLEAR state. Preemption can be issued in this state. + * @adreno_dev: Device which is in CLEAR state + */ +static void a4xx_preempt_clear_state( + struct adreno_device *adreno_dev) + +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + struct kgsl_device *device = &(adreno_dev->dev); + struct adreno_dispatcher_cmdqueue *dispatch_tempq; + struct kgsl_cmdbatch *cmdbatch; + struct adreno_ringbuffer *highest_busy_rb; + int switch_low_to_high; + int ret; + + /* Device not awake means there is nothing to do */ + if (!kgsl_state_is_awake(device)) + return; + + /* keep updating the current rptr when preemption is clear */ + adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, + &(adreno_dev->cur_rb->rptr)); + + highest_busy_rb = adreno_dispatcher_get_highest_busy_rb(adreno_dev); + if (!highest_busy_rb) + return; + + switch_low_to_high = adreno_compare_prio_level( + highest_busy_rb->id, + adreno_dev->cur_rb->id); + + /* already current then return */ + if (!switch_low_to_high) + return; + + if (switch_low_to_high < 0) { + /* + * if switching to lower priority make sure that the rptr and + * wptr are equal, when the lower rb is not starved + */ + if (adreno_dev->cur_rb->rptr != adreno_dev->cur_rb->wptr) + return; + /* + * switch to default context because when we switch back + * to higher context then its not known which pt will + * be current, so by making it default here the next + * commands submitted will set the right pt + */ + ret = adreno_drawctxt_switch(adreno_dev, + adreno_dev->cur_rb, + NULL, 0); + /* + * lower priority RB has to wait until space opens up in + * higher RB + */ + if (ret) + return; + + adreno_writereg(adreno_dev, + ADRENO_REG_CP_PREEMPT_DISABLE, 1); + } + + /* + * setup registers to do the switch to highest priority RB + * which is not empty or may be starving away(poor thing) + */ + a4xx_preemption_start(adreno_dev, highest_busy_rb); + + /* turn on IOMMU as the preemption may trigger pt switch */ + kgsl_mmu_enable_clk(&device->mmu); + + atomic_set(&dispatcher->preemption_state, + ADRENO_DISPATCHER_PREEMPT_TRIGGERED); + + adreno_dev->next_rb = highest_busy_rb; + mod_timer(&dispatcher->preempt_timer, jiffies + + msecs_to_jiffies(ADRENO_DISPATCH_PREEMPT_TIMEOUT)); + + trace_adreno_hw_preempt_clear_to_trig(adreno_dev->cur_rb, + adreno_dev->next_rb); + /* issue PREEMPT trigger */ + adreno_writereg(adreno_dev, ADRENO_REG_CP_PREEMPT, 1); + /* + * IOMMU clock can be safely switched off after the timestamp + * of the first command in the new rb + */ + dispatch_tempq = &adreno_dev->next_rb->dispatch_q; + if (dispatch_tempq->head != dispatch_tempq->tail) + cmdbatch = dispatch_tempq->cmd_q[dispatch_tempq->head]; + else + cmdbatch = NULL; + if (cmdbatch) + adreno_ringbuffer_mmu_disable_clk_on_ts(device, + adreno_dev->next_rb, + cmdbatch->global_ts); + else + adreno_ringbuffer_mmu_disable_clk_on_ts(device, + adreno_dev->next_rb, adreno_dev->next_rb->timestamp); + /* submit preempt token packet to ensure preemption */ + if (switch_low_to_high < 0) { + ret = adreno_ringbuffer_submit_preempt_token( + adreno_dev->cur_rb, adreno_dev->next_rb); + /* + * unexpected since we are submitting this when rptr = wptr, + * this was checked above already + */ + BUG_ON(ret); + dispatcher->preempt_token_submit = 1; + adreno_dev->cur_rb->wptr_preempt_end = adreno_dev->cur_rb->wptr; + } else { + dispatcher->preempt_token_submit = 0; + adreno_dispatcher_schedule(device); + adreno_dev->cur_rb->wptr_preempt_end = 0xFFFFFFFF; + } +} + +/** + * a4xx_preempt_complete_state() - Schedule preemption in + * COMPLETE state + * @adreno_dev: Device which is in COMPLETE state + */ +static void a4xx_preempt_complete_state( + struct adreno_device *adreno_dev) + +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + struct kgsl_device *device = &(adreno_dev->dev); + struct adreno_dispatcher_cmdqueue *dispatch_q; + unsigned int wptr, rbbase; + unsigned int val, val1; + + del_timer_sync(&dispatcher->preempt_timer); + + adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT, &val); + adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT_DEBUG, &val1); + + if (val || !val1) { + KGSL_DRV_ERR(device, + "Invalid state after preemption CP_PREEMPT: %08x, CP_PREEMPT_DEBUG: %08x\n", + val, val1); + adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT); + adreno_dispatcher_schedule(device); + return; + } + adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_BASE, &rbbase); + if (rbbase != adreno_dev->next_rb->buffer_desc.gpuaddr) { + KGSL_DRV_ERR(device, + "RBBASE incorrect after preemption, expected %x got %016llx\b", + rbbase, + adreno_dev->next_rb->buffer_desc.gpuaddr); + adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT); + adreno_dispatcher_schedule(device); + return; + } + + a4xx_preemption_save(adreno_dev, adreno_dev->cur_rb); + + dispatch_q = &(adreno_dev->cur_rb->dispatch_q); + /* new RB is the current RB */ + trace_adreno_hw_preempt_comp_to_clear(adreno_dev->next_rb, + adreno_dev->cur_rb); + adreno_dev->prev_rb = adreno_dev->cur_rb; + adreno_dev->cur_rb = adreno_dev->next_rb; + adreno_dev->cur_rb->preempted_midway = 0; + adreno_dev->cur_rb->wptr_preempt_end = 0xFFFFFFFF; + adreno_dev->next_rb = NULL; + if (adreno_disp_preempt_fair_sched) { + /* starved rb is now scheduled so unhalt dispatcher */ + if (ADRENO_DISPATCHER_RB_STARVE_TIMER_ELAPSED == + adreno_dev->cur_rb->starve_timer_state) + adreno_put_gpu_halt(adreno_dev); + adreno_dev->cur_rb->starve_timer_state = + ADRENO_DISPATCHER_RB_STARVE_TIMER_SCHEDULED; + adreno_dev->cur_rb->sched_timer = jiffies; + /* + * If the outgoing RB is has commands then set the + * busy time for it + */ + if (adreno_dev->prev_rb->rptr != adreno_dev->prev_rb->wptr) { + adreno_dev->prev_rb->starve_timer_state = + ADRENO_DISPATCHER_RB_STARVE_TIMER_INIT; + adreno_dev->prev_rb->sched_timer = jiffies; + } else { + adreno_dev->prev_rb->starve_timer_state = + ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT; + } + } + atomic_set(&dispatcher->preemption_state, + ADRENO_DISPATCHER_PREEMPT_CLEAR); + if (adreno_compare_prio_level(adreno_dev->prev_rb->id, + adreno_dev->cur_rb->id) < 0) { + if (adreno_dev->prev_rb->wptr_preempt_end != + adreno_dev->prev_rb->rptr) + adreno_dev->prev_rb->preempted_midway = 1; + } else if (adreno_dev->prev_rb->wptr_preempt_end != + adreno_dev->prev_rb->rptr) { + BUG(); + } + /* submit wptr if required for new rb */ + adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr); + if (adreno_dev->cur_rb->wptr != wptr) { + kgsl_pwrscale_busy(device); + adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR, + adreno_dev->cur_rb->wptr); + } + /* clear preemption register */ + adreno_writereg(adreno_dev, ADRENO_REG_CP_PREEMPT_DEBUG, 0); + adreno_preempt_process_dispatch_queue(adreno_dev, dispatch_q); +} + +static void a4xx_preemption_schedule( + struct adreno_device *adreno_dev) +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + struct kgsl_device *device = &(adreno_dev->dev); + + if (!adreno_is_preemption_enabled(adreno_dev)) + return; + + mutex_lock(&device->mutex); + + switch (atomic_read(&dispatcher->preemption_state)) { + case ADRENO_DISPATCHER_PREEMPT_CLEAR: + a4xx_preempt_clear_state(adreno_dev); + break; + case ADRENO_DISPATCHER_PREEMPT_TRIGGERED: + a4xx_preempt_trig_state(adreno_dev); + /* + * if we transitioned to next state then fall-through + * processing to next state + */ + if (!adreno_preempt_state(adreno_dev, + ADRENO_DISPATCHER_PREEMPT_COMPLETE)) + break; + case ADRENO_DISPATCHER_PREEMPT_COMPLETE: + a4xx_preempt_complete_state(adreno_dev); + break; + default: + BUG(); + } + + mutex_unlock(&device->mutex); +} + +struct adreno_gpudev adreno_a4xx_gpudev = { + .reg_offsets = &a4xx_reg_offsets, + .ft_perf_counters = a4xx_ft_perf_counters, + .ft_perf_counters_count = ARRAY_SIZE(a4xx_ft_perf_counters), + .perfcounters = &a4xx_perfcounters, + .irq = &a4xx_irq, + .irq_trace = trace_kgsl_a4xx_irq_status, + .snapshot_data = &a4xx_snapshot_data, + .num_prio_levels = ADRENO_PRIORITY_MAX_RB_LEVELS, + .vbif_xin_halt_ctrl0_mask = A4XX_VBIF_XIN_HALT_CTRL0_MASK, + + .perfcounter_init = a4xx_perfcounter_init, + .perfcounter_close = a4xx_perfcounter_close, + .rb_init = a4xx_rb_init, + .hw_init = a4xx_hw_init, + .microcode_read = a3xx_microcode_read, + .microcode_load = a3xx_microcode_load, + .coresight = &a4xx_coresight, + .start = a4xx_start, + .snapshot = a4xx_snapshot, + .is_sptp_idle = a4xx_is_sptp_idle, + .pwrlevel_change_settings = a4xx_pwrlevel_change_settings, + .regulator_enable = a4xx_regulator_enable, + .regulator_disable = a4xx_regulator_disable, + .preemption_pre_ibsubmit = a4xx_preemption_pre_ibsubmit, + .preemption_token = a4xx_preemption_token, + .preemption_schedule = a4xx_preemption_schedule, +}; diff --git a/drivers/gpu/msm/adreno_a4xx.h b/drivers/gpu/msm/adreno_a4xx.h new file mode 100644 index 000000000000..93e54e82a48c --- /dev/null +++ b/drivers/gpu/msm/adreno_a4xx.h @@ -0,0 +1,20 @@ +/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _ADRENO_A4XX_H_ +#define _ADRENO_A4XX_H_ + +void a4xx_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); + +#endif diff --git a/drivers/gpu/msm/adreno_a4xx_snapshot.c b/drivers/gpu/msm/adreno_a4xx_snapshot.c new file mode 100644 index 000000000000..96080d3b6d9f --- /dev/null +++ b/drivers/gpu/msm/adreno_a4xx_snapshot.c @@ -0,0 +1,605 @@ +/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/io.h> +#include "kgsl.h" +#include "adreno.h" +#include "kgsl_snapshot.h" +#include "a4xx_reg.h" +#include "adreno_snapshot.h" +#include "adreno_a4xx.h" + +/* + * Set of registers to dump for A4XX on snapshot. + * Registers in pairs - first value is the start offset, second + * is the stop offset (inclusive) + */ + +static const unsigned int a4xx_registers[] = { + /* RBBM */ + 0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026, + 0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066, + 0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF, + /* CP */ + 0x0200, 0x0226, 0x0228, 0x0233, 0x0240, 0x0258, 0x04C0, 0x04D0, + 0x04D2, 0x04DD, 0x0500, 0x050B, 0x0578, 0x058F, + /* VSC */ + 0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51, + /* GRAS */ + 0x0C80, 0x0C81, 0x0C88, 0x0C8F, + /* RB */ + 0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2, + /* PC */ + 0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23, + /* VFD */ + 0x0E40, 0x0E4A, + /* VPC */ + 0x0E60, 0x0E61, 0x0E63, 0x0E68, + /* UCHE */ + 0x0E80, 0x0E84, 0x0E88, 0x0E95, + /* GRAS CTX 0 */ + 0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E, + /* PC CTX 0 */ + 0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7, + /* VFD CTX 0 */ + 0x2200, 0x2204, 0x2208, 0x22A9, + /* GRAS CTX 1 */ + 0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E, + /* PC CTX 1 */ + 0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7, + /* VFD CTX 1 */ + 0x2600, 0x2604, 0x2608, 0x26A9, +}; + +static const unsigned int a4xx_sp_tp_registers[] = { + /* SP */ + 0x0EC0, 0x0ECF, + /* TPL1 */ + 0x0F00, 0x0F0B, + /* SP CTX 0 */ + 0x22C0, 0x22C1, 0x22C4, 0x22E5, 0x22E8, 0x22F8, 0x2300, 0x2306, + 0x230C, 0x2312, 0x2318, 0x2339, 0x2340, 0x2360, + /* TPL1 CTX 0 */ + 0x2380, 0x2382, 0x2384, 0x238F, 0x23A0, 0x23A6, + /* SP CTX 1 */+ + 0x26C0, 0x26C1, 0x26C4, 0x26E5, 0x26E8, 0x26F8, 0x2700, 0x2706, + 0x270C, 0x2712, 0x2718, 0x2739, 0x2740, 0x2760, + /* TPL1 CTX 1 */ + 0x2780, 0x2782, 0x2784, 0x278F, 0x27A0, 0x27A6, +}; + +static const unsigned int a4xx_ppd_registers[] = { + /* V2 Thresholds */ + 0x01B2, 0x01B5, + /* Control and Status */ + 0x01B9, 0x01BE, +}; + +static const unsigned int a4xx_xpu_registers[] = { + /* XPU */ + 0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20, + 0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40, + 0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95, +}; + +static const unsigned int a4xx_vbif_ver_20000000_registers[] = { + /* VBIF version 0x20000000 & IOMMU V1 */ + 0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022, + 0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, + 0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, + 0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, + 0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, + 0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, + 0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, + 0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, + 0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C, + 0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416, + 0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436, + 0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480, + 0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004, + 0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016, + 0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200, + 0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802, + 0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816, + 0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF, + 0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925, + 0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E, + 0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00, + 0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10, + 0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60, + 0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3, + 0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B, + 0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0, + 0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6, + 0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416, + 0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780, + 0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4, + 0x67D6, 0x67D6, 0x67EE, 0x67EE, +}; + +static const unsigned int a4xx_vbif_ver_20020000_registers[] = { + 0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022, + 0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, + 0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, + 0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, + 0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, + 0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, + 0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, + 0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, + 0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x4800, 0x4802, + 0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816, + 0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF, + 0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925, + 0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E, + 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00, 0x4E80, 0x4E80, + 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10, 0x4F18, 0x4F18, + 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60, 0x4F80, 0x4F81, + 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3, 0x6000, 0x6001, + 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B, 0x61FD, 0x61FD, + 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0, 0x63C0, 0x63C1, + 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6, 0x63EE, 0x63EE, + 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416, 0x6418, 0x641B, + 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780, 0x67A0, 0x67A0, + 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4, 0x67D6, 0x67D6, + 0x67EE, 0x67EE, +}; + +static const unsigned int a4xx_vbif_ver_20050000_registers[] = { + /* VBIF version 0x20050000 and 0x20090000 */ + 0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036, + 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049, + 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D, + 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098, + 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0, + 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108, + 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125, + 0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410, +}; + +static const struct adreno_vbif_snapshot_registers + a4xx_vbif_snapshot_registers[] = { + { 0x20000000, a4xx_vbif_ver_20000000_registers, + ARRAY_SIZE(a4xx_vbif_ver_20000000_registers)/2}, + { 0x20020000, a4xx_vbif_ver_20020000_registers, + ARRAY_SIZE(a4xx_vbif_ver_20020000_registers)/2}, + { 0x20050000, a4xx_vbif_ver_20050000_registers, + ARRAY_SIZE(a4xx_vbif_ver_20050000_registers)/2}, + { 0x20070000, a4xx_vbif_ver_20020000_registers, + ARRAY_SIZE(a4xx_vbif_ver_20020000_registers)/2}, + { 0x20090000, a4xx_vbif_ver_20050000_registers, + ARRAY_SIZE(a4xx_vbif_ver_20050000_registers)/2}, +}; + +#define A4XX_NUM_SHADER_BANKS 4 +#define A405_NUM_SHADER_BANKS 1 +/* Shader memory size in words */ +#define A4XX_SHADER_MEMORY_SIZE 0x4000 + +static const struct adreno_debugbus_block a4xx_debugbus_blocks[] = { + { A4XX_RBBM_DEBBUS_CP_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_RBBM_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_VBIF_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_HLSQ_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_UCHE_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_DPM_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_TESS_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_PC_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_VFD_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_VPC_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_TSE_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_RAS_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_VSC_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_COM_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_DCOM_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_SP_0_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_TPL1_0_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_RB_0_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_MARB_0_ID, 0x100 }, +}; + +static const struct adreno_debugbus_block a420_debugbus_blocks[] = { + { A4XX_RBBM_DEBBUS_SP_1_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_SP_2_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_SP_3_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_TPL1_1_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_TPL1_2_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_TPL1_3_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_RB_1_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_RB_2_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_RB_3_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_MARB_1_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_MARB_2_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_MARB_3_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_CCU_0_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_CCU_1_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_CCU_2_ID, 0x100, }, + { A4XX_RBBM_DEBBUS_CCU_3_ID, 0x100, }, +}; + +/** + * a4xx_snapshot_shader_memory - Helper function to dump the GPU shader + * memory to the snapshot buffer. + * @device: GPU device whose shader memory is to be dumped + * @buf: Pointer to binary snapshot data blob being made + * @remain: Number of remaining bytes in the snapshot blob + * @priv: Unused parameter + * + */ +static size_t a4xx_snapshot_shader_memory(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int i, j; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + unsigned int shader_read_len = A4XX_SHADER_MEMORY_SIZE; + unsigned int shader_banks = A4XX_NUM_SHADER_BANKS; + + if (shader_read_len > (device->shader_mem_len >> 2)) + shader_read_len = (device->shader_mem_len >> 2); + + if (adreno_is_a405(adreno_dev)) + shader_banks = A405_NUM_SHADER_BANKS; + + if (remain < DEBUG_SECTION_SZ(shader_read_len * + shader_banks)) { + SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_SHADER_MEMORY; + header->size = shader_read_len * shader_banks; + + /* Map shader memory to kernel, for dumping */ + if (device->shader_mem_virt == NULL) + device->shader_mem_virt = devm_ioremap(device->dev, + device->shader_mem_phys, + device->shader_mem_len); + + if (device->shader_mem_virt == NULL) { + KGSL_DRV_ERR(device, + "Unable to map shader memory region\n"); + return 0; + } + + for (j = 0; j < shader_banks; j++) { + unsigned int val; + /* select the SPTP */ + kgsl_regread(device, A4XX_HLSQ_SPTP_RDSEL, &val); + val &= ~0x3; + val |= j; + kgsl_regwrite(device, A4XX_HLSQ_SPTP_RDSEL, val); + /* Now, dump shader memory to snapshot */ + for (i = 0; i < shader_read_len; i++) + adreno_shadermem_regread(device, i, + &data[i + j * shader_read_len]); + } + + + return DEBUG_SECTION_SZ(shader_read_len * shader_banks); +} + +/* + * a4xx_rbbm_debug_bus_read() - Read data from trace bus + * @device: Device whose data bus is read + * @block_id: Trace bus block ID + * @index: Index of data to read + * @val: Output parameter where data is read + */ +static void a4xx_rbbm_debug_bus_read(struct kgsl_device *device, + unsigned int block_id, unsigned int index, unsigned int *val) +{ + unsigned int reg = 0; + + reg |= (block_id << A4XX_RBBM_CFG_DEBBUS_SEL_PING_BLK_SEL_SHIFT); + reg |= (index << A4XX_RBBM_CFG_DEBBUS_SEL_PING_INDEX_SHIFT); + kgsl_regwrite(device, A4XX_RBBM_CFG_DEBBUS_SEL_A, reg); + kgsl_regwrite(device, A4XX_RBBM_CFG_DEBBUS_SEL_B, reg); + kgsl_regwrite(device, A4XX_RBBM_CFG_DEBBUS_SEL_C, reg); + kgsl_regwrite(device, A4XX_RBBM_CFG_DEBBUS_SEL_D, reg); + + kgsl_regwrite(device, A4XX_RBBM_CFG_DEBBUS_IDX, 0x3020000); + kgsl_regread(device, A4XX_RBBM_CFG_DEBBUS_TRACE_BUF4, val); + val++; + kgsl_regwrite(device, A4XX_RBBM_CFG_DEBBUS_IDX, 0x1000000); + kgsl_regread(device, A4XX_RBBM_CFG_DEBBUS_TRACE_BUF4, val); +} + +/* + * a4xx_snapshot_vbif_debugbus() - Dump the VBIF debug data + * @device: Device pointer for which the debug data is dumped + * @buf: Pointer to the memory where the data is dumped + * @remain: Amout of bytes remaining in snapshot + * @priv: Pointer to debug bus block + * + * Returns the number of bytes dumped + */ +static size_t a4xx_snapshot_vbif_debugbus(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debugbus *header = + (struct kgsl_snapshot_debugbus *)buf; + struct adreno_debugbus_block *block = priv; + int i, j; + /* + * Total number of VBIF data words considering 3 sections: + * 2 arbiter blocks of 16 words + * 5 AXI XIN blocks of 4 dwords each + * 5 core clock side XIN blocks of 5 dwords each + */ + unsigned int dwords = (16 * A4XX_NUM_AXI_ARB_BLOCKS) + + (4 * A4XX_NUM_XIN_BLOCKS) + (5 * A4XX_NUM_XIN_BLOCKS); + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + size_t size; + unsigned int reg_clk; + + size = (dwords * sizeof(unsigned int)) + sizeof(*header); + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + header->id = block->block_id; + header->count = dwords; + + kgsl_regread(device, A4XX_VBIF_CLKON, ®_clk); + kgsl_regwrite(device, A4XX_VBIF_CLKON, reg_clk | + (A4XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK << + A4XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT)); + kgsl_regwrite(device, A4XX_VBIF_TEST_BUS1_CTRL0, 0); + kgsl_regwrite(device, A4XX_VBIF_TEST_BUS_OUT_CTRL, + (A4XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK << + A4XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT)); + for (i = 0; i < A4XX_NUM_AXI_ARB_BLOCKS; i++) { + kgsl_regwrite(device, A4XX_VBIF_TEST_BUS2_CTRL0, + (1 << (i + 16))); + for (j = 0; j < 16; j++) { + kgsl_regwrite(device, A4XX_VBIF_TEST_BUS2_CTRL1, + ((j & A4XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK) + << A4XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT)); + kgsl_regread(device, A4XX_VBIF_TEST_BUS_OUT, + data); + data++; + } + } + + /* XIN blocks AXI side */ + for (i = 0; i < A4XX_NUM_XIN_BLOCKS; i++) { + kgsl_regwrite(device, A4XX_VBIF_TEST_BUS2_CTRL0, 1 << i); + for (j = 0; j < 4; j++) { + kgsl_regwrite(device, A4XX_VBIF_TEST_BUS2_CTRL1, + ((j & A4XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK) + << A4XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT)); + kgsl_regread(device, A4XX_VBIF_TEST_BUS_OUT, + data); + data++; + } + } + + /* XIN blocks core clock side */ + for (i = 0; i < A4XX_NUM_XIN_BLOCKS; i++) { + kgsl_regwrite(device, A4XX_VBIF_TEST_BUS1_CTRL0, 1 << i); + for (j = 0; j < 5; j++) { + kgsl_regwrite(device, A4XX_VBIF_TEST_BUS1_CTRL1, + ((j & A4XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK) + << A4XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT)); + kgsl_regread(device, A4XX_VBIF_TEST_BUS_OUT, + data); + data++; + } + } + /* restore the clock of VBIF */ + kgsl_regwrite(device, A4XX_VBIF_CLKON, reg_clk); + return size; +} + +/* + * a4xx_snapshot_debugbus_block() - Capture debug data for a gpu block + * @device: Pointer to device + * @buf: Memory where data is captured + * @remain: Number of bytes left in snapshot + * @priv: Pointer to debug bus block + * + * Returns the number of bytes written + */ +static size_t a4xx_snapshot_debugbus_block(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debugbus *header = + (struct kgsl_snapshot_debugbus *)buf; + struct adreno_debugbus_block *block = priv; + int i; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + unsigned int dwords; + size_t size; + + dwords = block->dwords; + + /* For a4xx each debug bus data unit is 2 DWRODS */ + size = (dwords * sizeof(unsigned int) * 2) + sizeof(*header); + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + + header->id = block->block_id; + header->count = dwords * 2; + + for (i = 0; i < dwords; i++) + a4xx_rbbm_debug_bus_read(device, block->block_id, i, + &data[i*2]); + + return size; +} + +/* + * a4xx_snapshot_debugbus() - Capture debug bus data + * @device: The device for which data is captured + * @snapshot: Pointer to the snapshot instance + */ +static void a4xx_snapshot_debugbus(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + int i; + + kgsl_regwrite(device, A4XX_RBBM_CFG_DEBBUS_CTLM, + 0xf << A4XX_RBBM_CFG_DEBBUS_CTLT_ENABLE_SHIFT); + + for (i = 0; i < ARRAY_SIZE(a4xx_debugbus_blocks); i++) { + if (A4XX_RBBM_DEBBUS_VBIF_ID == + a4xx_debugbus_blocks[i].block_id) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, a4xx_snapshot_vbif_debugbus, + (void *) &a4xx_debugbus_blocks[i]); + else + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, a4xx_snapshot_debugbus_block, + (void *) &a4xx_debugbus_blocks[i]); + } + + if (!adreno_is_a405(adreno_dev)) { + for (i = 0; i < ARRAY_SIZE(a420_debugbus_blocks); i++) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, a4xx_snapshot_debugbus_block, + (void *) &a420_debugbus_blocks[i]); + + } +} + +static void a4xx_reset_hlsq(struct kgsl_device *device) +{ + unsigned int val, dummy = 0; + + /* reset cp */ + kgsl_regwrite(device, A4XX_RBBM_BLOCK_SW_RESET_CMD, 1 << 20); + kgsl_regread(device, A4XX_RBBM_BLOCK_SW_RESET_CMD, &dummy); + + /* reset hlsq */ + kgsl_regwrite(device, A4XX_RBBM_BLOCK_SW_RESET_CMD, 1 << 25); + kgsl_regread(device, A4XX_RBBM_BLOCK_SW_RESET_CMD, &dummy); + + /* clear reset bits */ + kgsl_regwrite(device, A4XX_RBBM_BLOCK_SW_RESET_CMD, 0); + kgsl_regread(device, A4XX_RBBM_BLOCK_SW_RESET_CMD, &dummy); + + + /* set HLSQ_TIMEOUT_THRESHOLD.cycle_timeout_limit_sp to 26 */ + kgsl_regread(device, A4XX_HLSQ_TIMEOUT_THRESHOLD, &val); + val &= (0x1F << 24); + val |= (26 << 24); + kgsl_regwrite(device, A4XX_HLSQ_TIMEOUT_THRESHOLD, val); +} + +/* + * a4xx_snapshot() - A4XX GPU snapshot function + * @adreno_dev: Device being snapshotted + * @snapshot: Pointer to the snapshot instance + * + * This is where all of the A4XX specific bits and pieces are grabbed + * into the snapshot memory + */ +void a4xx_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct adreno_snapshot_data *snap_data = gpudev->snapshot_data; + + /* Disable SP clock gating for the debug bus to work */ + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP0, 0); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP1, 0); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP2, 0); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP3, 0); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP0, 0); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP1, 0); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP2, 0); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP3, 0); + + /* Disable top level clock gating the debug bus to work */ + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL, 0); + kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2, 0); + + /* Turn on MMU clocks since we read MMU registers */ + kgsl_mmu_enable_clk(&device->mmu); + + /* Master set of (non debug) registers */ + + SNAPSHOT_REGISTERS(device, snapshot, a4xx_registers); + + if (adreno_is_a430(adreno_dev)) + SNAPSHOT_REGISTERS(device, snapshot, a4xx_sp_tp_registers); + + if (adreno_is_a420(adreno_dev)) + SNAPSHOT_REGISTERS(device, snapshot, a4xx_xpu_registers); + + if (adreno_is_a430v2(adreno_dev)) + SNAPSHOT_REGISTERS(device, snapshot, a4xx_ppd_registers); + + adreno_snapshot_vbif_registers(device, snapshot, + a4xx_vbif_snapshot_registers, + ARRAY_SIZE(a4xx_vbif_snapshot_registers)); + + kgsl_mmu_disable_clk(&device->mmu); + + kgsl_snapshot_indexed_registers(device, snapshot, + A4XX_CP_STATE_DEBUG_INDEX, A4XX_CP_STATE_DEBUG_DATA, + 0, snap_data->sect_sizes->cp_pfp); + + /* CP_ME indexed registers */ + kgsl_snapshot_indexed_registers(device, snapshot, + A4XX_CP_ME_CNTL, A4XX_CP_ME_STATUS, 64, 44); + + /* VPC memory */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, adreno_snapshot_vpc_memory, + &snap_data->sect_sizes->vpc_mem); + + /* CP MEQ */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, adreno_snapshot_cp_meq, + &snap_data->sect_sizes->cp_meq); + + /* CP PFP and PM4 */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, adreno_snapshot_cp_pfp_ram, NULL); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, adreno_snapshot_cp_pm4_ram, NULL); + + /* CP ROQ */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, adreno_snapshot_cp_roq, + &snap_data->sect_sizes->roq); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, adreno_snapshot_cp_merciu, + &snap_data->sect_sizes->cp_merciu); + + /* Debug bus */ + a4xx_snapshot_debugbus(device, snapshot); + + if (!adreno_is_a430(adreno_dev)) { + a4xx_reset_hlsq(device); + SNAPSHOT_REGISTERS(device, snapshot, a4xx_sp_tp_registers); + } + + /* Shader working/shadow memory */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a4xx_snapshot_shader_memory, + &snap_data->sect_sizes->shader_mem); +} diff --git a/drivers/gpu/msm/adreno_a5xx.c b/drivers/gpu/msm/adreno_a5xx.c new file mode 100644 index 000000000000..2a461700f46a --- /dev/null +++ b/drivers/gpu/msm/adreno_a5xx.c @@ -0,0 +1,3403 @@ +/* Copyright (c) 2014-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/firmware.h> +#include <soc/qcom/subsystem_restart.h> +#include <soc/qcom/scm.h> +#include <linux/pm_opp.h> + +#include "adreno.h" +#include "a5xx_reg.h" +#include "adreno_a5xx.h" +#include "adreno_cp_parser.h" +#include "adreno_trace.h" +#include "adreno_pm4types.h" +#include "adreno_perfcounter.h" +#include "adreno_ringbuffer.h" +#include "kgsl_sharedmem.h" +#include "kgsl_log.h" +#include "kgsl.h" + +static int zap_ucode_loaded; + +void a5xx_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); + +static const struct adreno_vbif_data a530_vbif[] = { + {A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003}, + {0, 0}, +}; + +static const struct adreno_vbif_platform a5xx_vbif_platforms[] = { + { adreno_is_a530, a530_vbif }, + { adreno_is_a510, a530_vbif }, + { adreno_is_a505, a530_vbif }, + { adreno_is_a506, a530_vbif }, +}; + +#define PREEMPT_RECORD(_field) \ + offsetof(struct a5xx_cp_preemption_record, _field) + +#define PREEMPT_SMMU_RECORD(_field) \ + offsetof(struct a5xx_cp_smmu_info, _field) +static void a5xx_gpmu_reset(struct work_struct *work); +static int _read_fw2_block_header(uint32_t *header, uint32_t id, + uint32_t major, uint32_t minor); + +/** + * Number of times to check if the regulator enabled before + * giving up and returning failure. + */ +#define PWR_RETRY 100 + +/** + * Number of times to check if the GPMU firmware is initialized before + * giving up and returning failure. + */ +#define GPMU_FW_INIT_RETRY 100 + +#define GPMU_HEADER_ID 1 +#define GPMU_FIRMWARE_ID 2 +#define GPMU_SEQUENCE_ID 3 +#define GPMU_INST_RAM_SIZE 0xFFF + +#define HEADER_MAJOR 1 +#define HEADER_MINOR 2 +#define HEADER_DATE 3 +#define HEADER_TIME 4 +#define HEADER_SEQUENCE 5 + +#define MAX_HEADER_SIZE 10 + +#define LM_SEQUENCE_ID 1 +#define HWCG_SEQUENCE_ID 2 +#define MAX_SEQUENCE_ID 3 + +/* GPMU communication protocal AGC */ +#define AGC_INIT_BASE A5XX_GPMU_DATA_RAM_BASE +#define AGC_RVOUS_MAGIC (AGC_INIT_BASE + 0) +#define AGC_KMD_GPMU_ADDR (AGC_INIT_BASE + 1) +#define AGC_KMD_GPMU_BYTES (AGC_INIT_BASE + 2) +#define AGC_GPMU_KMD_ADDR (AGC_INIT_BASE + 3) +#define AGC_GPMU_KMD_BYTES (AGC_INIT_BASE + 4) +#define AGC_INIT_MSG_MAGIC (AGC_INIT_BASE + 5) +#define AGC_RESERVED (AGC_INIT_BASE + 6) +#define AGC_MSG_BASE (AGC_INIT_BASE + 7) + +#define AGC_MSG_STATE (AGC_MSG_BASE + 0) +#define AGC_MSG_COMMAND (AGC_MSG_BASE + 1) +#define AGC_MSG_RETURN (AGC_MSG_BASE + 2) +#define AGC_MSG_PAYLOAD_SIZE (AGC_MSG_BASE + 3) +#define AGC_MSG_MAX_RETURN_SIZE (AGC_MSG_BASE + 4) +#define AGC_MSG_PAYLOAD (AGC_MSG_BASE + 5) + +#define AGC_INIT_MSG_VALUE 0xBABEFACE + +#define AGC_POWER_CONFIG_PRODUCTION_ID 1 + +#define LM_DEFAULT_LIMIT 6000 + +#define A530_DEFAULT_LEAKAGE 0x004E001A + +#define A530_QFPROM_RAW_PTE_ROW0_MSB 0x134 +#define A530_QFPROM_RAW_PTE_ROW2_MSB 0x144 +#define A530_QFPROM_CORR_PTE_ROW0_LSB 0x4130 + +static void a530_efuse_leakage(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int row0, row2; + unsigned int multiplier, gfx_active, leakage_pwr_on, coeff; + + adreno_efuse_read_u32(adreno_dev, + A530_QFPROM_RAW_PTE_ROW0_MSB, &row0); + + adreno_efuse_read_u32(adreno_dev, + A530_QFPROM_RAW_PTE_ROW2_MSB, &row2); + + multiplier = (row0 >> 1) & 0x3; + gfx_active = (row2 >> 2) & 0xFF; + + if (of_property_read_u32(device->pdev->dev.of_node, + "qcom,base-leakage-coefficient", &coeff)) + return; + + leakage_pwr_on = gfx_active * (1 << multiplier); + + adreno_dev->lm_leakage = (leakage_pwr_on << 16) | + ((leakage_pwr_on * coeff) / 100); +} + +static void a530_efuse_speed_bin(struct adreno_device *adreno_dev) +{ + unsigned int val; + + adreno_efuse_read_u32(adreno_dev, + A530_QFPROM_CORR_PTE_ROW0_LSB, &val); + + adreno_dev->speed_bin = + (val & 0xE0000000) >> 29; +} + +static const struct { + int (*check)(struct adreno_device *adreno_dev); + void (*func)(struct adreno_device *adreno_dev); +} a5xx_efuse_funcs[] = { + { adreno_is_a530, a530_efuse_leakage }, + { adreno_is_a530v3, a530_efuse_speed_bin }, +}; + +static void a5xx_check_features(struct adreno_device *adreno_dev) +{ + unsigned int i; + + if (adreno_efuse_map(adreno_dev)) + return; + + for (i = 0; i < ARRAY_SIZE(a5xx_efuse_funcs); i++) { + if (a5xx_efuse_funcs[i].check(adreno_dev)) + a5xx_efuse_funcs[i].func(adreno_dev); + } + + adreno_efuse_unmap(adreno_dev); +} + +/* + * a5xx_preemption_start() - Setup state to start preemption + */ +static void a5xx_preemption_start(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + struct kgsl_device *device = &(adreno_dev->dev); + struct kgsl_iommu *iommu = device->mmu.priv; + uint64_t ttbr0; + uint32_t contextidr; + struct kgsl_pagetable *pt; + bool switch_default_pt = true; + + kgsl_sharedmem_writel(device, &rb->preemption_desc, + PREEMPT_RECORD(wptr), rb->wptr); + kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO, + lower_32_bits(rb->preemption_desc.gpuaddr)); + kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI, + upper_32_bits(rb->preemption_desc.gpuaddr)); + kgsl_sharedmem_readq(&rb->pagetable_desc, &ttbr0, + offsetof(struct adreno_ringbuffer_pagetable_info, ttbr0)); + kgsl_sharedmem_readl(&rb->pagetable_desc, &contextidr, + offsetof(struct adreno_ringbuffer_pagetable_info, contextidr)); + + spin_lock(&kgsl_driver.ptlock); + list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) { + if (kgsl_mmu_pagetable_get_ttbr0(pt) == ttbr0) { + switch_default_pt = false; + break; + } + } + spin_unlock(&kgsl_driver.ptlock); + + if (switch_default_pt) { + ttbr0 = kgsl_mmu_pagetable_get_ttbr0( + device->mmu.defaultpagetable); + contextidr = kgsl_mmu_pagetable_get_contextidr( + device->mmu.defaultpagetable); + } + + kgsl_sharedmem_writeq(device, &iommu->smmu_info, + offsetof(struct a5xx_cp_smmu_info, ttbr0), ttbr0); + kgsl_sharedmem_writel(device, &iommu->smmu_info, + offsetof(struct a5xx_cp_smmu_info, context_idr), contextidr); +} + +/* + * a5xx_preemption_save() - Save the state after preemption is done + */ +static void a5xx_preemption_save(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + /* save the rptr from ctxrecord here */ + kgsl_sharedmem_readl(&rb->preemption_desc, &rb->rptr, + PREEMPT_RECORD(rptr)); +} + +static int a5xx_preemption_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct kgsl_iommu *iommu = device->mmu.priv; + struct adreno_ringbuffer *rb; + int ret; + unsigned int i; + uint64_t addr; + + /* We are dependent on IOMMU to make preemption go on the CP side */ + if (kgsl_mmu_get_mmutype() != KGSL_MMU_TYPE_IOMMU) + return -ENODEV; + + /* Allocate mem for storing preemption counters */ + ret = kgsl_allocate_global(device, &adreno_dev->preemption_counters, + adreno_dev->num_ringbuffers * + A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE, 0, 0); + if (ret) + return ret; + + addr = adreno_dev->preemption_counters.gpuaddr; + + /* Allocate mem for storing preemption switch record */ + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + ret = kgsl_allocate_global(&adreno_dev->dev, + &rb->preemption_desc, A5XX_CP_CTXRECORD_SIZE_IN_BYTES, + 0, KGSL_MEMDESC_PRIVILEGED); + if (ret) + return ret; + + /* Initialize the context switch record here */ + kgsl_sharedmem_writel(rb->device, &rb->preemption_desc, + PREEMPT_RECORD(magic), A5XX_CP_CTXRECORD_MAGIC_REF); + kgsl_sharedmem_writel(rb->device, &rb->preemption_desc, + PREEMPT_RECORD(info), 0); + kgsl_sharedmem_writel(rb->device, &rb->preemption_desc, + PREEMPT_RECORD(data), 0); + kgsl_sharedmem_writel(rb->device, &rb->preemption_desc, + PREEMPT_RECORD(cntl), 0x0800000C); + kgsl_sharedmem_writel(rb->device, &rb->preemption_desc, + PREEMPT_RECORD(rptr), 0); + kgsl_sharedmem_writel(rb->device, &rb->preemption_desc, + PREEMPT_RECORD(wptr), 0); + kgsl_sharedmem_writeq(rb->device, &rb->preemption_desc, + PREEMPT_RECORD(rbase), + adreno_dev->ringbuffers[i].buffer_desc.gpuaddr); + kgsl_sharedmem_writeq(rb->device, &rb->preemption_desc, + PREEMPT_RECORD(counter), addr); + + addr += A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE; + } + + /* Allocate mem for storing preemption smmu record */ + return kgsl_allocate_global(device, &iommu->smmu_info, PAGE_SIZE, + KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_PRIVILEGED); +} + +/* + * a5xx_preemption_token() - Preempt token on a5xx + * PM4 commands for preempt token on a5xx. These commands are + * submitted to ringbuffer to trigger preemption. + */ +static int a5xx_preemption_token(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, unsigned int *cmds, + uint64_t gpuaddr) +{ + unsigned int *cmds_orig = cmds; + + /* Enable yield in RB only */ + *cmds++ = cp_type7_packet(CP_YIELD_ENABLE, 1); + *cmds++ = 1; + + *cmds++ = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4); + cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr); + *cmds++ = 1; + /* generate interrupt on preemption completion */ + *cmds++ = 1; + + return cmds - cmds_orig; + +} + +/* + * a5xx_preemption_pre_ibsubmit() - Below PM4 commands are + * added at the beginning of every cmdbatch submission. + */ +static int a5xx_preemption_pre_ibsubmit( + struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, unsigned int *cmds, + struct kgsl_context *context, uint64_t cond_addr, + struct kgsl_memobj_node *ib) +{ + unsigned int *cmds_orig = cmds; + uint64_t gpuaddr = rb->preemption_desc.gpuaddr; + unsigned int preempt_style = 0; + + if (context) + preempt_style = ADRENO_PREEMPT_STYLE(context->flags); + + /* + * CP_PREEMPT_ENABLE_GLOBAL(global preemption) can only be set by KMD + * in ringbuffer. + * 1) set global preemption to 0x0 to disable global preemption. + * Only RB level preemption is allowed in this mode + * 2) Set global preemption to defer(0x2) for finegrain preemption. + * when global preemption is set to defer(0x2), + * CP_PREEMPT_ENABLE_LOCAL(local preemption) determines the + * preemption point. Local preemption + * can be enabled by both UMD(within IB) and KMD. + */ + *cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_GLOBAL, 1); + *cmds++ = ((preempt_style == KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN) + ? 2 : 0); + + /* Turn CP protection OFF */ + *cmds++ = cp_type7_packet(CP_SET_PROTECTED_MODE, 1); + *cmds++ = 0; + + /* + * CP during context switch will save context switch info to + * a5xx_cp_preemption_record pointed by CONTEXT_SWITCH_SAVE_ADDR + */ + *cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 1); + *cmds++ = lower_32_bits(gpuaddr); + *cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI, 1); + *cmds++ = upper_32_bits(gpuaddr); + + /* Turn CP protection ON */ + *cmds++ = cp_type7_packet(CP_SET_PROTECTED_MODE, 1); + *cmds++ = 1; + + /* + * Enable local preemption for finegrain preemption in case of + * a misbehaving IB + */ + if (preempt_style == KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN) { + *cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1); + *cmds++ = 1; + } else { + *cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1); + *cmds++ = 0; + } + + return cmds - cmds_orig; +} + +/* + * a5xx_preemption_post_ibsubmit() - Below PM4 commands are + * added after every cmdbatch submission. + */ +static int a5xx_preemption_post_ibsubmit( + struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, unsigned int *cmds, + struct kgsl_context *context) +{ + unsigned int *cmds_orig = cmds; + unsigned int ctx_id = context ? context->id : 0; + + /* + * SRM -- set render mode (ex binning, direct render etc) + * SRM is set by UMD usually at start of IB to tell CP the type of + * preemption. + * KMD needs to set SRM to NULL to indicate CP that rendering is + * done by IB. + */ + *cmds++ = cp_type7_packet(CP_SET_RENDER_MODE, 5); + *cmds++ = 0; + *cmds++ = 0; + *cmds++ = 0; + *cmds++ = 0; + *cmds++ = 0; + + cmds += a5xx_preemption_token(adreno_dev, rb, cmds, + rb->device->memstore.gpuaddr + + KGSL_MEMSTORE_OFFSET(ctx_id, preempted)); + + return cmds - cmds_orig; +} + +static void a5xx_platform_setup(struct adreno_device *adreno_dev) +{ + uint64_t addr; + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (adreno_is_a505_or_a506(adreno_dev)) { + gpudev->snapshot_data->sect_sizes->cp_meq = 32; + gpudev->snapshot_data->sect_sizes->cp_merciu = 1024; + gpudev->snapshot_data->sect_sizes->roq = 256; + + /* A505 & A506 having 3 XIN ports in VBIF */ + gpudev->vbif_xin_halt_ctrl0_mask = + A510_VBIF_XIN_HALT_CTRL0_MASK; + } else if (adreno_is_a510(adreno_dev)) { + gpudev->snapshot_data->sect_sizes->cp_meq = 32; + gpudev->snapshot_data->sect_sizes->cp_merciu = 32; + gpudev->snapshot_data->sect_sizes->roq = 256; + + /* A510 has 3 XIN ports in VBIF */ + gpudev->vbif_xin_halt_ctrl0_mask = + A510_VBIF_XIN_HALT_CTRL0_MASK; + } + + /* Calculate SP local and private mem addresses */ + addr = ALIGN(ADRENO_UCHE_GMEM_BASE + adreno_dev->gmem_size, SZ_64K); + adreno_dev->sp_local_gpuaddr = addr; + adreno_dev->sp_pvt_gpuaddr = addr + SZ_64K; + + /* Setup defaults that might get changed by the fuse bits */ + adreno_dev->lm_leakage = A530_DEFAULT_LEAKAGE; + adreno_dev->speed_bin = 0; + + /* Check efuse bits for various capabilties */ + a5xx_check_features(adreno_dev); +} + +static void a5xx_init(struct adreno_device *adreno_dev) +{ + if (adreno_is_a530(adreno_dev) && !adreno_is_a530v1(adreno_dev)) + INIT_WORK(&adreno_dev->gpmu_work, a5xx_gpmu_reset); + + a5xx_crashdump_init(adreno_dev); +} + +/** + * a5xx_protect_init() - Initializes register protection on a5xx + * @device: Pointer to the device structure + * Performs register writes to enable protected access to sensitive + * registers + */ +static void a5xx_protect_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + int index = 0; + struct kgsl_protected_registers *iommu_regs; + + /* enable access protection to privileged registers */ + kgsl_regwrite(device, A5XX_CP_PROTECT_CNTL, 0x00000007); + + /* RBBM registers */ + adreno_set_protected_registers(adreno_dev, &index, 0x4, 2); + adreno_set_protected_registers(adreno_dev, &index, 0x8, 3); + adreno_set_protected_registers(adreno_dev, &index, 0x10, 4); + adreno_set_protected_registers(adreno_dev, &index, 0x20, 5); + adreno_set_protected_registers(adreno_dev, &index, 0x40, 6); + adreno_set_protected_registers(adreno_dev, &index, 0x80, 6); + + /* Content protection registers */ + adreno_set_protected_registers(adreno_dev, &index, + A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 4); + adreno_set_protected_registers(adreno_dev, &index, + A5XX_RBBM_SECVID_TRUST_CNTL, 1); + + /* CP registers */ + adreno_set_protected_registers(adreno_dev, &index, 0x800, 6); + adreno_set_protected_registers(adreno_dev, &index, 0x840, 3); + adreno_set_protected_registers(adreno_dev, &index, 0x880, 5); + adreno_set_protected_registers(adreno_dev, &index, 0x0AA0, 0); + + /* RB registers */ + adreno_set_protected_registers(adreno_dev, &index, 0xCC0, 0); + adreno_set_protected_registers(adreno_dev, &index, 0xCF0, 1); + + /* VPC registers */ + adreno_set_protected_registers(adreno_dev, &index, 0xE68, 3); + adreno_set_protected_registers(adreno_dev, &index, 0xE70, 4); + + /* UCHE registers */ + adreno_set_protected_registers(adreno_dev, &index, 0xE87, 4); + + /* SMMU registers */ + iommu_regs = kgsl_mmu_get_prot_regs(&device->mmu); + if (iommu_regs) + adreno_set_protected_registers(adreno_dev, &index, + iommu_regs->base, iommu_regs->range); +} + +/* + * a5xx_is_sptp_idle() - A530 SP/TP/RAC should be power collapsed to be + * considered idle + * @adreno_dev: The adreno_device pointer + */ +static bool a5xx_is_sptp_idle(struct adreno_device *adreno_dev) +{ + unsigned int reg; + struct kgsl_device *device = &adreno_dev->dev; + + /* If feature is not supported or enabled, no worry */ + if (!ADRENO_FEATURE(adreno_dev, ADRENO_SPTP_PC) || + !test_bit(ADRENO_SPTP_PC_CTRL, &adreno_dev->pwrctrl_flag)) + return true; + kgsl_regread(device, A5XX_GPMU_SP_PWR_CLK_STATUS, ®); + if (reg & BIT(20)) + return false; + kgsl_regread(device, A5XX_GPMU_RBCCU_PWR_CLK_STATUS, ®); + return !(reg & BIT(20)); +} + +/* + * _poll_gdsc_status() - Poll the GDSC status register + * @adreno_dev: The adreno device pointer + * @status_reg: Offset of the status register + * @status_value: The expected bit value + * + * Poll the status register till the power-on bit is equal to the + * expected value or the max retries are exceeded. + */ +static int _poll_gdsc_status(struct adreno_device *adreno_dev, + unsigned int status_reg, + unsigned int status_value) +{ + unsigned int reg, retry = PWR_RETRY; + struct kgsl_device *device = &adreno_dev->dev; + + /* Bit 20 is the power on bit of SPTP and RAC GDSC status register */ + do { + udelay(1); + kgsl_regread(device, status_reg, ®); + } while (((reg & BIT(20)) != (status_value << 20)) && retry--); + if ((reg & BIT(20)) != (status_value << 20)) + return -ETIMEDOUT; + return 0; +} + +/* + * a5xx_regulator_enable() - Enable any necessary HW regulators + * @adreno_dev: The adreno device pointer + * + * Some HW blocks may need their regulators explicitly enabled + * on a restart. Clocks must be on during this call. + */ +static int a5xx_regulator_enable(struct adreno_device *adreno_dev) +{ + unsigned int ret; + struct kgsl_device *device = &adreno_dev->dev; + if (!adreno_is_a530(adreno_dev)) + return 0; + + /* + * Turn on smaller power domain first to reduce voltage droop. + * Set the default register values; set SW_COLLAPSE to 0. + */ + kgsl_regwrite(device, A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000); + /* Insert a delay between RAC and SPTP GDSC to reduce voltage droop */ + udelay(3); + ret = _poll_gdsc_status(adreno_dev, A5XX_GPMU_RBCCU_PWR_CLK_STATUS, 1); + if (ret) { + KGSL_PWR_ERR(device, "RBCCU GDSC enable failed\n"); + return ret; + } + + kgsl_regwrite(device, A5XX_GPMU_SP_POWER_CNTL, 0x778000); + ret = _poll_gdsc_status(adreno_dev, A5XX_GPMU_SP_PWR_CLK_STATUS, 1); + if (ret) { + KGSL_PWR_ERR(device, "SPTP GDSC enable failed\n"); + return ret; + } + + return 0; +} + +/* + * a5xx_regulator_disable() - Disable any necessary HW regulators + * @adreno_dev: The adreno device pointer + * + * Some HW blocks may need their regulators explicitly disabled + * on a power down to prevent current spikes. Clocks must be on + * during this call. + */ +static void a5xx_regulator_disable(struct adreno_device *adreno_dev) +{ + unsigned int reg; + struct kgsl_device *device = &adreno_dev->dev; + + /* If feature is not supported or not enabled */ + if (!ADRENO_FEATURE(adreno_dev, ADRENO_SPTP_PC) || + !test_bit(ADRENO_SPTP_PC_CTRL, &adreno_dev->pwrctrl_flag)) { + /* Set the default register values; set SW_COLLAPSE to 1 */ + kgsl_regwrite(device, A5XX_GPMU_SP_POWER_CNTL, 0x778001); + /* + * Insert a delay between SPTP and RAC GDSC to reduce voltage + * droop. + */ + udelay(3); + if (_poll_gdsc_status(adreno_dev, + A5XX_GPMU_SP_PWR_CLK_STATUS, 0)) + KGSL_PWR_WARN(device, "SPTP GDSC disable failed\n"); + + kgsl_regwrite(device, A5XX_GPMU_RBCCU_POWER_CNTL, 0x778001); + if (_poll_gdsc_status(adreno_dev, + A5XX_GPMU_RBCCU_PWR_CLK_STATUS, 0)) + KGSL_PWR_WARN(device, "RBCCU GDSC disable failed\n"); + } else if (test_bit(ADRENO_DEVICE_GPMU_INITIALIZED, + &adreno_dev->priv)) { + /* GPMU firmware is supposed to turn off SPTP & RAC GDSCs. */ + kgsl_regread(device, A5XX_GPMU_SP_PWR_CLK_STATUS, ®); + if (reg & BIT(20)) + KGSL_PWR_WARN(device, "SPTP GDSC is not disabled\n"); + kgsl_regread(device, A5XX_GPMU_RBCCU_PWR_CLK_STATUS, ®); + if (reg & BIT(20)) + KGSL_PWR_WARN(device, "RBCCU GDSC is not disabled\n"); + /* + * GPMU firmware is supposed to set GMEM to non-retention. + * Bit 14 is the memory core force on bit. + */ + kgsl_regread(device, A5XX_GPMU_RBCCU_CLOCK_CNTL, ®); + if (reg & BIT(14)) + KGSL_PWR_WARN(device, "GMEM is forced on\n"); + } + + if (adreno_is_a530(adreno_dev)) { + /* Reset VBIF before PC to avoid popping bogus FIFO entries */ + kgsl_regwrite(device, A5XX_RBBM_BLOCK_SW_RESET_CMD, + 0x003C0000); + kgsl_regwrite(device, A5XX_RBBM_BLOCK_SW_RESET_CMD, 0); + } +} + +/* + * a5xx_enable_pc() - Enable the GPMU based power collapse of the SPTP and RAC + * blocks + * @adreno_dev: The adreno device pointer + */ +static void a5xx_enable_pc(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + if (!ADRENO_FEATURE(adreno_dev, ADRENO_SPTP_PC) || + !test_bit(ADRENO_SPTP_PC_CTRL, &adreno_dev->pwrctrl_flag)) + return; + + kgsl_regwrite(device, A5XX_GPMU_PWR_COL_INTER_FRAME_CTRL, 0x0000007F); + kgsl_regwrite(device, A5XX_GPMU_PWR_COL_BINNING_CTRL, 0); + kgsl_regwrite(device, A5XX_GPMU_PWR_COL_INTER_FRAME_HYST, 0x000A0080); + kgsl_regwrite(device, A5XX_GPMU_PWR_COL_STAGGER_DELAY, 0x00600040); + + trace_adreno_sp_tp((unsigned long) __builtin_return_address(0)); +}; + +/* + * The maximum payload of a type4 packet is the max size minus one for the + * opcode + */ +#define TYPE4_MAX_PAYLOAD (PM4_TYPE4_PKT_SIZE_MAX - 1) + +static int _gpmu_create_load_cmds(struct adreno_device *adreno_dev, + uint32_t *ucode, uint32_t size) +{ + uint32_t *start, *cmds; + uint32_t offset = 0; + uint32_t cmds_size = size; + + /* Add a dword for each PM4 packet */ + cmds_size += (size / TYPE4_MAX_PAYLOAD) + 1; + + /* Add 4 dwords for the protected mode */ + cmds_size += 4; + + if (adreno_dev->gpmu_cmds != NULL) + return 0; + + adreno_dev->gpmu_cmds = kmalloc(cmds_size << 2, GFP_KERNEL); + if (adreno_dev->gpmu_cmds == NULL) + return -ENOMEM; + + cmds = adreno_dev->gpmu_cmds; + start = cmds; + + /* Turn CP protection OFF */ + *cmds++ = cp_type7_packet(CP_SET_PROTECTED_MODE, 1); + *cmds++ = 0; + + /* + * Prebuild the cmd stream to send to the GPU to load + * the GPMU firmware + */ + while (size > 0) { + int tmp_size = size; + + if (size >= TYPE4_MAX_PAYLOAD) + tmp_size = TYPE4_MAX_PAYLOAD; + + *cmds++ = cp_type4_packet( + A5XX_GPMU_INST_RAM_BASE + offset, + tmp_size); + + memcpy(cmds, &ucode[offset], tmp_size << 2); + + cmds += tmp_size; + offset += tmp_size; + size -= tmp_size; + } + + /* Turn CP protection ON */ + *cmds++ = cp_type7_packet(CP_SET_PROTECTED_MODE, 1); + *cmds++ = 1; + + adreno_dev->gpmu_cmds_size = (size_t) (cmds - start); + + return 0; +} + + +/* + * _load_gpmu_firmware() - Load the ucode into the GPMU RAM + * @adreno_dev: Pointer to adreno device + */ +static int _load_gpmu_firmware(struct adreno_device *adreno_dev) +{ + uint32_t *data; + const struct firmware *fw = NULL; + struct kgsl_device *device = &adreno_dev->dev; + const struct adreno_gpu_core *gpucore = adreno_dev->gpucore; + uint32_t *cmds, cmd_size; + int ret = -EINVAL; + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_GPMU)) + return 0; + + /* gpmu fw already saved and verified so do nothing new */ + if (adreno_dev->gpmu_cmds_size != 0) + return 0; + + if (gpucore->gpmufw_name == NULL) + return 0; + + ret = request_firmware(&fw, gpucore->gpmufw_name, device->dev); + if (ret || fw == NULL) { + KGSL_CORE_ERR("request_firmware (%s) failed: %d\n", + gpucore->gpmufw_name, ret); + return ret; + } + + data = (uint32_t *)fw->data; + + if (data[0] >= (fw->size / sizeof(uint32_t)) || data[0] < 2) + goto err; + + if (data[1] != GPMU_FIRMWARE_ID) + goto err; + ret = _read_fw2_block_header(&data[2], + GPMU_FIRMWARE_ID, + adreno_dev->gpucore->gpmu_major, + adreno_dev->gpucore->gpmu_minor); + if (ret) + goto err; + + cmds = data + data[2] + 3; + cmd_size = data[0] - data[2] - 2; + + if (cmd_size > GPMU_INST_RAM_SIZE) { + KGSL_CORE_ERR( + "GPMU firmware block size is larger than RAM size\n"); + goto err; + } + + /* Everything is cool, so create some commands */ + ret = _gpmu_create_load_cmds(adreno_dev, cmds, cmd_size); +err: + if (fw) + release_firmware(fw); + + return ret; +} + +static int _gpmu_send_init_cmds(struct adreno_device *adreno_dev) +{ + struct adreno_ringbuffer *rb = adreno_dev->cur_rb; + uint32_t *cmds; + uint32_t size = adreno_dev->gpmu_cmds_size; + + if (size == 0 || adreno_dev->gpmu_cmds == NULL) + return -EINVAL; + + cmds = adreno_ringbuffer_allocspace(rb, size); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + if (cmds == NULL) + return -ENOSPC; + + /* Copy to the RB the predefined fw sequence cmds */ + memcpy(cmds, adreno_dev->gpmu_cmds, size << 2); + return adreno_ringbuffer_submit_spin(rb, NULL, 2000); +} + +/* + * a5xx_gpmu_start() - Initialize and start the GPMU + * @adreno_dev: Pointer to adreno device + * + * Load the GPMU microcode, set up any features such as hardware clock gating + * or IFPC, and take the GPMU out of reset. + */ +static int a5xx_gpmu_start(struct adreno_device *adreno_dev) +{ + int ret; + unsigned int reg, retry = GPMU_FW_INIT_RETRY; + struct kgsl_device *device = &adreno_dev->dev; + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_GPMU)) + return 0; + + ret = _gpmu_send_init_cmds(adreno_dev); + if (ret) { + KGSL_CORE_ERR("Failed to program the GPMU: %d\n", ret); + return ret; + } + + /* GPMU clock gating setup */ + kgsl_regwrite(device, A5XX_GPMU_WFI_CONFIG, 0x00004014); + + /* Kick off GPMU firmware */ + kgsl_regwrite(device, A5XX_GPMU_CM3_SYSRESET, 0); + /* + * The hardware team's estimation of GPMU firmware initialization + * latency is about 3000 cycles, that's about 5 to 24 usec. + */ + do { + udelay(1); + kgsl_regread(device, A5XX_GPMU_GENERAL_0, ®); + } while ((reg != 0xBABEFACE) && retry--); + if (reg != 0xBABEFACE) { + KGSL_CORE_ERR("GPMU firmware initialization timed out\n"); + ret = -ETIMEDOUT; + } else { + set_bit(ADRENO_DEVICE_GPMU_INITIALIZED, &adreno_dev->priv); + /* + * We are in AWARE state and IRQ line from GPU to host is + * disabled. + * Read pending GPMU interrupts and clear GPMU_RBBM_INTR_INFO. + */ + kgsl_regread(device, A5XX_GPMU_RBBM_INTR_INFO, ®); + /* + * Clear RBBM interrupt mask if any of GPMU interrupts + * are pending. + */ + if (reg) + kgsl_regwrite(device, + A5XX_RBBM_INT_CLEAR_CMD, + 1 << A5XX_INT_GPMU_FIRMWARE); + } + return ret; +} + +struct kgsl_hwcg_reg { + unsigned int off; + unsigned int val; +}; + +static const struct kgsl_hwcg_reg a50x_hwcg_regs[] = { + {A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, + {A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, + {A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444}, + {A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, + {A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00555555}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, + {A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, + {A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222} +}; + +static const struct kgsl_hwcg_reg a510_hwcg_regs[] = { + {A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220}, + {A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, + {A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, + {A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, + {A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777}, + {A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, + {A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111}, + {A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, + {A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444}, + {A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, + {A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00555555}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, + {A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, + {A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222} +}; + +static const struct kgsl_hwcg_reg a530_hwcg_regs[] = { + {A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222}, + {A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, + {A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220}, + {A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220}, + {A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220}, + {A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF}, + {A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, + {A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, + {A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080}, + {A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080}, + {A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222}, + {A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222}, + {A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222}, + {A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777}, + {A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777}, + {A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777}, + {A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777}, + {A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777}, + {A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111}, + {A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111}, + {A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111}, + {A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111}, + {A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111}, + {A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, + {A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444}, + {A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, + {A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222}, + {A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220}, + {A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222}, + {A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00555555}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404}, + {A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002}, + {A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011}, + {A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, + {A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, + {A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, + {A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, + {A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, + {A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, + {A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, + {A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, + {A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222} +}; + +static const struct { + int (*devfunc)(struct adreno_device *adreno_dev); + const struct kgsl_hwcg_reg *regs; + unsigned int count; +} a5xx_hwcg_registers[] = { + { adreno_is_a530v3, a530_hwcg_regs, ARRAY_SIZE(a530_hwcg_regs) }, + { adreno_is_a530v2, a530_hwcg_regs, ARRAY_SIZE(a530_hwcg_regs) }, + { adreno_is_a510, a510_hwcg_regs, ARRAY_SIZE(a510_hwcg_regs) }, + { adreno_is_a505, a50x_hwcg_regs, ARRAY_SIZE(a50x_hwcg_regs) }, + { adreno_is_a506, a50x_hwcg_regs, ARRAY_SIZE(a50x_hwcg_regs) }, +}; + +static void a5xx_hwcg_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + const struct kgsl_hwcg_reg *regs; + int i, j; + + for (i = 0; i < ARRAY_SIZE(a5xx_hwcg_registers); i++) { + if (a5xx_hwcg_registers[i].devfunc(adreno_dev)) + break; + } + + if (i == ARRAY_SIZE(a5xx_hwcg_registers)) + return; + + regs = a5xx_hwcg_registers[i].regs; + + for (j = 0; j < a5xx_hwcg_registers[i].count; j++) + kgsl_regwrite(device, regs[j].off, regs[j].val); + + /* enable top level HWCG */ + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL, 0xAAA8AA00); + kgsl_regwrite(device, A5XX_RBBM_ISDB_CNT, 0x00000182); +} + +static int _read_fw2_block_header(uint32_t *header, uint32_t id, + uint32_t major, uint32_t minor) +{ + uint32_t header_size; + int i = 1; + + if (header == NULL) + return -ENOMEM; + + header_size = header[0]; + /* Headers have limited size and always occur as pairs of words */ + if (header_size > MAX_HEADER_SIZE || header_size % 2) + return -EINVAL; + /* Sequences must have an identifying id first thing in their header */ + if (id == GPMU_SEQUENCE_ID) { + if (header[i] != HEADER_SEQUENCE || + (header[i + 1] >= MAX_SEQUENCE_ID)) + return -EINVAL; + i += 2; + } + for (; i < header_size; i += 2) { + switch (header[i]) { + /* Major Version */ + case HEADER_MAJOR: + if ((major > header[i + 1]) && + header[i + 1]) { + KGSL_CORE_ERR( + "GPMU major version mis-match %d, %d\n", + major, header[i + 1]); + return -EINVAL; + } + break; + case HEADER_MINOR: + if (minor > header[i + 1]) + KGSL_CORE_ERR( + "GPMU minor version mis-match %d %d\n", + minor, header[i + 1]); + break; + case HEADER_DATE: + case HEADER_TIME: + break; + default: + KGSL_CORE_ERR("GPMU unknown header ID %d\n", + header[i]); + } + } + return 0; +} + +/* + * Read in the register sequence file and save pointers to the + * necessary sequences. + * + * GPU sequence file format (one dword per field unless noted): + * Block 1 length (length dword field not inclusive) + * Block 1 type = Sequence = 3 + * Block Header length (length dword field not inclusive) + * BH field ID = Sequence field ID + * BH field data = Sequence ID + * BH field ID + * BH field data + * ... + * Opcode 0 ID + * Opcode 0 data M words + * Opcode 1 ID + * Opcode 1 data N words + * ... + * Opcode X ID + * Opcode X data O words + * Block 2 length... + */ +static void _load_regfile(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + const struct firmware *fw; + uint32_t block_size = 0, block_total = 0, fw_size; + uint32_t *block; + int ret = -EINVAL; + + if (!adreno_dev->gpucore->regfw_name) + return; + + ret = request_firmware(&fw, adreno_dev->gpucore->regfw_name, + device->dev); + if (ret) { + KGSL_PWR_ERR(device, "request firmware failed %d, %s\n", + ret, adreno_dev->gpucore->regfw_name); + return; + } + + fw_size = fw->size / sizeof(uint32_t); + /* Min valid file of size 6, see file description */ + if (fw_size < 6) + goto err; + block = (uint32_t *)fw->data; + /* All offset numbers calculated from file description */ + while (block_total < fw_size) { + block_size = block[0]; + if (block_size >= fw_size || block_size < 2) + goto err; + if (block[1] != GPMU_SEQUENCE_ID) + goto err; + + /* For now ignore blocks other than the LM sequence */ + if (block[4] == LM_SEQUENCE_ID) { + ret = _read_fw2_block_header(&block[2], + GPMU_SEQUENCE_ID, + adreno_dev->gpucore->lm_major, + adreno_dev->gpucore->lm_minor); + if (ret) + goto err; + + adreno_dev->lm_fw = fw; + adreno_dev->lm_sequence = block + block[2] + 3; + adreno_dev->lm_size = block_size - block[2] - 2; + } + block_total += (block_size + 1); + block += (block_size + 1); + } + if (adreno_dev->lm_sequence) + return; + +err: + release_firmware(fw); + KGSL_PWR_ERR(device, + "Register file failed to load sz=%d bsz=%d header=%d\n", + fw_size, block_size, ret); + return; +} + +static int _execute_reg_sequence(struct adreno_device *adreno_dev, + uint32_t *opcode, uint32_t length) +{ + struct kgsl_device *device = &adreno_dev->dev; + uint32_t *cur = opcode; + uint64_t reg, val; + + /* todo double check the reg writes */ + while ((cur - opcode) < length) { + switch (cur[0]) { + /* Write a 32 bit value to a 64 bit reg */ + case 1: + reg = cur[2]; + reg = (reg << 32) | cur[1]; + kgsl_regwrite(device, reg, cur[3]); + cur += 4; + break; + /* Write a 64 bit value to a 64 bit reg */ + case 2: + reg = cur[2]; + reg = (reg << 32) | cur[1]; + val = cur[4]; + val = (val << 32) | cur[3]; + kgsl_regwrite(device, reg, val); + cur += 5; + break; + /* Delay for X usec */ + case 3: + udelay(cur[1]); + cur += 2; + break; + default: + return -EINVAL; + } } + return 0; +} + +static void _write_voltage_table(struct adreno_device *adreno_dev, + unsigned int addr, uint32_t *length) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int i; + struct dev_pm_opp *opp; + int levels = pwr->num_pwrlevels - 1; + unsigned int mvolt = 0; + + kgsl_regwrite(device, addr, adreno_dev->gpucore->max_power); + kgsl_regwrite(device, addr + 1, levels); + + /* Write voltage in mV and frequency in MHz */ + for (i = 0; i < levels; i++) { + opp = dev_pm_opp_find_freq_exact(&device->pdev->dev, + pwr->pwrlevels[i].gpu_freq, true); + /* _opp_get returns uV, convert to mV */ + if (!IS_ERR(opp)) + mvolt = dev_pm_opp_get_voltage(opp) / 1000; + kgsl_regwrite(device, addr + 2 + i * 2, mvolt); + kgsl_regwrite(device, addr + 3 + i * 2, + pwr->pwrlevels[i].gpu_freq / 1000000); + } + *length = levels * 2 + 2; +} + +static uint32_t lm_limit(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + + if (adreno_dev->lm_limit) + return adreno_dev->lm_limit; + + if (of_property_read_u32(device->pdev->dev.of_node, "qcom,lm-limit", + &adreno_dev->lm_limit)) + adreno_dev->lm_limit = LM_DEFAULT_LIMIT; + + return adreno_dev->lm_limit; +} +/* + * a5xx_lm_init() - Initialize LM/DPM on the GPMU + * @adreno_dev: The adreno device pointer + */ +static void a5xx_lm_init(struct adreno_device *adreno_dev) +{ + uint32_t length; + struct kgsl_device *device = &adreno_dev->dev; + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM) || + !test_bit(ADRENO_LM_CTRL, &adreno_dev->pwrctrl_flag)) + return; + + /* If something was wrong with the sequence file, return */ + if (adreno_dev->lm_sequence == NULL) + return; + + /* Write LM registers including DPM ucode, coefficients, and config */ + if (_execute_reg_sequence(adreno_dev, adreno_dev->lm_sequence, + adreno_dev->lm_size)) { + /* If the sequence is invalid, it's not getting better */ + adreno_dev->lm_sequence = NULL; + KGSL_PWR_WARN(device, + "Invalid LM sequence\n"); + return; + } + + kgsl_regwrite(device, A5XX_GPMU_TEMP_SENSOR_ID, + adreno_dev->gpucore->gpmu_tsens); + kgsl_regwrite(device, A5XX_GPMU_DELTA_TEMP_THRESHOLD, 0x1); + kgsl_regwrite(device, A5XX_GPMU_TEMP_SENSOR_CONFIG, 0x1); + + kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE, + (0x80000000 | device->pwrctrl.active_pwrlevel)); + /* use the leakage to set this value at runtime */ + kgsl_regwrite(device, A5XX_GPMU_BASE_LEAKAGE, + adreno_dev->lm_leakage); + + /* Enable the power threshold and set it to 6000m */ + kgsl_regwrite(device, A5XX_GPMU_GPMU_PWR_THRESHOLD, + 0x80000000 | lm_limit(adreno_dev)); + + kgsl_regwrite(device, A5XX_GPMU_BEC_ENABLE, 0x10001FFF); + kgsl_regwrite(device, A5XX_GDPM_CONFIG1, 0x00201FF1); + + /* Send an initial message to the GPMU with the LM voltage table */ + kgsl_regwrite(device, AGC_MSG_STATE, 0x1); + kgsl_regwrite(device, AGC_MSG_COMMAND, AGC_POWER_CONFIG_PRODUCTION_ID); + _write_voltage_table(adreno_dev, AGC_MSG_PAYLOAD, &length); + length *= sizeof(uint32_t); + kgsl_regwrite(device, AGC_MSG_PAYLOAD_SIZE, length); + kgsl_regwrite(device, AGC_INIT_MSG_MAGIC, AGC_INIT_MSG_VALUE); +} + +/* + * a5xx_lm_enable() - Enable the LM/DPM feature on the GPMU + * @adreno_dev: The adreno device pointer + */ +static void a5xx_lm_enable(struct adreno_device *adreno_dev) +{ + uint32_t val; + struct kgsl_device *device = &adreno_dev->dev; + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM) || + !test_bit(ADRENO_LM_CTRL, &adreno_dev->pwrctrl_flag)) + return; + + /* If no sequence properly initialized, return */ + if (adreno_dev->lm_sequence == NULL) + return; + + kgsl_regwrite(device, A5XX_GDPM_INT_MASK, 0x00000000); + kgsl_regwrite(device, A5XX_GDPM_INT_EN, 0x0000000A); + kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK, 0x00000001); + kgsl_regwrite(device, A5XX_GPMU_TEMP_THRESHOLD_INTR_EN_MASK, + 0x00050000); + kgsl_regwrite(device, A5XX_GPMU_THROTTLE_UNMASK_FORCE_CTRL, + 0x00030000); + if (adreno_is_a530v2(adreno_dev)) + val = 0x00060011; + /* v3 value */ + else + val = 0x00000011; + kgsl_regwrite(device, A5XX_GPMU_CLOCK_THROTTLE_CTRL, val); +} + +static int gpmu_set_level(struct kgsl_device *device, unsigned int val) +{ + unsigned int reg; + int retry = 20; + + kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE, val); + + do { + kgsl_regread(device, A5XX_GPMU_GPMU_VOLTAGE, ®); + } while ((reg & 0x80000000) && retry--); + + return (reg & 0x80000000) ? -ETIMEDOUT : 0; +} + +/* + * a5xx_pwrlevel_change_settings() - Program the hardware during power level + * transitions + * @adreno_dev: The adreno device pointer + * @prelevel: The previous power level + * @postlevel: The new power level + * @post: True if called after the clock change has taken effect + */ +static void a5xx_pwrlevel_change_settings(struct adreno_device *adreno_dev, + unsigned int prelevel, unsigned int postlevel, + bool post) +{ + struct kgsl_device *device = &adreno_dev->dev; + static int pre; + int on = 0; + + /* Only call through if PPD or LM is supported and enabled */ + if (ADRENO_FEATURE(adreno_dev, ADRENO_PPD) && + test_bit(ADRENO_PPD_CTRL, &adreno_dev->pwrctrl_flag)) + on = ADRENO_PPD; + + if (ADRENO_FEATURE(adreno_dev, ADRENO_LM) && + test_bit(ADRENO_LM_CTRL, &adreno_dev->pwrctrl_flag)) + on = ADRENO_LM; + + if (!on) + return; + + /* if this is a real pre, or a post without a previous pre, set pre */ + if ((post == 0) || (pre == 0 && post == 1)) + pre = 1; + else if (post == 1) + pre = 0; + + if (pre) { + if (gpmu_set_level(device, (0x80000010 | postlevel))) + KGSL_CORE_ERR( + "GPMU pre powerlevel did not stabilize\n"); + } + + if (post) { + if (gpmu_set_level(device, (0x80000000 | postlevel))) + KGSL_CORE_ERR( + "GPMU post powerlevel did not stabilize\n"); + pre = 0; + } +} + +static void a5xx_enable_64bit(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + + kgsl_regwrite(device, A5XX_CP_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_VSC_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_GRAS_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_RB_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_PC_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_HLSQ_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_VFD_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_VPC_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_UCHE_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_SP_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_TPL1_ADDR_MODE_CNTL, 0x1); + kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1); +} + +/* + * a5xx_gpmu_reset() - Re-enable GPMU based power features and restart GPMU + * @work: Pointer to the work struct for gpmu reset + * + * Load the GPMU microcode, set up any features such as hardware clock gating + * or IFPC, and take the GPMU out of reset. + */ +static void a5xx_gpmu_reset(struct work_struct *work) +{ + struct adreno_device *adreno_dev = container_of(work, + struct adreno_device, gpmu_work); + struct kgsl_device *device = &adreno_dev->dev; + + if (test_bit(ADRENO_DEVICE_GPMU_INITIALIZED, &adreno_dev->priv)) + return; + + /* + * If GPMU has already experienced a restart or is in the process of it + * after the watchdog timeout, then there is no need to reset GPMU + * again. + */ + if (device->state != KGSL_STATE_NAP && + device->state != KGSL_STATE_AWARE && + device->state != KGSL_STATE_ACTIVE) + return; + + mutex_lock(&device->mutex); + + if (device->state == KGSL_STATE_NAP) + kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE); + + if (a5xx_regulator_enable(adreno_dev)) + goto out; + + /* Soft reset of the GPMU block */ + kgsl_regwrite(device, A5XX_RBBM_BLOCK_SW_RESET_CMD, BIT(16)); + + a5xx_lm_init(adreno_dev); + + a5xx_enable_pc(adreno_dev); + + a5xx_gpmu_start(adreno_dev); + + a5xx_lm_enable(adreno_dev); + +out: + mutex_unlock(&device->mutex); +} + +/* + * a5xx_start() - Device start + * @adreno_dev: Pointer to adreno device + * + * a5xx device start + */ +static void a5xx_start(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct kgsl_iommu *iommu = device->mmu.priv; + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + unsigned int i; + struct adreno_ringbuffer *rb; + uint64_t def_ttbr0; + uint32_t contextidr; + + adreno_vbif_start(adreno_dev, a5xx_vbif_platforms, + ARRAY_SIZE(a5xx_vbif_platforms)); + + /* Make all blocks contribute to the GPU BUSY perf counter */ + kgsl_regwrite(device, A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF); + + /* + * Enable the RBBM error reporting bits. This lets us get + * useful information on failure + */ + kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL0, 0x00000001); + + /* + * Turn on hang detection for a530 v2 and beyond. This spews a + * lot of useful information into the RBBM registers on a hang. + */ + if (!adreno_is_a530v1(adreno_dev)) { + /* + * We have 4 RB units, and only RB0 activity signals are working + * correctly. Mask out RB1-3 activity signals from the HW hang + * detection logic as per recommendation of hardware team. + */ + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11, + 0xF0000000); + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12, + 0xFFFFFFFF); + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13, + 0xFFFFFFFF); + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14, + 0xFFFFFFFF); + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15, + 0xFFFFFFFF); + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16, + 0xFFFFFFFF); + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17, + 0xFFFFFFFF); + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18, + 0xFFFFFFFF); + + set_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv); + gpudev->irq->mask |= (1 << A5XX_INT_MISC_HANG_DETECT); + /* + * Set hang detection threshold to 1 million cycles + * (0xFFFF*16) + */ + kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_INT_CNTL, + (1 << 30) | 0xFFFF); + } + + + /* Turn on performance counters */ + kgsl_regwrite(device, A5XX_RBBM_PERFCTR_CNTL, 0x01); + + /* + * This is to increase performance by restricting VFD's cache access, + * so that LRZ and other data get evicted less. + */ + kgsl_regwrite(device, A5XX_UCHE_CACHE_WAYS, 0x02); + + /* + * Set UCHE_WRITE_THRU_BASE to the UCHE_TRAP_BASE effectively + * disabling L2 bypass + */ + kgsl_regwrite(device, A5XX_UCHE_TRAP_BASE_LO, 0xffff0000); + kgsl_regwrite(device, A5XX_UCHE_TRAP_BASE_HI, 0x0001ffff); + kgsl_regwrite(device, A5XX_UCHE_WRITE_THRU_BASE_LO, 0xffff0000); + kgsl_regwrite(device, A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001ffff); + + /* Program the GMEM VA range for the UCHE path */ + kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MIN_LO, + ADRENO_UCHE_GMEM_BASE); + kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x0); + kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MAX_LO, + ADRENO_UCHE_GMEM_BASE + + adreno_dev->gmem_size - 1); + kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x0); + + /* + * Below CP registers are 0x0 by default, program init + * values based on a5xx flavor. + */ + if (adreno_is_a505_or_a506(adreno_dev)) { + kgsl_regwrite(device, A5XX_CP_MEQ_THRESHOLDS, 0x20); + kgsl_regwrite(device, A5XX_CP_MERCIU_SIZE, 0x400); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A); + } else if (adreno_is_a510(adreno_dev)) { + kgsl_regwrite(device, A5XX_CP_MEQ_THRESHOLDS, 0x20); + kgsl_regwrite(device, A5XX_CP_MERCIU_SIZE, 0x20); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A); + } else { + kgsl_regwrite(device, A5XX_CP_MEQ_THRESHOLDS, 0x40); + kgsl_regwrite(device, A5XX_CP_MERCIU_SIZE, 0x40); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060); + kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16); + } + + /* + * vtxFifo and primFifo thresholds default values + * are different. + */ + if (adreno_is_a505_or_a506(adreno_dev)) + kgsl_regwrite(device, A5XX_PC_DBG_ECO_CNTL, + (0x100 << 11 | 0x100 << 22)); + else if (adreno_is_a510(adreno_dev)) + kgsl_regwrite(device, A5XX_PC_DBG_ECO_CNTL, + (0x200 << 11 | 0x200 << 22)); + else + kgsl_regwrite(device, A5XX_PC_DBG_ECO_CNTL, + (0x400 << 11 | 0x300 << 22)); + + /* + * A5x USP LDST non valid pixel wrongly update read combine offset + * In A5xx we added optimization for read combine. There could be cases + * on a530 v1 there is no valid pixel but the active masks is not + * cleared and the offset can be wrongly updated if the invalid address + * can be combined. The wrongly latched value will make the returning + * data got shifted at wrong offset. workaround this issue by disabling + * LD combine, bit[25] of SP_DBG_ECO_CNTL (sp chicken bit[17]) need to + * be set to 1, default is 0(enable) + */ + if (adreno_is_a530v1(adreno_dev)) + kgsl_regrmw(device, A5XX_SP_DBG_ECO_CNTL, 0, (1 << 25)); + + if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_TWO_PASS_USE_WFI)) { + /* + * Set TWOPASSUSEWFI in A5XX_PC_DBG_ECO_CNTL for + * microcodes after v77 + */ + if ((adreno_compare_pfp_version(adreno_dev, 0x5FF077) >= 0)) + kgsl_regrmw(device, A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8)); + } + + /* Set the USE_RETENTION_FLOPS chicken bit */ + kgsl_regwrite(device, A5XX_CP_CHICKEN_DBG, 0x02000000); + + /* Enable ISDB mode if requested */ + if (test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv)) { + if (!kgsl_active_count_get(device)) { + /* + * Disable ME/PFP split timeouts when the debugger is + * enabled because the CP doesn't know when a shader is + * in active debug + */ + kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL1, 0x06FFFFFF); + + /* Force the SP0/SP1 clocks on to enable ISDB */ + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP0, 0x0); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP1, 0x0); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP2, 0x0); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP3, 0x0); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP0, 0x0); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP1, 0x0); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP2, 0x0); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP3, 0x0); + + /* disable HWCG */ + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL, 0x0); + kgsl_regwrite(device, A5XX_RBBM_ISDB_CNT, 0x0); + } else + KGSL_CORE_ERR( + "Active count failed while turning on ISDB."); + } else { + /* if not in ISDB mode enable ME/PFP split notification */ + kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF); + /* enable HWCG */ + a5xx_hwcg_init(adreno_dev); + } + + kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL2, 0x0000003F); + + if (adreno_is_preemption_enabled(adreno_dev)) { + struct kgsl_pagetable *pt = device->mmu.defaultpagetable; + + def_ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pt); + contextidr = kgsl_mmu_pagetable_get_contextidr(pt); + + /* Initialize the context switch record here */ + kgsl_sharedmem_writel(device, &iommu->smmu_info, + PREEMPT_SMMU_RECORD(magic), + A5XX_CP_SMMU_INFO_MAGIC_REF); + kgsl_sharedmem_writeq(device, &iommu->smmu_info, + PREEMPT_SMMU_RECORD(ttbr0), def_ttbr0); + /* + * The CP doesn't actually use the asid field, so + * put a bad value into it until it is removed from + * the preemption record. + */ + kgsl_sharedmem_writeq(device, &iommu->smmu_info, + PREEMPT_SMMU_RECORD(asid), + 0xdecafbad); + kgsl_sharedmem_writeq(device, &iommu->smmu_info, + PREEMPT_SMMU_RECORD(context_idr), + contextidr); + adreno_writereg64(adreno_dev, + ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO, + ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI, + iommu->smmu_info.gpuaddr); + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + kgsl_sharedmem_writel(rb->device, &rb->preemption_desc, + PREEMPT_RECORD(rptr), 0); + kgsl_sharedmem_writel(rb->device, &rb->preemption_desc, + PREEMPT_RECORD(wptr), 0); + kgsl_sharedmem_writeq(rb->device, &rb->pagetable_desc, + offsetof(struct adreno_ringbuffer_pagetable_info, + ttbr0), def_ttbr0); + } + } + + a5xx_protect_init(adreno_dev); +} + +static int _preemption_init( + struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, unsigned int *cmds, + struct kgsl_context *context) +{ + unsigned int *cmds_orig = cmds; + uint64_t gpuaddr = rb->preemption_desc.gpuaddr; + uint64_t gpuaddr_token = rb->device->memstore.gpuaddr + + KGSL_MEMSTORE_OFFSET(0, preempted); + + /* Turn CP protection OFF */ + *cmds++ = cp_type7_packet(CP_SET_PROTECTED_MODE, 1); + *cmds++ = 0; + /* + * CP during context switch will save context switch info to + * a5xx_cp_preemption_record pointed by CONTEXT_SWITCH_SAVE_ADDR + */ + *cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 1); + *cmds++ = lower_32_bits(gpuaddr); + *cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI, 1); + *cmds++ = upper_32_bits(gpuaddr); + + /* Turn CP protection ON */ + *cmds++ = cp_type7_packet(CP_SET_PROTECTED_MODE, 1); + *cmds++ = 1; + + *cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_GLOBAL, 1); + *cmds++ = 0; + + *cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1); + *cmds++ = 1; + + /* Enable yield in RB only */ + *cmds++ = cp_type7_packet(CP_YIELD_ENABLE, 1); + *cmds++ = 1; + + *cmds++ = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4); + cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr_token); + *cmds++ = 1; + /* generate interrupt on preemption completion */ + *cmds++ = 1; + + return cmds - cmds_orig; +} + +/* Print some key registers if a spin-for-idle times out */ +static void spin_idle_debug(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + unsigned int rptr, wptr; + unsigned int status, status3, intstatus; + unsigned int hwfault; + + adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &rptr); + adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr); + + kgsl_regread(device, A5XX_RBBM_STATUS, &status); + kgsl_regread(device, A5XX_RBBM_STATUS3, &status3); + kgsl_regread(device, A5XX_RBBM_INT_0_STATUS, &intstatus); + kgsl_regread(device, A5XX_CP_HW_FAULT, &hwfault); + + dev_err(device->dev, + " rb=%X/%X rbbm_status=%8.8X/%8.8X int_0_status=%8.8X\n", + rptr, wptr, status, status3, intstatus); + dev_err(device->dev, " hwfault=%8.8X\n", hwfault); +} + +static void a5xx_post_start(struct adreno_device *adreno_dev) +{ + unsigned int *cmds, *start; + struct adreno_ringbuffer *rb = adreno_dev->cur_rb; + + cmds = adreno_ringbuffer_allocspace(rb, 42); + if (IS_ERR_OR_NULL(cmds)) + return; + + start = cmds; + + /* + * Send a pipeline stat event whenever the GPU gets powered up + * to cause misbehaving perf counters to start ticking + */ + if (adreno_is_a530(adreno_dev)) { + *cmds++ = cp_packet(adreno_dev, CP_EVENT_WRITE, 1); + *cmds++ = 0xF; + } + + if (adreno_is_preemption_enabled(adreno_dev)) + cmds += _preemption_init(adreno_dev, rb, cmds, NULL); + + rb->wptr = rb->wptr - (42 - (cmds - start)); + + if (cmds == start) + return; + + if (adreno_ringbuffer_submit_spin(rb, NULL, 2000)) { + struct kgsl_device *device = &adreno_dev->dev; + + KGSL_DRV_ERR(device, "hw initialization failed to idle\n"); + kgsl_device_snapshot(device, NULL); + } +} + +/* + * a5xx_hw_init() - Initialize GPU HW using PM4 cmds + * @adreno_dev: Pointer to adreno device + * + * Submit PM4 commands for HW initialization, + */ +static int a5xx_hw_init(struct adreno_device *adreno_dev) +{ + int ret; + struct kgsl_device *device = &adreno_dev->dev; + + /* GPU comes up in secured mode, make it unsecured by default */ + if (!ADRENO_FEATURE(adreno_dev, ADRENO_CONTENT_PROTECTION)) + kgsl_regwrite(device, A5XX_RBBM_SECVID_TRUST_CNTL, 0x0); + + /* Set up LM before initializing the GPMU */ + a5xx_lm_init(adreno_dev); + + /* Enable SPTP based power collapse before enabling GPMU */ + a5xx_enable_pc(adreno_dev); + + /* Program the GPMU */ + ret = a5xx_gpmu_start(adreno_dev); + if (ret) + return ret; + + /* Enable limits management */ + a5xx_lm_enable(adreno_dev); + + a5xx_post_start(adreno_dev); + + return 0; +} + +static int a5xx_switch_to_unsecure_mode(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + unsigned int *cmds; + int ret; + + cmds = adreno_ringbuffer_allocspace(rb, 2); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + if (cmds == NULL) + return -ENOSPC; + + cmds += cp_secure_mode(adreno_dev, cmds, 0); + + ret = adreno_ringbuffer_submit_spin(rb, NULL, 2000); + if (ret != 0) { + struct kgsl_device *device = &adreno_dev->dev; + + dev_err(device->dev, "Switch to unsecure failed to idle\n"); + spin_idle_debug(device); + kgsl_device_snapshot(device, NULL); + } + + return ret; +} + +/* + * a5xx_rb_init() - Initialize ringbuffer + * @adreno_dev: Pointer to adreno device + * @rb: Pointer to the ringbuffer of device + * + * Submit commands for ME initialization, + */ +static int a5xx_rb_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + unsigned int *cmds; + int ret; + + cmds = adreno_ringbuffer_allocspace(rb, 8); + if (IS_ERR(cmds)) + return PTR_ERR(cmds); + if (cmds == NULL) + return -ENOSPC; + + *cmds++ = cp_type7_packet(CP_ME_INIT, 7); + /* + * Mask -- look for all ordinals but drawcall + * range and reset ucode scratch memory. + */ + *cmds++ = 0x0000000f; + /* Multiple HW ctxs are unreliable on a530v1, use single hw context */ + if (adreno_is_a530v1(adreno_dev)) + *cmds++ = 0x00000000; + else + /* Use both contexts for 3D (bit0) 2D (bit1) */ + *cmds++ = 0x00000003; + /* Enable register protection */ + *cmds++ = 0x20000000; + /* Header dump address */ + *cmds++ = 0x00000000; + /* Header dump enable and dump size */ + *cmds++ = 0x00000000; + /* Below will be ignored by the CP unless bit4 in Mask is set */ + *cmds++ = 0x00000000; + *cmds++ = 0x00000000; + + ret = adreno_ringbuffer_submit_spin(rb, NULL, 2000); + if (ret != 0) { + struct kgsl_device *device = &adreno_dev->dev; + + dev_err(device->dev, "CP initialization failed to idle\n"); + spin_idle_debug(device); + kgsl_device_snapshot(device, NULL); + } + + /* GPU comes up in secured mode, make it unsecured by default */ + if (ADRENO_FEATURE(adreno_dev, ADRENO_CONTENT_PROTECTION)) + ret = a5xx_switch_to_unsecure_mode(adreno_dev, rb); + + return ret; +} + +static int _load_firmware(struct adreno_device *adreno_dev, const char *fwfile, + struct kgsl_memdesc *ucode, size_t *ucode_size, + unsigned int *ucode_version) +{ + struct kgsl_device *device = &adreno_dev->dev; + const struct firmware *fw = NULL; + int ret; + + ret = request_firmware(&fw, fwfile, device->dev); + + if (ret) { + KGSL_DRV_ERR(device, "request_firmware(%s) failed: %d\n", + fwfile, ret); + return ret; + } + + ret = kgsl_allocate_global(device, ucode, fw->size - 4, + KGSL_MEMFLAGS_GPUREADONLY, 0); + + if (ret) + goto done; + + memcpy(ucode->hostptr, &fw->data[4], fw->size - 4); + *ucode_size = (fw->size - 4) / sizeof(uint32_t); + *ucode_version = *(unsigned int *)&fw->data[4]; + +done: + release_firmware(fw); + + return ret; +} + +/* + * a5xx_microcode_read() - Read microcode + * @adreno_dev: Pointer to adreno device + */ +static int a5xx_microcode_read(struct adreno_device *adreno_dev) +{ + int ret; + + ret = _load_firmware(adreno_dev, + adreno_dev->gpucore->pm4fw_name, &adreno_dev->pm4, + &adreno_dev->pm4_fw_size, &adreno_dev->pm4_fw_version); + if (ret) + return ret; + + ret = _load_firmware(adreno_dev, + adreno_dev->gpucore->pfpfw_name, &adreno_dev->pfp, + &adreno_dev->pfp_fw_size, &adreno_dev->pfp_fw_version); + if (ret) + return ret; + + ret = _load_gpmu_firmware(adreno_dev); + if (ret) + return ret; + + _load_regfile(adreno_dev); + + return ret; +} + +/* + * a5xx_microcode_load() - Load microcode + * @adreno_dev: Pointer to adreno device + * @start_type: type of device start cold/warm + */ +static int a5xx_microcode_load(struct adreno_device *adreno_dev, + unsigned int start_type) +{ + void *ptr; + struct kgsl_device *device = &adreno_dev->dev; + uint64_t gpuaddr; + + gpuaddr = adreno_dev->pm4.gpuaddr; + kgsl_regwrite(device, A5XX_CP_PM4_INSTR_BASE_LO, + lower_32_bits(gpuaddr)); + kgsl_regwrite(device, A5XX_CP_PM4_INSTR_BASE_HI, + upper_32_bits(gpuaddr)); + + gpuaddr = adreno_dev->pfp.gpuaddr; + kgsl_regwrite(device, A5XX_CP_PFP_INSTR_BASE_LO, + lower_32_bits(gpuaddr)); + kgsl_regwrite(device, A5XX_CP_PFP_INSTR_BASE_HI, + upper_32_bits(gpuaddr)); + + /* + * Resume call to write the zap shader base address into the + * appropriate register + */ + if (zap_ucode_loaded) { + int ret; + struct scm_desc desc = {0}; + + desc.args[0] = 0; + desc.args[1] = 13; + desc.arginfo = SCM_ARGS(2); + + ret = scm_call2(SCM_SIP_FNID(SCM_SVC_BOOT, 0xA), &desc); + if (ret) { + pr_err("SCM resume call failed with error %d\n", ret); + return ret; + } + + } + + /* Load the zap shader firmware through PIL if its available */ + if (adreno_dev->gpucore->zap_name && !zap_ucode_loaded) { + ptr = subsystem_get(adreno_dev->gpucore->zap_name); + + /* Return error if the zap shader cannot be loaded */ + if (IS_ERR_OR_NULL(ptr)) + return (ptr == NULL) ? -ENODEV : PTR_ERR(ptr); + + zap_ucode_loaded = 1; + } + + return 0; +} + +static struct adreno_perfcount_register a5xx_perfcounters_cp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_0_LO, + A5XX_RBBM_PERFCTR_CP_0_HI, 0, A5XX_CP_PERFCTR_CP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_1_LO, + A5XX_RBBM_PERFCTR_CP_1_HI, 1, A5XX_CP_PERFCTR_CP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_2_LO, + A5XX_RBBM_PERFCTR_CP_2_HI, 2, A5XX_CP_PERFCTR_CP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_3_LO, + A5XX_RBBM_PERFCTR_CP_3_HI, 3, A5XX_CP_PERFCTR_CP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_4_LO, + A5XX_RBBM_PERFCTR_CP_4_HI, 4, A5XX_CP_PERFCTR_CP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_5_LO, + A5XX_RBBM_PERFCTR_CP_5_HI, 5, A5XX_CP_PERFCTR_CP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_6_LO, + A5XX_RBBM_PERFCTR_CP_6_HI, 6, A5XX_CP_PERFCTR_CP_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_7_LO, + A5XX_RBBM_PERFCTR_CP_7_HI, 7, A5XX_CP_PERFCTR_CP_SEL_7 }, +}; + +/* + * Note that PERFCTR_RBBM_0 is missing - it is used to emulate the PWR counters. + * See below. + */ +static struct adreno_perfcount_register a5xx_perfcounters_rbbm[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_1_LO, + A5XX_RBBM_PERFCTR_RBBM_1_HI, 9, A5XX_RBBM_PERFCTR_RBBM_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_2_LO, + A5XX_RBBM_PERFCTR_RBBM_2_HI, 10, A5XX_RBBM_PERFCTR_RBBM_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_3_LO, + A5XX_RBBM_PERFCTR_RBBM_3_HI, 11, A5XX_RBBM_PERFCTR_RBBM_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_pc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_0_LO, + A5XX_RBBM_PERFCTR_PC_0_HI, 12, A5XX_PC_PERFCTR_PC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_1_LO, + A5XX_RBBM_PERFCTR_PC_1_HI, 13, A5XX_PC_PERFCTR_PC_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_2_LO, + A5XX_RBBM_PERFCTR_PC_2_HI, 14, A5XX_PC_PERFCTR_PC_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_3_LO, + A5XX_RBBM_PERFCTR_PC_3_HI, 15, A5XX_PC_PERFCTR_PC_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_4_LO, + A5XX_RBBM_PERFCTR_PC_4_HI, 16, A5XX_PC_PERFCTR_PC_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_5_LO, + A5XX_RBBM_PERFCTR_PC_5_HI, 17, A5XX_PC_PERFCTR_PC_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_6_LO, + A5XX_RBBM_PERFCTR_PC_6_HI, 18, A5XX_PC_PERFCTR_PC_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_7_LO, + A5XX_RBBM_PERFCTR_PC_7_HI, 19, A5XX_PC_PERFCTR_PC_SEL_7 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_vfd[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_0_LO, + A5XX_RBBM_PERFCTR_VFD_0_HI, 20, A5XX_VFD_PERFCTR_VFD_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_1_LO, + A5XX_RBBM_PERFCTR_VFD_1_HI, 21, A5XX_VFD_PERFCTR_VFD_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_2_LO, + A5XX_RBBM_PERFCTR_VFD_2_HI, 22, A5XX_VFD_PERFCTR_VFD_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_3_LO, + A5XX_RBBM_PERFCTR_VFD_3_HI, 23, A5XX_VFD_PERFCTR_VFD_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_4_LO, + A5XX_RBBM_PERFCTR_VFD_4_HI, 24, A5XX_VFD_PERFCTR_VFD_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_5_LO, + A5XX_RBBM_PERFCTR_VFD_5_HI, 25, A5XX_VFD_PERFCTR_VFD_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_6_LO, + A5XX_RBBM_PERFCTR_VFD_6_HI, 26, A5XX_VFD_PERFCTR_VFD_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_7_LO, + A5XX_RBBM_PERFCTR_VFD_7_HI, 27, A5XX_VFD_PERFCTR_VFD_SEL_7 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_hlsq[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_0_LO, + A5XX_RBBM_PERFCTR_HLSQ_0_HI, 28, A5XX_HLSQ_PERFCTR_HLSQ_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_1_LO, + A5XX_RBBM_PERFCTR_HLSQ_1_HI, 29, A5XX_HLSQ_PERFCTR_HLSQ_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_2_LO, + A5XX_RBBM_PERFCTR_HLSQ_2_HI, 30, A5XX_HLSQ_PERFCTR_HLSQ_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_3_LO, + A5XX_RBBM_PERFCTR_HLSQ_3_HI, 31, A5XX_HLSQ_PERFCTR_HLSQ_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_4_LO, + A5XX_RBBM_PERFCTR_HLSQ_4_HI, 32, A5XX_HLSQ_PERFCTR_HLSQ_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_5_LO, + A5XX_RBBM_PERFCTR_HLSQ_5_HI, 33, A5XX_HLSQ_PERFCTR_HLSQ_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_6_LO, + A5XX_RBBM_PERFCTR_HLSQ_6_HI, 34, A5XX_HLSQ_PERFCTR_HLSQ_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_7_LO, + A5XX_RBBM_PERFCTR_HLSQ_7_HI, 35, A5XX_HLSQ_PERFCTR_HLSQ_SEL_7 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_vpc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_0_LO, + A5XX_RBBM_PERFCTR_VPC_0_HI, 36, A5XX_VPC_PERFCTR_VPC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_1_LO, + A5XX_RBBM_PERFCTR_VPC_1_HI, 37, A5XX_VPC_PERFCTR_VPC_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_2_LO, + A5XX_RBBM_PERFCTR_VPC_2_HI, 38, A5XX_VPC_PERFCTR_VPC_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_3_LO, + A5XX_RBBM_PERFCTR_VPC_3_HI, 39, A5XX_VPC_PERFCTR_VPC_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_ccu[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_0_LO, + A5XX_RBBM_PERFCTR_CCU_0_HI, 40, A5XX_RB_PERFCTR_CCU_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_1_LO, + A5XX_RBBM_PERFCTR_CCU_1_HI, 41, A5XX_RB_PERFCTR_CCU_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_2_LO, + A5XX_RBBM_PERFCTR_CCU_2_HI, 42, A5XX_RB_PERFCTR_CCU_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_3_LO, + A5XX_RBBM_PERFCTR_CCU_3_HI, 43, A5XX_RB_PERFCTR_CCU_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_tse[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_0_LO, + A5XX_RBBM_PERFCTR_TSE_0_HI, 44, A5XX_GRAS_PERFCTR_TSE_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_1_LO, + A5XX_RBBM_PERFCTR_TSE_1_HI, 45, A5XX_GRAS_PERFCTR_TSE_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_2_LO, + A5XX_RBBM_PERFCTR_TSE_2_HI, 46, A5XX_GRAS_PERFCTR_TSE_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_3_LO, + A5XX_RBBM_PERFCTR_TSE_3_HI, 47, A5XX_GRAS_PERFCTR_TSE_SEL_3 }, +}; + + +static struct adreno_perfcount_register a5xx_perfcounters_ras[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_0_LO, + A5XX_RBBM_PERFCTR_RAS_0_HI, 48, A5XX_GRAS_PERFCTR_RAS_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_1_LO, + A5XX_RBBM_PERFCTR_RAS_1_HI, 49, A5XX_GRAS_PERFCTR_RAS_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_2_LO, + A5XX_RBBM_PERFCTR_RAS_2_HI, 50, A5XX_GRAS_PERFCTR_RAS_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_3_LO, + A5XX_RBBM_PERFCTR_RAS_3_HI, 51, A5XX_GRAS_PERFCTR_RAS_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_uche[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_0_LO, + A5XX_RBBM_PERFCTR_UCHE_0_HI, 52, A5XX_UCHE_PERFCTR_UCHE_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_1_LO, + A5XX_RBBM_PERFCTR_UCHE_1_HI, 53, A5XX_UCHE_PERFCTR_UCHE_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_2_LO, + A5XX_RBBM_PERFCTR_UCHE_2_HI, 54, A5XX_UCHE_PERFCTR_UCHE_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_3_LO, + A5XX_RBBM_PERFCTR_UCHE_3_HI, 55, A5XX_UCHE_PERFCTR_UCHE_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_4_LO, + A5XX_RBBM_PERFCTR_UCHE_4_HI, 56, A5XX_UCHE_PERFCTR_UCHE_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_5_LO, + A5XX_RBBM_PERFCTR_UCHE_5_HI, 57, A5XX_UCHE_PERFCTR_UCHE_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_6_LO, + A5XX_RBBM_PERFCTR_UCHE_6_HI, 58, A5XX_UCHE_PERFCTR_UCHE_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_7_LO, + A5XX_RBBM_PERFCTR_UCHE_7_HI, 59, A5XX_UCHE_PERFCTR_UCHE_SEL_7 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_tp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_0_LO, + A5XX_RBBM_PERFCTR_TP_0_HI, 60, A5XX_TPL1_PERFCTR_TP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_1_LO, + A5XX_RBBM_PERFCTR_TP_1_HI, 61, A5XX_TPL1_PERFCTR_TP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_2_LO, + A5XX_RBBM_PERFCTR_TP_2_HI, 62, A5XX_TPL1_PERFCTR_TP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_3_LO, + A5XX_RBBM_PERFCTR_TP_3_HI, 63, A5XX_TPL1_PERFCTR_TP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_4_LO, + A5XX_RBBM_PERFCTR_TP_4_HI, 64, A5XX_TPL1_PERFCTR_TP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_5_LO, + A5XX_RBBM_PERFCTR_TP_5_HI, 65, A5XX_TPL1_PERFCTR_TP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_6_LO, + A5XX_RBBM_PERFCTR_TP_6_HI, 66, A5XX_TPL1_PERFCTR_TP_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_7_LO, + A5XX_RBBM_PERFCTR_TP_7_HI, 67, A5XX_TPL1_PERFCTR_TP_SEL_7 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_sp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_0_LO, + A5XX_RBBM_PERFCTR_SP_0_HI, 68, A5XX_SP_PERFCTR_SP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_1_LO, + A5XX_RBBM_PERFCTR_SP_1_HI, 69, A5XX_SP_PERFCTR_SP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_2_LO, + A5XX_RBBM_PERFCTR_SP_2_HI, 70, A5XX_SP_PERFCTR_SP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_3_LO, + A5XX_RBBM_PERFCTR_SP_3_HI, 71, A5XX_SP_PERFCTR_SP_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_4_LO, + A5XX_RBBM_PERFCTR_SP_4_HI, 72, A5XX_SP_PERFCTR_SP_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_5_LO, + A5XX_RBBM_PERFCTR_SP_5_HI, 73, A5XX_SP_PERFCTR_SP_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_6_LO, + A5XX_RBBM_PERFCTR_SP_6_HI, 74, A5XX_SP_PERFCTR_SP_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_7_LO, + A5XX_RBBM_PERFCTR_SP_7_HI, 75, A5XX_SP_PERFCTR_SP_SEL_7 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_8_LO, + A5XX_RBBM_PERFCTR_SP_8_HI, 76, A5XX_SP_PERFCTR_SP_SEL_8 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_9_LO, + A5XX_RBBM_PERFCTR_SP_9_HI, 77, A5XX_SP_PERFCTR_SP_SEL_9 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_10_LO, + A5XX_RBBM_PERFCTR_SP_10_HI, 78, A5XX_SP_PERFCTR_SP_SEL_10 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_11_LO, + A5XX_RBBM_PERFCTR_SP_11_HI, 79, A5XX_SP_PERFCTR_SP_SEL_11 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_rb[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_0_LO, + A5XX_RBBM_PERFCTR_RB_0_HI, 80, A5XX_RB_PERFCTR_RB_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_1_LO, + A5XX_RBBM_PERFCTR_RB_1_HI, 81, A5XX_RB_PERFCTR_RB_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_2_LO, + A5XX_RBBM_PERFCTR_RB_2_HI, 82, A5XX_RB_PERFCTR_RB_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_3_LO, + A5XX_RBBM_PERFCTR_RB_3_HI, 83, A5XX_RB_PERFCTR_RB_SEL_3 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_4_LO, + A5XX_RBBM_PERFCTR_RB_4_HI, 84, A5XX_RB_PERFCTR_RB_SEL_4 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_5_LO, + A5XX_RBBM_PERFCTR_RB_5_HI, 85, A5XX_RB_PERFCTR_RB_SEL_5 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_6_LO, + A5XX_RBBM_PERFCTR_RB_6_HI, 86, A5XX_RB_PERFCTR_RB_SEL_6 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_7_LO, + A5XX_RBBM_PERFCTR_RB_7_HI, 87, A5XX_RB_PERFCTR_RB_SEL_7 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_vsc[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VSC_0_LO, + A5XX_RBBM_PERFCTR_VSC_0_HI, 88, A5XX_VSC_PERFCTR_VSC_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VSC_1_LO, + A5XX_RBBM_PERFCTR_VSC_1_HI, 89, A5XX_VSC_PERFCTR_VSC_SEL_1 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_lrz[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_0_LO, + A5XX_RBBM_PERFCTR_LRZ_0_HI, 90, A5XX_GRAS_PERFCTR_LRZ_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_1_LO, + A5XX_RBBM_PERFCTR_LRZ_1_HI, 91, A5XX_GRAS_PERFCTR_LRZ_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_2_LO, + A5XX_RBBM_PERFCTR_LRZ_2_HI, 92, A5XX_GRAS_PERFCTR_LRZ_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_3_LO, + A5XX_RBBM_PERFCTR_LRZ_3_HI, 93, A5XX_GRAS_PERFCTR_LRZ_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_cmp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_0_LO, + A5XX_RBBM_PERFCTR_CMP_0_HI, 94, A5XX_RB_PERFCTR_CMP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_1_LO, + A5XX_RBBM_PERFCTR_CMP_1_HI, 95, A5XX_RB_PERFCTR_CMP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_2_LO, + A5XX_RBBM_PERFCTR_CMP_2_HI, 96, A5XX_RB_PERFCTR_CMP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_3_LO, + A5XX_RBBM_PERFCTR_CMP_3_HI, 97, A5XX_RB_PERFCTR_CMP_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_vbif[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW0, + A5XX_VBIF_PERF_CNT_HIGH0, -1, A5XX_VBIF_PERF_CNT_SEL0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW1, + A5XX_VBIF_PERF_CNT_HIGH1, -1, A5XX_VBIF_PERF_CNT_SEL1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW2, + A5XX_VBIF_PERF_CNT_HIGH2, -1, A5XX_VBIF_PERF_CNT_SEL2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW3, + A5XX_VBIF_PERF_CNT_HIGH3, -1, A5XX_VBIF_PERF_CNT_SEL3 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_vbif_pwr[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_PWR_CNT_LOW0, + A5XX_VBIF_PERF_PWR_CNT_HIGH0, -1, A5XX_VBIF_PERF_PWR_CNT_EN0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_PWR_CNT_LOW1, + A5XX_VBIF_PERF_PWR_CNT_HIGH1, -1, A5XX_VBIF_PERF_PWR_CNT_EN1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_PWR_CNT_LOW2, + A5XX_VBIF_PERF_PWR_CNT_HIGH2, -1, A5XX_VBIF_PERF_PWR_CNT_EN2 }, +}; + +static struct adreno_perfcount_register a5xx_perfcounters_alwayson[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_ALWAYSON_COUNTER_LO, + A5XX_RBBM_ALWAYSON_COUNTER_HI, -1 }, +}; + +/* + * 5XX targets don't really have physical PERFCTR_PWR registers - we emulate + * them using similar performance counters from the RBBM block. The difference + * betweeen using this group and the RBBM group is that the RBBM counters are + * reloaded after a power collapse which is not how the PWR counters behaved on + * legacy hardware. In order to limit the disruption on the rest of the system + * we go out of our way to ensure backwards compatability. Since RBBM counters + * are in short supply, we don't emulate PWR:0 which nobody uses - mark it as + * broken. + */ +static struct adreno_perfcount_register a5xx_perfcounters_pwr[] = { + { KGSL_PERFCOUNTER_BROKEN, 0, 0, 0, 0, -1, 0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_0_LO, + A5XX_RBBM_PERFCTR_RBBM_0_HI, -1, 0}, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_sp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_0_LO, + A5XX_SP_POWER_COUNTER_0_HI, -1, A5XX_SP_POWERCTR_SP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_1_LO, + A5XX_SP_POWER_COUNTER_1_HI, -1, A5XX_SP_POWERCTR_SP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_2_LO, + A5XX_SP_POWER_COUNTER_2_HI, -1, A5XX_SP_POWERCTR_SP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_3_LO, + A5XX_SP_POWER_COUNTER_3_HI, -1, A5XX_SP_POWERCTR_SP_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_tp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_0_LO, + A5XX_TP_POWER_COUNTER_0_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_1_LO, + A5XX_TP_POWER_COUNTER_1_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_2_LO, + A5XX_TP_POWER_COUNTER_2_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_3_LO, + A5XX_TP_POWER_COUNTER_3_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_rb[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_0_LO, + A5XX_RB_POWER_COUNTER_0_HI, -1, A5XX_RB_POWERCTR_RB_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_1_LO, + A5XX_RB_POWER_COUNTER_1_HI, -1, A5XX_RB_POWERCTR_RB_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_2_LO, + A5XX_RB_POWER_COUNTER_2_HI, -1, A5XX_RB_POWERCTR_RB_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_3_LO, + A5XX_RB_POWER_COUNTER_3_HI, -1, A5XX_RB_POWERCTR_RB_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_ccu[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CCU_POWER_COUNTER_0_LO, + A5XX_CCU_POWER_COUNTER_0_HI, -1, A5XX_RB_POWERCTR_CCU_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CCU_POWER_COUNTER_1_LO, + A5XX_CCU_POWER_COUNTER_1_HI, -1, A5XX_RB_POWERCTR_CCU_SEL_1 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_uche[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_0_LO, + A5XX_UCHE_POWER_COUNTER_0_HI, -1, + A5XX_UCHE_POWERCTR_UCHE_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_1_LO, + A5XX_UCHE_POWER_COUNTER_1_HI, -1, + A5XX_UCHE_POWERCTR_UCHE_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_2_LO, + A5XX_UCHE_POWER_COUNTER_2_HI, -1, + A5XX_UCHE_POWERCTR_UCHE_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_3_LO, + A5XX_UCHE_POWER_COUNTER_3_HI, -1, + A5XX_UCHE_POWERCTR_UCHE_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_cp[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_0_LO, + A5XX_CP_POWER_COUNTER_0_HI, -1, A5XX_CP_POWERCTR_CP_SEL_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_1_LO, + A5XX_CP_POWER_COUNTER_1_HI, -1, A5XX_CP_POWERCTR_CP_SEL_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_2_LO, + A5XX_CP_POWER_COUNTER_2_HI, -1, A5XX_CP_POWERCTR_CP_SEL_2 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_3_LO, + A5XX_CP_POWER_COUNTER_3_HI, -1, A5XX_CP_POWERCTR_CP_SEL_3 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_gpmu[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_0_LO, + A5XX_GPMU_POWER_COUNTER_0_HI, -1, + A5XX_GPMU_POWER_COUNTER_SELECT_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_1_LO, + A5XX_GPMU_POWER_COUNTER_1_HI, -1, + A5XX_GPMU_POWER_COUNTER_SELECT_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_2_LO, + A5XX_GPMU_POWER_COUNTER_2_HI, -1, + A5XX_GPMU_POWER_COUNTER_SELECT_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_3_LO, + A5XX_GPMU_POWER_COUNTER_3_HI, -1, + A5XX_GPMU_POWER_COUNTER_SELECT_0 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_4_LO, + A5XX_GPMU_POWER_COUNTER_4_HI, -1, + A5XX_GPMU_POWER_COUNTER_SELECT_1 }, + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_5_LO, + A5XX_GPMU_POWER_COUNTER_5_HI, -1, + A5XX_GPMU_POWER_COUNTER_SELECT_1 }, +}; + +static struct adreno_perfcount_register a5xx_pwrcounters_alwayson[] = { + { KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_ALWAYS_ON_COUNTER_LO, + A5XX_GPMU_ALWAYS_ON_COUNTER_HI, -1 }, +}; + +#define A5XX_PERFCOUNTER_GROUP(offset, name) \ + ADRENO_PERFCOUNTER_GROUP(a5xx, offset, name) + +#define A5XX_PERFCOUNTER_GROUP_FLAGS(offset, name, flags) \ + ADRENO_PERFCOUNTER_GROUP_FLAGS(a5xx, offset, name, flags) + +#define A5XX_POWER_COUNTER_GROUP(offset, name) \ + ADRENO_POWER_COUNTER_GROUP(a5xx, offset, name) + +static struct adreno_perfcount_group a5xx_perfcounter_groups + [KGSL_PERFCOUNTER_GROUP_MAX] = { + A5XX_PERFCOUNTER_GROUP(CP, cp), + A5XX_PERFCOUNTER_GROUP(RBBM, rbbm), + A5XX_PERFCOUNTER_GROUP(PC, pc), + A5XX_PERFCOUNTER_GROUP(VFD, vfd), + A5XX_PERFCOUNTER_GROUP(HLSQ, hlsq), + A5XX_PERFCOUNTER_GROUP(VPC, vpc), + A5XX_PERFCOUNTER_GROUP(CCU, ccu), + A5XX_PERFCOUNTER_GROUP(CMP, cmp), + A5XX_PERFCOUNTER_GROUP(TSE, tse), + A5XX_PERFCOUNTER_GROUP(RAS, ras), + A5XX_PERFCOUNTER_GROUP(LRZ, lrz), + A5XX_PERFCOUNTER_GROUP(UCHE, uche), + A5XX_PERFCOUNTER_GROUP(TP, tp), + A5XX_PERFCOUNTER_GROUP(SP, sp), + A5XX_PERFCOUNTER_GROUP(RB, rb), + A5XX_PERFCOUNTER_GROUP(VSC, vsc), + A5XX_PERFCOUNTER_GROUP_FLAGS(PWR, pwr, + ADRENO_PERFCOUNTER_GROUP_FIXED), + A5XX_PERFCOUNTER_GROUP(VBIF, vbif), + A5XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, vbif_pwr, + ADRENO_PERFCOUNTER_GROUP_FIXED), + A5XX_PERFCOUNTER_GROUP_FLAGS(ALWAYSON, alwayson, + ADRENO_PERFCOUNTER_GROUP_FIXED), + A5XX_POWER_COUNTER_GROUP(SP, sp), + A5XX_POWER_COUNTER_GROUP(TP, tp), + A5XX_POWER_COUNTER_GROUP(RB, rb), + A5XX_POWER_COUNTER_GROUP(CCU, ccu), + A5XX_POWER_COUNTER_GROUP(UCHE, uche), + A5XX_POWER_COUNTER_GROUP(CP, cp), + A5XX_POWER_COUNTER_GROUP(GPMU, gpmu), + A5XX_POWER_COUNTER_GROUP(ALWAYSON, alwayson), +}; + +static struct adreno_perfcounters a5xx_perfcounters = { + a5xx_perfcounter_groups, + ARRAY_SIZE(a5xx_perfcounter_groups), +}; + +static struct adreno_ft_perf_counters a5xx_ft_perf_counters[] = { + {KGSL_PERFCOUNTER_GROUP_SP, A5XX_SP_ALU_ACTIVE_CYCLES}, + {KGSL_PERFCOUNTER_GROUP_SP, A5XX_SP0_ICL1_MISSES}, + {KGSL_PERFCOUNTER_GROUP_SP, A5XX_SP_FS_CFLOW_INSTRUCTIONS}, + {KGSL_PERFCOUNTER_GROUP_TSE, A5XX_TSE_INPUT_PRIM_NUM}, +}; + +/* Register offset defines for A5XX, in order of enum adreno_regs */ +static unsigned int a5xx_register_offsets[ADRENO_REG_REGISTER_MAX] = { + ADRENO_REG_DEFINE(ADRENO_REG_CP_WFI_PEND_CTR, A5XX_CP_WFI_PEND_CTR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A5XX_CP_RB_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, A5XX_CP_RB_BASE_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A5XX_CP_RB_RPTR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A5XX_CP_RB_WPTR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_CNTL, A5XX_CP_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A5XX_CP_ME_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A5XX_CP_RB_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A5XX_CP_IB1_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, A5XX_CP_IB1_BASE_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, A5XX_CP_IB1_BUFSZ), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, A5XX_CP_IB2_BASE), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, A5XX_CP_IB2_BASE_HI), + ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A5XX_CP_IB2_BUFSZ), + ADRENO_REG_DEFINE(ADRENO_REG_CP_ROQ_ADDR, A5XX_CP_ROQ_DBG_ADDR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_ROQ_DATA, A5XX_CP_ROQ_DBG_DATA), + ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_ADDR, A5XX_CP_MERCIU_DBG_ADDR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_DATA, A5XX_CP_MERCIU_DBG_DATA_1), + ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_DATA2, + A5XX_CP_MERCIU_DBG_DATA_2), + ADRENO_REG_DEFINE(ADRENO_REG_CP_MEQ_ADDR, A5XX_CP_MEQ_DBG_ADDR), + ADRENO_REG_DEFINE(ADRENO_REG_CP_MEQ_DATA, A5XX_CP_MEQ_DBG_DATA), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PROTECT_REG_0, A5XX_CP_PROTECT_REG_0), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT, A5XX_CP_CONTEXT_SWITCH_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_DEBUG, ADRENO_REG_SKIP), + ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_DISABLE, ADRENO_REG_SKIP), + ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO, + A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO), + ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI, + A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A5XX_RBBM_STATUS), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS3, A5XX_RBBM_STATUS3), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_CTL, A5XX_RBBM_PERFCTR_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0, + A5XX_RBBM_PERFCTR_LOAD_CMD0), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1, + A5XX_RBBM_PERFCTR_LOAD_CMD1), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2, + A5XX_RBBM_PERFCTR_LOAD_CMD2), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD3, + A5XX_RBBM_PERFCTR_LOAD_CMD3), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, A5XX_RBBM_INT_0_MASK), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_STATUS, A5XX_RBBM_INT_0_STATUS), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_CLOCK_CTL, A5XX_RBBM_CLOCK_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_CLEAR_CMD, + A5XX_RBBM_INT_CLEAR_CMD), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A5XX_RBBM_SW_RESET_CMD), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_BLOCK_SW_RESET_CMD, + A5XX_RBBM_BLOCK_SW_RESET_CMD), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_BLOCK_SW_RESET_CMD2, + A5XX_RBBM_BLOCK_SW_RESET_CMD2), + ADRENO_REG_DEFINE(ADRENO_REG_UCHE_INVALIDATE0, A5XX_UCHE_INVALIDATE0), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO, + A5XX_RBBM_PERFCTR_LOAD_VALUE_LO), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI, + A5XX_RBBM_PERFCTR_LOAD_VALUE_HI), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TRUST_CONTROL, + A5XX_RBBM_SECVID_TRUST_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TRUST_CONFIG, + A5XX_RBBM_SECVID_TRUST_CONFIG), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TSB_CONTROL, + A5XX_RBBM_SECVID_TSB_CNTL), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_BASE, + A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_BASE_HI, + A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_SIZE, + A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO, + A5XX_RBBM_ALWAYSON_COUNTER_LO), + ADRENO_REG_DEFINE(ADRENO_REG_RBBM_ALWAYSON_COUNTER_HI, + A5XX_RBBM_ALWAYSON_COUNTER_HI), + ADRENO_REG_DEFINE(ADRENO_REG_VBIF_XIN_HALT_CTRL0, + A5XX_VBIF_XIN_HALT_CTRL0), + ADRENO_REG_DEFINE(ADRENO_REG_VBIF_XIN_HALT_CTRL1, + A5XX_VBIF_XIN_HALT_CTRL1), + ADRENO_REG_DEFINE(ADRENO_REG_VBIF_VERSION, + A5XX_VBIF_VERSION), +}; + +static const struct adreno_reg_offsets a5xx_reg_offsets = { + .offsets = a5xx_register_offsets, + .offset_0 = ADRENO_REG_REGISTER_MAX, +}; + +static void a5xx_cp_hw_err_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int status1, status2; + + kgsl_regread(device, A5XX_CP_INTERRUPT_STATUS, &status1); + + if (status1 & BIT(A5XX_CP_OPCODE_ERROR)) { + unsigned int val; + + kgsl_regwrite(device, A5XX_CP_PFP_STAT_ADDR, 0); + + /* + * A5XX_CP_PFP_STAT_DATA is indexed, so read it twice to get the + * value we want + */ + kgsl_regread(device, A5XX_CP_PFP_STAT_DATA, &val); + kgsl_regread(device, A5XX_CP_PFP_STAT_DATA, &val); + + KGSL_DRV_CRIT_RATELIMIT(device, + "ringbuffer opcode error | possible opcode=0x%8.8X\n", + val); + } + if (status1 & BIT(A5XX_CP_RESERVED_BIT_ERROR)) + KGSL_DRV_CRIT_RATELIMIT(device, + "ringbuffer reserved bit error interrupt\n"); + if (status1 & BIT(A5XX_CP_HW_FAULT_ERROR)) { + kgsl_regread(device, A5XX_CP_HW_FAULT, &status2); + KGSL_DRV_CRIT_RATELIMIT(device, + "CP | Ringbuffer HW fault | status=%x\n", + status2); + } + if (status1 & BIT(A5XX_CP_DMA_ERROR)) + KGSL_DRV_CRIT_RATELIMIT(device, "CP | DMA error\n"); + if (status1 & BIT(A5XX_CP_REGISTER_PROTECTION_ERROR)) { + kgsl_regread(device, A5XX_CP_PROTECT_STATUS, &status2); + KGSL_DRV_CRIT_RATELIMIT(device, + "CP | Protected mode error| %s | addr=%x | status=%x\n", + status2 & (1 << 24) ? "WRITE" : "READ", + (status2 & 0xFFFFF) >> 2, status2); + } + if (status1 & BIT(A5XX_CP_AHB_ERROR)) { + kgsl_regread(device, A5XX_CP_AHB_FAULT, &status2); + KGSL_DRV_CRIT_RATELIMIT(device, + "ringbuffer AHB error interrupt | status=%x\n", + status2); + } +} + +static void a5xx_err_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int reg; + + switch (bit) { + case A5XX_INT_RBBM_AHB_ERROR: { + kgsl_regread(device, A5XX_RBBM_AHB_ERROR_STATUS, ®); + + /* + * Return the word address of the erroring register so that it + * matches the register specification + */ + KGSL_DRV_CRIT(device, + "RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n", + reg & (1 << 28) ? "WRITE" : "READ", + (reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3, + (reg >> 24) & 0xF); + + /* Clear the error */ + kgsl_regwrite(device, A5XX_RBBM_AHB_CMD, (1 << 4)); + return; + } + case A5XX_INT_RBBM_TRANSFER_TIMEOUT: + KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: AHB transfer timeout\n"); + break; + case A5XX_INT_RBBM_ME_MS_TIMEOUT: + kgsl_regread(device, A5XX_RBBM_AHB_ME_SPLIT_STATUS, ®); + KGSL_DRV_CRIT_RATELIMIT(device, + "RBBM | ME master split timeout | status=%x\n", reg); + break; + case A5XX_INT_RBBM_PFP_MS_TIMEOUT: + kgsl_regread(device, A5XX_RBBM_AHB_PFP_SPLIT_STATUS, ®); + KGSL_DRV_CRIT_RATELIMIT(device, + "RBBM | PFP master split timeout | status=%x\n", reg); + break; + case A5XX_INT_RBBM_ETS_MS_TIMEOUT: + KGSL_DRV_CRIT_RATELIMIT(device, + "RBBM: ME master split timeout\n"); + break; + case A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW: + KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: ATB ASYNC overflow\n"); + break; + case A5XX_INT_RBBM_ATB_BUS_OVERFLOW: + KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: ATB bus overflow\n"); + break; + case A5XX_INT_UCHE_OOB_ACCESS: + KGSL_DRV_CRIT_RATELIMIT(device, "UCHE: Out of bounds access\n"); + break; + case A5XX_INT_UCHE_TRAP_INTR: + KGSL_DRV_CRIT_RATELIMIT(device, "UCHE: Trap interrupt\n"); + break; + case A5XX_INT_GPMU_VOLTAGE_DROOP: + KGSL_DRV_CRIT_RATELIMIT(device, "GPMU: Voltage droop\n"); + break; + default: + KGSL_DRV_CRIT_RATELIMIT(device, "Unknown interrupt %d\n", bit); + } +} + +static void a5xx_gpmu_int_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int reg; + + kgsl_regread(device, A5XX_GPMU_RBBM_INTR_INFO, ®); + + if (reg & BIT(31)) { + if (test_and_clear_bit(ADRENO_DEVICE_GPMU_INITIALIZED, + &adreno_dev->priv)) { + /* Stop GPMU */ + kgsl_regwrite(device, A5XX_GPMU_CM3_SYSRESET, 1); + + kgsl_schedule_work(&adreno_dev->gpmu_work); + + KGSL_DRV_CRIT_RATELIMIT(device, + "GPMU: Watchdog bite\n"); + } + } else if (!(reg & BIT(1))) + KGSL_DRV_CRIT_RATELIMIT(device, + "GPMU: Unknown interrupt 0x%08X\n", + reg); +} + +/* +* a5x_gpc_err_int_callback() - Isr for GPC error interrupts +* @adreno_dev: Pointer to device +* @bit: Interrupt bit +*/ +void a5x_gpc_err_int_callback(struct adreno_device *adreno_dev, int bit) +{ + struct kgsl_device *device = &adreno_dev->dev; + + /* + * GPC error is typically the result of mistake SW programming. + * Force GPU fault for this interrupt so that we can debug it + * with help of register dump. + */ + + KGSL_DRV_CRIT(device, "RBBM: GPC error\n"); + adreno_irqctrl(adreno_dev, 0); + + /* Trigger a fault in the dispatcher - this will effect a restart */ + adreno_set_gpu_fault(ADRENO_DEVICE(device), ADRENO_SOFT_FAULT); + adreno_dispatcher_schedule(device); +} + +#define A5XX_INT_MASK \ + ((1 << A5XX_INT_RBBM_AHB_ERROR) | \ + (1 << A5XX_INT_RBBM_TRANSFER_TIMEOUT) | \ + (1 << A5XX_INT_RBBM_ME_MS_TIMEOUT) | \ + (1 << A5XX_INT_RBBM_PFP_MS_TIMEOUT) | \ + (1 << A5XX_INT_RBBM_ETS_MS_TIMEOUT) | \ + (1 << A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW) | \ + (1 << A5XX_INT_RBBM_GPC_ERROR) | \ + (1 << A5XX_INT_CP_HW_ERROR) | \ + (1 << A5XX_INT_CP_IB1) | \ + (1 << A5XX_INT_CP_IB2) | \ + (1 << A5XX_INT_CP_RB) | \ + (1 << A5XX_INT_CP_CACHE_FLUSH_TS) | \ + (1 << A5XX_INT_RBBM_ATB_BUS_OVERFLOW) | \ + (1 << A5XX_INT_UCHE_OOB_ACCESS) | \ + (1 << A5XX_INT_UCHE_TRAP_INTR) | \ + (1 << A5XX_INT_CP_SW) | \ + (1 << A5XX_INT_GPMU_FIRMWARE) | \ + (1 << A5XX_INT_GPMU_VOLTAGE_DROOP)) + + +static struct adreno_irq_funcs a5xx_irq_funcs[32] = { + ADRENO_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 1 - RBBM_AHB_ERROR */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 2 - RBBM_TRANSFER_TIMEOUT */ + /* 3 - RBBM_ME_MASTER_SPLIT_TIMEOUT */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), + /* 4 - RBBM_PFP_MASTER_SPLIT_TIMEOUT */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), + /* 5 - RBBM_ETS_MASTER_SPLIT_TIMEOUT */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), + /* 6 - RBBM_ATB_ASYNC_OVERFLOW */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), + ADRENO_IRQ_CALLBACK(a5x_gpc_err_int_callback), /* 7 - GPC_ERR */ + ADRENO_IRQ_CALLBACK(adreno_dispatcher_preempt_callback),/* 8 - CP_SW */ + ADRENO_IRQ_CALLBACK(a5xx_cp_hw_err_callback), /* 9 - CP_HW_ERROR */ + /* 10 - CP_CCU_FLUSH_DEPTH_TS */ + ADRENO_IRQ_CALLBACK(NULL), + /* 11 - CP_CCU_FLUSH_COLOR_TS */ + ADRENO_IRQ_CALLBACK(NULL), + /* 12 - CP_CCU_RESOLVE_TS */ + ADRENO_IRQ_CALLBACK(NULL), + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 13 - CP_IB2_INT */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 14 - CP_IB1_INT */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 15 - CP_RB_INT */ + /* 16 - CCP_UNUSED_1 */ + ADRENO_IRQ_CALLBACK(NULL), + ADRENO_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 18 - CP_WT_DONE_TS */ + ADRENO_IRQ_CALLBACK(NULL), /* 19 - UNKNOWN_1 */ + ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */ + /* 21 - UNUSED_2 */ + ADRENO_IRQ_CALLBACK(NULL), + ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 22 - RBBM_ATB_BUS_OVERFLOW */ + /* 23 - MISC_HANG_DETECT */ + ADRENO_IRQ_CALLBACK(adreno_hang_int_callback), + ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 24 - UCHE_OOB_ACCESS */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 25 - UCHE_TRAP_INTR */ + ADRENO_IRQ_CALLBACK(NULL), /* 26 - DEBBUS_INTR_0 */ + ADRENO_IRQ_CALLBACK(NULL), /* 27 - DEBBUS_INTR_1 */ + ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 28 - GPMU_VOLTAGE_DROOP */ + ADRENO_IRQ_CALLBACK(a5xx_gpmu_int_callback), /* 29 - GPMU_FIRMWARE */ + ADRENO_IRQ_CALLBACK(NULL), /* 30 - ISDB_CPU_IRQ */ + ADRENO_IRQ_CALLBACK(NULL), /* 31 - ISDB_UNDER_DEBUG */ +}; + +static struct adreno_irq a5xx_irq = { + .funcs = a5xx_irq_funcs, + .mask = A5XX_INT_MASK, +}; + +/* + * Default size for CP queues for A5xx targets. You must + * overwrite these value in platform_setup function for + * A5xx derivatives if size differs. + */ +static struct adreno_snapshot_sizes a5xx_snap_sizes = { + .cp_pfp = 36, + .cp_me = 29, + .cp_meq = 64, + .cp_merciu = 64, + .roq = 512, +}; + +static struct adreno_snapshot_data a5xx_snapshot_data = { + .sect_sizes = &a5xx_snap_sizes, +}; + +static struct adreno_coresight_register a5xx_coresight_registers[] = { + { A5XX_RBBM_CFG_DBGBUS_SEL_A }, + { A5XX_RBBM_CFG_DBGBUS_SEL_B }, + { A5XX_RBBM_CFG_DBGBUS_SEL_C }, + { A5XX_RBBM_CFG_DBGBUS_SEL_D }, + { A5XX_RBBM_CFG_DBGBUS_CNTLT }, + { A5XX_RBBM_CFG_DBGBUS_CNTLM }, + { A5XX_RBBM_CFG_DBGBUS_OPL }, + { A5XX_RBBM_CFG_DBGBUS_OPE }, + { A5XX_RBBM_CFG_DBGBUS_IVTL_0 }, + { A5XX_RBBM_CFG_DBGBUS_IVTL_1 }, + { A5XX_RBBM_CFG_DBGBUS_IVTL_2 }, + { A5XX_RBBM_CFG_DBGBUS_IVTL_3 }, + { A5XX_RBBM_CFG_DBGBUS_MASKL_0 }, + { A5XX_RBBM_CFG_DBGBUS_MASKL_1 }, + { A5XX_RBBM_CFG_DBGBUS_MASKL_2 }, + { A5XX_RBBM_CFG_DBGBUS_MASKL_3 }, + { A5XX_RBBM_CFG_DBGBUS_BYTEL_0 }, + { A5XX_RBBM_CFG_DBGBUS_BYTEL_1 }, + { A5XX_RBBM_CFG_DBGBUS_IVTE_0 }, + { A5XX_RBBM_CFG_DBGBUS_IVTE_1 }, + { A5XX_RBBM_CFG_DBGBUS_IVTE_2 }, + { A5XX_RBBM_CFG_DBGBUS_IVTE_3 }, + { A5XX_RBBM_CFG_DBGBUS_MASKE_0 }, + { A5XX_RBBM_CFG_DBGBUS_MASKE_1 }, + { A5XX_RBBM_CFG_DBGBUS_MASKE_2 }, + { A5XX_RBBM_CFG_DBGBUS_MASKE_3 }, + { A5XX_RBBM_CFG_DBGBUS_NIBBLEE }, + { A5XX_RBBM_CFG_DBGBUS_PTRC0 }, + { A5XX_RBBM_CFG_DBGBUS_PTRC1 }, + { A5XX_RBBM_CFG_DBGBUS_LOADREG }, + { A5XX_RBBM_CFG_DBGBUS_IDX }, + { A5XX_RBBM_CFG_DBGBUS_CLRC }, + { A5XX_RBBM_CFG_DBGBUS_LOADIVT }, + { A5XX_RBBM_CFG_DBGBUS_EVENT_LOGIC }, + { A5XX_RBBM_CFG_DBGBUS_OVER }, + { A5XX_RBBM_CFG_DBGBUS_COUNT0 }, + { A5XX_RBBM_CFG_DBGBUS_COUNT1 }, + { A5XX_RBBM_CFG_DBGBUS_COUNT2 }, + { A5XX_RBBM_CFG_DBGBUS_COUNT3 }, + { A5XX_RBBM_CFG_DBGBUS_COUNT4 }, + { A5XX_RBBM_CFG_DBGBUS_COUNT5 }, + { A5XX_RBBM_CFG_DBGBUS_TRACE_ADDR }, + { A5XX_RBBM_CFG_DBGBUS_TRACE_BUF0 }, + { A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1 }, + { A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2 }, + { A5XX_RBBM_CFG_DBGBUS_TRACE_BUF3 }, + { A5XX_RBBM_CFG_DBGBUS_TRACE_BUF4 }, + { A5XX_RBBM_CFG_DBGBUS_MISR0 }, + { A5XX_RBBM_CFG_DBGBUS_MISR1 }, + { A5XX_RBBM_AHB_DBG_CNTL }, + { A5XX_RBBM_READ_AHB_THROUGH_DBG }, + { A5XX_RBBM_DBG_LO_HI_GPIO }, + { A5XX_RBBM_EXT_TRACE_BUS_CNTL }, + { A5XX_RBBM_EXT_VBIF_DBG_CNTL }, +}; + +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_a, &a5xx_coresight_registers[0]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_b, &a5xx_coresight_registers[1]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_c, &a5xx_coresight_registers[2]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_d, &a5xx_coresight_registers[3]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlt, &a5xx_coresight_registers[4]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlm, &a5xx_coresight_registers[5]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_opl, &a5xx_coresight_registers[6]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ope, &a5xx_coresight_registers[7]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_0, &a5xx_coresight_registers[8]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_1, &a5xx_coresight_registers[9]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_2, &a5xx_coresight_registers[10]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_3, &a5xx_coresight_registers[11]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_0, &a5xx_coresight_registers[12]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_1, &a5xx_coresight_registers[13]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_2, &a5xx_coresight_registers[14]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_3, &a5xx_coresight_registers[15]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_0, &a5xx_coresight_registers[16]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_1, &a5xx_coresight_registers[17]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_0, &a5xx_coresight_registers[18]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_1, &a5xx_coresight_registers[19]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_2, &a5xx_coresight_registers[20]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_3, &a5xx_coresight_registers[21]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_0, &a5xx_coresight_registers[22]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_1, &a5xx_coresight_registers[23]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_2, &a5xx_coresight_registers[24]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_3, &a5xx_coresight_registers[25]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_nibblee, &a5xx_coresight_registers[26]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc0, &a5xx_coresight_registers[27]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc1, &a5xx_coresight_registers[28]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadreg, &a5xx_coresight_registers[29]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_idx, &a5xx_coresight_registers[30]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_clrc, &a5xx_coresight_registers[31]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadivt, &a5xx_coresight_registers[32]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_event_logic, + &a5xx_coresight_registers[33]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_over, &a5xx_coresight_registers[34]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count0, &a5xx_coresight_registers[35]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count1, &a5xx_coresight_registers[36]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count2, &a5xx_coresight_registers[37]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count3, &a5xx_coresight_registers[38]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count4, &a5xx_coresight_registers[39]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count5, &a5xx_coresight_registers[40]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_addr, + &a5xx_coresight_registers[41]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf0, + &a5xx_coresight_registers[42]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf1, + &a5xx_coresight_registers[43]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf2, + &a5xx_coresight_registers[44]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf3, + &a5xx_coresight_registers[45]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf4, + &a5xx_coresight_registers[46]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_misr0, &a5xx_coresight_registers[47]); +static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_misr1, &a5xx_coresight_registers[48]); +static ADRENO_CORESIGHT_ATTR(ahb_dbg_cntl, &a5xx_coresight_registers[49]); +static ADRENO_CORESIGHT_ATTR(read_ahb_through_dbg, + &a5xx_coresight_registers[50]); +static ADRENO_CORESIGHT_ATTR(dbg_lo_hi_gpio, &a5xx_coresight_registers[51]); +static ADRENO_CORESIGHT_ATTR(ext_trace_bus_cntl, &a5xx_coresight_registers[52]); +static ADRENO_CORESIGHT_ATTR(ext_vbif_dbg_cntl, &a5xx_coresight_registers[53]); + +static struct attribute *a5xx_coresight_attrs[] = { + &coresight_attr_cfg_dbgbus_sel_a.attr.attr, + &coresight_attr_cfg_dbgbus_sel_b.attr.attr, + &coresight_attr_cfg_dbgbus_sel_c.attr.attr, + &coresight_attr_cfg_dbgbus_sel_d.attr.attr, + &coresight_attr_cfg_dbgbus_cntlt.attr.attr, + &coresight_attr_cfg_dbgbus_cntlm.attr.attr, + &coresight_attr_cfg_dbgbus_opl.attr.attr, + &coresight_attr_cfg_dbgbus_ope.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_0.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_1.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_2.attr.attr, + &coresight_attr_cfg_dbgbus_ivtl_3.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_0.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_1.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_2.attr.attr, + &coresight_attr_cfg_dbgbus_maskl_3.attr.attr, + &coresight_attr_cfg_dbgbus_bytel_0.attr.attr, + &coresight_attr_cfg_dbgbus_bytel_1.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_0.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_1.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_2.attr.attr, + &coresight_attr_cfg_dbgbus_ivte_3.attr.attr, + &coresight_attr_cfg_dbgbus_maske_0.attr.attr, + &coresight_attr_cfg_dbgbus_maske_1.attr.attr, + &coresight_attr_cfg_dbgbus_maske_2.attr.attr, + &coresight_attr_cfg_dbgbus_maske_3.attr.attr, + &coresight_attr_cfg_dbgbus_nibblee.attr.attr, + &coresight_attr_cfg_dbgbus_ptrc0.attr.attr, + &coresight_attr_cfg_dbgbus_ptrc1.attr.attr, + &coresight_attr_cfg_dbgbus_loadreg.attr.attr, + &coresight_attr_cfg_dbgbus_idx.attr.attr, + &coresight_attr_cfg_dbgbus_clrc.attr.attr, + &coresight_attr_cfg_dbgbus_loadivt.attr.attr, + &coresight_attr_cfg_dbgbus_event_logic.attr.attr, + &coresight_attr_cfg_dbgbus_over.attr.attr, + &coresight_attr_cfg_dbgbus_count0.attr.attr, + &coresight_attr_cfg_dbgbus_count1.attr.attr, + &coresight_attr_cfg_dbgbus_count2.attr.attr, + &coresight_attr_cfg_dbgbus_count3.attr.attr, + &coresight_attr_cfg_dbgbus_count4.attr.attr, + &coresight_attr_cfg_dbgbus_count5.attr.attr, + &coresight_attr_cfg_dbgbus_trace_addr.attr.attr, + &coresight_attr_cfg_dbgbus_trace_buf0.attr.attr, + &coresight_attr_cfg_dbgbus_trace_buf1.attr.attr, + &coresight_attr_cfg_dbgbus_trace_buf2.attr.attr, + &coresight_attr_cfg_dbgbus_trace_buf3.attr.attr, + &coresight_attr_cfg_dbgbus_trace_buf4.attr.attr, + &coresight_attr_cfg_dbgbus_misr0.attr.attr, + &coresight_attr_cfg_dbgbus_misr1.attr.attr, + &coresight_attr_ahb_dbg_cntl.attr.attr, + &coresight_attr_read_ahb_through_dbg.attr.attr, + &coresight_attr_dbg_lo_hi_gpio.attr.attr, + &coresight_attr_ext_trace_bus_cntl.attr.attr, + &coresight_attr_ext_vbif_dbg_cntl.attr.attr, + NULL, +}; + +static const struct attribute_group a5xx_coresight_group = { + .attrs = a5xx_coresight_attrs, +}; + +static const struct attribute_group *a5xx_coresight_groups[] = { + &a5xx_coresight_group, + NULL, +}; + +static struct adreno_coresight a5xx_coresight = { + .registers = a5xx_coresight_registers, + .count = ARRAY_SIZE(a5xx_coresight_registers), + .groups = a5xx_coresight_groups, +}; + +/** + * a5xx_preempt_trig_state() - Schedule preemption in TRIGGERRED + * state + * @adreno_dev: Device which is in TRIGGERRED state + */ +static void a5xx_preempt_trig_state( + struct adreno_device *adreno_dev) +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + struct kgsl_device *device = &(adreno_dev->dev); + unsigned int preempt_busy; + uint64_t rbbase; + + /* + * triggered preemption, check for busy bits, if not set go to complete + * bit 0: When high indicates CP is not done with preemption. + * bit 4: When high indicates that the CP is actively switching between + * application contexts. + * Check both the bits to make sure CP is done with preemption. + */ + adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT, &preempt_busy); + if (!(preempt_busy & 0x11)) { + + adreno_readreg64(adreno_dev, ADRENO_REG_CP_RB_BASE, + ADRENO_REG_CP_RB_BASE_HI, &rbbase); + /* Did preemption occur, if so then change states and return */ + if (rbbase != adreno_dev->cur_rb->buffer_desc.gpuaddr) { + if (rbbase == + adreno_dev->next_rb->buffer_desc.gpuaddr) { + KGSL_DRV_INFO(device, + "Preemption completed without interrupt\n"); + trace_adreno_hw_preempt_trig_to_comp( + adreno_dev->cur_rb, + adreno_dev->next_rb); + atomic_set(&dispatcher->preemption_state, + ADRENO_DISPATCHER_PREEMPT_COMPLETE); + } else { + /* + * Something wrong with preemption. + * Set fault and reschedule dispatcher to take + * care of fault. + */ + adreno_set_gpu_fault(adreno_dev, + ADRENO_PREEMPT_FAULT); + } + adreno_dispatcher_schedule(device); + return; + } + } + + /* + * Preemption is still happening. + * Hardware not yet idle means that preemption interrupt + * may still occur, nothing to do here until interrupt signals + * completion of preemption, just return here + */ + if (!adreno_hw_isidle(adreno_dev)) + return; + + /* + * We just changed states, reschedule dispatcher to change + * preemption states + */ + if (ADRENO_DISPATCHER_PREEMPT_TRIGGERED != + atomic_read(&dispatcher->preemption_state)) { + adreno_dispatcher_schedule(device); + return; + } + + + adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT); + + /* reschedule dispatcher to take care of the fault */ + adreno_dispatcher_schedule(device); +} + +/** + * a5xx_preempt_clear_state() - Schedule preemption in CLEAR + * state. Preemption can be issued in this state. + * @adreno_dev: Device which is in CLEAR state + */ +static void a5xx_preempt_clear_state( + struct adreno_device *adreno_dev) + +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + struct kgsl_device *device = &(adreno_dev->dev); + struct adreno_ringbuffer *highest_busy_rb; + int switch_low_to_high; + int ret; + + /* Device not awake means there is nothing to do */ + if (!kgsl_state_is_awake(device)) + return; + + /* keep updating the current rptr when preemption is clear */ + adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, + &(adreno_dev->cur_rb->rptr)); + + highest_busy_rb = adreno_dispatcher_get_highest_busy_rb(adreno_dev); + if (!highest_busy_rb) + return; + + switch_low_to_high = adreno_compare_prio_level( + highest_busy_rb->id, adreno_dev->cur_rb->id); + + /* already current then return */ + if (!switch_low_to_high) + return; + + if (switch_low_to_high < 0) { + + if (!adreno_hw_isidle(adreno_dev)) { + adreno_dispatcher_schedule(device); + return; + } + + /* + * if switching to lower priority make sure that the rptr and + * wptr are equal, when the lower rb is not starved + */ + if (adreno_dev->cur_rb->rptr != adreno_dev->cur_rb->wptr) + return; + /* + * switch to default context because when we switch back + * to higher context then its not known which pt will + * be current, so by making it default here the next + * commands submitted will set the right pt + */ + ret = adreno_drawctxt_switch(adreno_dev, + adreno_dev->cur_rb, + NULL, 0); + /* + * lower priority RB has to wait until space opens up in + * higher RB + */ + if (ret) + return; + } + + /* rptr could be updated in drawctxt switch above, update it here */ + adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, + &(adreno_dev->cur_rb->rptr)); + + /* turn on IOMMU as the preemption may trigger pt switch */ + kgsl_mmu_enable_clk(&device->mmu); + + /* + * setup memory to do the switch to highest priority RB + * which is not empty or may be starving away(poor thing) + */ + a5xx_preemption_start(adreno_dev, highest_busy_rb); + + atomic_set(&dispatcher->preemption_state, + ADRENO_DISPATCHER_PREEMPT_TRIGGERED); + + adreno_dev->next_rb = highest_busy_rb; + mod_timer(&dispatcher->preempt_timer, jiffies + + msecs_to_jiffies(ADRENO_DISPATCH_PREEMPT_TIMEOUT)); + + trace_adreno_hw_preempt_clear_to_trig(adreno_dev->cur_rb, + adreno_dev->next_rb); + /* issue PREEMPT trigger */ + adreno_writereg(adreno_dev, ADRENO_REG_CP_PREEMPT, 1); + + adreno_dispatcher_schedule(device); +} + +/** + * a5xx_preempt_complete_state() - Schedule preemption in + * COMPLETE state + * @adreno_dev: Device which is in COMPLETE state + */ +static void a5xx_preempt_complete_state( + struct adreno_device *adreno_dev) + +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + struct kgsl_device *device = &(adreno_dev->dev); + struct adreno_dispatcher_cmdqueue *dispatch_q; + uint64_t rbbase; + unsigned int wptr; + unsigned int val; + static unsigned long wait_for_preemption_complete; + + del_timer_sync(&dispatcher->preempt_timer); + + adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT, &val); + + if (val) { + /* + * Wait for 50ms for preemption state to be updated by CP + * before triggering hang + */ + if (wait_for_preemption_complete == 0) + wait_for_preemption_complete = jiffies + + msecs_to_jiffies(50); + if (time_after(jiffies, wait_for_preemption_complete)) { + wait_for_preemption_complete = 0; + KGSL_DRV_ERR(device, + "Invalid state after preemption CP_PREEMPT:%08x STOP:%1x BUSY:%1x\n", + val, (val & 0x1), (val & 0x10)>>4); + adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT); + } + adreno_dispatcher_schedule(device); + return; + } + + wait_for_preemption_complete = 0; + adreno_readreg64(adreno_dev, ADRENO_REG_CP_RB_BASE, + ADRENO_REG_CP_RB_BASE_HI, &rbbase); + if (rbbase != adreno_dev->next_rb->buffer_desc.gpuaddr) { + KGSL_DRV_ERR(device, + "RBBASE incorrect after preemption, expected %016llx got %016llx\b", + rbbase, + adreno_dev->next_rb->buffer_desc.gpuaddr); + adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT); + adreno_dispatcher_schedule(device); + return; + } + + a5xx_preemption_save(adreno_dev, adreno_dev->cur_rb); + + dispatch_q = &(adreno_dev->cur_rb->dispatch_q); + /* new RB is the current RB */ + trace_adreno_hw_preempt_comp_to_clear(adreno_dev->next_rb, + adreno_dev->cur_rb); + adreno_dev->prev_rb = adreno_dev->cur_rb; + adreno_dev->cur_rb = adreno_dev->next_rb; + adreno_dev->cur_rb->preempted_midway = 0; + adreno_dev->cur_rb->wptr_preempt_end = 0xFFFFFFFF; + adreno_dev->next_rb = NULL; + + if (adreno_disp_preempt_fair_sched) { + /* starved rb is now scheduled so unhalt dispatcher */ + if (ADRENO_DISPATCHER_RB_STARVE_TIMER_ELAPSED == + adreno_dev->cur_rb->starve_timer_state) + adreno_put_gpu_halt(adreno_dev); + adreno_dev->cur_rb->starve_timer_state = + ADRENO_DISPATCHER_RB_STARVE_TIMER_SCHEDULED; + adreno_dev->cur_rb->sched_timer = jiffies; + /* + * If the outgoing RB is has commands then set the + * busy time for it + */ + if (adreno_dev->prev_rb->rptr != adreno_dev->prev_rb->wptr) { + adreno_dev->prev_rb->starve_timer_state = + ADRENO_DISPATCHER_RB_STARVE_TIMER_INIT; + adreno_dev->prev_rb->sched_timer = jiffies; + } else { + adreno_dev->prev_rb->starve_timer_state = + ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT; + } + } + adreno_ringbuffer_mmu_disable_clk_on_ts(device, adreno_dev->cur_rb, + adreno_dev->cur_rb->timestamp); + + atomic_set(&dispatcher->preemption_state, + ADRENO_DISPATCHER_PREEMPT_CLEAR); + + /* submit wptr if required for new rb */ + adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr); + if (adreno_dev->cur_rb->wptr != wptr) { + kgsl_pwrscale_busy(device); + adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR, + adreno_dev->cur_rb->wptr); + } + + adreno_preempt_process_dispatch_queue(adreno_dev, dispatch_q); +} + +static void a5xx_preemption_schedule( + struct adreno_device *adreno_dev) +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + struct kgsl_device *device = &(adreno_dev->dev); + struct adreno_ringbuffer *rb; + int i = 0; + + if (!adreno_is_preemption_enabled(adreno_dev)) + return; + + mutex_lock(&device->mutex); + + /* + * This barrier is needed for most updated preemption_state + * to be read. + */ + smp_mb(); + + if (KGSL_STATE_ACTIVE == device->state) + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) + rb->rptr = adreno_get_rptr(rb); + + switch (atomic_read(&dispatcher->preemption_state)) { + case ADRENO_DISPATCHER_PREEMPT_CLEAR: + a5xx_preempt_clear_state(adreno_dev); + break; + case ADRENO_DISPATCHER_PREEMPT_TRIGGERED: + a5xx_preempt_trig_state(adreno_dev); + /* + * if we transitioned to next state then fall-through + * processing to next state + */ + if (!adreno_preempt_state(adreno_dev, + ADRENO_DISPATCHER_PREEMPT_COMPLETE)) + break; + case ADRENO_DISPATCHER_PREEMPT_COMPLETE: + a5xx_preempt_complete_state(adreno_dev); + break; + default: + BUG(); + } + + mutex_unlock(&device->mutex); +} + +struct adreno_gpudev adreno_a5xx_gpudev = { + .reg_offsets = &a5xx_reg_offsets, + .ft_perf_counters = a5xx_ft_perf_counters, + .ft_perf_counters_count = ARRAY_SIZE(a5xx_ft_perf_counters), + .coresight = &a5xx_coresight, + .start = a5xx_start, + .snapshot = a5xx_snapshot, + .irq = &a5xx_irq, + .snapshot_data = &a5xx_snapshot_data, + .irq_trace = trace_kgsl_a5xx_irq_status, + .num_prio_levels = ADRENO_PRIORITY_MAX_RB_LEVELS, + .platform_setup = a5xx_platform_setup, + .init = a5xx_init, + .rb_init = a5xx_rb_init, + .hw_init = a5xx_hw_init, + .microcode_read = a5xx_microcode_read, + .microcode_load = a5xx_microcode_load, + .perfcounters = &a5xx_perfcounters, + .vbif_xin_halt_ctrl0_mask = A5XX_VBIF_XIN_HALT_CTRL0_MASK, + .is_sptp_idle = a5xx_is_sptp_idle, + .regulator_enable = a5xx_regulator_enable, + .regulator_disable = a5xx_regulator_disable, + .pwrlevel_change_settings = a5xx_pwrlevel_change_settings, + .preemption_pre_ibsubmit = a5xx_preemption_pre_ibsubmit, + .preemption_post_ibsubmit = + a5xx_preemption_post_ibsubmit, + .preemption_token = a5xx_preemption_token, + .preemption_init = a5xx_preemption_init, + .preemption_schedule = a5xx_preemption_schedule, + .enable_64bit = a5xx_enable_64bit, +}; diff --git a/drivers/gpu/msm/adreno_a5xx.h b/drivers/gpu/msm/adreno_a5xx.h new file mode 100644 index 000000000000..e10678216b69 --- /dev/null +++ b/drivers/gpu/msm/adreno_a5xx.h @@ -0,0 +1,77 @@ +/* Copyright (c) 2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _ADRENO_A5XX_H_ +#define _ADRENO_A5XX_H_ + +#define A5XX_CP_CTXRECORD_MAGIC_REF 0x27C4BAFCUL +/* Size of each CP preemption record */ +#define A5XX_CP_CTXRECORD_SIZE_IN_BYTES 0x100000 +/* Size of the preemption counter block (in bytes) */ +#define A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE (16 * 4) + +/** + * struct a5xx_cp_preemption_record - CP context record for + * preemption. + * @magic: (00) Value at this offset must be equal to + * A5XX_CP_CTXRECORD_MAGIC_REF. + * @info: (04) Type of record. Written non-zero (usually) by CP. + * we must set to zero for all ringbuffers. + * @data: (08) DATA field in SET_RENDER_MODE or checkpoint packets. + * Written by CP when switching out. Not used on switch-in. + * we must initialize to zero. + * @cntl: (12) RB_CNTL, saved and restored by CP. + * @rptr: (16) RB_RPTR, saved and restored by CP. + * @wptr: (20) RB_WPTR, saved and restored by CP. + * @rptr_addr: (24) RB_RPTR_ADDR_LO|HI saved and restored. + * rbase: (32) RB_BASE_LO|HI saved and restored. + * counter: (40) Pointer to preemption counter + */ +struct a5xx_cp_preemption_record { + uint32_t magic; + uint32_t info; + uint32_t data; + uint32_t cntl; + uint32_t rptr; + uint32_t wptr; + uint64_t rptr_addr; + uint64_t rbase; + uint64_t counter; +}; + +#define A5XX_CP_SMMU_INFO_MAGIC_REF 0x3618CDA3UL + +/** + * struct a5xx_cp_smmu_info - CP preemption SMMU info. + * @magic: (00) The value at this offset must be equal to + * A5XX_CP_SMMU_INFO_MAGIC_REF. + * @_pad4: (04) Reserved/padding + * @ttbr0: (08) Base address of the page table for the + * incoming context. + * @context_idr: (16) Context Identification Register value. + */ +struct a5xx_cp_smmu_info { + uint32_t magic; + uint32_t _pad4; + uint64_t ttbr0; + uint32_t asid; + uint32_t context_idr; +}; + +void a5xx_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot); +unsigned int a5xx_num_registers(void); + +void a5xx_crashdump_init(struct adreno_device *adreno_dev); + +#endif diff --git a/drivers/gpu/msm/adreno_a5xx_snapshot.c b/drivers/gpu/msm/adreno_a5xx_snapshot.c new file mode 100644 index 000000000000..95f9198a330a --- /dev/null +++ b/drivers/gpu/msm/adreno_a5xx_snapshot.c @@ -0,0 +1,926 @@ +/* Copyright (c) 2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/io.h> +#include "kgsl.h" +#include "adreno.h" +#include "kgsl_snapshot.h" +#include "adreno_snapshot.h" +#include "a5xx_reg.h" +#include "adreno_a5xx.h" + +enum a5xx_rbbm_debbus_id { + A5XX_RBBM_DBGBUS_CP = 0x1, + A5XX_RBBM_DBGBUS_RBBM = 0x2, + A5XX_RBBM_DBGBUS_VBIF = 0x3, + A5XX_RBBM_DBGBUS_HLSQ = 0x4, + A5XX_RBBM_DBGBUS_UCHE = 0x5, + A5XX_RBBM_DBGBUS_DPM = 0x6, + A5XX_RBBM_DBGBUS_TESS = 0x7, + A5XX_RBBM_DBGBUS_PC = 0x8, + A5XX_RBBM_DBGBUS_VFDP = 0x9, + A5XX_RBBM_DBGBUS_VPC = 0xa, + A5XX_RBBM_DBGBUS_TSE = 0xb, + A5XX_RBBM_DBGBUS_RAS = 0xc, + A5XX_RBBM_DBGBUS_VSC = 0xd, + A5XX_RBBM_DBGBUS_COM = 0xe, + A5XX_RBBM_DBGBUS_DCOM = 0xf, + A5XX_RBBM_DBGBUS_LRZ = 0x10, + A5XX_RBBM_DBGBUS_A2D_DSP = 0x11, + A5XX_RBBM_DBGBUS_CCUFCHE = 0x12, + A5XX_RBBM_DBGBUS_GPMU = 0x13, + A5XX_RBBM_DBGBUS_RBP = 0x14, + A5XX_RBBM_DBGBUS_HM = 0x15, + A5XX_RBBM_DBGBUS_RBBM_CFG = 0x16, + A5XX_RBBM_DBGBUS_VBIF_CX = 0x17, + A5XX_RBBM_DBGBUS_GPC = 0x1d, + A5XX_RBBM_DBGBUS_LARC = 0x1e, + A5XX_RBBM_DBGBUS_HLSQ_SPTP = 0x1f, + A5XX_RBBM_DBGBUS_RB_0 = 0x20, + A5XX_RBBM_DBGBUS_RB_1 = 0x21, + A5XX_RBBM_DBGBUS_RB_2 = 0x22, + A5XX_RBBM_DBGBUS_RB_3 = 0x23, + A5XX_RBBM_DBGBUS_CCU_0 = 0x28, + A5XX_RBBM_DBGBUS_CCU_1 = 0x29, + A5XX_RBBM_DBGBUS_CCU_2 = 0x2a, + A5XX_RBBM_DBGBUS_CCU_3 = 0x2b, + A5XX_RBBM_DBGBUS_A2D_RAS_0 = 0x30, + A5XX_RBBM_DBGBUS_A2D_RAS_1 = 0x31, + A5XX_RBBM_DBGBUS_A2D_RAS_2 = 0x32, + A5XX_RBBM_DBGBUS_A2D_RAS_3 = 0x33, + A5XX_RBBM_DBGBUS_VFD_0 = 0x38, + A5XX_RBBM_DBGBUS_VFD_1 = 0x39, + A5XX_RBBM_DBGBUS_VFD_2 = 0x3a, + A5XX_RBBM_DBGBUS_VFD_3 = 0x3b, + A5XX_RBBM_DBGBUS_SP_0 = 0x40, + A5XX_RBBM_DBGBUS_SP_1 = 0x41, + A5XX_RBBM_DBGBUS_SP_2 = 0x42, + A5XX_RBBM_DBGBUS_SP_3 = 0x43, + A5XX_RBBM_DBGBUS_TPL1_0 = 0x48, + A5XX_RBBM_DBGBUS_TPL1_1 = 0x49, + A5XX_RBBM_DBGBUS_TPL1_2 = 0x4a, + A5XX_RBBM_DBGBUS_TPL1_3 = 0x4b +}; + +static const struct adreno_debugbus_block a5xx_debugbus_blocks[] = { + { A5XX_RBBM_DBGBUS_CP, 0x100, }, + { A5XX_RBBM_DBGBUS_RBBM, 0x100, }, + { A5XX_RBBM_DBGBUS_VBIF, 0x100, }, + { A5XX_RBBM_DBGBUS_HLSQ, 0x100, }, + { A5XX_RBBM_DBGBUS_UCHE, 0x100, }, + { A5XX_RBBM_DBGBUS_DPM, 0x100, }, + { A5XX_RBBM_DBGBUS_TESS, 0x100, }, + { A5XX_RBBM_DBGBUS_PC, 0x100, }, + { A5XX_RBBM_DBGBUS_VFDP, 0x100, }, + { A5XX_RBBM_DBGBUS_VPC, 0x100, }, + { A5XX_RBBM_DBGBUS_TSE, 0x100, }, + { A5XX_RBBM_DBGBUS_RAS, 0x100, }, + { A5XX_RBBM_DBGBUS_VSC, 0x100, }, + { A5XX_RBBM_DBGBUS_COM, 0x100, }, + { A5XX_RBBM_DBGBUS_DCOM, 0x100, }, + { A5XX_RBBM_DBGBUS_LRZ, 0x100, }, + { A5XX_RBBM_DBGBUS_A2D_DSP, 0x100, }, + { A5XX_RBBM_DBGBUS_CCUFCHE, 0x100, }, + { A5XX_RBBM_DBGBUS_GPMU, 0x100, }, + { A5XX_RBBM_DBGBUS_RBP, 0x100, }, + { A5XX_RBBM_DBGBUS_HM, 0x100, }, + { A5XX_RBBM_DBGBUS_RBBM_CFG, 0x100, }, + { A5XX_RBBM_DBGBUS_VBIF_CX, 0x100, }, + { A5XX_RBBM_DBGBUS_GPC, 0x100, }, + { A5XX_RBBM_DBGBUS_LARC, 0x100, }, + { A5XX_RBBM_DBGBUS_HLSQ_SPTP, 0x100, }, + { A5XX_RBBM_DBGBUS_RB_0, 0x100, }, + { A5XX_RBBM_DBGBUS_RB_1, 0x100, }, + { A5XX_RBBM_DBGBUS_RB_2, 0x100, }, + { A5XX_RBBM_DBGBUS_RB_3, 0x100, }, + { A5XX_RBBM_DBGBUS_CCU_0, 0x100, }, + { A5XX_RBBM_DBGBUS_CCU_1, 0x100, }, + { A5XX_RBBM_DBGBUS_CCU_2, 0x100, }, + { A5XX_RBBM_DBGBUS_CCU_3, 0x100, }, + { A5XX_RBBM_DBGBUS_A2D_RAS_0, 0x100, }, + { A5XX_RBBM_DBGBUS_A2D_RAS_1, 0x100, }, + { A5XX_RBBM_DBGBUS_A2D_RAS_2, 0x100, }, + { A5XX_RBBM_DBGBUS_A2D_RAS_3, 0x100, }, + { A5XX_RBBM_DBGBUS_VFD_0, 0x100, }, + { A5XX_RBBM_DBGBUS_VFD_1, 0x100, }, + { A5XX_RBBM_DBGBUS_VFD_2, 0x100, }, + { A5XX_RBBM_DBGBUS_VFD_3, 0x100, }, + { A5XX_RBBM_DBGBUS_SP_0, 0x100, }, + { A5XX_RBBM_DBGBUS_SP_1, 0x100, }, + { A5XX_RBBM_DBGBUS_SP_2, 0x100, }, + { A5XX_RBBM_DBGBUS_SP_3, 0x100, }, + { A5XX_RBBM_DBGBUS_TPL1_0, 0x100, }, + { A5XX_RBBM_DBGBUS_TPL1_1, 0x100, }, + { A5XX_RBBM_DBGBUS_TPL1_2, 0x100, }, + { A5XX_RBBM_DBGBUS_TPL1_3, 0x100, }, +}; + +#define A5XX_NUM_AXI_ARB_BLOCKS 2 +#define A5XX_NUM_XIN_BLOCKS 5 + +/* a5xx_snapshot_cp_pm4() - Dump PM4 data in snapshot */ +static size_t a5xx_snapshot_cp_pm4(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + size_t size = adreno_dev->pm4_fw_size; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "CP PM4 RAM DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_PM4_RAM; + header->size = size; + + memcpy(data, adreno_dev->pm4.hostptr, size * sizeof(uint32_t)); + + return DEBUG_SECTION_SZ(size); +} + +/* a5xx_snapshot_cp_pfp() - Dump the PFP data on snapshot */ +static size_t a5xx_snapshot_cp_pfp(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + int size = adreno_dev->pfp_fw_size; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "CP PFP RAM DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_PFP_RAM; + header->size = size; + + memcpy(data, adreno_dev->pfp.hostptr, size * sizeof(uint32_t)); + + return DEBUG_SECTION_SZ(size); +} + +/* a5xx_rbbm_debug_bus_read() - Read data from trace bus */ +static void a5xx_rbbm_debug_bus_read(struct kgsl_device *device, + unsigned int block_id, unsigned int index, unsigned int *val) +{ + unsigned int reg; + + reg = (block_id << A5XX_RBBM_CFG_DBGBUS_SEL_PING_BLK_SEL_SHIFT) | + (index << A5XX_RBBM_CFG_DBGBUS_SEL_PING_INDEX_SHIFT); + kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_A, reg); + kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_B, reg); + kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_C, reg); + kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_D, reg); + + kgsl_regread(device, A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2, val); + val++; + kgsl_regread(device, A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1, val); + +} + +/* a5xx_snapshot_vbif_debugbus() - Dump the VBIF debug data */ +static size_t a5xx_snapshot_vbif_debugbus(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debugbus *header = + (struct kgsl_snapshot_debugbus *)buf; + struct adreno_debugbus_block *block = priv; + int i, j; + /* + * Total number of VBIF data words considering 3 sections: + * 2 arbiter blocks of 16 words + * 5 AXI XIN blocks of 4 dwords each + * 5 core clock side XIN blocks of 5 dwords each + */ + unsigned int dwords = (16 * A5XX_NUM_AXI_ARB_BLOCKS) + + (4 * A5XX_NUM_XIN_BLOCKS) + (5 * A5XX_NUM_XIN_BLOCKS); + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + size_t size; + unsigned int reg_clk; + + size = (dwords * sizeof(unsigned int)) + sizeof(*header); + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + header->id = block->block_id; + header->count = dwords; + + kgsl_regread(device, A5XX_VBIF_CLKON, ®_clk); + kgsl_regwrite(device, A5XX_VBIF_CLKON, reg_clk | + (A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK << + A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT)); + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS1_CTRL0, 0); + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS_OUT_CTRL, + (A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK << + A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT)); + for (i = 0; i < A5XX_NUM_AXI_ARB_BLOCKS; i++) { + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL0, + (1 << (i + 16))); + for (j = 0; j < 16; j++) { + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL1, + ((j & A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK) + << A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT)); + kgsl_regread(device, A5XX_VBIF_TEST_BUS_OUT, + data); + data++; + } + } + + /* XIN blocks AXI side */ + for (i = 0; i < A5XX_NUM_XIN_BLOCKS; i++) { + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL0, 1 << i); + for (j = 0; j < 4; j++) { + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL1, + ((j & A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK) + << A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT)); + kgsl_regread(device, A5XX_VBIF_TEST_BUS_OUT, + data); + data++; + } + } + + /* XIN blocks core clock side */ + for (i = 0; i < A5XX_NUM_XIN_BLOCKS; i++) { + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS1_CTRL0, 1 << i); + for (j = 0; j < 5; j++) { + kgsl_regwrite(device, A5XX_VBIF_TEST_BUS1_CTRL1, + ((j & A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK) + << A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT)); + kgsl_regread(device, A5XX_VBIF_TEST_BUS_OUT, + data); + data++; + } + } + /* restore the clock of VBIF */ + kgsl_regwrite(device, A5XX_VBIF_CLKON, reg_clk); + return size; +} + +/* a5xx_snapshot_debugbus_block() - Capture debug data for a gpu block */ +static size_t a5xx_snapshot_debugbus_block(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_debugbus *header = + (struct kgsl_snapshot_debugbus *)buf; + struct adreno_debugbus_block *block = priv; + int i; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + unsigned int dwords; + size_t size; + + dwords = block->dwords; + + /* For a5xx each debug bus data unit is 2 DWRODS */ + size = (dwords * sizeof(unsigned int) * 2) + sizeof(*header); + + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS"); + return 0; + } + + header->id = block->block_id; + header->count = dwords * 2; + + for (i = 0; i < dwords; i++) + a5xx_rbbm_debug_bus_read(device, block->block_id, i, + &data[i*2]); + + return size; +} + +/* a5xx_snapshot_debugbus() - Capture debug bus data */ +static void a5xx_snapshot_debugbus(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + int i; + + kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_CNTLM, + 0xf << A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT); + + for (i = 0; i < ARRAY_SIZE(a5xx_debugbus_blocks); i++) { + if (A5XX_RBBM_DBGBUS_VBIF == a5xx_debugbus_blocks[i].block_id) + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, a5xx_snapshot_vbif_debugbus, + (void *) &a5xx_debugbus_blocks[i]); + else + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_DEBUGBUS, + snapshot, a5xx_snapshot_debugbus_block, + (void *) &a5xx_debugbus_blocks[i]); + } +} + +static const unsigned int a5xx_vbif_ver_20040000_registers[] = { + /* VBIF version 0x20040000*/ + 0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x302C, 0x3030, 0x3030, + 0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, + 0x3042, 0x3042, 0x3049, 0x3049, 0x3058, 0x3058, 0x305A, 0x3061, + 0x3064, 0x3068, 0x306C, 0x306D, 0x3080, 0x3088, 0x308C, 0x308C, + 0x3090, 0x3094, 0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, + 0x30C8, 0x30C8, 0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, + 0x3100, 0x3100, 0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, + 0x3120, 0x3120, 0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, + 0x340C, 0x340C, 0x3410, 0x3410, 0x3800, 0x3801, +}; + +static const struct adreno_vbif_snapshot_registers +a5xx_vbif_snapshot_registers[] = { + { 0x20040000, a5xx_vbif_ver_20040000_registers, + ARRAY_SIZE(a5xx_vbif_ver_20040000_registers)/2}, + { 0x20040001, a5xx_vbif_ver_20040000_registers, + ARRAY_SIZE(a5xx_vbif_ver_20040000_registers)/2}, +}; + +/* + * Set of registers to dump for A5XX on snapshot. + * Registers in pairs - first value is the start offset, second + * is the stop offset (inclusive) + */ + +static const unsigned int a5xx_registers[] = { + /* RBBM */ + 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B, + 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095, + 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3, + 0x04E0, 0x0533, 0x0540, 0x0555, 0xF400, 0xF400, 0xF800, 0xF807, + /* CP */ + 0x0800, 0x081A, 0x081F, 0x0841, 0x0860, 0x0860, 0x0880, 0x08A0, + 0x0B00, 0x0B12, 0x0B15, 0x0B28, 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, + /* VSC */ + 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53, 0x0C60, 0x0C61, + /* GRAS */ + 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98, 0x0CA0, 0x0CA0, + 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585, + /* RB */ + 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8, + 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E, + 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545, + /* PC */ + 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0, + 0x24C0, 0x24C0, + /* VFD */ + 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57, + /* VPC */ + 0x0E60, 0x0E7C, + /* UCHE */ + 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0xEA0, 0xEA8, 0xEB0, 0xEB2, + + /* RB CTX 0 */ + 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9, 0xE1B0, 0xE1B6, + 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201, 0xE210, 0xE21C, + 0xE240, 0xE268, + /* GRAS CTX 0 */ + 0xE000, 0xE006, 0xE010, 0xE09A, 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, + 0xE100, 0xE105, + /* PC CTX 0 */ + 0xE380, 0xE38F, 0xE3B0, 0xE3B0, + /* VFD CTX 0 */ + 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0, + /* VPC CTX 0 */ + 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, + + /* RB CTX 1 */ + 0xE940, 0xE947, 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, + 0xE9C0, 0xE9C7, 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, + 0xEA40, 0xEA68, + /* GRAS CTX 1 */ + 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB, + 0xE900, 0xE905, + /* PC CTX 1 */ + 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, + /* VFD CTX 1 */ + 0xEC00, 0xEC05, 0xEC08, 0xECE9, 0xECF0, 0xECF0, + /* VPC CTX 1 */ + 0xEA80, 0xEA80, 0xEA82, 0xEAA3, 0xEAA5, 0xEAC2, + /* GPMU */ + 0xA800, 0xA8FF, 0xAC60, 0xAC60, + /* DPM */ + 0xB000, 0xB97F, 0xB9A0, 0xB9BF, +}; + +struct a5xx_hlsq_sp_tp_regs { + unsigned int statetype; + unsigned int ahbaddr; + unsigned int size; +}; + +static const struct a5xx_hlsq_sp_tp_regs a5xx_hlsq_sp_tp_registers[] = { + /* HLSQ CTX 0 2D */ + { 0x31, 0x2080, 0x1 }, + /* HLSQ CTX 1 2D */ + { 0x33, 0x2480, 0x1 }, + /* HLSQ CTX 0 3D */ + { 0x32, 0xE780, 0x7f }, + /* HLSQ CTX 1 3D */ + { 0x34, 0xEF80, 0x7f }, + + /* SP non context */ + { 0x3f, 0x0EC0, 0x40 }, + /* SP CTX 0 2D */ + { 0x3d, 0x2040, 0x1 }, + /* SP CTX 1 2D */ + { 0x3b, 0x2440, 0x1 }, + /* SP CTX 0 3D */ + { 0x3e, 0xE580, 0x180 }, + /* SP CTX 1 3D */ + { 0x3c, 0xED80, 0x180 }, + + /* TP non context */ + { 0x3a, 0x0F00, 0x40 }, + /* TP CTX 0 2D */ + { 0x38, 0x2000, 0x10 }, + /* TP CTX 1 2D */ + { 0x36, 0x2400, 0x10 }, + /* TP CTX 0 3D */ + { 0x39, 0xE700, 0x128 }, + /* TP CTX 1 3D */ + { 0x37, 0xEF00, 0x128 }, +}; + +/* HLSQ non context registers - can't be read on A530v1 */ +static const struct a5xx_hlsq_sp_tp_regs a5xx_hlsq_non_ctx_registers = { + 0x35, 0xE00, 0x1C +}; + +#define A5XX_NUM_SHADER_BANKS 4 +#define A5XX_SHADER_STATETYPE_SHIFT 8 + +enum a5xx_shader_obj { + A5XX_TP_W_MEMOBJ = 1, + A5XX_TP_W_SAMPLER = 2, + A5XX_TP_W_MIPMAP_BASE = 3, + A5XX_TP_W_MEMOBJ_TAG = 4, + A5XX_TP_W_SAMPLER_TAG = 5, + A5XX_TP_S_3D_MEMOBJ = 6, + A5XX_TP_S_3D_SAMPLER = 0x7, + A5XX_TP_S_3D_MEMOBJ_TAG = 0x8, + A5XX_TP_S_3D_SAMPLER_TAG = 0x9, + A5XX_TP_S_CS_MEMOBJ = 0xA, + A5XX_TP_S_CS_SAMPLER = 0xB, + A5XX_TP_S_CS_MEMOBJ_TAG = 0xC, + A5XX_TP_S_CS_SAMPLER_TAG = 0xD, + A5XX_SP_W_INSTR = 0xE, + A5XX_SP_W_CONST = 0xF, + A5XX_SP_W_UAV_SIZE = 0x10, + A5XX_SP_W_CB_SIZE = 0x11, + A5XX_SP_W_UAV_BASE = 0x12, + A5XX_SP_W_CB_BASE = 0x13, + A5XX_SP_W_INST_TAG = 0x14, + A5XX_SP_W_STATE = 0x15, + A5XX_SP_S_3D_INSTR = 0x16, + A5XX_SP_S_3D_CONST = 0x17, + A5XX_SP_S_3D_CB_BASE = 0x18, + A5XX_SP_S_3D_CB_SIZE = 0x19, + A5XX_SP_S_3D_UAV_BASE = 0x1A, + A5XX_SP_S_3D_UAV_SIZE = 0x1B, + A5XX_SP_S_CS_INSTR = 0x1C, + A5XX_SP_S_CS_CONST = 0x1D, + A5XX_SP_S_CS_CB_BASE = 0x1E, + A5XX_SP_S_CS_CB_SIZE = 0x1F, + A5XX_SP_S_CS_UAV_BASE = 0x20, + A5XX_SP_S_CS_UAV_SIZE = 0x21, + A5XX_SP_S_3D_INSTR_DIRTY = 0x22, + A5XX_SP_S_3D_CONST_DIRTY = 0x23, + A5XX_SP_S_3D_CB_BASE_DIRTY = 0x24, + A5XX_SP_S_3D_CB_SIZE_DIRTY = 0x25, + A5XX_SP_S_3D_UAV_BASE_DIRTY = 0x26, + A5XX_SP_S_3D_UAV_SIZE_DIRTY = 0x27, + A5XX_SP_S_CS_INSTR_DIRTY = 0x28, + A5XX_SP_S_CS_CONST_DIRTY = 0x29, + A5XX_SP_S_CS_CB_BASE_DIRTY = 0x2A, + A5XX_SP_S_CS_CB_SIZE_DIRTY = 0x2B, + A5XX_SP_S_CS_UAV_BASE_DIRTY = 0x2C, + A5XX_SP_S_CS_UAV_SIZE_DIRTY = 0x2D, + A5XX_HLSQ_ICB = 0x2E, + A5XX_HLSQ_ICB_DIRTY = 0x2F, + A5XX_HLSQ_ICB_CB_BASE_DIRTY = 0x30, + A5XX_SP_POWER_RESTORE_RAM = 0x40, + A5XX_SP_POWER_RESTORE_RAM_TAG = 0x41, + A5XX_TP_POWER_RESTORE_RAM = 0x42, + A5XX_TP_POWER_RESTORE_RAM_TAG = 0x43, + +}; + +struct a5xx_shader_block { + unsigned int statetype; + unsigned int sz; +}; + +struct a5xx_shader_block_info { + const struct a5xx_shader_block *shader_block; + unsigned int shader_num; +}; + +static const struct a5xx_shader_block a5xx_shader_blocks[] = { + {A5XX_TP_W_MEMOBJ, 0x200}, + {A5XX_TP_W_MIPMAP_BASE, 0x3C0}, + {A5XX_TP_W_SAMPLER_TAG, 0x40}, + {A5XX_TP_S_3D_SAMPLER, 0x80}, + {A5XX_TP_S_3D_SAMPLER_TAG, 0x20}, + {A5XX_TP_S_CS_SAMPLER, 0x40}, + {A5XX_TP_S_CS_SAMPLER_TAG, 0x10}, + {A5XX_SP_W_CONST, 0x800}, + {A5XX_SP_W_CB_SIZE, 0x30}, + {A5XX_SP_W_CB_BASE, 0xF0}, + {A5XX_SP_W_STATE, 0x1}, + {A5XX_SP_S_3D_CONST, 0x800}, + {A5XX_SP_S_3D_CB_SIZE, 0x28}, + {A5XX_SP_S_3D_UAV_SIZE, 0x80}, + {A5XX_SP_S_CS_CONST, 0x400}, + {A5XX_SP_S_CS_CB_SIZE, 0x8}, + {A5XX_SP_S_CS_UAV_SIZE, 0x80}, + {A5XX_SP_S_3D_CONST_DIRTY, 0x12}, + {A5XX_SP_S_3D_CB_SIZE_DIRTY, 0x1}, + {A5XX_SP_S_3D_UAV_SIZE_DIRTY, 0x2}, + {A5XX_SP_S_CS_CONST_DIRTY, 0xA}, + {A5XX_SP_S_CS_CB_SIZE_DIRTY, 0x1}, + {A5XX_SP_S_CS_UAV_SIZE_DIRTY, 0x2}, + {A5XX_HLSQ_ICB_DIRTY, 0xB}, + {A5XX_SP_POWER_RESTORE_RAM_TAG, 0xA}, + {A5XX_TP_POWER_RESTORE_RAM_TAG, 0xA}, + {A5XX_TP_W_SAMPLER, 0x80}, + {A5XX_TP_W_MEMOBJ_TAG, 0x40}, + {A5XX_TP_S_3D_MEMOBJ, 0x200}, + {A5XX_TP_S_3D_MEMOBJ_TAG, 0x20}, + {A5XX_TP_S_CS_MEMOBJ, 0x100}, + {A5XX_TP_S_CS_MEMOBJ_TAG, 0x10}, + {A5XX_SP_W_INSTR, 0x800}, + {A5XX_SP_W_UAV_SIZE, 0x80}, + {A5XX_SP_W_UAV_BASE, 0x80}, + {A5XX_SP_W_INST_TAG, 0x40}, + {A5XX_SP_S_3D_INSTR, 0x800}, + {A5XX_SP_S_3D_CB_BASE, 0xC8}, + {A5XX_SP_S_3D_UAV_BASE, 0x80}, + {A5XX_SP_S_CS_INSTR, 0x400}, + {A5XX_SP_S_CS_CB_BASE, 0x28}, + {A5XX_SP_S_CS_UAV_BASE, 0x80}, + {A5XX_SP_S_3D_INSTR_DIRTY, 0x1}, + {A5XX_SP_S_3D_CB_BASE_DIRTY, 0x5}, + {A5XX_SP_S_3D_UAV_BASE_DIRTY, 0x2}, + {A5XX_SP_S_CS_INSTR_DIRTY, 0x1}, + {A5XX_SP_S_CS_CB_BASE_DIRTY, 0x1}, + {A5XX_SP_S_CS_UAV_BASE_DIRTY, 0x2}, + {A5XX_HLSQ_ICB, 0x200}, + {A5XX_HLSQ_ICB_CB_BASE_DIRTY, 0x4}, + {A5XX_SP_POWER_RESTORE_RAM, 0x140}, + {A5XX_TP_POWER_RESTORE_RAM, 0x40}, +}; + +static size_t a5xx_snapshot_shader_memory(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_shader *header = + (struct kgsl_snapshot_shader *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + unsigned int i; + struct a5xx_shader_block_info *shader_block_info = + (struct a5xx_shader_block_info *)priv; + unsigned int statetype = shader_block_info->shader_block->statetype; + unsigned int size = shader_block_info->shader_block->sz; + unsigned int shader_num = shader_block_info->shader_num; + + + if (remain < SHADER_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY"); + return 0; + } + + kgsl_regwrite(device, A5XX_HLSQ_DBG_READ_SEL, + ((statetype << A5XX_SHADER_STATETYPE_SHIFT) | shader_num)); + + header->type = statetype; + header->index = shader_num; + header->size = size; + + for (i = 0; i < size; i++) + kgsl_regread(device, A5XX_HLSQ_DBG_AHB_READ_APERTURE + i, + data++); + + return SHADER_SECTION_SZ(size); +} + +static void a5xx_snapshot_shader(struct kgsl_device *device, + struct kgsl_snapshot *snapshot) +{ + unsigned int i, j; + struct a5xx_shader_block_info blk; + + for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) { + for (j = 0; j < A5XX_NUM_SHADER_BANKS; j++) { + blk.shader_block = &a5xx_shader_blocks[i]; + blk.shader_num = j; + /* Shader working/shadow memory */ + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_SHADER, + snapshot, a5xx_snapshot_shader_memory, &blk); + } + } +} + +static int get_hlsq_registers(struct kgsl_device *device, + const struct a5xx_hlsq_sp_tp_regs *regs, unsigned int *data) +{ + int j; + unsigned int val; + + kgsl_regwrite(device, A5XX_HLSQ_DBG_READ_SEL, + (regs->statetype << A5XX_SHADER_STATETYPE_SHIFT)); + + for (j = 0; j < regs->size; j++) { + kgsl_regread(device, A5XX_HLSQ_DBG_AHB_READ_APERTURE + j, &val); + *data++ = regs->ahbaddr + j; + *data++ = val; + } + + return (regs->size * 2); +} + +static size_t a5xx_snapshot_dump_hlsq_sp_tp_regs(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + int count = 0, i; + + /* Figure out how many registers we are going to dump */ + for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++) + count += a5xx_hlsq_sp_tp_registers[i].size; + + /* the HLSQ non context registers cannot be dumped on A530v1 */ + if (!adreno_is_a530v1(adreno_dev)) + count += a5xx_hlsq_non_ctx_registers.size; + + if (remain < (count * 8) + sizeof(*header)) { + SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); + return 0; + } + + for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++) + data += get_hlsq_registers(device, + &a5xx_hlsq_sp_tp_registers[i], data); + + if (!adreno_is_a530v1(adreno_dev)) + data += get_hlsq_registers(device, + &a5xx_hlsq_non_ctx_registers, data); + + header->count = count; + + /* Return the size of the section */ + return (count * 8) + sizeof(*header); +} + +static size_t a5xx_legacy_snapshot_registers(struct kgsl_device *device, + u8 *buf, size_t remain) +{ + struct kgsl_snapshot_registers regs = { + .regs = a5xx_registers, + .count = ARRAY_SIZE(a5xx_registers) / 2, + }; + + return kgsl_snapshot_dump_registers(device, buf, remain, ®s); +} + +static struct kgsl_memdesc capturescript; +static struct kgsl_memdesc registers; + +#define REG_PAIR_COUNT(_a, _i) \ + (((_a)[(2 * (_i)) + 1] - (_a)[2 * (_i)]) + 1) + +static inline unsigned int count_registers(void) +{ + unsigned int i, count = 0; + + for (i = 0; i < ARRAY_SIZE(a5xx_registers) / 2; i++) + count += REG_PAIR_COUNT(a5xx_registers, i); + + return count; +} + +static unsigned int copy_registers(unsigned int *dst) +{ + unsigned int *src = (unsigned int *) registers.hostptr; + unsigned int i, count = 0; + + for (i = 0; i < ARRAY_SIZE(a5xx_registers) / 2; i++) { + unsigned int j; + unsigned int start = a5xx_registers[2 * i]; + unsigned int end = a5xx_registers[(2 * i) + 1]; + + for (j = start; j <= end; j++, count++) { + *dst++ = j; + *dst++ = *src++; + } + } + + return count; +} + +static size_t a5xx_snapshot_registers(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + unsigned long wait_time; + unsigned int reg = 0; + unsigned int val; + + /* Jump to legacy if the crash dump script was not initialized */ + if (capturescript.gpuaddr == 0 || registers.gpuaddr == 0) + return a5xx_legacy_snapshot_registers(device, buf, remain); + + /* + * If we got here because we are stalled on fault the crash dumper has + * won't work + */ + kgsl_regread(device, A5XX_RBBM_STATUS3, &val); + if (val & BIT(24)) + return a5xx_legacy_snapshot_registers(device, buf, remain); + + if (remain < (count_registers() * 8) + sizeof(*header)) { + SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); + return 0; + } + + kgsl_regwrite(device, A5XX_CP_CRASH_SCRIPT_BASE_LO, + lower_32_bits(capturescript.gpuaddr)); + kgsl_regwrite(device, A5XX_CP_CRASH_SCRIPT_BASE_HI, + upper_32_bits(capturescript.gpuaddr)); + kgsl_regwrite(device, A5XX_CP_CRASH_DUMP_CNTL, 1); + + wait_time = jiffies + msecs_to_jiffies(CP_CRASH_DUMPER_TIMEOUT); + while (!time_after(jiffies, wait_time)) { + kgsl_regread(device, A5XX_CP_CRASH_DUMP_CNTL, ®); + if (reg & 0x4) + break; + cpu_relax(); + } + + if (!(reg & 0x4)) { + KGSL_CORE_ERR("Crash dump timed out: 0x%X\n", reg); + return a5xx_legacy_snapshot_registers(device, buf, remain); + } + + header->count = copy_registers(data); + + /* Return the size of the section */ + return (header->count * 8) + sizeof(*header); +} + +/* + * a5xx_snapshot() - A5XX GPU snapshot function + * @adreno_dev: Device being snapshotted + * @snapshot: Pointer to the snapshot instance + * + * This is where all of the A5XX specific bits and pieces are grabbed + * into the snapshot memory + */ +void a5xx_snapshot(struct adreno_device *adreno_dev, + struct kgsl_snapshot *snapshot) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct adreno_snapshot_data *snap_data = gpudev->snapshot_data; + unsigned int clock_ctl, reg; + + /* Disable Clock gating temporarily for the debug bus to work */ + kgsl_regread(device, A5XX_RBBM_CLOCK_CNTL, &clock_ctl); + kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL, 0); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, + snapshot, a5xx_snapshot_registers, NULL); + + adreno_snapshot_vbif_registers(device, snapshot, + a5xx_vbif_snapshot_registers, + ARRAY_SIZE(a5xx_vbif_snapshot_registers)); + + /* Dump SP TP HLSQ registers */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, snapshot, + a5xx_snapshot_dump_hlsq_sp_tp_regs, NULL); + + /* CP_PFP indexed registers */ + kgsl_snapshot_indexed_registers(device, snapshot, + A5XX_CP_PFP_STAT_ADDR, A5XX_CP_PFP_STAT_DATA, + 0, snap_data->sect_sizes->cp_pfp); + + /* CP_ME indexed registers */ + kgsl_snapshot_indexed_registers(device, snapshot, + A5XX_CP_ME_STAT_ADDR, A5XX_CP_ME_STAT_DATA, + 0, snap_data->sect_sizes->cp_me); + + /* CP_DRAW_STATE */ + kgsl_snapshot_indexed_registers(device, snapshot, + A5XX_CP_DRAW_STATE_ADDR, A5XX_CP_DRAW_STATE_DATA, + 0, 128); + + /* + * CP needs to be halted on a530v1 before reading CP_PFP_UCODE_DBG_DATA + * and CP_PM4_UCODE_DBG_DATA registers + */ + if (adreno_is_a530v1(adreno_dev)) { + adreno_readreg(adreno_dev, ADRENO_REG_CP_ME_CNTL, ®); + reg |= (1 << 27) | (1 << 28); + adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, reg); + } + + /* ME_UCODE Cache */ + kgsl_snapshot_indexed_registers(device, snapshot, + A5XX_CP_ME_UCODE_DBG_ADDR, A5XX_CP_ME_UCODE_DBG_DATA, + 0, 0x53F); + + /* PFP_UCODE Cache */ + kgsl_snapshot_indexed_registers(device, snapshot, + A5XX_CP_PFP_UCODE_DBG_ADDR, A5XX_CP_PFP_UCODE_DBG_DATA, + 0, 0x53F); + + /* CP MEQ */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, adreno_snapshot_cp_meq, + &snap_data->sect_sizes->cp_meq); + + /* CP ROQ */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, adreno_snapshot_cp_roq, + &snap_data->sect_sizes->roq); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, adreno_snapshot_cp_merciu, + &snap_data->sect_sizes->cp_merciu); + + /* CP PFP and PM4 */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a5xx_snapshot_cp_pfp, NULL); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, + snapshot, a5xx_snapshot_cp_pm4, NULL); + + /* Shader memory */ + a5xx_snapshot_shader(device, snapshot); + + /* Debug bus */ + a5xx_snapshot_debugbus(device, snapshot); +} + +void a5xx_crashdump_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int i, count; + uint64_t *ptr; + uint64_t gpuaddr; + + if (capturescript.gpuaddr != 0 && registers.gpuaddr != 0) + return; + + /* + * For the capture script two blocks of memory are needed: A block of + * GPU readonly memory for the special capture script and a destination + * block for the register values. The size of the capture script needs + * is 128 bits (4 dwords) per register pair and 4 dwords at the end. + * The destination block needs to be big enough to hold all the + * registers that we will capture. + */ + + if (kgsl_allocate_global(device, &capturescript, + ((ARRAY_SIZE(a5xx_registers) / 2) * 16) + 16, + KGSL_MEMFLAGS_GPUREADONLY, 0)) + return; + + /* Count the total number of registers to capture */ + count = count_registers(); + + if (kgsl_allocate_global(device, ®isters, + count * sizeof(unsigned int), 0, 0)) + return; + + /* Build the crash script */ + + ptr = (uint64_t *) capturescript.hostptr; + gpuaddr = registers.gpuaddr; + + for (i = 0; i < ARRAY_SIZE(a5xx_registers) / 2; i++) { + unsigned int regs = REG_PAIR_COUNT(a5xx_registers, i); + *ptr++ = gpuaddr; + *ptr++ = (((uint64_t) a5xx_registers[2 * i]) << 44) | regs; + + gpuaddr += regs * sizeof(unsigned int); + } + + *ptr++ = 0; + *ptr++ = 0; +} diff --git a/drivers/gpu/msm/adreno_compat.c b/drivers/gpu/msm/adreno_compat.c new file mode 100644 index 000000000000..4d78de5b9ec3 --- /dev/null +++ b/drivers/gpu/msm/adreno_compat.c @@ -0,0 +1,206 @@ +/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/uaccess.h> +#include <linux/ioctl.h> + +#include "kgsl.h" +#include "kgsl_compat.h" + +#include "adreno.h" +#include "adreno_compat.h" + +int adreno_getproperty_compat(struct kgsl_device *device, + unsigned int type, + void __user *value, + size_t sizebytes) +{ + int status = -EINVAL; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + switch (type) { + case KGSL_PROP_DEVICE_INFO: + { + struct kgsl_devinfo_compat devinfo; + + if (sizebytes != sizeof(devinfo)) { + status = -EINVAL; + break; + } + + memset(&devinfo, 0, sizeof(devinfo)); + devinfo.device_id = device->id + 1; + devinfo.chip_id = adreno_dev->chipid; + devinfo.mmu_enabled = kgsl_mmu_enabled(); + devinfo.gmem_gpubaseaddr = adreno_dev->gmem_base; + devinfo.gmem_sizebytes = adreno_dev->gmem_size; + + if (copy_to_user(value, &devinfo, sizeof(devinfo)) != + 0) { + status = -EFAULT; + break; + } + status = 0; + } + break; + case KGSL_PROP_DEVICE_SHADOW: + { + struct kgsl_shadowprop_compat shadowprop; + + if (sizebytes != sizeof(shadowprop)) { + status = -EINVAL; + break; + } + memset(&shadowprop, 0, sizeof(shadowprop)); + if (device->memstore.hostptr) { + /* + * NOTE: with mmu enabled, gpuaddr doesn't mean + * anything to mmap(). + * NOTE: shadowprop.gpuaddr is uint32 + * (because legacy) and the memstore gpuaddr is + * 64 bit. Cast the memstore gpuaddr to uint32. + */ + shadowprop.gpuaddr = + (unsigned int) device->memstore.gpuaddr; + shadowprop.size = + (unsigned int) device->memstore.size; + /* + * GSL needs this to be set, even if it + * appears to be meaningless + */ + shadowprop.flags = KGSL_FLAGS_INITIALIZED | + KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS; + } + if (copy_to_user(value, &shadowprop, + sizeof(shadowprop))) { + status = -EFAULT; + break; + } + status = 0; + } + break; + default: + /* + * Call the adreno_getproperty to check if the property type + * was KGSL_PROP_MMU_ENABLE or KGSL_PROP_INTERRUPT_WAITS + */ + status = device->ftbl->getproperty(device, type, value, + sizebytes); + } + + return status; +} + +int adreno_setproperty_compat(struct kgsl_device_private *dev_priv, + unsigned int type, + void __user *value, + unsigned int sizebytes) +{ + int status = -EINVAL; + struct kgsl_device *device = dev_priv->device; + + switch (type) { + case KGSL_PROP_PWR_CONSTRAINT: { + struct kgsl_device_constraint_compat constraint32; + struct kgsl_device_constraint constraint; + struct kgsl_context *context; + + if (sizebytes != sizeof(constraint32)) + break; + + if (copy_from_user(&constraint32, value, + sizeof(constraint32))) { + status = -EFAULT; + break; + } + + /* Populate the real constraint type from the compat */ + constraint.type = constraint32.type; + constraint.context_id = constraint32.context_id; + constraint.data = compat_ptr(constraint32.data); + constraint.size = (size_t)constraint32.size; + + context = kgsl_context_get_owner(dev_priv, + constraint.context_id); + if (context == NULL) + break; + status = adreno_set_constraint(device, context, + &constraint); + kgsl_context_put(context); + } + break; + default: + /* + * Call adreno_setproperty in case the property type was + * KGSL_PROP_PWRCTRL + */ + status = device->ftbl->setproperty(dev_priv, type, value, + sizebytes); + } + + return status; +} + +static long adreno_ioctl_perfcounter_query_compat( + struct kgsl_device_private *dev_priv, unsigned int cmd, + void *data) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device); + struct kgsl_perfcounter_query_compat *query32 = data; + struct kgsl_perfcounter_query query; + long result; + + query.groupid = query32->groupid; + query.countables = to_user_ptr(query32->countables); + query.count = query32->count; + query.max_counters = query32->max_counters; + + result = adreno_perfcounter_query_group(adreno_dev, + query.groupid, query.countables, + query.count, &query.max_counters); + query32->max_counters = query.max_counters; + + return result; +} + +static long adreno_ioctl_perfcounter_read_compat( + struct kgsl_device_private *dev_priv, unsigned int cmd, + void *data) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device); + struct kgsl_perfcounter_read_compat *read32 = data; + struct kgsl_perfcounter_read read; + + read.reads = (struct kgsl_perfcounter_read_group __user *) + (uintptr_t)read32->reads; + read.count = read32->count; + + return adreno_perfcounter_read_group(adreno_dev, read.reads, + read.count); +} + +static struct kgsl_ioctl adreno_compat_ioctl_funcs[] = { + { IOCTL_KGSL_PERFCOUNTER_GET, adreno_ioctl_perfcounter_get }, + { IOCTL_KGSL_PERFCOUNTER_PUT, adreno_ioctl_perfcounter_put }, + { IOCTL_KGSL_PERFCOUNTER_QUERY_COMPAT, + adreno_ioctl_perfcounter_query_compat }, + { IOCTL_KGSL_PERFCOUNTER_READ_COMPAT, + adreno_ioctl_perfcounter_read_compat }, +}; + +long adreno_compat_ioctl(struct kgsl_device_private *dev_priv, + unsigned int cmd, unsigned long arg) +{ + return adreno_ioctl_helper(dev_priv, cmd, arg, + adreno_compat_ioctl_funcs, + ARRAY_SIZE(adreno_compat_ioctl_funcs)); +} diff --git a/drivers/gpu/msm/adreno_compat.h b/drivers/gpu/msm/adreno_compat.h new file mode 100644 index 000000000000..4fba17bc8b13 --- /dev/null +++ b/drivers/gpu/msm/adreno_compat.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __ADRENO_COMPAT_H +#define __ADRENO_COMPAT_H + +#ifdef CONFIG_COMPAT +#include <linux/compat.h> +#include "kgsl.h" +#include "kgsl_device.h" + +int adreno_getproperty_compat(struct kgsl_device *device, + unsigned int type, + void __user *value, + size_t sizebytes); + +int adreno_setproperty_compat(struct kgsl_device_private *dev_priv, + unsigned int type, + void __user *value, + unsigned int sizebytes); + +long adreno_compat_ioctl(struct kgsl_device_private *dev_priv, + unsigned int cmd, unsigned long arg); + +#else + +static inline int adreno_getproperty_compat(struct kgsl_device *device, + unsigned int type, + void __user *value, size_t sizebytes) +{ + BUG(); +} + +static inline int adreno_setproperty_compat(struct kgsl_device_private + *dev_priv, unsigned int type, + void __user *value, unsigned int sizebytes) +{ + BUG(); +} + +static inline long adreno_compat_ioctl(struct kgsl_device_private *dev_priv, + unsigned int cmd, unsigned long arg) +{ + BUG(); +} + +#endif /* CONFIG_COMPAT */ +#endif /* __ADRENO_COMPAT_H */ diff --git a/drivers/gpu/msm/adreno_coresight.c b/drivers/gpu/msm/adreno_coresight.c new file mode 100644 index 000000000000..326f3ed0ed4d --- /dev/null +++ b/drivers/gpu/msm/adreno_coresight.c @@ -0,0 +1,331 @@ +/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/platform_device.h> +#include <linux/of_coresight.h> +#include <linux/coresight.h> + +#include "adreno.h" + +#define TO_ADRENO_CORESIGHT_ATTR(_attr) \ + container_of(_attr, struct adreno_coresight_attr, attr) + +ssize_t adreno_coresight_show_register(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned int val = 0; + struct kgsl_device *device = dev_get_drvdata(dev->parent); + struct adreno_device *adreno_dev; + struct adreno_coresight_attr *cattr = TO_ADRENO_CORESIGHT_ATTR(attr); + + if (device == NULL) + return -EINVAL; + + adreno_dev = ADRENO_DEVICE(device); + + if (cattr->reg == NULL) + return -EINVAL; + + /* + * Return the current value of the register if coresight is enabled, + * otherwise report 0 + */ + + mutex_lock(&device->mutex); + if (test_bit(ADRENO_DEVICE_CORESIGHT, &adreno_dev->priv)) { + + /* + * If the device isn't power collapsed read the actual value + * from the hardware - otherwise return the cached value + */ + + if (device->state == KGSL_STATE_ACTIVE || + device->state == KGSL_STATE_NAP) { + if (!kgsl_active_count_get(device)) { + kgsl_regread(device, cattr->reg->offset, + &cattr->reg->value); + kgsl_active_count_put(device); + } + } + + val = cattr->reg->value; + } + mutex_unlock(&device->mutex); + + return snprintf(buf, PAGE_SIZE, "0x%X", val); +} + +ssize_t adreno_coresight_store_register(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + struct kgsl_device *device = dev_get_drvdata(dev->parent); + struct adreno_device *adreno_dev; + struct adreno_coresight_attr *cattr = TO_ADRENO_CORESIGHT_ATTR(attr); + unsigned long val; + int ret; + + if (device == NULL) + return -EINVAL; + + adreno_dev = ADRENO_DEVICE(device); + + if (cattr->reg == NULL) + return -EINVAL; + + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; + + mutex_lock(&device->mutex); + + /* Ignore writes while coresight is off */ + if (!test_bit(ADRENO_DEVICE_CORESIGHT, &adreno_dev->priv)) + goto out; + + cattr->reg->value = val; + + /* Program the hardware if it is not power collapsed */ + if (device->state == KGSL_STATE_ACTIVE || + device->state == KGSL_STATE_NAP) { + if (!kgsl_active_count_get(device)) { + kgsl_regwrite(device, cattr->reg->offset, + cattr->reg->value); + kgsl_active_count_put(device); + } + } + +out: + mutex_unlock(&device->mutex); + return size; +} + +/** + * adreno_coresight_disable() - Generic function to disable coresight debugging + * @csdev: Pointer to coresight's device struct + * + * This is a generic function to disable coresight debug bus on adreno + * devices. This should be used in all cases of disabling + * coresight debug bus for adreno devices. This function in turn calls + * the adreno device specific function through the gpudev hook. + * This function is registered as the coresight disable function + * with coresight driver. It should only be called through coresight driver + * as that would ensure that the necessary setup required to be done on + * coresight driver's part is also done. + */ +static void adreno_coresight_disable(struct coresight_device *csdev) +{ + struct kgsl_device *device = dev_get_drvdata(csdev->dev.parent); + struct adreno_device *adreno_dev; + struct adreno_gpudev *gpudev; + struct adreno_coresight *coresight; + int i; + + if (device == NULL) + return; + + adreno_dev = ADRENO_DEVICE(device); + gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + coresight = gpudev->coresight; + + if (coresight == NULL) + return; + + mutex_lock(&device->mutex); + + if (!kgsl_active_count_get(device)) { + for (i = 0; i < coresight->count; i++) + kgsl_regwrite(device, coresight->registers[i].offset, + 0); + + kgsl_active_count_put(device); + } + + clear_bit(ADRENO_DEVICE_CORESIGHT, &adreno_dev->priv); + + mutex_unlock(&device->mutex); +} + +/** + * _adreno_coresight_get_and_clear(): Save the current value of coresight + * registers and clear the registers subsequently. Clearing registers + * has the effect of disabling coresight. + * @adreno_dev: Pointer to adreno device struct + */ +static int _adreno_coresight_get_and_clear(struct adreno_device *adreno_dev) +{ + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_coresight *coresight = gpudev->coresight; + int i; + + if (coresight == NULL) + return -ENODEV; + + kgsl_pre_hwaccess(device); + /* + * Save the current value of each coresight register + * and then clear each register + */ + for (i = 0; i < coresight->count; i++) { + kgsl_regread(device, coresight->registers[i].offset, + &coresight->registers[i].value); + kgsl_regwrite(device, coresight->registers[i].offset, + 0); + } + + return 0; +} + +static int _adreno_coresight_set(struct adreno_device *adreno_dev) +{ + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_coresight *coresight = gpudev->coresight; + int i; + + if (coresight == NULL) + return -ENODEV; + + BUG_ON(!kgsl_state_is_awake(device)); + for (i = 0; i < coresight->count; i++) + kgsl_regwrite(device, coresight->registers[i].offset, + coresight->registers[i].value); + + return 0; +} +/** + * adreno_coresight_enable() - Generic function to enable coresight debugging + * @csdev: Pointer to coresight's device struct + * + * This is a generic function to enable coresight debug bus on adreno + * devices. This should be used in all cases of enabling + * coresight debug bus for adreno devices. This function is registered as the + * coresight enable function with coresight driver. It should only be called + * through coresight driver as that would ensure that the necessary setup + * required to be done on coresight driver's part is also done. + */ +static int adreno_coresight_enable(struct coresight_device *csdev) +{ + struct kgsl_device *device = dev_get_drvdata(csdev->dev.parent); + struct adreno_device *adreno_dev; + struct adreno_gpudev *gpudev; + struct adreno_coresight *coresight; + int ret = 0; + + if (device == NULL) + return -ENODEV; + + adreno_dev = ADRENO_DEVICE(device); + gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + coresight = gpudev->coresight; + + if (coresight == NULL) + return -ENODEV; + + mutex_lock(&device->mutex); + if (!test_and_set_bit(ADRENO_DEVICE_CORESIGHT, &adreno_dev->priv)) { + int i; + + /* Reset all the debug registers to their default values */ + + for (i = 0; i < coresight->count; i++) + coresight->registers[i].value = + coresight->registers[i].initial; + + ret = kgsl_active_count_get(device); + if (!ret) { + ret = _adreno_coresight_set(adreno_dev); + kgsl_active_count_put(device); + } + } + + mutex_unlock(&device->mutex); + + return ret; +} + +/** + * adreno_coresight_start() - Reprogram coresight registers after power collapse + * @adreno_dev: Pointer to the adreno device structure + * + * Cache the current coresight register values so they can be restored after + * power collapse + */ +void adreno_coresight_stop(struct adreno_device *adreno_dev) +{ + if (test_bit(ADRENO_DEVICE_CORESIGHT, &adreno_dev->priv)) + _adreno_coresight_get_and_clear(adreno_dev); +} + +/** + * adreno_coresight_start() - Reprogram coresight registers after power collapse + * @adreno_dev: Pointer to the adreno device structure + * + * Reprogram the cached values to the coresight registers on power up + */ +void adreno_coresight_start(struct adreno_device *adreno_dev) +{ + if (test_bit(ADRENO_DEVICE_CORESIGHT, &adreno_dev->priv)) + _adreno_coresight_set(adreno_dev); +} + +static const struct coresight_ops_source adreno_coresight_source_ops = { + .enable = adreno_coresight_enable, + .disable = adreno_coresight_disable, +}; + +static const struct coresight_ops adreno_coresight_ops = { + .source_ops = &adreno_coresight_source_ops, +}; + +void adreno_coresight_remove(struct adreno_device *adreno_dev) +{ + coresight_unregister(adreno_dev->csdev); + adreno_dev->csdev = NULL; +} + +int adreno_coresight_init(struct adreno_device *adreno_dev) +{ + int ret = 0; + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct kgsl_device *device = &adreno_dev->dev; + struct coresight_desc desc; + + if (gpudev->coresight == NULL) + return -ENODEV; + + if (adreno_dev->csdev != NULL) + return 0; + + memset(&desc, 0, sizeof(desc)); + + desc.pdata = of_get_coresight_platform_data(&device->pdev->dev, + device->pdev->dev.of_node); + if (desc.pdata == NULL) + return -ENODEV; + + desc.type = CORESIGHT_DEV_TYPE_SOURCE; + desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_BUS; + desc.ops = &adreno_coresight_ops; + desc.dev = &device->pdev->dev; + desc.owner = THIS_MODULE; + desc.groups = gpudev->coresight->groups; + + adreno_dev->csdev = coresight_register(&desc); + + if (IS_ERR(adreno_dev->csdev)) + ret = PTR_ERR(adreno_dev->csdev); + + return ret; +} diff --git a/drivers/gpu/msm/adreno_cp_parser.c b/drivers/gpu/msm/adreno_cp_parser.c new file mode 100644 index 000000000000..4faf3a8319cb --- /dev/null +++ b/drivers/gpu/msm/adreno_cp_parser.c @@ -0,0 +1,1048 @@ +/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "kgsl.h" +#include "kgsl_sharedmem.h" +#include "kgsl_snapshot.h" + +#include "adreno.h" +#include "adreno_pm4types.h" +#include "a3xx_reg.h" +#include "adreno_cp_parser.h" + +#define MAX_IB_OBJS 1000 +#define NUM_SET_DRAW_GROUPS 32 + +struct set_draw_state { + uint64_t cmd_stream_addr; + uint64_t cmd_stream_dwords; +}; + +/* List of variables used when parsing an IB */ +struct ib_parser_variables { + /* List of registers containing addresses and their sizes */ + unsigned int cp_addr_regs[ADRENO_CP_ADDR_MAX]; + /* 32 groups of command streams in set draw state packets */ + struct set_draw_state set_draw_groups[NUM_SET_DRAW_GROUPS]; +}; + +/* + * Used for locating shader objects. This array holds the unit size of shader + * objects based on type and block of shader. The type can be 0 or 1 hence there + * are 2 columns and block can be 0-7 hence 7 rows. + */ +static int load_state_unit_sizes[7][2] = { + { 2, 4 }, + { 0, 1 }, + { 2, 4 }, + { 0, 1 }, + { 8, 2 }, + { 8, 2 }, + { 8, 2 }, +}; + +static int adreno_ib_find_objs(struct kgsl_device *device, + struct kgsl_process_private *process, + uint64_t gpuaddr, uint64_t dwords, + int obj_type, + struct adreno_ib_object_list *ib_obj_list, + int ib_level); + +static int ib_parse_set_draw_state(struct kgsl_device *device, + unsigned int *ptr, + struct kgsl_process_private *process, + struct adreno_ib_object_list *ib_obj_list, + struct ib_parser_variables *ib_parse_vars); + +static int ib_parse_type7_set_draw_state(struct kgsl_device *device, + unsigned int *ptr, + struct kgsl_process_private *process, + struct adreno_ib_object_list *ib_obj_list); + +/* + * adreno_ib_merge_range() - Increases the address range tracked by an ib + * object + * @ib_obj: The ib object + * @gpuaddr: The start address which is to be merged + * @size: Size of the merging address + */ +static void adreno_ib_merge_range(struct adreno_ib_object *ib_obj, + uint64_t gpuaddr, uint64_t size) +{ + uint64_t addr_end1 = ib_obj->gpuaddr + ib_obj->size; + uint64_t addr_end2 = gpuaddr + size; + if (gpuaddr < ib_obj->gpuaddr) + ib_obj->gpuaddr = gpuaddr; + if (addr_end2 > addr_end1) + ib_obj->size = addr_end2 - ib_obj->gpuaddr; + else + ib_obj->size = addr_end1 - ib_obj->gpuaddr; +} + +/* + * adreno_ib_check_overlap() - Checks if an address range overlap + * @gpuaddr: The start address range to check for overlap + * @size: Size of the address range + * @type: The type of address range + * @ib_obj_list: The list of address ranges to check for overlap + * + * Checks if an address range overlaps with a list of address ranges + * Returns the entry from list which overlaps else NULL + */ +static struct adreno_ib_object *adreno_ib_check_overlap(uint64_t gpuaddr, + uint64_t size, int type, + struct adreno_ib_object_list *ib_obj_list) +{ + struct adreno_ib_object *ib_obj; + int i; + + for (i = 0; i < ib_obj_list->num_objs; i++) { + ib_obj = &(ib_obj_list->obj_list[i]); + if ((type == ib_obj->snapshot_obj_type) && + kgsl_addr_range_overlap(ib_obj->gpuaddr, ib_obj->size, + gpuaddr, size)) + /* regions overlap */ + return ib_obj; + } + return NULL; +} + +/* + * adreno_ib_add() - Add a gpuaddress range to list + * @process: Process in which the gpuaddress is mapped + * @type: The type of address range + * @ib_obj_list: List of the address ranges in which the given range is to be + * added + * + * Add a gpuaddress range as an ib object to a given list after checking if it + * overlaps with another entry on the list. If it conflicts then change the + * existing entry to incorporate this range + * + * Returns 0 on success else error code + */ +static int adreno_ib_add(struct kgsl_process_private *process, + uint64_t gpuaddr, int type, + struct adreno_ib_object_list *ib_obj_list) +{ + uint64_t size; + struct adreno_ib_object *ib_obj; + struct kgsl_mem_entry *entry; + + if (MAX_IB_OBJS <= ib_obj_list->num_objs) + return -E2BIG; + + entry = kgsl_sharedmem_find(process, gpuaddr); + if (!entry) + /* + * Do not fail if gpuaddr not found, we can continue + * to search for other objects even if few objects are + * not found + */ + return 0; + + size = entry->memdesc.size; + gpuaddr = entry->memdesc.gpuaddr; + + ib_obj = adreno_ib_check_overlap(gpuaddr, size, type, ib_obj_list); + if (ib_obj) { + adreno_ib_merge_range(ib_obj, gpuaddr, size); + kgsl_mem_entry_put(entry); + } else { + adreno_ib_init_ib_obj(gpuaddr, size, type, entry, + &(ib_obj_list->obj_list[ib_obj_list->num_objs])); + ib_obj_list->num_objs++; + } + return 0; +} + +/* + * ib_save_mip_addresses() - Find mip addresses + * @pkt: Pointer to the packet in IB + * @process: The process in which IB is mapped + * @ib_obj_list: List in which any objects found are added + * + * Returns 0 on success else error code + */ +static int ib_save_mip_addresses(unsigned int *pkt, + struct kgsl_process_private *process, + struct adreno_ib_object_list *ib_obj_list) +{ + int ret = 0; + int num_levels = (pkt[1] >> 22) & 0x03FF; + int i; + unsigned int *hostptr; + struct kgsl_mem_entry *ent; + unsigned int block, type; + int unitsize = 0; + + block = (pkt[1] >> 19) & 0x07; + type = pkt[2] & 0x03; + + if (type == 0) + unitsize = load_state_unit_sizes[block][0]; + else + unitsize = load_state_unit_sizes[block][1]; + + if (3 == block && 1 == type) { + uint64_t gpuaddr = pkt[2] & 0xFFFFFFFC; + uint64_t size = (num_levels * unitsize) << 2; + + ent = kgsl_sharedmem_find(process, gpuaddr); + if (ent == NULL) + return 0; + + if (!kgsl_gpuaddr_in_memdesc(&ent->memdesc, + gpuaddr, size)) { + kgsl_mem_entry_put(ent); + return 0; + } + + hostptr = kgsl_gpuaddr_to_vaddr(&ent->memdesc, gpuaddr); + if (hostptr != NULL) { + for (i = 0; i < num_levels; i++) { + ret = adreno_ib_add(process, hostptr[i], + SNAPSHOT_GPU_OBJECT_GENERIC, + ib_obj_list); + if (ret) + break; + } + } + + kgsl_memdesc_unmap(&ent->memdesc); + kgsl_mem_entry_put(ent); + } + return ret; +} + +/* + * ib_parse_load_state() - Parse load state packet + * @pkt: Pointer to the packet in IB + * @process: The pagetable in which the IB is mapped + * @ib_obj_list: List in which any objects found are added + * @ib_parse_vars: VAriable list that store temporary addressses + * + * Parse load state packet found in an IB and add any memory object found to + * a list + * Returns 0 on success else error code + */ +static int ib_parse_load_state(unsigned int *pkt, + struct kgsl_process_private *process, + struct adreno_ib_object_list *ib_obj_list, + struct ib_parser_variables *ib_parse_vars) +{ + int ret = 0; + int i; + + /* + * The object here is to find indirect shaders i.e - shaders loaded from + * GPU memory instead of directly in the command. These should be added + * to the list of memory objects to dump. So look at the load state + * if the block is indirect (source = 4). If so then add the memory + * address to the list. The size of the object differs depending on the + * type per the load_state_unit_sizes array above. + */ + + if (type3_pkt_size(pkt[0]) < 2) + return 0; + + /* + * Anything from 3rd ordinal onwards of packet can be a memory object, + * no need to be fancy about parsing it, just save it if it looks + * like memory + */ + for (i = 0; i <= (type3_pkt_size(pkt[0]) - 2); i++) { + ret |= adreno_ib_add(process, pkt[2 + i] & 0xFFFFFFFC, + SNAPSHOT_GPU_OBJECT_GENERIC, + ib_obj_list); + if (ret) + break; + } + /* get the mip addresses */ + if (!ret) + ret = ib_save_mip_addresses(pkt, process, ib_obj_list); + return ret; +} + +/* + * This opcode sets the base addresses for the visibilty stream buffer and the + * visiblity stream size buffer. + */ + +static int ib_parse_set_bin_data(unsigned int *pkt, + struct kgsl_process_private *process, + struct adreno_ib_object_list *ib_obj_list, + struct ib_parser_variables *ib_parse_vars) +{ + int ret = 0; + + if (type3_pkt_size(pkt[0]) < 2) + return 0; + + /* Visiblity stream buffer */ + ret = adreno_ib_add(process, pkt[1], + SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list); + if (ret) + return ret; + + /* visiblity stream size buffer (fixed size 8 dwords) */ + ret = adreno_ib_add(process, pkt[2], + SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list); + + return ret; +} + +/* + * This opcode writes to GPU memory - if the buffer is written to, there is a + * good chance that it would be valuable to capture in the snapshot, so mark all + * buffers that are written to as frozen + */ + +static int ib_parse_mem_write(unsigned int *pkt, + struct kgsl_process_private *process, + struct adreno_ib_object_list *ib_obj_list, + struct ib_parser_variables *ib_parse_vars) +{ + if (type3_pkt_size(pkt[0]) < 1) + return 0; + + /* + * The address is where the data in the rest of this packet is written + * to, but since that might be an offset into the larger buffer we need + * to get the whole thing. Pass a size of 0 tocapture the entire buffer. + */ + + return adreno_ib_add(process, pkt[1] & 0xFFFFFFFC, + SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list); +} + +/* + * ib_add_type0_entries() - Add memory objects to list + * @device: The device on which the IB will execute + * @process: The process in which IB is mapped + * @ib_obj_list: The list of gpu objects + * @ib_parse_vars: addresses ranges found in type0 packets + * + * Add memory objects to given list that are found in type0 packets + * Returns 0 on success else 0 + */ +static int ib_add_type0_entries(struct kgsl_device *device, + struct kgsl_process_private *process, + struct adreno_ib_object_list *ib_obj_list, + struct ib_parser_variables *ib_parse_vars) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + int ret = 0; + int i; + int vfd_end; + unsigned int mask; + /* First up the visiblity stream buffer */ + if (adreno_is_a4xx(adreno_dev)) + mask = 0xFFFFFFFC; + else + mask = 0xFFFFFFFF; + for (i = ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0; + i < ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7; i++) { + if (ib_parse_vars->cp_addr_regs[i]) { + ret = adreno_ib_add(process, + ib_parse_vars->cp_addr_regs[i] & mask, + SNAPSHOT_GPU_OBJECT_GENERIC, + ib_obj_list); + if (ret) + return ret; + ib_parse_vars->cp_addr_regs[i] = 0; + ib_parse_vars->cp_addr_regs[i + 1] = 0; + i++; + } + } + + vfd_end = adreno_is_a4xx(adreno_dev) ? + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_31 : + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15; + for (i = ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0; + i <= vfd_end; i++) { + if (ib_parse_vars->cp_addr_regs[i]) { + ret = adreno_ib_add(process, + ib_parse_vars->cp_addr_regs[i], + SNAPSHOT_GPU_OBJECT_GENERIC, + ib_obj_list); + if (ret) + return ret; + ib_parse_vars->cp_addr_regs[i] = 0; + } + } + + if (ib_parse_vars->cp_addr_regs[ADRENO_CP_ADDR_VSC_SIZE_ADDRESS]) { + ret = adreno_ib_add(process, + ib_parse_vars->cp_addr_regs[ + ADRENO_CP_ADDR_VSC_SIZE_ADDRESS] & mask, + SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list); + if (ret) + return ret; + ib_parse_vars->cp_addr_regs[ + ADRENO_CP_ADDR_VSC_SIZE_ADDRESS] = 0; + } + mask = 0xFFFFFFE0; + for (i = ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR; + i <= ADRENO_CP_ADDR_SP_FS_OBJ_START_REG; i++) { + ret = adreno_ib_add(process, + ib_parse_vars->cp_addr_regs[i] & mask, + SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list); + if (ret) + return ret; + ib_parse_vars->cp_addr_regs[i] = 0; + } + return ret; +} +/* + * The DRAW_INDX opcode sends a draw initator which starts a draw operation in + * the GPU, so this is the point where all the registers and buffers become + * "valid". The DRAW_INDX may also have an index buffer pointer that should be + * frozen with the others + */ + +static int ib_parse_draw_indx(struct kgsl_device *device, unsigned int *pkt, + struct kgsl_process_private *process, + struct adreno_ib_object_list *ib_obj_list, + struct ib_parser_variables *ib_parse_vars) +{ + int ret = 0; + int i; + int opcode = cp_type3_opcode(pkt[0]); + + switch (opcode) { + case CP_DRAW_INDX: + if (type3_pkt_size(pkt[0]) > 3) { + ret = adreno_ib_add(process, + pkt[4], SNAPSHOT_GPU_OBJECT_GENERIC, + ib_obj_list); + } + break; + case CP_DRAW_INDX_OFFSET: + if (type3_pkt_size(pkt[0]) == 6) { + ret = adreno_ib_add(process, + pkt[5], SNAPSHOT_GPU_OBJECT_GENERIC, + ib_obj_list); + } + break; + case CP_DRAW_INDIRECT: + if (type3_pkt_size(pkt[0]) == 2) { + ret = adreno_ib_add(process, + pkt[2], SNAPSHOT_GPU_OBJECT_GENERIC, + ib_obj_list); + } + break; + case CP_DRAW_INDX_INDIRECT: + if (type3_pkt_size(pkt[0]) == 4) { + ret = adreno_ib_add(process, + pkt[2], SNAPSHOT_GPU_OBJECT_GENERIC, + ib_obj_list); + if (ret) + break; + ret = adreno_ib_add(process, + pkt[4], SNAPSHOT_GPU_OBJECT_GENERIC, + ib_obj_list); + } + break; + case CP_DRAW_AUTO: + if (type3_pkt_size(pkt[0]) == 6) { + ret = adreno_ib_add(process, + pkt[3], SNAPSHOT_GPU_OBJECT_GENERIC, + ib_obj_list); + if (ret) + break; + ret = adreno_ib_add(process, + pkt[4], SNAPSHOT_GPU_OBJECT_GENERIC, + ib_obj_list); + } + break; + } + + if (ret) + return ret; + /* + * All of the type0 writes are valid at a draw initiator, so freeze + * the various buffers that we are tracking + */ + ret = ib_add_type0_entries(device, process, ib_obj_list, + ib_parse_vars); + if (ret) + return ret; + /* Process set draw state command streams if any */ + for (i = 0; i < NUM_SET_DRAW_GROUPS; i++) { + if (!ib_parse_vars->set_draw_groups[i].cmd_stream_dwords) + continue; + ret = adreno_ib_find_objs(device, process, + ib_parse_vars->set_draw_groups[i].cmd_stream_addr, + ib_parse_vars->set_draw_groups[i].cmd_stream_dwords, + SNAPSHOT_GPU_OBJECT_DRAW, + ib_obj_list, 2); + if (ret) + break; + } + return ret; +} + +/* + * Parse all the type7 opcode packets that may contain important information, + * such as additional GPU buffers to grab or a draw initator + */ + +static int ib_parse_type7(struct kgsl_device *device, unsigned int *ptr, + struct kgsl_process_private *process, + struct adreno_ib_object_list *ib_obj_list, + struct ib_parser_variables *ib_parse_vars) +{ + int opcode = cp_type7_opcode(*ptr); + + switch (opcode) { + case CP_SET_DRAW_STATE: + return ib_parse_type7_set_draw_state(device, ptr, process, + ib_obj_list); + } + + return 0; +} + +/* + * Parse all the type3 opcode packets that may contain important information, + * such as additional GPU buffers to grab or a draw initator + */ + +static int ib_parse_type3(struct kgsl_device *device, unsigned int *ptr, + struct kgsl_process_private *process, + struct adreno_ib_object_list *ib_obj_list, + struct ib_parser_variables *ib_parse_vars) +{ + int opcode = cp_type3_opcode(*ptr); + + switch (opcode) { + case CP_LOAD_STATE: + return ib_parse_load_state(ptr, process, ib_obj_list, + ib_parse_vars); + case CP_SET_BIN_DATA: + return ib_parse_set_bin_data(ptr, process, ib_obj_list, + ib_parse_vars); + case CP_MEM_WRITE: + return ib_parse_mem_write(ptr, process, ib_obj_list, + ib_parse_vars); + case CP_DRAW_INDX: + case CP_DRAW_INDX_OFFSET: + case CP_DRAW_INDIRECT: + case CP_DRAW_INDX_INDIRECT: + return ib_parse_draw_indx(device, ptr, process, ib_obj_list, + ib_parse_vars); + case CP_SET_DRAW_STATE: + return ib_parse_set_draw_state(device, ptr, process, + ib_obj_list, ib_parse_vars); + } + + return 0; +} + +/* + * Parse type0 packets found in the stream. Some of the registers that are + * written are clues for GPU buffers that we need to freeze. Register writes + * are considred valid when a draw initator is called, so just cache the values + * here and freeze them when a CP_DRAW_INDX is seen. This protects against + * needlessly caching buffers that won't be used during a draw call + */ + +static int ib_parse_type0(struct kgsl_device *device, unsigned int *ptr, + struct kgsl_process_private *process, + struct adreno_ib_object_list *ib_obj_list, + struct ib_parser_variables *ib_parse_vars) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + int size = type0_pkt_size(*ptr); + int offset = type0_pkt_offset(*ptr); + int i; + int reg_index; + int ret = 0; + + for (i = 0; i < size; i++, offset++) { + /* Visiblity stream buffer */ + if (offset >= adreno_cp_parser_getreg(adreno_dev, + ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0) && + offset <= adreno_cp_parser_getreg(adreno_dev, + ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7)) { + reg_index = adreno_cp_parser_regindex( + adreno_dev, offset, + ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0, + ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7); + if (reg_index >= 0) + ib_parse_vars->cp_addr_regs[reg_index] = + ptr[i + 1]; + continue; + } else if ((offset >= adreno_cp_parser_getreg(adreno_dev, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0)) && + (offset <= adreno_cp_parser_getreg(adreno_dev, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15))) { + reg_index = adreno_cp_parser_regindex(adreno_dev, + offset, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15); + if (reg_index >= 0) + ib_parse_vars->cp_addr_regs[reg_index] = + ptr[i + 1]; + continue; + } else if ((offset >= adreno_cp_parser_getreg(adreno_dev, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_16)) && + (offset <= adreno_cp_parser_getreg(adreno_dev, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_31))) { + reg_index = adreno_cp_parser_regindex(adreno_dev, + offset, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_16, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_31); + if (reg_index >= 0) + ib_parse_vars->cp_addr_regs[reg_index] = + ptr[i + 1]; + continue; + } else { + if (offset == + adreno_cp_parser_getreg(adreno_dev, + ADRENO_CP_ADDR_VSC_SIZE_ADDRESS)) + ib_parse_vars->cp_addr_regs[ + ADRENO_CP_ADDR_VSC_SIZE_ADDRESS] = + ptr[i + 1]; + else if (offset == adreno_cp_parser_getreg(adreno_dev, + ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR)) + ib_parse_vars->cp_addr_regs[ + ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR] = + ptr[i + 1]; + else if (offset == adreno_cp_parser_getreg(adreno_dev, + ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR)) + ib_parse_vars->cp_addr_regs[ + ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR] = + ptr[i + 1]; + else if (offset == adreno_cp_parser_getreg(adreno_dev, + ADRENO_CP_ADDR_SP_VS_OBJ_START_REG)) + ib_parse_vars->cp_addr_regs[ + ADRENO_CP_ADDR_SP_VS_OBJ_START_REG] = + ptr[i + 1]; + else if (offset == adreno_cp_parser_getreg(adreno_dev, + ADRENO_CP_ADDR_SP_FS_OBJ_START_REG)) + ib_parse_vars->cp_addr_regs[ + ADRENO_CP_ADDR_SP_FS_OBJ_START_REG] = + ptr[i + 1]; + else if ((offset == adreno_cp_parser_getreg(adreno_dev, + ADRENO_CP_UCHE_INVALIDATE0)) || + (offset == adreno_cp_parser_getreg(adreno_dev, + ADRENO_CP_UCHE_INVALIDATE1))) { + ret = adreno_ib_add(process, + ptr[i + 1] & 0xFFFFFFC0, + SNAPSHOT_GPU_OBJECT_GENERIC, + ib_obj_list); + if (ret) + break; + } + } + } + return ret; +} + +static int ib_parse_type7_set_draw_state(struct kgsl_device *device, + unsigned int *ptr, + struct kgsl_process_private *process, + struct adreno_ib_object_list *ib_obj_list) +{ + int size = type7_pkt_size(*ptr); + int i; + int grp_id; + int ret = 0; + int flags; + uint64_t cmd_stream_dwords; + uint64_t cmd_stream_addr; + + /* + * size is the size of the packet that does not include the DWORD + * for the packet header, we only want to loop here through the + * packet parameters from ptr[1] till ptr[size] where ptr[0] is the + * packet header. In each loop we look at 3 DWORDS hence increment + * loop counter by 3 always + */ + for (i = 1; i <= size; i += 3) { + grp_id = (ptr[i] & 0x1F000000) >> 24; + /* take action based on flags */ + flags = (ptr[i] & 0x000F0000) >> 16; + + /* + * dirty flag or no flags both mean we need to load it for + * next draw. No flags is used when the group is activated + * or initialized for the first time in the IB + */ + if (flags & 0x1 || !flags) { + cmd_stream_dwords = ptr[i] & 0x0000FFFF; + cmd_stream_addr = ptr[i + 2]; + cmd_stream_addr = cmd_stream_addr << 32 | ptr[i + 1]; + if (cmd_stream_dwords) + ret = adreno_ib_find_objs(device, process, + cmd_stream_addr, cmd_stream_dwords, + SNAPSHOT_GPU_OBJECT_DRAW, ib_obj_list, + 2); + if (ret) + break; + continue; + } + /* load immediate */ + if (flags & 0x8) { + uint64_t gpuaddr = ptr[i + 2]; + gpuaddr = gpuaddr << 32 | ptr[i + 1]; + ret = adreno_ib_find_objs(device, process, + gpuaddr, (ptr[i] & 0x0000FFFF), + SNAPSHOT_GPU_OBJECT_IB, + ib_obj_list, 2); + if (ret) + break; + } + } + return ret; +} + +static int ib_parse_set_draw_state(struct kgsl_device *device, + unsigned int *ptr, + struct kgsl_process_private *process, + struct adreno_ib_object_list *ib_obj_list, + struct ib_parser_variables *ib_parse_vars) +{ + int size = type0_pkt_size(*ptr); + int i; + int grp_id; + int ret = 0; + int flags; + + /* + * size is the size of the packet that does not include the DWORD + * for the packet header, we only want to loop here through the + * packet parameters from ptr[1] till ptr[size] where ptr[0] is the + * packet header. In each loop we look at 2 DWORDS hence increment + * loop counter by 2 always + */ + for (i = 1; i <= size; i += 2) { + grp_id = (ptr[i] & 0x1F000000) >> 24; + /* take action based on flags */ + flags = (ptr[i] & 0x000F0000) >> 16; + /* Disable all groups */ + if (flags & 0x4) { + int j; + for (j = 0; j < NUM_SET_DRAW_GROUPS; j++) + ib_parse_vars->set_draw_groups[j]. + cmd_stream_dwords = 0; + continue; + } + /* disable flag */ + if (flags & 0x2) { + ib_parse_vars->set_draw_groups[grp_id]. + cmd_stream_dwords = 0; + continue; + } + /* + * dirty flag or no flags both mean we need to load it for + * next draw. No flags is used when the group is activated + * or initialized for the first time in the IB + */ + if (flags & 0x1 || !flags) { + ib_parse_vars->set_draw_groups[grp_id]. + cmd_stream_dwords = ptr[i] & 0x0000FFFF; + ib_parse_vars->set_draw_groups[grp_id]. + cmd_stream_addr = ptr[i + 1]; + continue; + } + /* load immediate */ + if (flags & 0x8) { + ret = adreno_ib_find_objs(device, process, + ptr[i + 1], (ptr[i] & 0x0000FFFF), + SNAPSHOT_GPU_OBJECT_IB, + ib_obj_list, 2); + if (ret) + break; + } + } + return ret; +} + +/* + * adreno_cp_parse_ib2() - Wrapper function around IB2 parsing + * @device: Device pointer + * @process: Process in which the IB is allocated + * @gpuaddr: IB2 gpuaddr + * @dwords: IB2 size in dwords + * @ib_obj_list: List of objects found in IB + * @ib_level: The level from which function is called, either from IB1 or IB2 + * + * Function does some checks to ensure that IB2 parsing is called from IB1 + * and then calls the function to find objects in IB2. + */ +static int adreno_cp_parse_ib2(struct kgsl_device *device, + struct kgsl_process_private *process, + uint64_t gpuaddr, uint64_t dwords, + struct adreno_ib_object_list *ib_obj_list, + int ib_level) +{ + struct adreno_ib_object *ib_obj; + int i; + /* + * We can only expect an IB2 in IB1, if we are + * already processing an IB2 then return error + */ + if (2 == ib_level) + return -EINVAL; + /* + * only try to find sub objects iff this IB has + * not been processed already + */ + for (i = 0; i < ib_obj_list->num_objs; i++) + ib_obj = &(ib_obj_list->obj_list[i]); + if ((SNAPSHOT_GPU_OBJECT_IB == ib_obj->snapshot_obj_type) && + (gpuaddr >= ib_obj->gpuaddr) && + (gpuaddr + dwords * sizeof(unsigned int) <= + ib_obj->gpuaddr + ib_obj->size)) + return 0; + + return adreno_ib_find_objs(device, process, gpuaddr, dwords, + SNAPSHOT_GPU_OBJECT_IB, ib_obj_list, 2); +} + +/* + * adreno_ib_find_objs() - Find all IB objects in a given IB + * @device: The device pointer on which the IB executes + * @process: The process in which the IB and all contained objects are mapped. + * @gpuaddr: The gpu address of the IB + * @dwords: Size of ib in dwords + * @obj_type: The object type can be either an IB or a draw state sequence + * @ib_obj_list: The list in which the IB and the objects in it are added. + * @ib_level: Indicates if IB1 or IB2 is being processed + * + * Finds all IB objects in a given IB and puts then in a list. Can be called + * recursively for the IB2's in the IB1's + * Returns 0 on success else error code + */ +static int adreno_ib_find_objs(struct kgsl_device *device, + struct kgsl_process_private *process, + uint64_t gpuaddr, uint64_t dwords, + int obj_type, + struct adreno_ib_object_list *ib_obj_list, + int ib_level) +{ + int ret = 0; + uint64_t rem = dwords; + int i; + struct ib_parser_variables ib_parse_vars; + unsigned int *src; + struct adreno_ib_object *ib_obj; + struct kgsl_mem_entry *entry; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + /* check that this IB is not already on list */ + for (i = 0; i < ib_obj_list->num_objs; i++) { + ib_obj = &(ib_obj_list->obj_list[i]); + if ((obj_type == ib_obj->snapshot_obj_type) && + (ib_obj->gpuaddr <= gpuaddr) && + ((ib_obj->gpuaddr + ib_obj->size) >= + (gpuaddr + (dwords << 2)))) + return 0; + } + + entry = kgsl_sharedmem_find(process, gpuaddr); + if (!entry) + return -EINVAL; + + if (!kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, (dwords << 2))) { + kgsl_mem_entry_put(entry); + return -EINVAL; + } + + src = kgsl_gpuaddr_to_vaddr(&entry->memdesc, gpuaddr); + if (!src) { + kgsl_mem_entry_put(entry); + return -EINVAL; + } + + memset(&ib_parse_vars, 0, sizeof(struct ib_parser_variables)); + + ret = adreno_ib_add(process, gpuaddr, obj_type, ib_obj_list); + if (ret) + goto done; + + for (i = 0; rem > 0; rem--, i++) { + int pktsize; + + if (pkt_is_type0(src[i])) + pktsize = type0_pkt_size(src[i]); + + else if (pkt_is_type3(src[i])) + pktsize = type3_pkt_size(src[i]); + + else if (pkt_is_type4(src[i])) + pktsize = type4_pkt_size(src[i]); + + else if (pkt_is_type7(src[i])) + pktsize = type7_pkt_size(src[i]); + + /* + * If the packet isn't a type 1, type 3, type 4 or type 7 then + * don't bother parsing it - it is likely corrupted + */ + else + break; + + if (((pkt_is_type0(src[i]) || pkt_is_type3(src[i])) && !pktsize) + || ((pktsize + 1) > rem)) + break; + + if (pkt_is_type3(src[i])) { + if (adreno_cmd_is_ib(adreno_dev, src[i])) { + uint64_t gpuaddrib2 = src[i + 1]; + uint64_t size = src[i + 2]; + + ret = adreno_cp_parse_ib2(device, process, + gpuaddrib2, size, + ib_obj_list, ib_level); + if (ret) + goto done; + } else { + ret = ib_parse_type3(device, &src[i], process, + ib_obj_list, + &ib_parse_vars); + /* + * If the parse function failed (probably + * because of a bad decode) then bail out and + * just capture the binary IB data + */ + + if (ret) + goto done; + } + } + + else if (pkt_is_type7(src[i])) { + if (adreno_cmd_is_ib(adreno_dev, src[i])) { + uint64_t size = src[i + 3]; + uint64_t gpuaddrib2 = src[i + 2]; + gpuaddrib2 = gpuaddrib2 << 32 | src[i + 1]; + + ret = adreno_cp_parse_ib2(device, process, + gpuaddrib2, size, + ib_obj_list, ib_level); + if (ret) + goto done; + } else { + ret = ib_parse_type7(device, &src[i], process, + ib_obj_list, + &ib_parse_vars); + /* + * If the parse function failed (probably + * because of a bad decode) then bail out and + * just capture the binary IB data + */ + + if (ret) + goto done; + } + } + + else if (pkt_is_type0(src[i])) { + ret = ib_parse_type0(device, &src[i], process, + ib_obj_list, &ib_parse_vars); + if (ret) + goto done; + } + + i += pktsize; + rem -= pktsize; + } + +done: + /* + * For set draw objects there may not be a draw_indx packet at its end + * to signal that we need to save the found objects in it, so just save + * it here. + */ + if (!ret && SNAPSHOT_GPU_OBJECT_DRAW == obj_type) + ret = ib_add_type0_entries(device, process, ib_obj_list, + &ib_parse_vars); + + kgsl_memdesc_unmap(&entry->memdesc); + kgsl_mem_entry_put(entry); + return ret; +} + + +/* + * adreno_ib_create_object_list() - Find all the memory objects in IB + * @device: The device pointer on which the IB executes + * @process: The process in which the IB and all contained objects are mapped + * @gpuaddr: The gpu address of the IB + * @dwords: Size of ib in dwords + * @ib_obj_list: The list in which the IB and the objects in it are added. + * + * Find all the memory objects that an IB needs for execution and place + * them in a list including the IB. + * Returns the ib object list. On success 0 is returned, on failure error + * code is returned along with number of objects that was saved before + * error occurred. If no objects found then the list pointer is set to + * NULL. + */ +int adreno_ib_create_object_list(struct kgsl_device *device, + struct kgsl_process_private *process, + uint64_t gpuaddr, uint64_t dwords, + struct adreno_ib_object_list **out_ib_obj_list) +{ + int ret = 0; + struct adreno_ib_object_list *ib_obj_list; + + if (!out_ib_obj_list) + return -EINVAL; + + *out_ib_obj_list = NULL; + + ib_obj_list = kzalloc(sizeof(*ib_obj_list), GFP_KERNEL); + if (!ib_obj_list) + return -ENOMEM; + + ib_obj_list->obj_list = vmalloc(MAX_IB_OBJS * + sizeof(struct adreno_ib_object)); + + if (!ib_obj_list->obj_list) { + kfree(ib_obj_list); + return -ENOMEM; + } + + ret = adreno_ib_find_objs(device, process, gpuaddr, dwords, + SNAPSHOT_GPU_OBJECT_IB, ib_obj_list, 1); + + /* Even if there was an error return the remaining objects found */ + if (ib_obj_list->num_objs) + *out_ib_obj_list = ib_obj_list; + + return ret; +} + +/* + * adreno_ib_destroy_obj_list() - Destroy an ib object list + * @ib_obj_list: List to destroy + * + * Free up all resources used by an ib_obj_list + */ +void adreno_ib_destroy_obj_list(struct adreno_ib_object_list *ib_obj_list) +{ + int i; + + if (!ib_obj_list) + return; + + for (i = 0; i < ib_obj_list->num_objs; i++) { + if (ib_obj_list->obj_list[i].entry) + kgsl_mem_entry_put(ib_obj_list->obj_list[i].entry); + } + vfree(ib_obj_list->obj_list); + kfree(ib_obj_list); +} diff --git a/drivers/gpu/msm/adreno_cp_parser.h b/drivers/gpu/msm/adreno_cp_parser.h new file mode 100644 index 000000000000..0248de2d600a --- /dev/null +++ b/drivers/gpu/msm/adreno_cp_parser.h @@ -0,0 +1,186 @@ +/* Copyright (c) 2013-2014, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __ADRENO_IB_PARSER__ +#define __ADRENO_IB_PARSER__ + +#include "adreno.h" + +extern const unsigned int a3xx_cp_addr_regs[]; +extern const unsigned int a4xx_cp_addr_regs[]; + +/* + * struct adreno_ib_object - Structure containing information about an + * address range found in an IB + * @gpuaddr: The starting gpuaddress of the range + * @size: Size of the range + * @snapshot_obj_type - Type of range used in snapshot + * @entry: The memory entry in which this range is found + */ +struct adreno_ib_object { + uint64_t gpuaddr; + uint64_t size; + int snapshot_obj_type; + struct kgsl_mem_entry *entry; +}; + +/* + * struct adreno_ib_object_list - List of address ranges found in IB + * @obj_list: The address range list + * @num_objs: Number of objects in list + */ +struct adreno_ib_object_list { + struct adreno_ib_object *obj_list; + int num_objs; +}; + +/* + * adreno registers used during IB parsing, there contain addresses + * and sizes of the addresses that present in an IB + */ +enum adreno_cp_addr_regs { + ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0 = 0, + ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_0, + ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_1, + ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_1, + ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_2, + ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_2, + ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_3, + ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_3, + ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_4, + ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_4, + ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_5, + ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_5, + ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_6, + ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_6, + ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_7, + ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_1, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_2, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_3, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_4, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_5, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_6, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_7, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_8, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_9, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_10, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_11, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_12, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_13, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_14, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_16, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_17, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_18, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_19, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_20, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_21, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_22, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_23, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_24, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_25, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_26, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_27, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_28, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_29, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_30, + ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_31, + ADRENO_CP_ADDR_VSC_SIZE_ADDRESS, + ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR, + ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR, + ADRENO_CP_ADDR_SP_VS_OBJ_START_REG, + ADRENO_CP_ADDR_SP_FS_OBJ_START_REG, + ADRENO_CP_UCHE_INVALIDATE0, + ADRENO_CP_UCHE_INVALIDATE1, + ADRENO_CP_ADDR_MAX, +}; + +/* + * adreno_ib_init_ib_obj() - Create an ib object structure and initialize it + * with gpuaddress and size + * @gpuaddr: gpuaddr with which to initialize the object with + * @size: Size in bytes with which the object is initialized + * @ib_type: The IB type used by snapshot + * + * Returns the object pointer on success else error code in the pointer + */ +static inline void adreno_ib_init_ib_obj(uint64_t gpuaddr, + uint64_t size, int obj_type, + struct kgsl_mem_entry *entry, + struct adreno_ib_object *ib_obj) +{ + ib_obj->gpuaddr = gpuaddr; + ib_obj->size = size; + ib_obj->snapshot_obj_type = obj_type; + ib_obj->entry = entry; +} + +/* + * adreno_cp_parser_getreg() - Returns the value of register offset + * @adreno_dev: The adreno device being operated upon + * @reg_enum: Enum index of the register whose offset is returned + */ +static inline int adreno_cp_parser_getreg(struct adreno_device *adreno_dev, + enum adreno_cp_addr_regs reg_enum) +{ + if (reg_enum == ADRENO_CP_ADDR_MAX) + return -EEXIST; + + if (adreno_is_a3xx(adreno_dev)) + return a3xx_cp_addr_regs[reg_enum]; + else if (adreno_is_a4xx(adreno_dev)) + return a4xx_cp_addr_regs[reg_enum]; + else + return -EEXIST; +} + +/* + * adreno_cp_parser_regindex() - Returns enum index for a given register offset + * @adreno_dev: The adreno device being operated upon + * @offset: Register offset + * @start: The start index to search from + * @end: The last index to search + * + * Checks the list of registers defined for the device and returns the index + * whose offset value matches offset parameter. + */ +static inline int adreno_cp_parser_regindex(struct adreno_device *adreno_dev, + unsigned int offset, + enum adreno_cp_addr_regs start, + enum adreno_cp_addr_regs end) +{ + int i; + const unsigned int *regs; + if (adreno_is_a4xx(adreno_dev)) + regs = a4xx_cp_addr_regs; + else if (adreno_is_a3xx(adreno_dev)) + regs = a3xx_cp_addr_regs; + else + return -EEXIST; + + for (i = start; i <= end && i < ADRENO_CP_ADDR_MAX; i++) + if (regs[i] == offset) + return i; + return -EEXIST; +} + +int adreno_ib_create_object_list( + struct kgsl_device *device, + struct kgsl_process_private *process, + uint64_t gpuaddr, uint64_t dwords, + struct adreno_ib_object_list **out_ib_obj_list); + +void adreno_ib_destroy_obj_list(struct adreno_ib_object_list *ib_obj_list); + +#endif diff --git a/drivers/gpu/msm/adreno_debugfs.c b/drivers/gpu/msm/adreno_debugfs.c new file mode 100644 index 000000000000..2290d29fd28c --- /dev/null +++ b/drivers/gpu/msm/adreno_debugfs.c @@ -0,0 +1,380 @@ +/* Copyright (c) 2002,2008-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/export.h> +#include <linux/delay.h> +#include <linux/debugfs.h> +#include <linux/uaccess.h> +#include <linux/io.h> + +#include "kgsl.h" +#include "adreno.h" +#include "kgsl_cffdump.h" +#include "kgsl_sync.h" + +static int _isdb_set(void *data, u64 val) +{ + struct kgsl_device *device = data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + /* Once ISDB goes enabled it stays enabled */ + if (test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv)) + return 0; + + mutex_lock(&device->mutex); + + /* + * Bring down the GPU so we can bring it back up with the correct power + * and clock settings + */ + kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND); + set_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv); + kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER); + + mutex_unlock(&device->mutex); + + return 0; +} + +static int _isdb_get(void *data, u64 *val) +{ + struct kgsl_device *device = data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + *val = (u64) test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(_isdb_fops, _isdb_get, _isdb_set, "%llu\n"); + +static int _lm_limit_set(void *data, u64 val) +{ + struct kgsl_device *device = data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM)) + return 0; + + /* assure value is between 3A and 10A */ + if (val > 10000) + val = 10000; + else if (val < 3000) + val = 3000; + + adreno_dev->lm_limit = val; + + if (test_bit(ADRENO_LM_CTRL, &adreno_dev->pwrctrl_flag)) { + mutex_lock(&device->mutex); + kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND); + kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER); + mutex_unlock(&device->mutex); + } + + return 0; +} + +static int _lm_limit_get(void *data, u64 *val) +{ + struct kgsl_device *device = data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM)) + *val = 0; + + *val = (u64) adreno_dev->lm_limit; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(_lm_limit_fops, _lm_limit_get, _lm_limit_set, "%llu\n"); + +static int _lm_threshold_count_get(void *data, u64 *val) +{ + struct kgsl_device *device = data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM)) + *val = 0; + else + *val = (u64) adreno_dev->lm_threshold_cross; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(_lm_threshold_fops, _lm_threshold_count_get, + NULL, "%llu\n"); + +static int _active_count_get(void *data, u64 *val) +{ + struct kgsl_device *device = data; + unsigned int i = atomic_read(&device->active_cnt); + + *val = (u64) i; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(_active_count_fops, _active_count_get, NULL, "%llu\n"); + +typedef void (*reg_read_init_t)(struct kgsl_device *device); +typedef void (*reg_read_fill_t)(struct kgsl_device *device, int i, + unsigned int *vals, int linec); + + +static void sync_event_print(struct seq_file *s, + struct kgsl_cmdbatch_sync_event *sync_event) +{ + switch (sync_event->type) { + case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP: { + seq_printf(s, "sync: ctx: %d ts: %d", + sync_event->context->id, sync_event->timestamp); + break; + } + case KGSL_CMD_SYNCPOINT_TYPE_FENCE: + seq_printf(s, "sync: [%p] %s", sync_event->handle, + (sync_event->handle && sync_event->handle->fence) + ? sync_event->handle->fence->name : "NULL"); + break; + default: + seq_printf(s, "sync: type: %d", sync_event->type); + break; + } +} + +struct flag_entry { + unsigned long mask; + const char *str; +}; + +static const struct flag_entry cmdbatch_flags[] = {KGSL_CMDBATCH_FLAGS}; + +static const struct flag_entry cmdbatch_priv[] = { + { CMDBATCH_FLAG_SKIP, "skip"}, + { CMDBATCH_FLAG_FORCE_PREAMBLE, "force_preamble"}, + { CMDBATCH_FLAG_WFI, "wait_for_idle" }, +}; + +static const struct flag_entry context_flags[] = {KGSL_CONTEXT_FLAGS}; + +/* + * Note that the ADRENO_CONTEXT_* flags start at + * KGSL_CONTEXT_PRIV_DEVICE_SPECIFIC so it is ok to cross the streams here. + */ +static const struct flag_entry context_priv[] = { + { KGSL_CONTEXT_PRIV_DETACHED, "detached"}, + { KGSL_CONTEXT_PRIV_INVALID, "invalid"}, + { KGSL_CONTEXT_PRIV_PAGEFAULT, "pagefault"}, + { ADRENO_CONTEXT_FAULT, "fault"}, + { ADRENO_CONTEXT_GPU_HANG, "gpu_hang"}, + { ADRENO_CONTEXT_GPU_HANG_FT, "gpu_hang_ft"}, + { ADRENO_CONTEXT_SKIP_EOF, "skip_end_of_frame" }, + { ADRENO_CONTEXT_FORCE_PREAMBLE, "force_preamble"}, +}; + +static void print_flags(struct seq_file *s, const struct flag_entry *table, + size_t table_size, unsigned long flags) +{ + int i; + int first = 1; + + for (i = 0; i < table_size; i++) { + if (flags & table[i].mask) { + seq_printf(s, "%c%s", first ? '\0' : '|', table[i].str); + flags &= ~(table[i].mask); + first = 0; + } + } + if (flags) { + seq_printf(s, "%c0x%lx", first ? '\0' : '|', flags); + first = 0; + } + if (first) + seq_puts(s, "None"); +} + +static void cmdbatch_print(struct seq_file *s, struct kgsl_cmdbatch *cmdbatch) +{ + struct kgsl_cmdbatch_sync_event *event; + unsigned int i; + + /* print fences first, since they block this cmdbatch */ + + for (i = 0; i < cmdbatch->numsyncs; i++) { + event = &cmdbatch->synclist[i]; + + if (!kgsl_cmdbatch_event_pending(cmdbatch, i)) + continue; + + /* + * Timestamp is 0 for KGSL_CONTEXT_SYNC, but print it anyways + * so that it is clear if the fence was a separate submit + * or part of an IB submit. + */ + seq_printf(s, "\t%d ", cmdbatch->timestamp); + sync_event_print(s, event); + seq_puts(s, "\n"); + } + + /* if this flag is set, there won't be an IB */ + if (cmdbatch->flags & KGSL_CONTEXT_SYNC) + return; + + seq_printf(s, "\t%d: ib: expires: %lu", + cmdbatch->timestamp, cmdbatch->expires); + + seq_puts(s, " flags: "); + print_flags(s, cmdbatch_flags, ARRAY_SIZE(cmdbatch_flags), + cmdbatch->flags); + + seq_puts(s, " priv: "); + print_flags(s, cmdbatch_priv, ARRAY_SIZE(cmdbatch_priv), + cmdbatch->priv); + + seq_puts(s, "\n"); +} + +static const char *ctx_type_str(unsigned int type) +{ + int i; + struct flag_entry table[] = {KGSL_CONTEXT_TYPES}; + + for (i = 0; i < ARRAY_SIZE(table); i++) + if (type == table[i].mask) + return table[i].str; + return "UNKNOWN"; +} + +static int ctx_print(struct seq_file *s, void *unused) +{ + struct adreno_context *drawctxt = s->private; + unsigned int i; + struct kgsl_event *event; + unsigned int queued = 0, consumed = 0, retired = 0; + + seq_printf(s, "id: %d type: %s priority: %d process: %s (%d) tid: %d\n", + drawctxt->base.id, + ctx_type_str(drawctxt->type), + drawctxt->base.priority, + drawctxt->base.proc_priv->comm, + drawctxt->base.proc_priv->pid, + drawctxt->base.tid); + + seq_puts(s, "flags: "); + print_flags(s, context_flags, ARRAY_SIZE(context_flags), + drawctxt->base.flags & ~(KGSL_CONTEXT_PRIORITY_MASK + | KGSL_CONTEXT_TYPE_MASK)); + seq_puts(s, " priv: "); + print_flags(s, context_priv, ARRAY_SIZE(context_priv), + drawctxt->base.priv); + seq_puts(s, "\n"); + + seq_puts(s, "timestamps: "); + kgsl_readtimestamp(drawctxt->base.device, &drawctxt->base, + KGSL_TIMESTAMP_QUEUED, &queued); + kgsl_readtimestamp(drawctxt->base.device, &drawctxt->base, + KGSL_TIMESTAMP_CONSUMED, &consumed); + kgsl_readtimestamp(drawctxt->base.device, &drawctxt->base, + KGSL_TIMESTAMP_RETIRED, &retired); + seq_printf(s, "queued: %u consumed: %u retired: %u global:%u\n", + queued, consumed, retired, + drawctxt->internal_timestamp); + + seq_puts(s, "cmdqueue:\n"); + + spin_lock(&drawctxt->lock); + for (i = drawctxt->cmdqueue_head; + i != drawctxt->cmdqueue_tail; + i = CMDQUEUE_NEXT(i, ADRENO_CONTEXT_CMDQUEUE_SIZE)) + cmdbatch_print(s, drawctxt->cmdqueue[i]); + spin_unlock(&drawctxt->lock); + + seq_puts(s, "events:\n"); + spin_lock(&drawctxt->base.events.lock); + list_for_each_entry(event, &drawctxt->base.events.events, node) + seq_printf(s, "\t%d: %pF created: %u\n", event->timestamp, + event->func, event->created); + spin_unlock(&drawctxt->base.events.lock); + + return 0; +} + +static int ctx_open(struct inode *inode, struct file *file) +{ + int ret; + unsigned int id = (unsigned int)(unsigned long)inode->i_private; + struct kgsl_context *context; + + context = kgsl_context_get(kgsl_get_device(KGSL_DEVICE_3D0), id); + if (context == NULL) + return -ENODEV; + + ret = single_open(file, ctx_print, context); + if (ret) + kgsl_context_put(context); + return ret; +} + +static int ctx_release(struct inode *inode, struct file *file) +{ + struct kgsl_context *context; + + context = ((struct seq_file *)file->private_data)->private; + + kgsl_context_put(context); + + return single_release(inode, file); +} + +static const struct file_operations ctx_fops = { + .open = ctx_open, + .read = seq_read, + .llseek = seq_lseek, + .release = ctx_release, +}; + + +void +adreno_context_debugfs_init(struct adreno_device *adreno_dev, + struct adreno_context *ctx) +{ + unsigned char name[16]; + + snprintf(name, sizeof(name), "%d", ctx->base.id); + + ctx->debug_root = debugfs_create_file(name, 0444, + adreno_dev->ctx_d_debugfs, + (void *)(unsigned long)ctx->base.id, &ctx_fops); +} + +void adreno_debugfs_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + + if (!device->d_debugfs || IS_ERR(device->d_debugfs)) + return; + + kgsl_cffdump_debugfs_create(device); + + debugfs_create_file("active_cnt", 0444, device->d_debugfs, device, + &_active_count_fops); + adreno_dev->ctx_d_debugfs = debugfs_create_dir("ctx", + device->d_debugfs); + + if (ADRENO_FEATURE(adreno_dev, ADRENO_LM)) { + debugfs_create_file("lm_limit", 0644, device->d_debugfs, device, + &_lm_limit_fops); + debugfs_create_file("lm_threshold_count", 0444, + device->d_debugfs, device, &_lm_threshold_fops); + } + + if (adreno_is_a5xx(adreno_dev)) + debugfs_create_file("isdb", 0644, device->d_debugfs, + device, &_isdb_fops); +} diff --git a/drivers/gpu/msm/adreno_dispatch.c b/drivers/gpu/msm/adreno_dispatch.c new file mode 100644 index 000000000000..6daf6977829e --- /dev/null +++ b/drivers/gpu/msm/adreno_dispatch.c @@ -0,0 +1,2595 @@ +/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/wait.h> +#include <linux/delay.h> +#include <linux/sched.h> +#include <linux/jiffies.h> +#include <linux/err.h> + +#include "kgsl.h" +#include "kgsl_cffdump.h" +#include "kgsl_sharedmem.h" +#include "adreno.h" +#include "adreno_ringbuffer.h" +#include "adreno_trace.h" +#include "kgsl_sharedmem.h" + +#define CMDQUEUE_NEXT(_i, _s) (((_i) + 1) % (_s)) + +/* Time in ms after which the dispatcher tries to schedule an unscheduled RB */ +static unsigned int _dispatch_starvation_time = 2000; + +/* Amount of time in ms that a starved RB is permitted to execute for */ +static unsigned int _dispatch_time_slice = 25; + +/* + * If set then dispatcher tries to schedule lower priority RB's after if they + * have commands in their pipe and have been inactive for + * _dispatch_starvation_time. Also, once an RB is schduled it will be allowed + * to run for _dispatch_time_slice unless it's commands complete before + * _dispatch_time_slice + */ +unsigned int adreno_disp_preempt_fair_sched; + +/* Number of commands that can be queued in a context before it sleeps */ +static unsigned int _context_cmdqueue_size = 50; + +/* Number of milliseconds to wait for the context queue to clear */ +static unsigned int _context_queue_wait = 10000; + +/* Number of command batches sent at a time from a single context */ +static unsigned int _context_cmdbatch_burst = 5; + +/* + * GFT throttle parameters. If GFT recovered more than + * X times in Y ms invalidate the context and do not attempt recovery. + * X -> _fault_throttle_burst + * Y -> _fault_throttle_time + */ +static unsigned int _fault_throttle_time = 3000; +static unsigned int _fault_throttle_burst = 3; + +/* + * Maximum ringbuffer inflight for the single submitting context case - this + * should be sufficiently high to keep the GPU loaded + */ +static unsigned int _dispatcher_q_inflight_hi = 15; + +/* + * Minimum inflight for the multiple context case - this should sufficiently low + * to allow for lower latency context switching + */ +static unsigned int _dispatcher_q_inflight_lo = 4; + +/* Command batch timeout (in milliseconds) */ +unsigned int adreno_cmdbatch_timeout = 2000; + +/* Interval for reading and comparing fault detection registers */ +static unsigned int _fault_timer_interval = 200; + +static int dispatcher_do_fault(struct kgsl_device *device); + +/** + * _track_context - Add a context ID to the list of recently seen contexts + * for the command queue + * @cmdqueue: cmdqueue to add the context to + * @id: ID of the context to add + * + * This function is called when a new item is added to a context - this tracks + * the number of active contexts seen in the last 100ms for the command queue + */ +static void _track_context(struct adreno_dispatcher_cmdqueue *cmdqueue, + unsigned int id) +{ + struct adreno_context_list *list = cmdqueue->active_contexts; + int oldest = -1, empty = -1; + unsigned long age = 0; + int i, count = 0; + bool updated = false; + + for (i = 0; i < ACTIVE_CONTEXT_LIST_MAX; i++) { + + /* If the new ID matches the slot update the expire time */ + if (list[i].id == id) { + list[i].jiffies = jiffies + msecs_to_jiffies(100); + updated = true; + count++; + continue; + } + + /* Remember and skip empty slots */ + if ((list[i].id == 0) || + time_after(jiffies, list[i].jiffies)) { + empty = i; + continue; + } + + count++; + + /* Remember the oldest active entry */ + if (oldest == -1 || time_before(list[i].jiffies, age)) { + age = list[i].jiffies; + oldest = i; + } + } + + if (updated == false) { + int pos = (empty != -1) ? empty : oldest; + + list[pos].jiffies = jiffies + msecs_to_jiffies(100); + list[pos].id = id; + count++; + } + + cmdqueue->active_context_count = count; +} + +/* + * If only one context has queued in the last 100 milliseconds increase + * inflight to a high number to load up the GPU. If multiple contexts + * have queued drop the inflight for better context switch latency. + * If no contexts have queued what are you even doing here? + */ + +static inline int +_cmdqueue_inflight(struct adreno_dispatcher_cmdqueue *cmdqueue) +{ + return (cmdqueue->active_context_count > 1) + ? _dispatcher_q_inflight_lo : _dispatcher_q_inflight_hi; +} + +/** + * fault_detect_read() - Read the set of fault detect registers + * @device: Pointer to the KGSL device struct + * + * Read the set of fault detect registers and store them in the local array. + * This is for the initial values that are compared later with + * fault_detect_read_compare. Also store the initial timestamp of each rb + * to compare the timestamps with. + */ +static void fault_detect_read(struct kgsl_device *device) +{ + int i; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv)) + return; + + for (i = 0; i < adreno_dev->num_ringbuffers; i++) { + struct adreno_ringbuffer *rb = &(adreno_dev->ringbuffers[i]); + adreno_rb_readtimestamp(device, rb, + KGSL_TIMESTAMP_RETIRED, &(rb->fault_detect_ts)); + } + + for (i = 0; i < adreno_ft_regs_num; i++) { + if (adreno_ft_regs[i] != 0) + kgsl_regread(device, adreno_ft_regs[i], + &adreno_ft_regs_val[i]); + } +} + +/* + * Check to see if the device is idle + */ +static inline bool _isidle(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + const struct adreno_gpu_core *gpucore = adreno_dev->gpucore; + unsigned int reg_rbbm_status; + + if (!kgsl_state_is_awake(device)) + goto ret; + + /* only check rbbm status to determine if GPU is idle */ + adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS, ®_rbbm_status); + + if (reg_rbbm_status & gpucore->busy_mask) + return false; + +ret: + /* Clear the existing register values */ + memset(adreno_ft_regs_val, 0, + adreno_ft_regs_num * sizeof(unsigned int)); + + return true; +} + +/** + * fault_detect_read_compare() - Read the fault detect registers and compare + * them to the current value + * @device: Pointer to the KGSL device struct + * + * Read the set of fault detect registers and compare them to the current set + * of registers. Return 1 if any of the register values changed. Also, compare + * if the current RB's timstamp has changed or not. + */ +static int fault_detect_read_compare(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); + int i, ret = 0; + unsigned int ts; + + /* Check to see if the device is idle - if so report no hang */ + if (_isidle(device) == true) + ret = 1; + + for (i = 0; i < adreno_ft_regs_num; i++) { + unsigned int val; + + if (adreno_ft_regs[i] == 0) + continue; + kgsl_regread(device, adreno_ft_regs[i], &val); + if (val != adreno_ft_regs_val[i]) + ret = 1; + adreno_ft_regs_val[i] = val; + } + + if (!adreno_rb_readtimestamp(device, adreno_dev->cur_rb, + KGSL_TIMESTAMP_RETIRED, &ts)) { + if (ts != rb->fault_detect_ts) + ret = 1; + + rb->fault_detect_ts = ts; + } + + return ret; +} + +static void start_fault_timer(struct adreno_device *adreno_dev) +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + + if (adreno_soft_fault_detect(adreno_dev)) + mod_timer(&dispatcher->fault_timer, + jiffies + msecs_to_jiffies(_fault_timer_interval)); +} + +/** + * _retire_marker() - Retire a marker command batch without sending it to the + * hardware + * @cmdbatch: Pointer to the cmdbatch to retire + * + * In some cases marker commands can be retired by the software without going to + * the GPU. In those cases, update the memstore from the CPU, kick off the + * event engine to handle expired events and destroy the command batch. + */ +static void _retire_marker(struct kgsl_cmdbatch *cmdbatch) +{ + struct kgsl_context *context = cmdbatch->context; + struct adreno_context *drawctxt = ADRENO_CONTEXT(cmdbatch->context); + struct kgsl_device *device = context->device; + + /* + * Write the start and end timestamp to the memstore to keep the + * accounting sane + */ + kgsl_sharedmem_writel(device, &device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, soptimestamp), + cmdbatch->timestamp); + + kgsl_sharedmem_writel(device, &device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp), + cmdbatch->timestamp); + + + /* Retire pending GPU events for the object */ + kgsl_process_event_group(device, &context->events); + + trace_adreno_cmdbatch_retired(cmdbatch, -1, 0, 0, drawctxt->rb); + kgsl_cmdbatch_destroy(cmdbatch); +} + +static int _check_context_queue(struct adreno_context *drawctxt) +{ + int ret; + + spin_lock(&drawctxt->lock); + + /* + * Wake up if there is room in the context or if the whole thing got + * invalidated while we were asleep + */ + + if (kgsl_context_invalid(&drawctxt->base)) + ret = 1; + else + ret = drawctxt->queued < _context_cmdqueue_size ? 1 : 0; + + spin_unlock(&drawctxt->lock); + + return ret; +} + +/* + * return true if this is a marker command and the dependent timestamp has + * retired + */ +static bool _marker_expired(struct kgsl_cmdbatch *cmdbatch) +{ + return (cmdbatch->flags & KGSL_CMDBATCH_MARKER) && + kgsl_check_timestamp(cmdbatch->device, cmdbatch->context, + cmdbatch->marker_timestamp); +} + +static inline void _pop_cmdbatch(struct adreno_context *drawctxt) +{ + drawctxt->cmdqueue_head = CMDQUEUE_NEXT(drawctxt->cmdqueue_head, + ADRENO_CONTEXT_CMDQUEUE_SIZE); + drawctxt->queued--; +} +/** + * Removes all expired marker and sync cmdbatches from + * the context queue when marker command and dependent + * timestamp are retired. This function is recursive. + * returns cmdbatch if context has command, NULL otherwise. + */ +static struct kgsl_cmdbatch *_expire_markers(struct adreno_context *drawctxt) +{ + struct kgsl_cmdbatch *cmdbatch; + + if (drawctxt->cmdqueue_head == drawctxt->cmdqueue_tail) + return NULL; + + cmdbatch = drawctxt->cmdqueue[drawctxt->cmdqueue_head]; + + if (cmdbatch == NULL) + return NULL; + + /* Check to see if this is a marker we can skip over */ + if ((cmdbatch->flags & KGSL_CMDBATCH_MARKER) && + _marker_expired(cmdbatch)) { + _pop_cmdbatch(drawctxt); + _retire_marker(cmdbatch); + return _expire_markers(drawctxt); + } + + if (cmdbatch->flags & KGSL_CMDBATCH_SYNC) { + if (!kgsl_cmdbatch_events_pending(cmdbatch)) { + _pop_cmdbatch(drawctxt); + kgsl_cmdbatch_destroy(cmdbatch); + return _expire_markers(drawctxt); + } + } + + return cmdbatch; +} + +static void expire_markers(struct adreno_context *drawctxt) +{ + spin_lock(&drawctxt->lock); + _expire_markers(drawctxt); + spin_unlock(&drawctxt->lock); +} + +static struct kgsl_cmdbatch *_get_cmdbatch(struct adreno_context *drawctxt) +{ + struct kgsl_cmdbatch *cmdbatch; + bool pending = false; + + cmdbatch = _expire_markers(drawctxt); + + if (cmdbatch == NULL) + return NULL; + + /* + * If the marker isn't expired but the SKIP bit is set + * then there are real commands following this one in + * the queue. This means that we need to dispatch the + * command so that we can keep the timestamp accounting + * correct. If skip isn't set then we block this queue + * until the dependent timestamp expires + */ + if ((cmdbatch->flags & KGSL_CMDBATCH_MARKER) && + (!test_bit(CMDBATCH_FLAG_SKIP, &cmdbatch->priv))) + pending = true; + + if (kgsl_cmdbatch_events_pending(cmdbatch)) + pending = true; + + /* + * If changes are pending and the canary timer hasn't been + * started yet, start it + */ + if (pending) { + /* + * If syncpoints are pending start the canary timer if + * it hasn't already been started + */ + if (!cmdbatch->timeout_jiffies) { + cmdbatch->timeout_jiffies = jiffies + 5 * HZ; + mod_timer(&cmdbatch->timer, cmdbatch->timeout_jiffies); + } + + return ERR_PTR(-EAGAIN); + } + + _pop_cmdbatch(drawctxt); + return cmdbatch; +} + +/** + * adreno_dispatcher_get_cmdbatch() - Get a new command from a context queue + * @drawctxt: Pointer to the adreno draw context + * + * Dequeue a new command batch from the context list + */ +static struct kgsl_cmdbatch *adreno_dispatcher_get_cmdbatch( + struct adreno_context *drawctxt) +{ + struct kgsl_cmdbatch *cmdbatch; + + spin_lock(&drawctxt->lock); + cmdbatch = _get_cmdbatch(drawctxt); + spin_unlock(&drawctxt->lock); + + /* + * Delete the timer and wait for timer handler to finish executing + * on another core before queueing the buffer. We must do this + * without holding any spin lock that the timer handler might be using + */ + if (!IS_ERR_OR_NULL(cmdbatch)) + del_timer_sync(&cmdbatch->timer); + + return cmdbatch; +} + +/** + * adreno_dispatcher_requeue_cmdbatch() - Put a command back on the context + * queue + * @drawctxt: Pointer to the adreno draw context + * @cmdbatch: Pointer to the KGSL cmdbatch to requeue + * + * Failure to submit a command to the ringbuffer isn't the fault of the command + * being submitted so if a failure happens, push it back on the head of the the + * context queue to be reconsidered again unless the context got detached. + */ +static inline int adreno_dispatcher_requeue_cmdbatch( + struct adreno_context *drawctxt, struct kgsl_cmdbatch *cmdbatch) +{ + unsigned int prev; + spin_lock(&drawctxt->lock); + + if (kgsl_context_detached(&drawctxt->base) || + kgsl_context_invalid(&drawctxt->base)) { + spin_unlock(&drawctxt->lock); + /* get rid of this cmdbatch since the context is bad */ + kgsl_cmdbatch_destroy(cmdbatch); + return -ENOENT; + } + + prev = drawctxt->cmdqueue_head == 0 ? + (ADRENO_CONTEXT_CMDQUEUE_SIZE - 1) : + (drawctxt->cmdqueue_head - 1); + + /* + * The maximum queue size always needs to be one less then the size of + * the ringbuffer queue so there is "room" to put the cmdbatch back in + */ + + BUG_ON(prev == drawctxt->cmdqueue_tail); + + drawctxt->cmdqueue[prev] = cmdbatch; + drawctxt->queued++; + + /* Reset the command queue head to reflect the newly requeued change */ + drawctxt->cmdqueue_head = prev; + spin_unlock(&drawctxt->lock); + return 0; +} + +/** + * dispatcher_queue_context() - Queue a context in the dispatcher pending list + * @dispatcher: Pointer to the adreno dispatcher struct + * @drawctxt: Pointer to the adreno draw context + * + * Add a context to the dispatcher pending list. + */ +static void dispatcher_queue_context(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + + /* Refuse to queue a detached context */ + if (kgsl_context_detached(&drawctxt->base)) + return; + + spin_lock(&dispatcher->plist_lock); + + if (plist_node_empty(&drawctxt->pending)) { + /* Get a reference to the context while it sits on the list */ + if (_kgsl_context_get(&drawctxt->base)) { + trace_dispatch_queue_context(drawctxt); + plist_add(&drawctxt->pending, &dispatcher->pending); + } + } + + spin_unlock(&dispatcher->plist_lock); +} + +/** + * sendcmd() - Send a command batch to the GPU hardware + * @dispatcher: Pointer to the adreno dispatcher struct + * @cmdbatch: Pointer to the KGSL cmdbatch being sent + * + * Send a KGSL command batch to the GPU hardware + */ +static int sendcmd(struct adreno_device *adreno_dev, + struct kgsl_cmdbatch *cmdbatch) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + struct adreno_context *drawctxt = ADRENO_CONTEXT(cmdbatch->context); + struct adreno_dispatcher_cmdqueue *dispatch_q = + ADRENO_CMDBATCH_DISPATCH_CMDQUEUE(cmdbatch); + struct adreno_submit_time time; + uint64_t secs = 0; + unsigned long nsecs = 0; + int ret; + + mutex_lock(&device->mutex); + if (adreno_gpu_halt(adreno_dev) != 0) { + mutex_unlock(&device->mutex); + return -EBUSY; + } + + dispatcher->inflight++; + dispatch_q->inflight++; + + if (dispatcher->inflight == 1 && + !test_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv)) { + /* Time to make the donuts. Turn on the GPU */ + ret = kgsl_active_count_get(device); + if (ret) { + dispatcher->inflight--; + dispatch_q->inflight--; + mutex_unlock(&device->mutex); + return ret; + } + + set_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv); + } + + if (test_bit(ADRENO_DEVICE_CMDBATCH_PROFILE, &adreno_dev->priv)) { + set_bit(CMDBATCH_FLAG_PROFILE, &cmdbatch->priv); + cmdbatch->profile_index = adreno_dev->cmdbatch_profile_index; + adreno_dev->cmdbatch_profile_index = + (adreno_dev->cmdbatch_profile_index + 1) % + ADRENO_CMDBATCH_PROFILE_COUNT; + } + + ret = adreno_ringbuffer_submitcmd(adreno_dev, cmdbatch, &time); + + /* + * On the first command, if the submission was successful, then read the + * fault registers. If it failed then turn off the GPU. Sad face. + */ + + if (dispatcher->inflight == 1) { + if (ret == 0) { + fault_detect_read(device); + + if (!test_and_set_bit(ADRENO_DISPATCHER_ACTIVE, + &dispatcher->priv)) + reinit_completion(&dispatcher->idle_gate); + } else { + kgsl_active_count_put(device); + clear_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv); + } + } + + mutex_unlock(&device->mutex); + + if (ret) { + dispatcher->inflight--; + dispatch_q->inflight--; + + /* + * -ENOENT means that the context was detached before the + * command was submitted - don't log a message in that case + */ + + if (ret != -ENOENT) + KGSL_DRV_ERR(device, + "Unable to submit command to the ringbuffer %d\n", + ret); + return ret; + } + + secs = time.ktime; + nsecs = do_div(secs, 1000000000); + + trace_adreno_cmdbatch_submitted(cmdbatch, (int) dispatcher->inflight, + time.ticks, (unsigned long) secs, nsecs / 1000, drawctxt->rb); + + cmdbatch->submit_ticks = time.ticks; + + dispatch_q->cmd_q[dispatch_q->tail] = cmdbatch; + dispatch_q->tail = (dispatch_q->tail + 1) % + ADRENO_DISPATCH_CMDQUEUE_SIZE; + + /* + * If this is the first command in the pipe then the GPU will + * immediately start executing it so we can start the expiry timeout on + * the command batch here. Subsequent command batches will have their + * timer started when the previous command batch is retired. + * Set the timer if the cmdbatch was submitted to current + * active RB else this timer will need to be set when the + * RB becomes active, also if dispatcher is not is CLEAR + * state then the cmdbatch it is currently executing is + * unclear so do not set timer in that case either. + */ + if (1 == dispatch_q->inflight && + (&(adreno_dev->cur_rb->dispatch_q)) == dispatch_q && + adreno_preempt_state(adreno_dev, + ADRENO_DISPATCHER_PREEMPT_CLEAR)) { + cmdbatch->expires = jiffies + + msecs_to_jiffies(adreno_cmdbatch_timeout); + mod_timer(&dispatcher->timer, cmdbatch->expires); + } + + /* Start the fault detection timer on the first submission */ + if (dispatcher->inflight == 1) + start_fault_timer(adreno_dev); + + /* + * we just submitted something, readjust ringbuffer + * execution level + */ + gpudev->preemption_schedule(adreno_dev); + return 0; +} + +/** + * dispatcher_context_sendcmds() - Send commands from a context to the GPU + * @adreno_dev: Pointer to the adreno device struct + * @drawctxt: Pointer to the adreno context to dispatch commands from + * + * Dequeue and send a burst of commands from the specified context to the GPU + * Returns postive if the context needs to be put back on the pending queue + * 0 if the context is empty or detached and negative on error + */ +static int dispatcher_context_sendcmds(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt) +{ + struct adreno_dispatcher_cmdqueue *dispatch_q = + &(drawctxt->rb->dispatch_q); + int count = 0; + int ret = 0; + int inflight = _cmdqueue_inflight(dispatch_q); + unsigned int timestamp; + + if (dispatch_q->inflight >= inflight) { + expire_markers(drawctxt); + return -EBUSY; + } + + /* + * Each context can send a specific number of command batches per cycle + */ + while ((count < _context_cmdbatch_burst) && + (dispatch_q->inflight < inflight)) { + struct kgsl_cmdbatch *cmdbatch; + + if (adreno_gpu_fault(adreno_dev) != 0) + break; + + cmdbatch = adreno_dispatcher_get_cmdbatch(drawctxt); + + /* + * adreno_context_get_cmdbatch returns -EAGAIN if the current + * cmdbatch has pending sync points so no more to do here. + * When the sync points are satisfied then the context will get + * reqeueued + */ + + if (IS_ERR_OR_NULL(cmdbatch)) { + if (IS_ERR(cmdbatch)) + ret = PTR_ERR(cmdbatch); + break; + } + + /* + * If this is a synchronization submission then there are no + * commands to submit. Discard it and get the next item from + * the queue. Decrement count so this packet doesn't count + * against the burst for the context + */ + + if (cmdbatch->flags & KGSL_CMDBATCH_SYNC) { + kgsl_cmdbatch_destroy(cmdbatch); + continue; + } + + timestamp = cmdbatch->timestamp; + + ret = sendcmd(adreno_dev, cmdbatch); + + /* + * On error from sendcmd() try to requeue the command batch + * unless we got back -ENOENT which means that the context has + * been detached and there will be no more deliveries from here + */ + if (ret != 0) { + /* Destroy the cmdbatch on -ENOENT */ + if (ret == -ENOENT) + kgsl_cmdbatch_destroy(cmdbatch); + else { + /* + * If the requeue returns an error, return that + * instead of whatever sendcmd() sent us + */ + int r = adreno_dispatcher_requeue_cmdbatch( + drawctxt, cmdbatch); + if (r) + ret = r; + } + + break; + } + + drawctxt->submitted_timestamp = timestamp; + + count++; + } + + /* + * Wake up any snoozing threads if we have consumed any real commands + * or marker commands and we have room in the context queue. + */ + + if (_check_context_queue(drawctxt)) + wake_up_all(&drawctxt->wq); + + if (!ret) + ret = count; + + /* Return error or the number of commands queued */ + return ret; +} + +/** + * _adreno_dispatcher_issuecmds() - Issue commmands from pending contexts + * @adreno_dev: Pointer to the adreno device struct + * + * Issue as many commands as possible (up to inflight) from the pending contexts + * This function assumes the dispatcher mutex has been locked. + */ +static void _adreno_dispatcher_issuecmds(struct adreno_device *adreno_dev) +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + struct adreno_context *drawctxt, *next; + struct plist_head requeue, busy_list; + int ret; + + /* Leave early if the dispatcher isn't in a happy state */ + if (adreno_gpu_fault(adreno_dev) != 0) + return; + + plist_head_init(&requeue); + plist_head_init(&busy_list); + + /* Try to fill the ringbuffers as much as possible */ + while (1) { + + /* Stop doing things if the dispatcher is paused or faulted */ + if (adreno_gpu_fault(adreno_dev) != 0) + break; + + if (0 != adreno_gpu_halt(adreno_dev)) + break; + + spin_lock(&dispatcher->plist_lock); + + if (plist_head_empty(&dispatcher->pending)) { + spin_unlock(&dispatcher->plist_lock); + break; + } + + /* Get the next entry on the list */ + drawctxt = plist_first_entry(&dispatcher->pending, + struct adreno_context, pending); + + plist_del(&drawctxt->pending, &dispatcher->pending); + + spin_unlock(&dispatcher->plist_lock); + + if (kgsl_context_detached(&drawctxt->base) || + kgsl_context_invalid(&drawctxt->base)) { + kgsl_context_put(&drawctxt->base); + continue; + } + + ret = dispatcher_context_sendcmds(adreno_dev, drawctxt); + + /* Don't bother requeuing on -ENOENT - context is detached */ + if (ret != 0 && ret != -ENOENT) { + spin_lock(&dispatcher->plist_lock); + + /* + * Check to seen if the context had been requeued while + * we were processing it (probably by another thread + * pushing commands). If it has then shift it to the + * requeue list if it was not able to submit commands + * due to the dispatch_q being full. Also, do a put to + * make sure the reference counting stays accurate. + * If the node is empty then we will put it on the + * requeue list and not touch the refcount since we + * already hold it from the first time it went on the + * list. + */ + + if (!plist_node_empty(&drawctxt->pending)) { + plist_del(&drawctxt->pending, + &dispatcher->pending); + kgsl_context_put(&drawctxt->base); + } + + if (ret == -EBUSY) + /* Inflight queue is full */ + plist_add(&drawctxt->pending, &busy_list); + else + plist_add(&drawctxt->pending, &requeue); + + spin_unlock(&dispatcher->plist_lock); + } else { + /* + * If the context doesn't need be requeued put back the + * refcount + */ + + kgsl_context_put(&drawctxt->base); + } + } + + spin_lock(&dispatcher->plist_lock); + + /* Put the contexts that couldn't submit back on the pending list */ + plist_for_each_entry_safe(drawctxt, next, &busy_list, pending) { + plist_del(&drawctxt->pending, &busy_list); + plist_add(&drawctxt->pending, &dispatcher->pending); + } + + /* Now put the contexts that need to be requeued back on the list */ + plist_for_each_entry_safe(drawctxt, next, &requeue, pending) { + plist_del(&drawctxt->pending, &requeue); + plist_add(&drawctxt->pending, &dispatcher->pending); + } + + spin_unlock(&dispatcher->plist_lock); +} + +/** + * adreno_dispatcher_issuecmds() - Issue commmands from pending contexts + * @adreno_dev: Pointer to the adreno device struct + * + * Lock the dispatcher and call _adreno_dispatcher_issueibcmds + */ +static void adreno_dispatcher_issuecmds(struct adreno_device *adreno_dev) +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + + /* If the dispatcher is busy then schedule the work for later */ + if (!mutex_trylock(&dispatcher->mutex)) { + adreno_dispatcher_schedule(&adreno_dev->dev); + return; + } + + _adreno_dispatcher_issuecmds(adreno_dev); + mutex_unlock(&dispatcher->mutex); +} + +/** + * get_timestamp() - Return the next timestamp for the context + * @drawctxt - Pointer to an adreno draw context struct + * @cmdbatch - Pointer to a command batch + * @timestamp - Pointer to a timestamp value possibly passed from the user + * + * Assign a timestamp based on the settings of the draw context and the command + * batch. + */ +static int get_timestamp(struct adreno_context *drawctxt, + struct kgsl_cmdbatch *cmdbatch, unsigned int *timestamp) +{ + /* Synchronization commands don't get a timestamp */ + if (cmdbatch->flags & KGSL_CMDBATCH_SYNC) { + *timestamp = 0; + return 0; + } + + if (drawctxt->base.flags & KGSL_CONTEXT_USER_GENERATED_TS) { + /* + * User specified timestamps need to be greater than the last + * issued timestamp in the context + */ + if (timestamp_cmp(drawctxt->timestamp, *timestamp) >= 0) + return -ERANGE; + + drawctxt->timestamp = *timestamp; + } else + drawctxt->timestamp++; + + *timestamp = drawctxt->timestamp; + return 0; +} + +/** + * adreno_dispatcher_preempt_timer() - Timer that triggers when preemption has + * not completed + * @data: Pointer to adreno device that did not preempt in timely manner + */ +static void adreno_dispatcher_preempt_timer(unsigned long data) +{ + struct adreno_device *adreno_dev = (struct adreno_device *) data; + struct kgsl_device *device = &(adreno_dev->dev); + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + KGSL_DRV_ERR(device, + "Preemption timed out. cur_rb rptr/wptr %x/%x id %d, next_rb rptr/wptr %x/%x id %d, disp_state: %d\n", + adreno_dev->cur_rb->rptr, adreno_dev->cur_rb->wptr, + adreno_dev->cur_rb->id, adreno_dev->next_rb->rptr, + adreno_dev->next_rb->wptr, adreno_dev->next_rb->id, + atomic_read(&dispatcher->preemption_state)); + adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT); + adreno_dispatcher_schedule(device); +} + +/** + * adreno_dispatcher_get_highest_busy_rb() - Returns the highest priority RB + * which is busy + * @adreno_dev: Device whose RB is returned + */ +struct adreno_ringbuffer *adreno_dispatcher_get_highest_busy_rb( + struct adreno_device *adreno_dev) +{ + struct adreno_ringbuffer *rb, *highest_busy_rb = NULL; + int i; + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + if (rb->rptr != rb->wptr && !highest_busy_rb) { + highest_busy_rb = rb; + goto done; + } + + if (!adreno_disp_preempt_fair_sched) + continue; + + switch (rb->starve_timer_state) { + case ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT: + if (rb->rptr != rb->wptr && + adreno_dev->cur_rb != rb) { + rb->starve_timer_state = + ADRENO_DISPATCHER_RB_STARVE_TIMER_INIT; + rb->sched_timer = jiffies; + } + break; + case ADRENO_DISPATCHER_RB_STARVE_TIMER_INIT: + if (time_after(jiffies, rb->sched_timer + + msecs_to_jiffies(_dispatch_starvation_time))) { + rb->starve_timer_state = + ADRENO_DISPATCHER_RB_STARVE_TIMER_ELAPSED; + /* halt dispatcher to remove starvation */ + adreno_get_gpu_halt(adreno_dev); + } + break; + case ADRENO_DISPATCHER_RB_STARVE_TIMER_SCHEDULED: + BUG_ON(adreno_dev->cur_rb != rb); + /* + * If the RB has not been running for the minimum + * time slice then allow it to run + */ + if ((rb->rptr != rb->wptr) && time_before(jiffies, + adreno_dev->cur_rb->sched_timer + + msecs_to_jiffies(_dispatch_time_slice))) + highest_busy_rb = rb; + else + rb->starve_timer_state = + ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT; + break; + case ADRENO_DISPATCHER_RB_STARVE_TIMER_ELAPSED: + default: + break; + } + } +done: + return highest_busy_rb; +} + +/** + * adreno_dispactcher_queue_cmd() - Queue a new command in the context + * @adreno_dev: Pointer to the adreno device struct + * @drawctxt: Pointer to the adreno draw context + * @cmdbatch: Pointer to the command batch being submitted + * @timestamp: Pointer to the requested timestamp + * + * Queue a command in the context - if there isn't any room in the queue, then + * block until there is + */ +int adreno_dispatcher_queue_cmd(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, struct kgsl_cmdbatch *cmdbatch, + uint32_t *timestamp) +{ + struct adreno_dispatcher_cmdqueue *dispatch_q = + ADRENO_CMDBATCH_DISPATCH_CMDQUEUE(cmdbatch); + int ret; + + spin_lock(&drawctxt->lock); + + if (kgsl_context_detached(&drawctxt->base)) { + spin_unlock(&drawctxt->lock); + return -ENOENT; + } + + /* + * Force the preamble for this submission only - this is usually + * requested by the dispatcher as part of fault recovery + */ + + if (test_and_clear_bit(ADRENO_CONTEXT_FORCE_PREAMBLE, + &drawctxt->base.priv)) + set_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &cmdbatch->priv); + + /* + * Force the premable if set from userspace in the context or cmdbatch + * flags + */ + + if ((drawctxt->base.flags & KGSL_CONTEXT_CTX_SWITCH) || + (cmdbatch->flags & KGSL_CMDBATCH_CTX_SWITCH)) + set_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &cmdbatch->priv); + + /* Skip this cmdbatch commands if IFH_NOP is enabled */ + if (drawctxt->base.flags & KGSL_CONTEXT_IFH_NOP) + set_bit(CMDBATCH_FLAG_SKIP, &cmdbatch->priv); + + /* + * If we are waiting for the end of frame and it hasn't appeared yet, + * then mark the command batch as skipped. It will still progress + * through the pipeline but it won't actually send any commands + */ + + if (test_bit(ADRENO_CONTEXT_SKIP_EOF, &drawctxt->base.priv)) { + set_bit(CMDBATCH_FLAG_SKIP, &cmdbatch->priv); + + /* + * If this command batch represents the EOF then clear the way + * for the dispatcher to continue submitting + */ + + if (cmdbatch->flags & KGSL_CMDBATCH_END_OF_FRAME) { + clear_bit(ADRENO_CONTEXT_SKIP_EOF, + &drawctxt->base.priv); + + /* + * Force the preamble on the next command to ensure that + * the state is correct + */ + set_bit(ADRENO_CONTEXT_FORCE_PREAMBLE, + &drawctxt->base.priv); + } + } + + /* Wait for room in the context queue */ + + while (drawctxt->queued >= _context_cmdqueue_size) { + trace_adreno_drawctxt_sleep(drawctxt); + spin_unlock(&drawctxt->lock); + + ret = wait_event_interruptible_timeout(drawctxt->wq, + _check_context_queue(drawctxt), + msecs_to_jiffies(_context_queue_wait)); + + spin_lock(&drawctxt->lock); + trace_adreno_drawctxt_wake(drawctxt); + + if (ret <= 0) { + spin_unlock(&drawctxt->lock); + return (ret == 0) ? -ETIMEDOUT : (int) ret; + } + } + /* + * Account for the possiblity that the context got invalidated + * while we were sleeping + */ + + if (kgsl_context_invalid(&drawctxt->base)) { + spin_unlock(&drawctxt->lock); + return -EDEADLK; + } + if (kgsl_context_detached(&drawctxt->base)) { + spin_unlock(&drawctxt->lock); + return -ENOENT; + } + + ret = get_timestamp(drawctxt, cmdbatch, timestamp); + if (ret) { + spin_unlock(&drawctxt->lock); + return ret; + } + + cmdbatch->timestamp = *timestamp; + + if (cmdbatch->flags & KGSL_CMDBATCH_MARKER) { + + /* + * See if we can fastpath this thing - if nothing is queued + * and nothing is inflight retire without bothering the GPU + */ + + if (!drawctxt->queued && kgsl_check_timestamp(cmdbatch->device, + cmdbatch->context, drawctxt->queued_timestamp)) { + trace_adreno_cmdbatch_queued(cmdbatch, + drawctxt->queued); + + _retire_marker(cmdbatch); + spin_unlock(&drawctxt->lock); + return 0; + } + + /* + * Remember the last queued timestamp - the marker will block + * until that timestamp is expired (unless another command + * comes along and forces the marker to execute) + */ + + cmdbatch->marker_timestamp = drawctxt->queued_timestamp; + } + + /* SYNC commands have timestamp 0 and will get optimized out anyway */ + if (!(cmdbatch->flags & KGSL_CONTEXT_SYNC)) + drawctxt->queued_timestamp = *timestamp; + + /* + * Set the fault tolerance policy for the command batch - assuming the + * context hasn't disabled FT use the current device policy + */ + + if (drawctxt->base.flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE) + set_bit(KGSL_FT_DISABLE, &cmdbatch->fault_policy); + else + cmdbatch->fault_policy = adreno_dev->ft_policy; + + /* Put the command into the queue */ + drawctxt->cmdqueue[drawctxt->cmdqueue_tail] = cmdbatch; + drawctxt->cmdqueue_tail = (drawctxt->cmdqueue_tail + 1) % + ADRENO_CONTEXT_CMDQUEUE_SIZE; + + /* + * If this is a real command then we need to force any markers queued + * before it to dispatch to keep time linear - set the skip bit so + * the commands get NOPed. + */ + + if (!(cmdbatch->flags & KGSL_CMDBATCH_MARKER)) { + unsigned int i = drawctxt->cmdqueue_head; + + while (i != drawctxt->cmdqueue_tail) { + if (drawctxt->cmdqueue[i]->flags & KGSL_CMDBATCH_MARKER) + set_bit(CMDBATCH_FLAG_SKIP, + &drawctxt->cmdqueue[i]->priv); + + i = CMDQUEUE_NEXT(i, ADRENO_CONTEXT_CMDQUEUE_SIZE); + } + } + + drawctxt->queued++; + trace_adreno_cmdbatch_queued(cmdbatch, drawctxt->queued); + + _track_context(dispatch_q, drawctxt->base.id); + + spin_unlock(&drawctxt->lock); + + /* Add the context to the dispatcher pending list */ + dispatcher_queue_context(adreno_dev, drawctxt); + + /* + * Only issue commands if inflight is less than burst -this prevents us + * from sitting around waiting for the mutex on a busy system - the work + * loop will schedule it for us. Inflight is mutex protected but the + * worse that can happen is that it will go to 0 after we check and if + * it goes to 0 it is because the work loop decremented it and the work + * queue will try to schedule new commands anyway. + */ + + if (dispatch_q->inflight < _context_cmdbatch_burst) + adreno_dispatcher_issuecmds(adreno_dev); + + return 0; +} + +static int _mark_context(int id, void *ptr, void *data) +{ + unsigned int guilty = *((unsigned int *) data); + struct kgsl_context *context = ptr; + + /* + * If the context is guilty mark it as such. Otherwise mark it as + * innocent if it had not already been marked as guilty. If id is + * passed as 0 then mark EVERYBODY guilty (recovery failed) + */ + + if (guilty == 0 || guilty == context->id) + context->reset_status = + KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT; + else if (context->reset_status != + KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT) + context->reset_status = + KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT; + + return 0; +} + +/** + * mark_guilty_context() - Mark the given context as guilty (failed recovery) + * @device: Pointer to a KGSL device structure + * @id: Context ID of the guilty context (or 0 to mark all as guilty) + * + * Mark the given (or all) context(s) as guilty (failed recovery) + */ +static void mark_guilty_context(struct kgsl_device *device, unsigned int id) +{ + /* Mark the status for all the contexts in the device */ + + read_lock(&device->context_lock); + idr_for_each(&device->context_idr, _mark_context, &id); + read_unlock(&device->context_lock); +} + +/* + * If an IB inside of the command batch has a gpuaddr that matches the base + * passed in then zero the size which effectively skips it when it is submitted + * in the ringbuffer. + */ +static void cmdbatch_skip_ib(struct kgsl_cmdbatch *cmdbatch, uint64_t base) +{ + struct kgsl_memobj_node *ib; + + list_for_each_entry(ib, &cmdbatch->cmdlist, node) { + if (ib->gpuaddr == base) { + ib->priv |= MEMOBJ_SKIP; + if (base) + return; + } + } +} + +static void cmdbatch_skip_cmd(struct kgsl_cmdbatch *cmdbatch, + struct kgsl_cmdbatch **replay, int count) +{ + struct adreno_context *drawctxt = ADRENO_CONTEXT(cmdbatch->context); + int i; + + /* + * SKIPCMD policy: next IB issued for this context is tentative + * if it fails we assume that GFT failed and if it succeeds + * we mark GFT as a success. + * + * Find next commandbatch for the faulting context + * If commandbatch is found + * a) store the current commandbatch fault_policy in context's next + * commandbatch fault_policy + * b) force preamble for next commandbatch + */ + for (i = 1; i < count; i++) { + if (replay[i]->context->id == cmdbatch->context->id) { + replay[i]->fault_policy = replay[0]->fault_policy; + set_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &replay[i]->priv); + set_bit(KGSL_FT_SKIPCMD, &replay[i]->fault_recovery); + break; + } + } + + /* + * If we did not find the next cmd then + * a) set a flag for next command issued in this context + * b) store the fault_policy, this fault_policy becomes the policy of + * next command issued in this context + */ + if ((i == count) && drawctxt) { + set_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv); + drawctxt->fault_policy = replay[0]->fault_policy; + } + + /* set the flags to skip this cmdbatch */ + set_bit(CMDBATCH_FLAG_SKIP, &cmdbatch->priv); + cmdbatch->fault_recovery = 0; +} + +static void cmdbatch_skip_frame(struct kgsl_cmdbatch *cmdbatch, + struct kgsl_cmdbatch **replay, int count) +{ + struct adreno_context *drawctxt = ADRENO_CONTEXT(cmdbatch->context); + int skip = 1; + int i; + + for (i = 0; i < count; i++) { + + /* + * Only operate on command batches that belong to the + * faulting context + */ + + if (replay[i]->context->id != cmdbatch->context->id) + continue; + + /* + * Skip all the command batches in this context until + * the EOF flag is seen. If the EOF flag is seen then + * force the preamble for the next command. + */ + + if (skip) { + set_bit(CMDBATCH_FLAG_SKIP, &replay[i]->priv); + + if (replay[i]->flags & KGSL_CMDBATCH_END_OF_FRAME) + skip = 0; + } else { + set_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &replay[i]->priv); + return; + } + } + + /* + * If the EOF flag hasn't been seen yet then set the flag in the + * drawctxt to keep looking for it + */ + + if (skip && drawctxt) + set_bit(ADRENO_CONTEXT_SKIP_EOF, &drawctxt->base.priv); + + /* + * If we did see the EOF flag then force the preamble on for the + * next command issued on this context + */ + + if (!skip && drawctxt) + set_bit(ADRENO_CONTEXT_FORCE_PREAMBLE, &drawctxt->base.priv); +} + +static void remove_invalidated_cmdbatches(struct kgsl_device *device, + struct kgsl_cmdbatch **replay, int count) +{ + int i; + + for (i = 0; i < count; i++) { + struct kgsl_cmdbatch *cmd = replay[i]; + if (cmd == NULL) + continue; + + if (kgsl_context_detached(cmd->context) || + kgsl_context_invalid(cmd->context)) { + replay[i] = NULL; + + mutex_lock(&device->mutex); + kgsl_cancel_events_timestamp(device, + &cmd->context->events, cmd->timestamp); + mutex_unlock(&device->mutex); + + kgsl_cmdbatch_destroy(cmd); + } + } +} + +static char _pidname[TASK_COMM_LEN]; + +static inline const char *_kgsl_context_comm(struct kgsl_context *context) +{ + if (context && context->proc_priv) + strlcpy(_pidname, context->proc_priv->comm, sizeof(_pidname)); + else + snprintf(_pidname, TASK_COMM_LEN, "unknown"); + + return _pidname; +} + +#define pr_fault(_d, _c, fmt, args...) \ + dev_err((_d)->dev, "%s[%d]: " fmt, \ + _kgsl_context_comm((_c)->context), \ + (_c)->context->proc_priv->pid, ##args) + + +static void adreno_fault_header(struct kgsl_device *device, + struct adreno_ringbuffer *rb, struct kgsl_cmdbatch *cmdbatch) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + unsigned int status, rptr, wptr, ib1sz, ib2sz; + uint64_t ib1base, ib2base; + + adreno_readreg(adreno_dev , ADRENO_REG_RBBM_STATUS, &status); + adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &rptr); + adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr); + adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE, + ADRENO_REG_CP_IB1_BASE_HI, &ib1base); + adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BUFSZ, &ib1sz); + adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB2_BASE, + ADRENO_REG_CP_IB2_BASE_HI, &ib2base); + adreno_readreg(adreno_dev, ADRENO_REG_CP_IB2_BUFSZ, &ib2sz); + + if (cmdbatch != NULL) { + struct adreno_context *drawctxt = + ADRENO_CONTEXT(cmdbatch->context); + + trace_adreno_gpu_fault(cmdbatch->context->id, + cmdbatch->timestamp, + status, rptr, wptr, ib1base, ib1sz, + ib2base, ib2sz, drawctxt->rb->id); + + pr_fault(device, cmdbatch, + "gpu fault ctx %d ts %d status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", + cmdbatch->context->id, cmdbatch->timestamp, status, + rptr, wptr, ib1base, ib1sz, ib2base, ib2sz); + + if (rb != NULL) + pr_fault(device, cmdbatch, + "gpu fault rb %d rb sw r/w %4.4x/%4.4x\n", + rb->id, rb->rptr, rb->wptr); + } else { + int id = (rb != NULL) ? rb->id : -1; + + dev_err(device->dev, + "RB[%d]: gpu fault status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", + id, status, rptr, wptr, ib1base, ib1sz, ib2base, + ib2sz); + if (rb != NULL) + dev_err(device->dev, + "RB[%d] gpu fault rb sw r/w %4.4x/%4.4x\n", + rb->id, rb->rptr, rb->wptr); + } +} + +void adreno_fault_skipcmd_detached(struct kgsl_device *device, + struct adreno_context *drawctxt, + struct kgsl_cmdbatch *cmdbatch) +{ + if (test_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv) && + kgsl_context_detached(&drawctxt->base)) { + pr_context(device, cmdbatch->context, "gpu %s ctx %d\n", + "detached", cmdbatch->context->id); + clear_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv); + } +} + +/** + * process_cmdbatch_fault() - Process a cmdbatch for fault policies + * @device: Device on which the cmdbatch caused a fault + * @replay: List of cmdbatches that are to be replayed on the device. The + * faulting cmdbatch is the first command in the replay list and the remaining + * cmdbatches in the list are commands that were submitted to the same queue + * as the faulting one. + * @count: Number of cmdbatches in replay + * @base: The IB1 base at the time of fault + * @fault: The fault type + */ +static void process_cmdbatch_fault(struct kgsl_device *device, + struct kgsl_cmdbatch **replay, int count, + unsigned int base, + int fault) +{ + struct kgsl_cmdbatch *cmdbatch = replay[0]; + int i; + char *state = "failed"; + + /* + * If GFT recovered more than X times in Y ms invalidate the context + * and do not attempt recovery. + * Example: X==3 and Y==3000 ms, GPU hung at 500ms, 1700ms, 25000ms and + * 3000ms for the same context, we will not try FT and invalidate the + * context @3000ms because context triggered GFT more than 3 times in + * last 3 seconds. If a context caused recoverable GPU hangs + * where 1st and 4th gpu hang are more than 3 seconds apart we + * won't disable GFT and invalidate the context. + */ + if (test_bit(KGSL_FT_THROTTLE, &cmdbatch->fault_policy)) { + if (time_after(jiffies, (cmdbatch->context->fault_time + + msecs_to_jiffies(_fault_throttle_time)))) { + cmdbatch->context->fault_time = jiffies; + cmdbatch->context->fault_count = 1; + } else { + cmdbatch->context->fault_count++; + if (cmdbatch->context->fault_count > + _fault_throttle_burst) { + set_bit(KGSL_FT_DISABLE, + &cmdbatch->fault_policy); + pr_context(device, cmdbatch->context, + "gpu fault threshold exceeded %d faults in %d msecs\n", + _fault_throttle_burst, + _fault_throttle_time); + } + } + } + + /* + * If FT is disabled for this cmdbatch invalidate immediately + */ + + if (test_bit(KGSL_FT_DISABLE, &cmdbatch->fault_policy) || + test_bit(KGSL_FT_TEMP_DISABLE, &cmdbatch->fault_policy)) { + state = "skipped"; + bitmap_zero(&cmdbatch->fault_policy, BITS_PER_LONG); + } + + /* If the context is detached do not run FT on context */ + if (kgsl_context_detached(cmdbatch->context)) { + state = "detached"; + bitmap_zero(&cmdbatch->fault_policy, BITS_PER_LONG); + } + + /* + * Set a flag so we don't print another PM dump if the cmdbatch fails + * again on replay + */ + + set_bit(KGSL_FT_SKIP_PMDUMP, &cmdbatch->fault_policy); + + /* + * A hardware fault generally means something was deterministically + * wrong with the command batch - no point in trying to replay it + * Clear the replay bit and move on to the next policy level + */ + + if (fault & ADRENO_HARD_FAULT) + clear_bit(KGSL_FT_REPLAY, &(cmdbatch->fault_policy)); + + /* + * A timeout fault means the IB timed out - clear the policy and + * invalidate - this will clear the FT_SKIP_PMDUMP bit but that is okay + * because we won't see this cmdbatch again + */ + + if (fault & ADRENO_TIMEOUT_FAULT) + bitmap_zero(&cmdbatch->fault_policy, BITS_PER_LONG); + + /* + * If the context had a GPU page fault then it is likely it would fault + * again if replayed + */ + + if (test_bit(KGSL_CONTEXT_PRIV_PAGEFAULT, + &cmdbatch->context->priv)) { + /* we'll need to resume the mmu later... */ + clear_bit(KGSL_FT_REPLAY, &cmdbatch->fault_policy); + clear_bit(KGSL_CONTEXT_PRIV_PAGEFAULT, + &cmdbatch->context->priv); + } + + /* + * Execute the fault tolerance policy. Each command batch stores the + * current fault policy that was set when it was queued. + * As the options are tried in descending priority + * (REPLAY -> SKIPIBS -> SKIPFRAME -> NOTHING) the bits are cleared + * from the cmdbatch policy so the next thing can be tried if the + * change comes around again + */ + + /* Replay the hanging command batch again */ + if (test_and_clear_bit(KGSL_FT_REPLAY, &cmdbatch->fault_policy)) { + trace_adreno_cmdbatch_recovery(cmdbatch, BIT(KGSL_FT_REPLAY)); + set_bit(KGSL_FT_REPLAY, &cmdbatch->fault_recovery); + return; + } + + /* + * Skip the last IB1 that was played but replay everything else. + * Note that the last IB1 might not be in the "hung" command batch + * because the CP may have caused a page-fault while it was prefetching + * the next IB1/IB2. walk all outstanding commands and zap the + * supposedly bad IB1 where ever it lurks. + */ + + if (test_and_clear_bit(KGSL_FT_SKIPIB, &cmdbatch->fault_policy)) { + trace_adreno_cmdbatch_recovery(cmdbatch, BIT(KGSL_FT_SKIPIB)); + set_bit(KGSL_FT_SKIPIB, &cmdbatch->fault_recovery); + + for (i = 0; i < count; i++) { + if (replay[i] != NULL && + replay[i]->context->id == cmdbatch->context->id) + cmdbatch_skip_ib(replay[i], base); + } + + return; + } + + /* Skip the faulted command batch submission */ + if (test_and_clear_bit(KGSL_FT_SKIPCMD, &cmdbatch->fault_policy)) { + trace_adreno_cmdbatch_recovery(cmdbatch, BIT(KGSL_FT_SKIPCMD)); + + /* Skip faulting command batch */ + cmdbatch_skip_cmd(cmdbatch, replay, count); + + return; + } + + if (test_and_clear_bit(KGSL_FT_SKIPFRAME, &cmdbatch->fault_policy)) { + trace_adreno_cmdbatch_recovery(cmdbatch, + BIT(KGSL_FT_SKIPFRAME)); + set_bit(KGSL_FT_SKIPFRAME, &cmdbatch->fault_recovery); + + /* + * Skip all the pending command batches for this context until + * the EOF frame is seen + */ + cmdbatch_skip_frame(cmdbatch, replay, count); + return; + } + + /* If we get here then all the policies failed */ + + pr_context(device, cmdbatch->context, "gpu %s ctx %d ts %d\n", + state, cmdbatch->context->id, cmdbatch->timestamp); + + /* Mark the context as failed */ + mark_guilty_context(device, cmdbatch->context->id); + + /* Invalidate the context */ + adreno_drawctxt_invalidate(device, cmdbatch->context); +} + +/** + * recover_dispatch_q() - Recover all commands in a dispatch queue by + * resubmitting the commands + * @device: Device on which recovery is performed + * @dispatch_q: The command queue to recover + * @fault: Faults caused by the command in the dispatch q + * @base: The IB1 base during the fault + */ +static void recover_dispatch_q(struct kgsl_device *device, + struct adreno_dispatcher_cmdqueue *dispatch_q, + int fault, + unsigned int base) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_cmdbatch **replay = NULL; + unsigned int ptr; + int first = 0; + int count = 0; + int i; + + /* Allocate memory to store the inflight commands */ + replay = kzalloc(sizeof(*replay) * dispatch_q->inflight, GFP_KERNEL); + + if (replay == NULL) { + unsigned int ptr = dispatch_q->head; + + /* Recovery failed - mark everybody on this q guilty */ + while (ptr != dispatch_q->tail) { + struct kgsl_context *context = + dispatch_q->cmd_q[ptr]->context; + + mark_guilty_context(device, context->id); + adreno_drawctxt_invalidate(device, context); + kgsl_cmdbatch_destroy(dispatch_q->cmd_q[ptr]); + + ptr = CMDQUEUE_NEXT(ptr, ADRENO_DISPATCH_CMDQUEUE_SIZE); + } + + /* + * Set the replay count to zero - this will ensure that the + * hardware gets reset but nothing else gets played + */ + + count = 0; + goto replay; + } + + /* Copy the inflight command batches into the temporary storage */ + ptr = dispatch_q->head; + + while (ptr != dispatch_q->tail) { + replay[count++] = dispatch_q->cmd_q[ptr]; + ptr = CMDQUEUE_NEXT(ptr, ADRENO_DISPATCH_CMDQUEUE_SIZE); + } + + if (fault && count) + process_cmdbatch_fault(device, replay, + count, base, fault); +replay: + dispatch_q->inflight = 0; + dispatch_q->head = dispatch_q->tail = 0; + /* Remove any pending command batches that have been invalidated */ + remove_invalidated_cmdbatches(device, replay, count); + + /* Replay the pending command buffers */ + for (i = 0; i < count; i++) { + + int ret; + + if (replay[i] == NULL) + continue; + + /* + * Force the preamble on the first command (if applicable) to + * avoid any strange stage issues + */ + + if (first == 0) { + set_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &replay[i]->priv); + first = 1; + } + + /* + * Force each command batch to wait for idle - this avoids weird + * CP parse issues + */ + + set_bit(CMDBATCH_FLAG_WFI, &replay[i]->priv); + + ret = sendcmd(adreno_dev, replay[i]); + + /* + * If sending the command fails, then try to recover by + * invalidating the context + */ + + if (ret) { + pr_context(device, replay[i]->context, + "gpu reset failed ctx %d ts %d\n", + replay[i]->context->id, replay[i]->timestamp); + + /* Mark this context as guilty (failed recovery) */ + mark_guilty_context(device, replay[i]->context->id); + + adreno_drawctxt_invalidate(device, replay[i]->context); + remove_invalidated_cmdbatches(device, &replay[i], + count - i); + } + } + + /* Clear the fault bit */ + clear_bit(ADRENO_DEVICE_FAULT, &adreno_dev->priv); + + kfree(replay); +} + +static int dispatcher_do_fault(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + struct adreno_dispatcher_cmdqueue *dispatch_q = NULL, *dispatch_q_temp; + struct adreno_ringbuffer *rb; + struct adreno_ringbuffer *hung_rb = NULL; + unsigned int reg; + uint64_t base; + struct kgsl_cmdbatch *cmdbatch = NULL; + int ret, i; + int fault; + int halt; + + fault = atomic_xchg(&dispatcher->fault, 0); + if (fault == 0) + return 0; + + /* + * On A5xx, read RBBM_STATUS3:SMMU_STALLED_ON_FAULT (BIT 24) to + * tell if this function was entered after a pagefault. If so, only + * proceed if the fault handler has already run in the IRQ thread, + * else return early to give the fault handler a chance to run. + */ + if (!(fault & ADRENO_IOMMU_PAGE_FAULT) && adreno_is_a5xx(adreno_dev)) { + unsigned int val; + mutex_lock(&device->mutex); + adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS3, &val); + mutex_unlock(&device->mutex); + if (val & BIT(24)) + return 0; + } + + /* Turn off all the timers */ + del_timer_sync(&dispatcher->timer); + del_timer_sync(&dispatcher->fault_timer); + del_timer_sync(&dispatcher->preempt_timer); + + mutex_lock(&device->mutex); + + /* hang opcode */ + kgsl_cffdump_hang(device); + + adreno_readreg64(adreno_dev, ADRENO_REG_CP_RB_BASE, + ADRENO_REG_CP_RB_BASE_HI, &base); + + /* + * Force the CP off for anything but a hard fault to make sure it is + * good and stopped + */ + if (!(fault & ADRENO_HARD_FAULT)) { + adreno_readreg(adreno_dev, ADRENO_REG_CP_ME_CNTL, ®); + if (adreno_is_a5xx(adreno_dev)) + reg |= 1 | (1 << 1); + else + reg |= (1 << 27) | (1 << 28); + adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, reg); + } + /* + * retire cmdbatches from all the dispatch_q's before starting recovery + */ + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + adreno_dispatch_process_cmdqueue(adreno_dev, + &(rb->dispatch_q), 0); + /* Select the active dispatch_q */ + if (base == rb->buffer_desc.gpuaddr) { + dispatch_q = &(rb->dispatch_q); + hung_rb = rb; + adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, + &hung_rb->rptr); + if (adreno_dev->cur_rb != hung_rb) { + adreno_dev->prev_rb = adreno_dev->cur_rb; + adreno_dev->cur_rb = hung_rb; + } + } + if (ADRENO_DISPATCHER_RB_STARVE_TIMER_ELAPSED == + rb->starve_timer_state) { + adreno_put_gpu_halt(adreno_dev); + rb->starve_timer_state = + ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT; + } + } + + if (dispatch_q && (dispatch_q->tail != dispatch_q->head)) { + cmdbatch = dispatch_q->cmd_q[dispatch_q->head]; + trace_adreno_cmdbatch_fault(cmdbatch, fault); + } + + adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE, + ADRENO_REG_CP_IB1_BASE_HI, &base); + + /* + * Dump the snapshot information if this is the first + * detected fault for the oldest active command batch + */ + + if (cmdbatch == NULL || + !test_bit(KGSL_FT_SKIP_PMDUMP, &cmdbatch->fault_policy)) { + adreno_fault_header(device, hung_rb, cmdbatch); + kgsl_device_snapshot(device, + cmdbatch ? cmdbatch->context : NULL); + } + + /* Terminate the stalled transaction and resume the IOMMU */ + if (fault & ADRENO_IOMMU_PAGE_FAULT) + kgsl_mmu_pagefault_resume(&device->mmu); + + /* Reset the dispatcher queue */ + dispatcher->inflight = 0; + atomic_set(&dispatcher->preemption_state, + ADRENO_DISPATCHER_PREEMPT_CLEAR); + + /* Reset the GPU and make sure halt is not set during recovery */ + halt = adreno_gpu_halt(adreno_dev); + adreno_clear_gpu_halt(adreno_dev); + + /* + * If there is a stall in the ringbuffer after all commands have been + * retired then we could hit problems if contexts are waiting for + * internal timestamps that will never retire + */ + + if (hung_rb != NULL) { + kgsl_sharedmem_writel(device, &device->memstore, + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_MAX + hung_rb->id, + soptimestamp), hung_rb->timestamp); + + kgsl_sharedmem_writel(device, &device->memstore, + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_MAX + hung_rb->id, + eoptimestamp), hung_rb->timestamp); + + /* Schedule any pending events to be run */ + kgsl_process_event_group(device, &hung_rb->events); + } + + ret = adreno_reset(device, fault); + mutex_unlock(&device->mutex); + /* if any other fault got in until reset then ignore */ + atomic_set(&dispatcher->fault, 0); + + /* If adreno_reset() fails then what hope do we have for the future? */ + BUG_ON(ret); + + /* recover all the dispatch_q's starting with the one that hung */ + if (dispatch_q) + recover_dispatch_q(device, dispatch_q, fault, base); + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + dispatch_q_temp = &(rb->dispatch_q); + if (dispatch_q_temp != dispatch_q) + recover_dispatch_q(device, dispatch_q_temp, 0, base); + } + + atomic_add(halt, &adreno_dev->halt); + + return 1; +} + +static inline int cmdbatch_consumed(struct kgsl_cmdbatch *cmdbatch, + unsigned int consumed, unsigned int retired) +{ + return ((timestamp_cmp(cmdbatch->timestamp, consumed) >= 0) && + (timestamp_cmp(retired, cmdbatch->timestamp) < 0)); +} + +static void _print_recovery(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch) +{ + static struct { + unsigned int mask; + const char *str; + } flags[] = { ADRENO_FT_TYPES }; + + int i, nr = find_first_bit(&cmdbatch->fault_recovery, BITS_PER_LONG); + char *result = "unknown"; + + for (i = 0; i < ARRAY_SIZE(flags); i++) { + if (flags[i].mask == BIT(nr)) { + result = (char *) flags[i].str; + break; + } + } + + pr_context(device, cmdbatch->context, + "gpu %s ctx %d ts %d policy %lX\n", + result, cmdbatch->context->id, cmdbatch->timestamp, + cmdbatch->fault_recovery); +} + +static void cmdbatch_profile_ticks(struct adreno_device *adreno_dev, + struct kgsl_cmdbatch *cmdbatch, uint64_t *start, uint64_t *retire) +{ + void *ptr = adreno_dev->cmdbatch_profile_buffer.hostptr; + struct adreno_cmdbatch_profile_entry *entry; + + entry = (struct adreno_cmdbatch_profile_entry *) + (ptr + (cmdbatch->profile_index * sizeof(*entry))); + + rmb(); + *start = entry->started; + *retire = entry->retired; +} + +int adreno_dispatch_process_cmdqueue(struct adreno_device *adreno_dev, + struct adreno_dispatcher_cmdqueue *dispatch_q, + int long_ib_detect) +{ + struct kgsl_device *device = &(adreno_dev->dev); + struct adreno_dispatcher *dispatcher = &(adreno_dev->dispatcher); + uint64_t start_ticks = 0, retire_ticks = 0; + + struct adreno_dispatcher_cmdqueue *active_q = + &(adreno_dev->cur_rb->dispatch_q); + int count = 0; + + while (dispatch_q->head != dispatch_q->tail) { + struct kgsl_cmdbatch *cmdbatch = + dispatch_q->cmd_q[dispatch_q->head]; + struct adreno_context *drawctxt; + BUG_ON(cmdbatch == NULL); + + drawctxt = ADRENO_CONTEXT(cmdbatch->context); + + /* + * First try to expire the timestamp. This happens if the + * context is valid and the timestamp expired normally or if the + * context was destroyed before the command batch was finished + * in the GPU. Either way retire the command batch advance the + * pointers and continue processing the queue + */ + + if (kgsl_check_timestamp(device, cmdbatch->context, + cmdbatch->timestamp)) { + + /* + * If the cmdbatch in question had faulted announce its + * successful completion to the world + */ + + if (cmdbatch->fault_recovery != 0) { + /* Mark the context as faulted and recovered */ + set_bit(ADRENO_CONTEXT_FAULT, + &cmdbatch->context->priv); + + _print_recovery(device, cmdbatch); + } + + /* Reduce the number of inflight command batches */ + dispatcher->inflight--; + dispatch_q->inflight--; + + /* + * If kernel profiling is enabled get the submit and + * retired ticks from the buffer + */ + + if (test_bit(CMDBATCH_FLAG_PROFILE, &cmdbatch->priv)) + cmdbatch_profile_ticks(adreno_dev, cmdbatch, + &start_ticks, &retire_ticks); + + trace_adreno_cmdbatch_retired(cmdbatch, + (int) dispatcher->inflight, start_ticks, + retire_ticks, ADRENO_CMDBATCH_RB(cmdbatch)); + + /* Record the delta between submit and retire ticks */ + drawctxt->submit_retire_ticks[drawctxt->ticks_index] = + retire_ticks - cmdbatch->submit_ticks; + + drawctxt->ticks_index = (drawctxt->ticks_index + 1) + % SUBMIT_RETIRE_TICKS_SIZE; + + /* Zero the old entry*/ + dispatch_q->cmd_q[dispatch_q->head] = NULL; + + /* Advance the buffer head */ + dispatch_q->head = CMDQUEUE_NEXT(dispatch_q->head, + ADRENO_DISPATCH_CMDQUEUE_SIZE); + + /* Destroy the retired command batch */ + kgsl_cmdbatch_destroy(cmdbatch); + + /* Update the expire time for the next command batch */ + + if (dispatch_q->inflight > 0 && + dispatch_q == active_q) { + cmdbatch = + dispatch_q->cmd_q[dispatch_q->head]; + cmdbatch->expires = jiffies + + msecs_to_jiffies( + adreno_cmdbatch_timeout); + } + + count++; + continue; + } + /* + * Break here if fault detection is disabled for the context or + * if the long running IB detection is disaled device wide or + * if the dispatch q is not active + * Long running command buffers will be allowed to run to + * completion - but badly behaving command buffers (infinite + * shaders etc) can end up running forever. + */ + + if (!long_ib_detect || + drawctxt->base.flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE + || dispatch_q != active_q) + break; + + /* + * The last line of defense is to check if the command batch has + * timed out. If we get this far but the timeout hasn't expired + * yet then the GPU is still ticking away + */ + + if (time_is_after_jiffies(cmdbatch->expires)) + break; + + /* Boom goes the dynamite */ + + pr_context(device, cmdbatch->context, + "gpu timeout ctx %d ts %d\n", + cmdbatch->context->id, cmdbatch->timestamp); + + adreno_set_gpu_fault(adreno_dev, ADRENO_TIMEOUT_FAULT); + break; + } + return count; +} + +/** + * adreno_dispatcher_work() - Master work handler for the dispatcher + * @work: Pointer to the work struct for the current work queue + * + * Process expired commands and send new ones. + */ +static void adreno_dispatcher_work(struct work_struct *work) +{ + struct adreno_dispatcher *dispatcher = + container_of(work, struct adreno_dispatcher, work); + struct adreno_device *adreno_dev = + container_of(dispatcher, struct adreno_device, dispatcher); + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + int count = 0; + int cur_rb_id = adreno_dev->cur_rb->id; + + mutex_lock(&dispatcher->mutex); + + if (ADRENO_DISPATCHER_PREEMPT_CLEAR == + atomic_read(&dispatcher->preemption_state)) + /* process the active q*/ + count = adreno_dispatch_process_cmdqueue(adreno_dev, + &(adreno_dev->cur_rb->dispatch_q), + adreno_long_ib_detect(adreno_dev)); + + else if (ADRENO_DISPATCHER_PREEMPT_TRIGGERED == + atomic_read(&dispatcher->preemption_state)) + count = adreno_dispatch_process_cmdqueue(adreno_dev, + &(adreno_dev->cur_rb->dispatch_q), 0); + + /* Check if gpu fault occurred */ + if (dispatcher_do_fault(device)) + goto done; + + gpudev->preemption_schedule(adreno_dev); + + if (cur_rb_id != adreno_dev->cur_rb->id) { + struct adreno_dispatcher_cmdqueue *dispatch_q = + &(adreno_dev->cur_rb->dispatch_q); + /* active level switched, clear new level cmdbatches */ + count = adreno_dispatch_process_cmdqueue(adreno_dev, + dispatch_q, + adreno_long_ib_detect(adreno_dev)); + /* + * If GPU has already completed all the commands in new incoming + * RB then we may not get another interrupt due to which + * dispatcher may not run again. Schedule dispatcher here so + * we can come back and process the other RB's if required + */ + if (dispatch_q->head == dispatch_q->tail) + adreno_dispatcher_schedule(device); + } + /* + * If inflight went to 0, queue back up the event processor to catch + * stragglers + */ + if (dispatcher->inflight == 0 && count) + kgsl_schedule_work(&device->event_work); + + /* Try to dispatch new commands */ + _adreno_dispatcher_issuecmds(adreno_dev); + +done: + /* Either update the timer for the next command batch or disable it */ + if (dispatcher->inflight) { + struct kgsl_cmdbatch *cmdbatch = + adreno_dev->cur_rb->dispatch_q.cmd_q[ + adreno_dev->cur_rb->dispatch_q.head]; + if (cmdbatch && adreno_preempt_state(adreno_dev, + ADRENO_DISPATCHER_PREEMPT_CLEAR)) + /* Update the timeout timer for the next cmdbatch */ + mod_timer(&dispatcher->timer, cmdbatch->expires); + + /* There are still things in flight - update the idle counts */ + mutex_lock(&device->mutex); + kgsl_pwrscale_update(device); + mod_timer(&device->idle_timer, jiffies + + device->pwrctrl.interval_timeout); + mutex_unlock(&device->mutex); + } else { + /* There is nothing left in the pipeline. Shut 'er down boys */ + mutex_lock(&device->mutex); + + if (test_and_clear_bit(ADRENO_DISPATCHER_ACTIVE, + &dispatcher->priv)) + complete_all(&dispatcher->idle_gate); + + /* + * Stop the fault timer before decrementing the active count to + * avoid reading the hardware registers while we are trying to + * turn clocks off + */ + del_timer_sync(&dispatcher->fault_timer); + + if (test_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv)) { + kgsl_active_count_put(device); + clear_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv); + } + + mutex_unlock(&device->mutex); + } + + mutex_unlock(&dispatcher->mutex); +} + +void adreno_dispatcher_schedule(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + + kgsl_schedule_work(&dispatcher->work); +} + +/** + * adreno_dispatcher_queue_context() - schedule a drawctxt in the dispatcher + * device: pointer to the KGSL device + * drawctxt: pointer to the drawctxt to schedule + * + * Put a draw context on the dispatcher pending queue and schedule the + * dispatcher. This is used to reschedule changes that might have been blocked + * for sync points or other concerns + */ +void adreno_dispatcher_queue_context(struct kgsl_device *device, + struct adreno_context *drawctxt) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + dispatcher_queue_context(adreno_dev, drawctxt); + adreno_dispatcher_schedule(device); +} + +/* + * This is called on a regular basis while command batches are inflight. Fault + * detection registers are read and compared to the existing values - if they + * changed then the GPU is still running. If they are the same between + * subsequent calls then the GPU may have faulted + */ + +static void adreno_dispatcher_fault_timer(unsigned long data) +{ + struct adreno_device *adreno_dev = (struct adreno_device *) data; + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + + /* Leave if the user decided to turn off fast hang detection */ + if (!adreno_soft_fault_detect(adreno_dev)) + return; + + if (adreno_gpu_fault(adreno_dev)) { + adreno_dispatcher_schedule(device); + return; + } + + /* + * Read the fault registers - if it returns 0 then they haven't changed + * so mark the dispatcher as faulted and schedule the work loop. + */ + + if (!fault_detect_read_compare(device)) { + adreno_set_gpu_fault(adreno_dev, ADRENO_SOFT_FAULT); + adreno_dispatcher_schedule(device); + } else { + mod_timer(&dispatcher->fault_timer, + jiffies + msecs_to_jiffies(_fault_timer_interval)); + } +} + +/* + * This is called when the timer expires - it either means the GPU is hung or + * the IB is taking too long to execute + */ +static void adreno_dispatcher_timer(unsigned long data) +{ + struct adreno_device *adreno_dev = (struct adreno_device *) data; + struct kgsl_device *device = &adreno_dev->dev; + + adreno_dispatcher_schedule(device); +} + +/** + * adreno_dispatcher_start() - activate the dispatcher + * @adreno_dev: pointer to the adreno device structure + * + */ +void adreno_dispatcher_start(struct kgsl_device *device) +{ + complete_all(&device->cmdbatch_gate); + + /* Schedule the work loop to get things going */ + adreno_dispatcher_schedule(device); +} + +/** + * adreno_dispatcher_stop() - stop the dispatcher + * @adreno_dev: pointer to the adreno device structure + * + * Stop the dispatcher and close all the timers + */ +void adreno_dispatcher_stop(struct adreno_device *adreno_dev) +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + + del_timer_sync(&dispatcher->timer); + del_timer_sync(&dispatcher->fault_timer); +} + +/** + * adreno_dispatcher_close() - close the dispatcher + * @adreno_dev: pointer to the adreno device structure + * + * Close the dispatcher and free all the oustanding commands and memory + */ +void adreno_dispatcher_close(struct adreno_device *adreno_dev) +{ + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + int i; + struct adreno_ringbuffer *rb; + + mutex_lock(&dispatcher->mutex); + del_timer_sync(&dispatcher->timer); + del_timer_sync(&dispatcher->fault_timer); + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + struct adreno_dispatcher_cmdqueue *dispatch_q = + &(rb->dispatch_q); + while (dispatch_q->head != dispatch_q->tail) { + kgsl_cmdbatch_destroy( + dispatch_q->cmd_q[dispatch_q->head]); + dispatch_q->head = (dispatch_q->head + 1) + % ADRENO_DISPATCH_CMDQUEUE_SIZE; + } + } + + mutex_unlock(&dispatcher->mutex); + + kobject_put(&dispatcher->kobj); +} + +struct dispatcher_attribute { + struct attribute attr; + ssize_t (*show)(struct adreno_dispatcher *, + struct dispatcher_attribute *, char *); + ssize_t (*store)(struct adreno_dispatcher *, + struct dispatcher_attribute *, const char *buf, + size_t count); + unsigned int max; + unsigned int *value; +}; + +#define DISPATCHER_UINT_ATTR(_name, _mode, _max, _value) \ + struct dispatcher_attribute dispatcher_attr_##_name = { \ + .attr = { .name = __stringify(_name), .mode = _mode }, \ + .show = _show_uint, \ + .store = _store_uint, \ + .max = _max, \ + .value = &(_value), \ + } + +#define to_dispatcher_attr(_a) \ + container_of((_a), struct dispatcher_attribute, attr) +#define to_dispatcher(k) container_of(k, struct adreno_dispatcher, kobj) + +static ssize_t _store_uint(struct adreno_dispatcher *dispatcher, + struct dispatcher_attribute *attr, + const char *buf, size_t size) +{ + unsigned int val = 0; + int ret; + + ret = kgsl_sysfs_store(buf, &val); + if (ret) + return ret; + + if (!val || (attr->max && (val > attr->max))) + return -EINVAL; + + *((unsigned int *) attr->value) = val; + return size; +} + +static ssize_t _show_uint(struct adreno_dispatcher *dispatcher, + struct dispatcher_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%u\n", + *((unsigned int *) attr->value)); +} + +static DISPATCHER_UINT_ATTR(inflight, 0644, ADRENO_DISPATCH_CMDQUEUE_SIZE, + _dispatcher_q_inflight_hi); + +static DISPATCHER_UINT_ATTR(inflight_low_latency, 0644, + ADRENO_DISPATCH_CMDQUEUE_SIZE, _dispatcher_q_inflight_lo); +/* + * Our code that "puts back" a command from the context is much cleaner + * if we are sure that there will always be enough room in the + * ringbuffer so restrict the maximum size of the context queue to + * ADRENO_CONTEXT_CMDQUEUE_SIZE - 1 + */ +static DISPATCHER_UINT_ATTR(context_cmdqueue_size, 0644, + ADRENO_CONTEXT_CMDQUEUE_SIZE - 1, _context_cmdqueue_size); +static DISPATCHER_UINT_ATTR(context_burst_count, 0644, 0, + _context_cmdbatch_burst); +static DISPATCHER_UINT_ATTR(cmdbatch_timeout, 0644, 0, + adreno_cmdbatch_timeout); +static DISPATCHER_UINT_ATTR(context_queue_wait, 0644, 0, _context_queue_wait); +static DISPATCHER_UINT_ATTR(fault_detect_interval, 0644, 0, + _fault_timer_interval); +static DISPATCHER_UINT_ATTR(fault_throttle_time, 0644, 0, + _fault_throttle_time); +static DISPATCHER_UINT_ATTR(fault_throttle_burst, 0644, 0, + _fault_throttle_burst); +static DISPATCHER_UINT_ATTR(disp_preempt_fair_sched, 0644, 0, + adreno_disp_preempt_fair_sched); +static DISPATCHER_UINT_ATTR(dispatch_time_slice, 0644, 0, + _dispatch_time_slice); +static DISPATCHER_UINT_ATTR(dispatch_starvation_time, 0644, 0, + _dispatch_starvation_time); + +static struct attribute *dispatcher_attrs[] = { + &dispatcher_attr_inflight.attr, + &dispatcher_attr_inflight_low_latency.attr, + &dispatcher_attr_context_cmdqueue_size.attr, + &dispatcher_attr_context_burst_count.attr, + &dispatcher_attr_cmdbatch_timeout.attr, + &dispatcher_attr_context_queue_wait.attr, + &dispatcher_attr_fault_detect_interval.attr, + &dispatcher_attr_fault_throttle_time.attr, + &dispatcher_attr_fault_throttle_burst.attr, + &dispatcher_attr_disp_preempt_fair_sched.attr, + &dispatcher_attr_dispatch_time_slice.attr, + &dispatcher_attr_dispatch_starvation_time.attr, + NULL, +}; + +static ssize_t dispatcher_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct adreno_dispatcher *dispatcher = to_dispatcher(kobj); + struct dispatcher_attribute *pattr = to_dispatcher_attr(attr); + ssize_t ret = -EIO; + + if (pattr->show) + ret = pattr->show(dispatcher, pattr, buf); + + return ret; +} + +static ssize_t dispatcher_sysfs_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t count) +{ + struct adreno_dispatcher *dispatcher = to_dispatcher(kobj); + struct dispatcher_attribute *pattr = to_dispatcher_attr(attr); + ssize_t ret = -EIO; + + if (pattr->store) + ret = pattr->store(dispatcher, pattr, buf, count); + + return ret; +} + +static const struct sysfs_ops dispatcher_sysfs_ops = { + .show = dispatcher_sysfs_show, + .store = dispatcher_sysfs_store +}; + +static struct kobj_type ktype_dispatcher = { + .sysfs_ops = &dispatcher_sysfs_ops, + .default_attrs = dispatcher_attrs, +}; + +/** + * adreno_dispatcher_init() - Initialize the dispatcher + * @adreno_dev: pointer to the adreno device structure + * + * Initialize the dispatcher + */ +int adreno_dispatcher_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + int ret; + + memset(dispatcher, 0, sizeof(*dispatcher)); + + mutex_init(&dispatcher->mutex); + + setup_timer(&dispatcher->timer, adreno_dispatcher_timer, + (unsigned long) adreno_dev); + + setup_timer(&dispatcher->fault_timer, adreno_dispatcher_fault_timer, + (unsigned long) adreno_dev); + + setup_timer(&dispatcher->preempt_timer, adreno_dispatcher_preempt_timer, + (unsigned long) adreno_dev); + + INIT_WORK(&dispatcher->work, adreno_dispatcher_work); + + init_completion(&dispatcher->idle_gate); + complete_all(&dispatcher->idle_gate); + + plist_head_init(&dispatcher->pending); + spin_lock_init(&dispatcher->plist_lock); + + atomic_set(&dispatcher->preemption_state, + ADRENO_DISPATCHER_PREEMPT_CLEAR); + + ret = kobject_init_and_add(&dispatcher->kobj, &ktype_dispatcher, + &device->dev->kobj, "dispatch"); + + return ret; +} + +/* + * adreno_dispatcher_idle() - Wait for dispatcher to idle + * @adreno_dev: Adreno device whose dispatcher needs to idle + * + * Signal dispatcher to stop sending more commands and complete + * the commands that have already been submitted. This function + * should not be called when dispatcher mutex is held. + */ +int adreno_dispatcher_idle(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher; + int ret; + + BUG_ON(!mutex_is_locked(&device->mutex)); + if (!test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv)) + return 0; + + /* + * Ensure that this function is not called when dispatcher + * mutex is held and device is started + */ + if (mutex_is_locked(&dispatcher->mutex) && + dispatcher->mutex.owner == current) + BUG_ON(1); + + adreno_get_gpu_halt(adreno_dev); + + mutex_unlock(&device->mutex); + + ret = wait_for_completion_timeout(&dispatcher->idle_gate, + msecs_to_jiffies(ADRENO_IDLE_TIMEOUT)); + if (ret == 0) { + ret = -ETIMEDOUT; + WARN(1, "Dispatcher halt timeout "); + } else if (ret < 0) { + KGSL_DRV_ERR(device, "Dispatcher halt failed %d\n", ret); + } else { + ret = 0; + } + + mutex_lock(&device->mutex); + adreno_put_gpu_halt(adreno_dev); + /* + * requeue dispatcher work to resubmit pending commands + * that may have been blocked due to this idling request + */ + adreno_dispatcher_schedule(device); + return ret; +} + +void adreno_preempt_process_dispatch_queue(struct adreno_device *adreno_dev, + struct adreno_dispatcher_cmdqueue *dispatch_q) +{ + struct kgsl_device *device = &(adreno_dev->dev); + struct kgsl_cmdbatch *cmdbatch; + + if (dispatch_q->head != dispatch_q->tail) { + /* + * retire cmdbacthes from previous q, and don't check for + * timeout since the cmdbatch may have been preempted + */ + adreno_dispatch_process_cmdqueue(adreno_dev, + dispatch_q, 0); + } + + /* set the timer for the first cmdbatch of active dispatch_q */ + dispatch_q = &(adreno_dev->cur_rb->dispatch_q); + if (dispatch_q->head != dispatch_q->tail) { + cmdbatch = dispatch_q->cmd_q[dispatch_q->head]; + cmdbatch->expires = jiffies + + msecs_to_jiffies(adreno_cmdbatch_timeout); + } + kgsl_schedule_work(&device->event_work); +} + +/** + * adreno_dispatcher_preempt_callback() - Callback funcion for CP_SW interrupt + * @adreno_dev: The device on which the interrupt occurred + * @bit: Interrupt bit in the interrupt status register + */ +void adreno_dispatcher_preempt_callback(struct adreno_device *adreno_dev, + int bit) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_dispatcher *dispatcher = &(adreno_dev->dispatcher); + if (ADRENO_DISPATCHER_PREEMPT_TRIGGERED != + atomic_read(&dispatcher->preemption_state)) { + KGSL_DRV_INFO(device, + "Preemption interrupt generated w/o trigger!\n"); + return; + } + trace_adreno_hw_preempt_trig_to_comp_int(adreno_dev->cur_rb, + adreno_dev->next_rb); + atomic_set(&dispatcher->preemption_state, + ADRENO_DISPATCHER_PREEMPT_COMPLETE); + adreno_dispatcher_schedule(device); +} diff --git a/drivers/gpu/msm/adreno_dispatch.h b/drivers/gpu/msm/adreno_dispatch.h new file mode 100644 index 000000000000..1e89e604e0ae --- /dev/null +++ b/drivers/gpu/msm/adreno_dispatch.h @@ -0,0 +1,160 @@ +/* Copyright (c) 2008-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + + +#ifndef ____ADRENO_DISPATCHER_H +#define ____ADRENO_DISPATCHER_H + +/* Time to allow preemption to complete (in ms) */ +#define ADRENO_DISPATCH_PREEMPT_TIMEOUT 10000 + +extern unsigned int adreno_disp_preempt_fair_sched; +extern unsigned int adreno_cmdbatch_timeout; + +/** + * enum adreno_dispatcher_preempt_states - States of dispatcher for ringbuffer + * preemption + * @ADRENO_DISPATCHER_PREEMPT_CLEAR: No preemption is underway, + * only 1 preemption can be underway at any point + * @ADRENO_DISPATCHER_PREEMPT_TRIGGERED: A preemption is underway + * @ADRENO_DISPATCHER_PREEMPT_COMPLETE: A preemption has just completed + */ +enum adreno_dispatcher_preempt_states { + ADRENO_DISPATCHER_PREEMPT_CLEAR = 0, + ADRENO_DISPATCHER_PREEMPT_TRIGGERED, + ADRENO_DISPATCHER_PREEMPT_COMPLETE, +}; + +/** + * enum adreno_dispatcher_starve_timer_states - Starvation control states of + * a RB + * @ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT: Uninitialized, starvation control + * is not operating + * @ADRENO_DISPATCHER_RB_STARVE_TIMER_INIT: Starvation timer is initialized + * and counting + * @ADRENO_DISPATCHER_RB_STARVE_TIMER_ELAPSED: The starvation timer has elapsed + * this state indicates that the RB is starved + * @ADRENO_DISPATCHER_RB_STARVE_TIMER_SCHEDULED: RB is scheduled on the device + * and will remain scheduled for a minimum time slice when in this state. + */ +enum adreno_dispatcher_starve_timer_states { + ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT = 0, + ADRENO_DISPATCHER_RB_STARVE_TIMER_INIT = 1, + ADRENO_DISPATCHER_RB_STARVE_TIMER_ELAPSED = 2, + ADRENO_DISPATCHER_RB_STARVE_TIMER_SCHEDULED = 3, +}; + +/* + * Maximum size of the dispatcher ringbuffer - the actual inflight size will be + * smaller then this but this size will allow for a larger range of inflight + * sizes that can be chosen at runtime + */ + +#define ADRENO_DISPATCH_CMDQUEUE_SIZE 128 + +#define CMDQUEUE_NEXT(_i, _s) (((_i) + 1) % (_s)) + +#define ACTIVE_CONTEXT_LIST_MAX 2 + +struct adreno_context_list { + unsigned int id; + unsigned long jiffies; +}; + +/** + * struct adreno_dispatcher_cmdqueue - List of commands for a RB level + * @cmd_q: List of command batches submitted to dispatcher + * @inflight: Number of commands inflight in this q + * @head: Head pointer to the q + * @tail: Queues tail pointer + * @active_contexts: List of most recently seen contexts + * @active_context_count: Number of active contexts in the active_contexts list + */ +struct adreno_dispatcher_cmdqueue { + struct kgsl_cmdbatch *cmd_q[ADRENO_DISPATCH_CMDQUEUE_SIZE]; + unsigned int inflight; + unsigned int head; + unsigned int tail; + struct adreno_context_list active_contexts[ACTIVE_CONTEXT_LIST_MAX]; + int active_context_count; +}; + +/** + * struct adreno_dispatcher - container for the adreno GPU dispatcher + * @mutex: Mutex to protect the structure + * @state: Current state of the dispatcher (active or paused) + * @timer: Timer to monitor the progress of the command batches + * @inflight: Number of command batch operations pending in the ringbuffer + * @fault: Non-zero if a fault was detected. + * @pending: Priority list of contexts waiting to submit command batches + * @plist_lock: Spin lock to protect the pending queue + * @work: work_struct to put the dispatcher in a work queue + * @kobj: kobject for the dispatcher directory in the device sysfs node + * @idle_gate: Gate to wait on for dispatcher to idle + * @preemption_state: Indicated what state the dispatcher is in, states are + * defined by enum adreno_dispatcher_preempt_states + * @preempt_token_submit: Indicates if a preempt token has been subnitted in + * current ringbuffer. + * @preempt_timer: Timer to track if preemption occured within specified time + * @disp_preempt_fair_sched: If set then dispatcher will try to be fair to + * starving RB's by scheduling them in and enforcing a minimum time slice + * for every RB that is scheduled to run on the device + */ +struct adreno_dispatcher { + struct mutex mutex; + unsigned long priv; + struct timer_list timer; + struct timer_list fault_timer; + unsigned int inflight; + atomic_t fault; + struct plist_head pending; + spinlock_t plist_lock; + struct work_struct work; + struct kobject kobj; + struct completion idle_gate; + atomic_t preemption_state; + int preempt_token_submit; + struct timer_list preempt_timer; + unsigned int disp_preempt_fair_sched; +}; + +enum adreno_dispatcher_flags { + ADRENO_DISPATCHER_POWER = 0, + ADRENO_DISPATCHER_ACTIVE = 1, +}; + +void adreno_dispatcher_start(struct kgsl_device *device); +int adreno_dispatcher_init(struct adreno_device *adreno_dev); +void adreno_dispatcher_close(struct adreno_device *adreno_dev); +int adreno_dispatcher_idle(struct adreno_device *adreno_dev); +void adreno_dispatcher_stop(struct adreno_device *adreno_dev); + +int adreno_dispatcher_queue_cmd(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, struct kgsl_cmdbatch *cmdbatch, + uint32_t *timestamp); + +void adreno_dispatcher_schedule(struct kgsl_device *device); +void adreno_dispatcher_pause(struct adreno_device *adreno_dev); +void adreno_dispatcher_queue_context(struct kgsl_device *device, + struct adreno_context *drawctxt); +void adreno_dispatcher_preempt_callback(struct adreno_device *adreno_dev, + int bit); +struct adreno_ringbuffer *adreno_dispatcher_get_highest_busy_rb( + struct adreno_device *adreno_dev); +int adreno_dispatch_process_cmdqueue(struct adreno_device *adreno_dev, + struct adreno_dispatcher_cmdqueue *dispatch_q, + int long_ib_detect); +void adreno_preempt_process_dispatch_queue(struct adreno_device *adreno_dev, + struct adreno_dispatcher_cmdqueue *dispatch_q); + +#endif /* __ADRENO_DISPATCHER_H */ diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c new file mode 100644 index 000000000000..505fd5473a78 --- /dev/null +++ b/drivers/gpu/msm/adreno_drawctxt.c @@ -0,0 +1,585 @@ +/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/slab.h> +#include <linux/msm_kgsl.h> +#include <linux/sched.h> +#include <linux/debugfs.h> + +#include "kgsl.h" +#include "kgsl_sharedmem.h" +#include "adreno.h" +#include "adreno_trace.h" + +#define KGSL_INIT_REFTIMESTAMP 0x7FFFFFFF + +static void wait_callback(struct kgsl_device *device, + struct kgsl_event_group *group, void *priv, int result) +{ + struct adreno_context *drawctxt = priv; + wake_up_all(&drawctxt->waiting); +} + +static int _check_context_timestamp(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp) +{ + /* Bail if the drawctxt has been invalidated or destroyed */ + if (kgsl_context_detached(context) || kgsl_context_invalid(context)) + return 1; + + return kgsl_check_timestamp(device, context, timestamp); +} + +/** + * adreno_drawctxt_dump() - dump information about a draw context + * @device: KGSL device that owns the context + * @context: KGSL context to dump information about + * + * Dump specific information about the context to the kernel log. Used for + * fence timeout callbacks + */ +void adreno_drawctxt_dump(struct kgsl_device *device, + struct kgsl_context *context) +{ + unsigned int queue, start, retire; + struct adreno_context *drawctxt = ADRENO_CONTEXT(context); + int index, pos; + char buf[120]; + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED, &queue); + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_CONSUMED, &start); + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, &retire); + + /* + * We may have cmdbatch timer running, which also uses same + * lock, take a lock with software interrupt disabled (bh) + * to avoid spin lock recursion. + */ + spin_lock_bh(&drawctxt->lock); + dev_err(device->dev, + " context[%d]: queue=%d, submit=%d, start=%d, retire=%d\n", + context->id, queue, drawctxt->submitted_timestamp, + start, retire); + + if (drawctxt->cmdqueue_head != drawctxt->cmdqueue_tail) { + struct kgsl_cmdbatch *cmdbatch = + drawctxt->cmdqueue[drawctxt->cmdqueue_head]; + + if (test_bit(CMDBATCH_FLAG_FENCE_LOG, &cmdbatch->priv)) { + dev_err(device->dev, + " possible deadlock. Context %d might be blocked for itself\n", + context->id); + goto stats; + } + + if (kgsl_cmdbatch_events_pending(cmdbatch)) { + dev_err(device->dev, + " context[%d] (ts=%d) Active sync points:\n", + context->id, cmdbatch->timestamp); + + kgsl_dump_syncpoints(device, cmdbatch); + } + } + +stats: + memset(buf, 0, sizeof(buf)); + + pos = 0; + + for (index = 0; index < SUBMIT_RETIRE_TICKS_SIZE; index++) { + uint64_t msecs; + unsigned int usecs; + + if (!drawctxt->submit_retire_ticks[index]) + continue; + msecs = drawctxt->submit_retire_ticks[index] * 10; + usecs = do_div(msecs, 192); + usecs = do_div(msecs, 1000); + pos += snprintf(buf + pos, sizeof(buf) - pos, "%d.%0d ", + (unsigned int)msecs, usecs); + } + dev_err(device->dev, " context[%d]: submit times: %s\n", + context->id, buf); + + spin_unlock_bh(&drawctxt->lock); +} + +/** + * adreno_drawctxt_wait() - sleep until a timestamp expires + * @adreno_dev: pointer to the adreno_device struct + * @drawctxt: Pointer to the draw context to sleep for + * @timetamp: Timestamp to wait on + * @timeout: Number of jiffies to wait (0 for infinite) + * + * Register an event to wait for a timestamp on a context and sleep until it + * has past. Returns < 0 on error, -ETIMEDOUT if the timeout expires or 0 + * on success + */ +int adreno_drawctxt_wait(struct adreno_device *adreno_dev, + struct kgsl_context *context, + uint32_t timestamp, unsigned int timeout) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_context *drawctxt = ADRENO_CONTEXT(context); + int ret; + long ret_temp; + + if (kgsl_context_detached(context)) + return -ENOENT; + + if (kgsl_context_invalid(context)) + return -EDEADLK; + + trace_adreno_drawctxt_wait_start(-1, context->id, timestamp); + + ret = kgsl_add_event(device, &context->events, timestamp, + wait_callback, (void *) drawctxt); + if (ret) + goto done; + + /* + * If timeout is 0, wait forever. msecs_to_jiffies will force + * values larger than INT_MAX to an infinite timeout. + */ + if (timeout == 0) + timeout = UINT_MAX; + + ret_temp = wait_event_interruptible_timeout(drawctxt->waiting, + _check_context_timestamp(device, context, timestamp), + msecs_to_jiffies(timeout)); + + if (ret_temp == 0) { + ret = -ETIMEDOUT; + goto done; + } else if (ret_temp < 0) { + ret = (int) ret_temp; + goto done; + } + ret = 0; + + /* -EDEADLK if the context was invalidated while we were waiting */ + if (kgsl_context_invalid(context)) + ret = -EDEADLK; + + + /* Return -EINVAL if the context was detached while we were waiting */ + if (kgsl_context_detached(context)) + ret = -ENOENT; + +done: + trace_adreno_drawctxt_wait_done(-1, context->id, timestamp, ret); + return ret; +} + +/** + * adreno_drawctxt_wait_rb() - Wait for the last RB timestamp at which this + * context submitted a command to the corresponding RB + * @adreno_dev: The device on which the timestamp is active + * @context: The context which subbmitted command to RB + * @timestamp: The RB timestamp of last command submitted to RB by context + * @timeout: Timeout value for the wait + */ +static int adreno_drawctxt_wait_rb(struct adreno_device *adreno_dev, + struct kgsl_context *context, + uint32_t timestamp, unsigned int timeout) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_context *drawctxt = ADRENO_CONTEXT(context); + int ret = 0; + + /* Needs to hold the device mutex */ + BUG_ON(!mutex_is_locked(&device->mutex)); + + /* + * If the context is invalid then return immediately - we may end up + * waiting for a timestamp that will never come + */ + if (kgsl_context_invalid(context)) + goto done; + + trace_adreno_drawctxt_wait_start(drawctxt->rb->id, context->id, + timestamp); + + ret = adreno_ringbuffer_waittimestamp(drawctxt->rb, timestamp, timeout); +done: + trace_adreno_drawctxt_wait_done(drawctxt->rb->id, context->id, + timestamp, ret); + return ret; +} + +static int drawctxt_detach_cmdbatches(struct adreno_context *drawctxt, + struct kgsl_cmdbatch **list) +{ + int count = 0; + + while (drawctxt->cmdqueue_head != drawctxt->cmdqueue_tail) { + struct kgsl_cmdbatch *cmdbatch = + drawctxt->cmdqueue[drawctxt->cmdqueue_head]; + + drawctxt->cmdqueue_head = (drawctxt->cmdqueue_head + 1) % + ADRENO_CONTEXT_CMDQUEUE_SIZE; + + list[count++] = cmdbatch; + } + + return count; +} + +/** + * adreno_drawctxt_invalidate() - Invalidate an adreno draw context + * @device: Pointer to the KGSL device structure for the GPU + * @context: Pointer to the KGSL context structure + * + * Invalidate the context and remove all queued commands and cancel any pending + * waiters + */ +void adreno_drawctxt_invalidate(struct kgsl_device *device, + struct kgsl_context *context) +{ + struct adreno_context *drawctxt = ADRENO_CONTEXT(context); + struct kgsl_cmdbatch *list[ADRENO_CONTEXT_CMDQUEUE_SIZE]; + int i, count; + + trace_adreno_drawctxt_invalidate(drawctxt); + + spin_lock(&drawctxt->lock); + set_bit(KGSL_CONTEXT_PRIV_INVALID, &context->priv); + + /* + * set the timestamp to the last value since the context is invalidated + * and we want the pending events for this context to go away + */ + kgsl_sharedmem_writel(device, &device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, soptimestamp), + drawctxt->timestamp); + + kgsl_sharedmem_writel(device, &device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp), + drawctxt->timestamp); + + /* Get rid of commands still waiting in the queue */ + count = drawctxt_detach_cmdbatches(drawctxt, list); + spin_unlock(&drawctxt->lock); + + for (i = 0; i < count; i++) { + kgsl_cancel_events_timestamp(device, &context->events, + list[i]->timestamp); + kgsl_cmdbatch_destroy(list[i]); + } + + /* Make sure all pending events are processed or cancelled */ + kgsl_flush_event_group(device, &context->events); + + /* Give the bad news to everybody waiting around */ + wake_up_all(&drawctxt->waiting); + wake_up_all(&drawctxt->wq); +} + +/* + * Set the priority of the context based on the flags passed into context + * create. If the priority is not set in the flags, then the kernel can + * assign any priority it desires for the context. + */ +#define KGSL_CONTEXT_PRIORITY_MED 0x8 + +static inline void _set_context_priority(struct adreno_context *drawctxt) +{ + /* If the priority is not set by user, set it for them */ + if ((drawctxt->base.flags & KGSL_CONTEXT_PRIORITY_MASK) == + KGSL_CONTEXT_PRIORITY_UNDEF) + drawctxt->base.flags |= (KGSL_CONTEXT_PRIORITY_MED << + KGSL_CONTEXT_PRIORITY_SHIFT); + + /* Store the context priority */ + drawctxt->base.priority = + (drawctxt->base.flags & KGSL_CONTEXT_PRIORITY_MASK) >> + KGSL_CONTEXT_PRIORITY_SHIFT; +} + +/** + * adreno_drawctxt_create - create a new adreno draw context + * @dev_priv: the owner of the context + * @flags: flags for the context (passed from user space) + * + * Create and return a new draw context for the 3D core. + */ +struct kgsl_context * +adreno_drawctxt_create(struct kgsl_device_private *dev_priv, + uint32_t *flags) +{ + struct adreno_context *drawctxt; + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + int ret; + unsigned long local; + + local = *flags & (KGSL_CONTEXT_PREAMBLE | + KGSL_CONTEXT_NO_GMEM_ALLOC | + KGSL_CONTEXT_PER_CONTEXT_TS | + KGSL_CONTEXT_USER_GENERATED_TS | + KGSL_CONTEXT_NO_FAULT_TOLERANCE | + KGSL_CONTEXT_CTX_SWITCH | + KGSL_CONTEXT_PRIORITY_MASK | + KGSL_CONTEXT_TYPE_MASK | + KGSL_CONTEXT_PWR_CONSTRAINT | + KGSL_CONTEXT_IFH_NOP | + KGSL_CONTEXT_SECURE | + KGSL_CONTEXT_PREEMPT_STYLE_MASK); + + /* Check for errors before trying to initialize */ + + /* If preemption is not supported, ignore preemption request */ + if (!test_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv)) + local &= ~KGSL_CONTEXT_PREEMPT_STYLE_MASK; + + /* We no longer support legacy context switching */ + if ((local & KGSL_CONTEXT_PREAMBLE) == 0 || + (local & KGSL_CONTEXT_NO_GMEM_ALLOC) == 0) { + KGSL_DEV_ERR_ONCE(device, + "legacy context switch not supported\n"); + return ERR_PTR(-EINVAL); + } + + /* Make sure that our target can support secure contexts if requested */ + if (!kgsl_mmu_is_secured(&dev_priv->device->mmu) && + (local & KGSL_CONTEXT_SECURE)) { + KGSL_DEV_ERR_ONCE(device, "Secure context not supported\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + drawctxt = kzalloc(sizeof(struct adreno_context), GFP_KERNEL); + + if (drawctxt == NULL) + return ERR_PTR(-ENOMEM); + + drawctxt->timestamp = 0; + + drawctxt->base.flags = local; + + /* Always enable per-context timestamps */ + drawctxt->base.flags |= KGSL_CONTEXT_PER_CONTEXT_TS; + drawctxt->type = (drawctxt->base.flags & KGSL_CONTEXT_TYPE_MASK) + >> KGSL_CONTEXT_TYPE_SHIFT; + spin_lock_init(&drawctxt->lock); + init_waitqueue_head(&drawctxt->wq); + init_waitqueue_head(&drawctxt->waiting); + + /* Set the context priority */ + _set_context_priority(drawctxt); + /* set the context ringbuffer */ + drawctxt->rb = adreno_ctx_get_rb(adreno_dev, drawctxt); + + /* + * Set up the plist node for the dispatcher. Insert the node into the + * drawctxt pending list based on priority. + */ + plist_node_init(&drawctxt->pending, drawctxt->base.priority); + + /* + * Now initialize the common part of the context. This allocates the + * context id, and then possibly another thread could look it up. + * So we want all of our initializtion that doesn't require the context + * id to be done before this call. + */ + ret = kgsl_context_init(dev_priv, &drawctxt->base); + if (ret != 0) { + kfree(drawctxt); + return ERR_PTR(ret); + } + + kgsl_sharedmem_writel(device, &device->memstore, + KGSL_MEMSTORE_OFFSET(drawctxt->base.id, soptimestamp), + 0); + kgsl_sharedmem_writel(device, &device->memstore, + KGSL_MEMSTORE_OFFSET(drawctxt->base.id, eoptimestamp), + 0); + + adreno_context_debugfs_init(ADRENO_DEVICE(device), drawctxt); + + /* copy back whatever flags we dediced were valid */ + *flags = drawctxt->base.flags; + return &drawctxt->base; +} + +/** + * adreno_drawctxt_sched() - Schedule a previously blocked context + * @device: pointer to a KGSL device + * @drawctxt: drawctxt to rechedule + * + * This function is called by the core when it knows that a previously blocked + * context has been unblocked. The default adreno response is to reschedule the + * context on the dispatcher + */ +void adreno_drawctxt_sched(struct kgsl_device *device, + struct kgsl_context *context) +{ + adreno_dispatcher_queue_context(device, ADRENO_CONTEXT(context)); +} + +/** + * adreno_drawctxt_detach(): detach a context from the GPU + * @context: Generic KGSL context container for the context + * + */ +void adreno_drawctxt_detach(struct kgsl_context *context) +{ + struct kgsl_device *device; + struct adreno_device *adreno_dev; + struct adreno_context *drawctxt; + struct adreno_ringbuffer *rb; + int ret, count, i; + struct kgsl_cmdbatch *list[ADRENO_CONTEXT_CMDQUEUE_SIZE]; + + if (context == NULL) + return; + + device = context->device; + adreno_dev = ADRENO_DEVICE(device); + drawctxt = ADRENO_CONTEXT(context); + rb = drawctxt->rb; + + /* deactivate context */ + mutex_lock(&device->mutex); + if (rb->drawctxt_active == drawctxt) { + if (adreno_dev->cur_rb == rb) { + if (!kgsl_active_count_get(device)) { + adreno_drawctxt_switch(adreno_dev, rb, NULL, 0); + kgsl_active_count_put(device); + } else + BUG(); + } else + adreno_drawctxt_switch(adreno_dev, rb, NULL, 0); + } + mutex_unlock(&device->mutex); + + spin_lock(&drawctxt->lock); + count = drawctxt_detach_cmdbatches(drawctxt, list); + spin_unlock(&drawctxt->lock); + + for (i = 0; i < count; i++) { + /* + * If the context is deteached while we are waiting for + * the next command in GFT SKIP CMD, print the context + * detached status here. + */ + adreno_fault_skipcmd_detached(device, drawctxt, list[i]); + kgsl_cmdbatch_destroy(list[i]); + } + + /* + * internal_timestamp is set in adreno_ringbuffer_addcmds, + * which holds the device mutex. + */ + mutex_lock(&device->mutex); + + /* + * Wait for the last global timestamp to pass before continuing. + * The maxumum wait time is 30s, some large IB's can take longer + * than 10s and if hang happens then the time for the context's + * commands to retire will be greater than 10s. 30s should be sufficient + * time to wait for the commands even if a hang happens. + */ + ret = adreno_drawctxt_wait_rb(adreno_dev, context, + drawctxt->internal_timestamp, 30 * 1000); + + /* + * If the wait for global fails due to timeout then nothing after this + * point is likely to work very well - BUG_ON() so we can take advantage + * of the debug tools to figure out what the h - e - double hockey + * sticks happened. If EAGAIN error is returned then recovery will kick + * in and there will be no more commands in the RB pipe from this + * context which is waht we are waiting for, so ignore -EAGAIN error + */ + BUG_ON(ret && ret != -EAGAIN); + + kgsl_sharedmem_writel(device, &device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, soptimestamp), + drawctxt->timestamp); + + kgsl_sharedmem_writel(device, &device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp), + drawctxt->timestamp); + + adreno_profile_process_results(adreno_dev); + + mutex_unlock(&device->mutex); + + /* wake threads waiting to submit commands from this context */ + wake_up_all(&drawctxt->waiting); + wake_up_all(&drawctxt->wq); +} + +void adreno_drawctxt_destroy(struct kgsl_context *context) +{ + struct adreno_context *drawctxt; + if (context == NULL) + return; + + drawctxt = ADRENO_CONTEXT(context); + debugfs_remove_recursive(drawctxt->debug_root); + kfree(drawctxt); +} + +/** + * adreno_drawctxt_switch - switch the current draw context in a given RB + * @adreno_dev - The 3D device that owns the context + * @rb: The ringubffer pointer on which the current context is being changed + * @drawctxt - the 3D context to switch to + * @flags - Flags to accompany the switch (from user space) + * + * Switch the current draw context in given RB + */ + +int adreno_drawctxt_switch(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_context *drawctxt, + unsigned int flags) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct kgsl_pagetable *new_pt; + int ret = 0; + + /* We always expect a valid rb */ + BUG_ON(!rb); + + /* already current? */ + if (rb->drawctxt_active == drawctxt) + return ret; + + trace_adreno_drawctxt_switch(rb, + drawctxt, flags); + + /* Get a refcount to the new instance */ + if (drawctxt) { + if (!_kgsl_context_get(&drawctxt->base)) + return -ENOENT; + + new_pt = drawctxt->base.proc_priv->pagetable; + } else { + /* No context - set the default pagetable and thats it. */ + new_pt = device->mmu.defaultpagetable; + } + ret = adreno_iommu_set_pt_ctx(rb, new_pt, drawctxt); + if (ret) { + KGSL_DRV_ERR(device, + "Failed to set pagetable on rb %d\n", rb->id); + return ret; + } + + /* Put the old instance of the active drawctxt */ + if (rb->drawctxt_active) + kgsl_context_put(&rb->drawctxt_active->base); + + rb->drawctxt_active = drawctxt; + return 0; +} diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h new file mode 100644 index 000000000000..d50460a544b1 --- /dev/null +++ b/drivers/gpu/msm/adreno_drawctxt.h @@ -0,0 +1,131 @@ +/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __ADRENO_DRAWCTXT_H +#define __ADRENO_DRAWCTXT_H + +struct adreno_context_type { + unsigned int type; + const char *str; +}; + +#define ADRENO_CONTEXT_CMDQUEUE_SIZE 128 +#define SUBMIT_RETIRE_TICKS_SIZE 7 + +struct kgsl_device; +struct adreno_device; +struct kgsl_device_private; +struct kgsl_context; + +/** + * struct adreno_context - Adreno GPU draw context + * @timestamp: Last issued context-specific timestamp + * @internal_timestamp: Global timestamp of the last issued command + * NOTE: guarded by device->mutex, not drawctxt->mutex! + * @type: Context type (GL, CL, RS) + * @mutex: Mutex to protect the cmdqueue + * @cmdqueue: Queue of command batches waiting to be dispatched for this context + * @cmdqueue_head: Head of the cmdqueue queue + * @cmdqueue_tail: Tail of the cmdqueue queue + * @pending: Priority list node for the dispatcher list of pending contexts + * @wq: Workqueue structure for contexts to sleep pending room in the queue + * @waiting: Workqueue structure for contexts waiting for a timestamp or event + * @queued: Number of commands queued in the cmdqueue + * @fault_policy: GFT fault policy set in cmdbatch_skip_cmd(); + * @debug_root: debugfs entry for this context. + * @queued_timestamp: The last timestamp that was queued on this context + * @rb: The ringbuffer in which this context submits commands. + * @submitted_timestamp: The last timestamp that was submitted for this context + * @submit_retire_ticks: Array to hold cmdbatch execution times from submit + * to retire + * @ticks_index: The index into submit_retire_ticks[] where the new delta will + * be written. + */ +struct adreno_context { + struct kgsl_context base; + unsigned int timestamp; + unsigned int internal_timestamp; + unsigned int type; + spinlock_t lock; + + /* Dispatcher */ + struct kgsl_cmdbatch *cmdqueue[ADRENO_CONTEXT_CMDQUEUE_SIZE]; + unsigned int cmdqueue_head; + unsigned int cmdqueue_tail; + + struct plist_node pending; + wait_queue_head_t wq; + wait_queue_head_t waiting; + + int queued; + unsigned int fault_policy; + struct dentry *debug_root; + unsigned int queued_timestamp; + struct adreno_ringbuffer *rb; + unsigned int submitted_timestamp; + uint64_t submit_retire_ticks[SUBMIT_RETIRE_TICKS_SIZE]; + int ticks_index; +}; + +/* Flag definitions for flag field in adreno_context */ + +/* Set when sync timer of cmdbatch belonging to the context times out */ +#define ADRENO_CONTEXT_CMDBATCH_FLAG_FENCE_LOG BIT(0) + +/** + * enum adreno_context_priv - Private flags for an adreno draw context + * @ADRENO_CONTEXT_FAULT - set if the context has faulted (and recovered) + * @ADRENO_CONTEXT_GPU_HANG - Context has caused a GPU hang + * @ADRENO_CONTEXT_GPU_HANG_FT - Context has caused a GPU hang + * and fault tolerance was successful + * @ADRENO_CONTEXT_SKIP_EOF - Context skip IBs until the next end of frame + * marker. + * @ADRENO_CONTEXT_FORCE_PREAMBLE - Force the preamble for the next submission. + * @ADRENO_CONTEXT_SKIP_CMD - Context's command batch is skipped during + fault tolerance. + */ +enum adreno_context_priv { + ADRENO_CONTEXT_FAULT = KGSL_CONTEXT_PRIV_DEVICE_SPECIFIC, + ADRENO_CONTEXT_GPU_HANG, + ADRENO_CONTEXT_GPU_HANG_FT, + ADRENO_CONTEXT_SKIP_EOF, + ADRENO_CONTEXT_FORCE_PREAMBLE, + ADRENO_CONTEXT_SKIP_CMD, +}; + +struct kgsl_context *adreno_drawctxt_create(struct kgsl_device_private *, + uint32_t *flags); + +void adreno_drawctxt_detach(struct kgsl_context *context); + +void adreno_drawctxt_destroy(struct kgsl_context *context); + +void adreno_drawctxt_sched(struct kgsl_device *device, + struct kgsl_context *context); + +struct adreno_ringbuffer; +int adreno_drawctxt_switch(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, + struct adreno_context *drawctxt, + unsigned int flags); + +int adreno_drawctxt_wait(struct adreno_device *adreno_dev, + struct kgsl_context *context, + uint32_t timestamp, unsigned int timeout); + +void adreno_drawctxt_invalidate(struct kgsl_device *device, + struct kgsl_context *context); + +void adreno_drawctxt_dump(struct kgsl_device *device, + struct kgsl_context *context); + +#endif /* __ADRENO_DRAWCTXT_H */ diff --git a/drivers/gpu/msm/adreno_ioctl.c b/drivers/gpu/msm/adreno_ioctl.c new file mode 100644 index 000000000000..13d3353946ca --- /dev/null +++ b/drivers/gpu/msm/adreno_ioctl.c @@ -0,0 +1,168 @@ +/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/ioctl.h> +#include "kgsl_device.h" +#include "adreno.h" +#include "adreno_a5xx.h" + +long adreno_ioctl_perfcounter_get(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_perfcounter_get *get = data; + int result; + + mutex_lock(&device->mutex); + + /* + * adreno_perfcounter_get() is called by kernel clients + * during start(), so it is not safe to take an + * active count inside that function. + */ + result = kgsl_active_count_get(device); + + if (result == 0) { + result = adreno_perfcounter_get(adreno_dev, + get->groupid, get->countable, &get->offset, + &get->offset_hi, PERFCOUNTER_FLAG_NONE); + kgsl_active_count_put(device); + } + mutex_unlock(&device->mutex); + + return (long) result; +} + +long adreno_ioctl_perfcounter_put(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_perfcounter_put *put = data; + int result; + + mutex_lock(&device->mutex); + result = adreno_perfcounter_put(adreno_dev, put->groupid, + put->countable, PERFCOUNTER_FLAG_NONE); + mutex_unlock(&device->mutex); + + return (long) result; +} + +static long adreno_ioctl_perfcounter_query(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device); + struct kgsl_perfcounter_query *query = data; + + return (long) adreno_perfcounter_query_group(adreno_dev, query->groupid, + query->countables, query->count, &query->max_counters); +} + +static long adreno_ioctl_perfcounter_read(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device); + struct kgsl_perfcounter_read *read = data; + + return (long) adreno_perfcounter_read_group(adreno_dev, read->reads, + read->count); +} + +static long adreno_ioctl_preemption_counters_query( + struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device); + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct kgsl_preemption_counters_query *read = data; + int size_level = A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE; + int levels_to_copy; + + if (!adreno_is_a5xx(adreno_dev) || + !adreno_is_preemption_enabled(adreno_dev)) + return -EOPNOTSUPP; + + if (read->size_user < size_level) + return -EINVAL; + + /* Calculate number of preemption counter levels to copy to userspace */ + levels_to_copy = (read->size_user / size_level); + if (levels_to_copy > gpudev->num_prio_levels) + levels_to_copy = gpudev->num_prio_levels; + + if (copy_to_user((void __user *) (uintptr_t) read->counters, + adreno_dev->preemption_counters.hostptr, + levels_to_copy * size_level)) + return -EFAULT; + + read->max_priority_level = levels_to_copy; + read->size_priority_level = size_level; + + return 0; +} + +long adreno_ioctl_helper(struct kgsl_device_private *dev_priv, + unsigned int cmd, unsigned long arg, + const struct kgsl_ioctl *cmds, int len) +{ + unsigned char data[128]; + long ret; + int i; + + for (i = 0; i < len; i++) { + if (_IOC_NR(cmd) == _IOC_NR(cmds[i].cmd)) + break; + } + + if (i == len) { + KGSL_DRV_INFO(dev_priv->device, + "invalid ioctl code 0x%08X\n", cmd); + return -ENOIOCTLCMD; + } + + BUG_ON(_IOC_SIZE(cmds[i].cmd) > sizeof(data)); + + if (_IOC_SIZE(cmds[i].cmd)) { + ret = kgsl_ioctl_copy_in(cmds[i].cmd, cmd, arg, data); + + if (ret) + return ret; + } else { + memset(data, 0, sizeof(data)); + } + + ret = cmds[i].func(dev_priv, cmd, data); + + if (ret == 0 && _IOC_SIZE(cmds[i].cmd)) + ret = kgsl_ioctl_copy_out(cmds[i].cmd, cmd, arg, data); + + return ret; +} + +static struct kgsl_ioctl adreno_ioctl_funcs[] = { + { IOCTL_KGSL_PERFCOUNTER_GET, adreno_ioctl_perfcounter_get }, + { IOCTL_KGSL_PERFCOUNTER_PUT, adreno_ioctl_perfcounter_put }, + { IOCTL_KGSL_PERFCOUNTER_QUERY, adreno_ioctl_perfcounter_query }, + { IOCTL_KGSL_PERFCOUNTER_READ, adreno_ioctl_perfcounter_read }, + { IOCTL_KGSL_PREEMPTIONCOUNTER_QUERY, + adreno_ioctl_preemption_counters_query }, +}; + +long adreno_ioctl(struct kgsl_device_private *dev_priv, + unsigned int cmd, unsigned long arg) +{ + return adreno_ioctl_helper(dev_priv, cmd, arg, + adreno_ioctl_funcs, ARRAY_SIZE(adreno_ioctl_funcs)); +} diff --git a/drivers/gpu/msm/adreno_iommu.c b/drivers/gpu/msm/adreno_iommu.c new file mode 100644 index 000000000000..45f5c9be64d9 --- /dev/null +++ b/drivers/gpu/msm/adreno_iommu.c @@ -0,0 +1,980 @@ +/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include "adreno.h" +#include "kgsl_sharedmem.h" +#include "a3xx_reg.h" +#include "adreno_pm4types.h" +#include "kgsl_mmu.h" + +#define A5XX_PFP_PER_PROCESS_UCODE_VER 0x5FF064 +#define A5XX_PM4_PER_PROCESS_UCODE_VER 0x5FF052 + +/* + * _wait_reg() - make CP poll on a register + * @cmds: Pointer to memory where commands are to be added + * @addr: Register address to poll for + * @val: Value to poll for + * @mask: The value against which register value is masked + * @interval: wait interval + */ +static unsigned int _wait_reg(struct adreno_device *adreno_dev, + unsigned int *cmds, unsigned int addr, + unsigned int val, unsigned int mask, + unsigned int interval) +{ + unsigned int *start = cmds; + + if (adreno_is_a3xx(adreno_dev)) { + *cmds++ = cp_packet(adreno_dev, CP_WAIT_REG_EQ, 4); + *cmds++ = addr; + *cmds++ = val; + *cmds++ = mask; + *cmds++ = interval; + } else { + *cmds++ = cp_mem_packet(adreno_dev, CP_WAIT_REG_MEM, 5, 1); + *cmds++ = 0x3; /* Mem Space = Register, Function = Equals */ + cmds += cp_gpuaddr(adreno_dev, cmds, addr); /* Poll address */ + *cmds++ = val; /* ref val */ + *cmds++ = mask; + *cmds++ = interval; + + /* WAIT_REG_MEM turns back on protected mode - push it off */ + *cmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1); + *cmds++ = 0; + } + + return cmds - start; +} + +static unsigned int _iommu_lock(struct adreno_device *adreno_dev, + unsigned int *cmds) +{ + unsigned int *start = cmds; + struct kgsl_iommu *iommu = adreno_dev->dev.mmu.priv; + + /* + * If we don't have this register, probe should have forced + * global pagetables and we shouldn't get here. + * BUG() so we don't debug a bad register write. + */ + BUG_ON(iommu->micro_mmu_ctrl == UINT_MAX); + + /* + * glue commands together until next + * WAIT_FOR_ME + */ + cmds += _wait_reg(adreno_dev, cmds, + adreno_getreg(adreno_dev, ADRENO_REG_CP_WFI_PEND_CTR), + 1, 0xFFFFFFFF, 0xF); + + /* set the iommu lock bit */ + *cmds++ = cp_packet(adreno_dev, CP_REG_RMW, 3); + *cmds++ = iommu->micro_mmu_ctrl >> 2; + /* AND to unmask the lock bit */ + *cmds++ = ~(KGSL_IOMMU_IMPLDEF_MICRO_MMU_CTRL_HALT); + /* OR to set the IOMMU lock bit */ + *cmds++ = KGSL_IOMMU_IMPLDEF_MICRO_MMU_CTRL_HALT; + + /* wait for smmu to lock */ + cmds += _wait_reg(adreno_dev, cmds, iommu->micro_mmu_ctrl >> 2, + KGSL_IOMMU_IMPLDEF_MICRO_MMU_CTRL_IDLE, + KGSL_IOMMU_IMPLDEF_MICRO_MMU_CTRL_IDLE, 0xF); + + return cmds - start; +} + +static unsigned int _iommu_unlock(struct adreno_device *adreno_dev, + unsigned int *cmds) +{ + struct kgsl_iommu *iommu = adreno_dev->dev.mmu.priv; + unsigned int *start = cmds; + + BUG_ON(iommu->micro_mmu_ctrl == UINT_MAX); + + /* unlock the IOMMU lock */ + *cmds++ = cp_packet(adreno_dev, CP_REG_RMW, 3); + *cmds++ = iommu->micro_mmu_ctrl >> 2; + /* AND to unmask the lock bit */ + *cmds++ = ~(KGSL_IOMMU_IMPLDEF_MICRO_MMU_CTRL_HALT); + /* OR with 0 so lock bit is unset */ + *cmds++ = 0; + + /* release all commands since _iommu_lock() with wait_for_me */ + cmds += cp_wait_for_me(adreno_dev, cmds); + + return cmds - start; +} + +static unsigned int _vbif_lock(struct adreno_device *adreno_dev, + unsigned int *cmds) +{ + unsigned int *start = cmds; + /* + * glue commands together until next + * WAIT_FOR_ME + */ + cmds += _wait_reg(adreno_dev, cmds, + adreno_getreg(adreno_dev, ADRENO_REG_CP_WFI_PEND_CTR), + 1, 0xFFFFFFFF, 0xF); + + /* MMU-500 VBIF stall */ + *cmds++ = cp_packet(adreno_dev, CP_REG_RMW, 3); + *cmds++ = A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0; + /* AND to unmask the HALT bit */ + *cmds++ = ~(VBIF_RECOVERABLE_HALT_CTRL); + /* OR to set the HALT bit */ + *cmds++ = 0x1; + + /* Wait for acknowledgement */ + cmds += _wait_reg(adreno_dev, cmds, + A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL1, + 1, 0xFFFFFFFF, 0xF); + + return cmds - start; +} + +static unsigned int _vbif_unlock(struct adreno_device *adreno_dev, + unsigned int *cmds) +{ + unsigned int *start = cmds; + + /* MMU-500 VBIF unstall */ + *cmds++ = cp_packet(adreno_dev, CP_REG_RMW, 3); + *cmds++ = A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0; + /* AND to unmask the HALT bit */ + *cmds++ = ~(VBIF_RECOVERABLE_HALT_CTRL); + /* OR to reset the HALT bit */ + *cmds++ = 0; + + /* release all commands since _vbif_lock() with wait_for_me */ + cmds += cp_wait_for_me(adreno_dev, cmds); + return cmds - start; +} + +static unsigned int _cp_smmu_reg(struct adreno_device *adreno_dev, + unsigned int *cmds, + enum kgsl_iommu_reg_map reg, + unsigned int num) +{ + unsigned int *start = cmds; + unsigned int offset; + struct kgsl_iommu *iommu = adreno_dev->dev.mmu.priv; + + offset = kgsl_mmu_get_reg_ahbaddr(&adreno_dev->dev.mmu, + KGSL_IOMMU_CONTEXT_USER, reg) >> 2; + + if (adreno_is_a5xx(adreno_dev) || iommu->version == 1) { + *cmds++ = cp_register(adreno_dev, offset, num); + } else if (adreno_is_a3xx(adreno_dev)) { + *cmds++ = cp_packet(adreno_dev, CP_REG_WR_NO_CTXT, num + 1); + *cmds++ = offset; + } else if (adreno_is_a4xx(adreno_dev)) { + *cmds++ = cp_packet(adreno_dev, CP_WIDE_REG_WRITE, num + 1); + *cmds++ = offset; + } else { + BUG(); + } + return cmds - start; +} + +static unsigned int _tlbiall(struct adreno_device *adreno_dev, + unsigned int *cmds) +{ + unsigned int *start = cmds; + unsigned int tlbstatus; + + tlbstatus = kgsl_mmu_get_reg_ahbaddr(&adreno_dev->dev.mmu, + KGSL_IOMMU_CONTEXT_USER, + KGSL_IOMMU_CTX_TLBSTATUS) >> 2; + + cmds += _cp_smmu_reg(adreno_dev, cmds, KGSL_IOMMU_CTX_TLBIALL, 1); + *cmds++ = 1; + + cmds += _cp_smmu_reg(adreno_dev, cmds, KGSL_IOMMU_CTX_TLBSYNC, 1); + *cmds++ = 0; + + cmds += _wait_reg(adreno_dev, cmds, tlbstatus, 0, + KGSL_IOMMU_CTX_TLBSTATUS_SACTIVE, 0xF); + + return cmds - start; +} + + +/** + * _adreno_iommu_add_idle_cmds - Add pm4 packets for GPU idle + * @adreno_dev - Pointer to device structure + * @cmds - Pointer to memory where idle commands need to be added + */ +static inline int _adreno_iommu_add_idle_cmds(struct adreno_device *adreno_dev, + unsigned int *cmds) +{ + unsigned int *start = cmds; + + cmds += cp_wait_for_idle(adreno_dev, cmds); + + if (adreno_is_a3xx(adreno_dev)) + cmds += cp_wait_for_me(adreno_dev, cmds); + + return cmds - start; +} + +/* + * _invalidate_uche_cpu() - Invalidate UCHE using CPU + * @adreno_dev: the device + */ +static void _invalidate_uche_cpu(struct adreno_device *adreno_dev) +{ + /* Invalidate UCHE using CPU */ + if (adreno_is_a5xx(adreno_dev)) + adreno_writereg(adreno_dev, + ADRENO_REG_UCHE_INVALIDATE0, 0x12); + else if (adreno_is_a4xx(adreno_dev)) { + adreno_writereg(adreno_dev, + ADRENO_REG_UCHE_INVALIDATE0, 0); + adreno_writereg(adreno_dev, + ADRENO_REG_UCHE_INVALIDATE1, 0x12); + } else if (adreno_is_a3xx(adreno_dev)) { + adreno_writereg(adreno_dev, + ADRENO_REG_UCHE_INVALIDATE0, 0); + adreno_writereg(adreno_dev, + ADRENO_REG_UCHE_INVALIDATE1, + 0x90000000); + } else { + BUG(); + } +} + +/* + * _ctx_switch_use_cpu_path() - Decide whether to use cpu path + * @adreno_dev: the device + * @new_pt: pagetable to switch + * @rb: ringbuffer for ctx switch + * + * If we are idle and switching to default pagetable it is + * preferable to poke the iommu directly rather than using the + * GPU command stream. + */ +static bool _ctx_switch_use_cpu_path( + struct adreno_device *adreno_dev, + struct kgsl_pagetable *new_pt, + struct adreno_ringbuffer *rb) +{ + /* + * If rb is current, we can use cpu path when GPU is + * idle and we are switching to default pt. + * If rb is not current, we can use cpu path when rb has no + * pending commands (rptr = wptr) and we are switching to default pt. + */ + if (adreno_dev->cur_rb == rb) + return adreno_isidle(&adreno_dev->dev) && + (new_pt == adreno_dev->dev.mmu.defaultpagetable); + else if ((rb->wptr == rb->rptr) && + (new_pt == adreno_dev->dev.mmu.defaultpagetable)) + return true; + + return false; +} + +/** + * adreno_iommu_set_apriv() - Generate commands to set/reset the APRIV + * @adreno_dev: Device on which the commands will execute + * @cmds: The memory pointer where commands are generated + * @set: If set then APRIV is set else reset + * + * Returns the number of commands generated + */ +unsigned int adreno_iommu_set_apriv(struct adreno_device *adreno_dev, + unsigned int *cmds, int set) +{ + unsigned int *cmds_orig = cmds; + + /* adreno 3xx doesn't have the CP_CNTL.APRIV field */ + if (adreno_is_a3xx(adreno_dev)) + return 0; + + cmds += cp_wait_for_idle(adreno_dev, cmds); + cmds += cp_wait_for_me(adreno_dev, cmds); + *cmds++ = cp_register(adreno_dev, adreno_getreg(adreno_dev, + ADRENO_REG_CP_CNTL), 1); + if (set) + *cmds++ = 1; + else + *cmds++ = 0; + + return cmds - cmds_orig; +} + +static inline int _adreno_iommu_add_idle_indirect_cmds( + struct adreno_device *adreno_dev, + unsigned int *cmds, uint64_t nop_gpuaddr) +{ + unsigned int *start = cmds; + /* + * Adding an indirect buffer ensures that the prefetch stalls until + * the commands in indirect buffer have completed. We need to stall + * prefetch with a nop indirect buffer when updating pagetables + * because it provides stabler synchronization */ + cmds += cp_wait_for_me(adreno_dev, cmds); + *cmds++ = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1); + cmds += cp_gpuaddr(adreno_dev, cmds, nop_gpuaddr); + *cmds++ = 2; + cmds += cp_wait_for_idle(adreno_dev, cmds); + return cmds - start; +} + +/** + * _adreno_mmu_set_pt_update_condition() - Generate commands to setup a + * flag to indicate whether pt switch is required or not by comparing + * current pt id and incoming pt id + * @rb: The RB on which the commands will execute + * @cmds: The pointer to memory where the commands are placed. + * @ptname: Incoming pt id to set to + * + * Returns number of commands added. + */ +static unsigned int _adreno_mmu_set_pt_update_condition( + struct adreno_ringbuffer *rb, + unsigned int *cmds, unsigned int ptname) +{ + struct kgsl_device *device = rb->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + unsigned int *cmds_orig = cmds; + /* + * write 1 to switch pt flag indicating that we need to execute the + * pt switch commands + */ + *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1); + cmds += cp_gpuaddr(adreno_dev, cmds, (rb->pagetable_desc.gpuaddr + + offsetof(struct adreno_ringbuffer_pagetable_info, + switch_pt_enable))); + *cmds++ = 1; + *cmds++ = cp_packet(adreno_dev, CP_WAIT_MEM_WRITES, 1); + *cmds++ = 0; + cmds += cp_wait_for_me(adreno_dev, cmds); + /* + * The current ptname is + * directly compared to the incoming pt id + */ + *cmds++ = cp_mem_packet(adreno_dev, CP_COND_WRITE, 6, 2); + /* write to mem space, when a mem space is equal to ref val */ + *cmds++ = (1 << 8) | (1 << 4) | 3; + cmds += cp_gpuaddr(adreno_dev, cmds, + (adreno_dev->ringbuffers[0].pagetable_desc.gpuaddr + + offsetof(struct adreno_ringbuffer_pagetable_info, + current_global_ptname))); + *cmds++ = ptname; + *cmds++ = 0xFFFFFFFF; + cmds += cp_gpuaddr(adreno_dev, cmds, + (rb->pagetable_desc.gpuaddr + + offsetof(struct adreno_ringbuffer_pagetable_info, + switch_pt_enable))); + *cmds++ = 0; + *cmds++ = cp_packet(adreno_dev, CP_WAIT_MEM_WRITES, 1); + *cmds++ = 0; + cmds += cp_wait_for_me(adreno_dev, cmds); + + return cmds - cmds_orig; +} + +/** + * _adreno_iommu_pt_update_pid_to_mem() - Add commands to write to memory the + * pagetable id. + * @rb: The ringbuffer on which these commands will execute + * @cmds: Pointer to memory where the commands are copied + * @ptname: The pagetable id + */ +static unsigned int _adreno_iommu_pt_update_pid_to_mem( + struct adreno_ringbuffer *rb, + unsigned int *cmds, int ptname) +{ + struct kgsl_device *device = rb->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + unsigned int *cmds_orig = cmds; + + *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1); + cmds += cp_gpuaddr(adreno_dev, cmds, + (rb->pagetable_desc.gpuaddr + + offsetof(struct adreno_ringbuffer_pagetable_info, + current_rb_ptname))); + *cmds++ = ptname; + *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1); + cmds += cp_gpuaddr(adreno_dev, cmds, + (adreno_dev->ringbuffers[0].pagetable_desc.gpuaddr + + offsetof(struct adreno_ringbuffer_pagetable_info, + current_global_ptname))); + *cmds++ = ptname; + /* pagetable switch done, Housekeeping: set the switch_pt_enable to 0 */ + *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1); + cmds += cp_gpuaddr(adreno_dev, cmds, + (rb->pagetable_desc.gpuaddr + + offsetof(struct adreno_ringbuffer_pagetable_info, + switch_pt_enable))); + *cmds++ = 0; + *cmds++ = cp_packet(adreno_dev, CP_WAIT_MEM_WRITES, 1); + *cmds++ = 0; + cmds += cp_wait_for_me(adreno_dev, cmds); + + return cmds - cmds_orig; +} + +static unsigned int _adreno_iommu_set_pt_v1(struct adreno_ringbuffer *rb, + unsigned int *cmds_orig, + u64 ttbr0, u32 contextidr, u32 ptname) +{ + struct kgsl_device *device = rb->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + unsigned int *cmds = cmds_orig; + unsigned int *cond_exec_ptr; + + cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds); + + /* set flag that indicates whether pt switch is required*/ + cmds += _adreno_mmu_set_pt_update_condition(rb, cmds, ptname); + *cmds++ = cp_mem_packet(adreno_dev, CP_COND_EXEC, 4, 2); + cmds += cp_gpuaddr(adreno_dev, cmds, + (rb->pagetable_desc.gpuaddr + + offsetof(struct adreno_ringbuffer_pagetable_info, + switch_pt_enable))); + cmds += cp_gpuaddr(adreno_dev, cmds, + (rb->pagetable_desc.gpuaddr + + offsetof(struct adreno_ringbuffer_pagetable_info, + switch_pt_enable))); + *cmds++ = 1; + /* Exec count to be filled later */ + cond_exec_ptr = cmds; + cmds++; + + cmds += cp_wait_for_idle(adreno_dev, cmds); + + cmds += _iommu_lock(adreno_dev, cmds); + + cmds += _cp_smmu_reg(adreno_dev, cmds, KGSL_IOMMU_CTX_TTBR0, 2); + *cmds++ = lower_32_bits(ttbr0); + *cmds++ = upper_32_bits(ttbr0); + cmds += _cp_smmu_reg(adreno_dev, cmds, + KGSL_IOMMU_CTX_CONTEXTIDR, 1); + *cmds++ = contextidr; + + /* a3xx doesn't have MEQ space to hold the TLBI commands */ + if (adreno_is_a3xx(adreno_dev)) + cmds += _iommu_unlock(adreno_dev, cmds); + + cmds += _tlbiall(adreno_dev, cmds); + + /* unlock or wait for me to finish the TLBI */ + if (!adreno_is_a3xx(adreno_dev)) + cmds += _iommu_unlock(adreno_dev, cmds); + else + cmds += cp_wait_for_me(adreno_dev, cmds); + + /* Exec count ordinal of CP_COND_EXEC packet */ + *cond_exec_ptr = (cmds - cond_exec_ptr - 1); + cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds); + cmds += _adreno_iommu_pt_update_pid_to_mem(rb, cmds, ptname); + + return cmds - cmds_orig; +} + + +static unsigned int _adreno_iommu_set_pt_v2_a3xx(struct kgsl_device *device, + unsigned int *cmds_orig, + u64 ttbr0, u32 contextidr) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + unsigned int *cmds = cmds_orig; + + cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds); + + cmds += _vbif_lock(adreno_dev, cmds); + + cmds += _cp_smmu_reg(adreno_dev, cmds, KGSL_IOMMU_CTX_TTBR0, 2); + *cmds++ = lower_32_bits(ttbr0); + *cmds++ = upper_32_bits(ttbr0); + cmds += _cp_smmu_reg(adreno_dev, cmds, KGSL_IOMMU_CTX_CONTEXTIDR, 1); + *cmds++ = contextidr; + + cmds += _vbif_unlock(adreno_dev, cmds); + + cmds += _tlbiall(adreno_dev, cmds); + + /* wait for me to finish the TLBI */ + cmds += cp_wait_for_me(adreno_dev, cmds); + + cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds); + + return cmds - cmds_orig; +} + +static unsigned int _adreno_iommu_set_pt_v2_a4xx(struct kgsl_device *device, + unsigned int *cmds_orig, + u64 ttbr0, u32 contextidr) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + unsigned int *cmds = cmds_orig; + + cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds); + + cmds += _vbif_lock(adreno_dev, cmds); + + cmds += _cp_smmu_reg(adreno_dev, cmds, KGSL_IOMMU_CTX_TTBR0, 2); + *cmds++ = lower_32_bits(ttbr0); + *cmds++ = upper_32_bits(ttbr0); + cmds += _cp_smmu_reg(adreno_dev, cmds, KGSL_IOMMU_CTX_CONTEXTIDR, 1); + *cmds++ = contextidr; + + cmds += _vbif_unlock(adreno_dev, cmds); + + cmds += _tlbiall(adreno_dev, cmds); + + /* wait for me to finish the TLBI */ + cmds += cp_wait_for_me(adreno_dev, cmds); + + cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds); + + return cmds - cmds_orig; +} + +static unsigned int _adreno_iommu_set_pt_v2_a5xx(struct kgsl_device *device, + unsigned int *cmds_orig, + u64 ttbr0, u32 contextidr, + struct adreno_ringbuffer *rb) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + unsigned int *cmds = cmds_orig; + + cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds); + cmds += cp_wait_for_me(adreno_dev, cmds); + + /* CP switches the pagetable and flushes the Caches */ + *cmds++ = cp_packet(adreno_dev, CP_SMMU_TABLE_UPDATE, 3); + *cmds++ = lower_32_bits(ttbr0); + *cmds++ = upper_32_bits(ttbr0); + *cmds++ = contextidr; + + *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 4, 1); + cmds += cp_gpuaddr(adreno_dev, cmds, (rb->pagetable_desc.gpuaddr + + offsetof(struct adreno_ringbuffer_pagetable_info, ttbr0))); + *cmds++ = lower_32_bits(ttbr0); + *cmds++ = upper_32_bits(ttbr0); + *cmds++ = contextidr; + + /* release all commands with wait_for_me */ + cmds += cp_wait_for_me(adreno_dev, cmds); + + cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds); + + return cmds - cmds_orig; +} + +/** + * adreno_iommu_set_pt_generate_cmds() - Generate commands to change pagetable + * @rb: The RB pointer in which these commaands are to be submitted + * @cmds: The pointer where the commands are placed + * @pt: The pagetable to switch to + */ +unsigned int adreno_iommu_set_pt_generate_cmds( + struct adreno_ringbuffer *rb, + unsigned int *cmds, + struct kgsl_pagetable *pt) +{ + struct kgsl_device *device = rb->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + u64 ttbr0; + u32 contextidr; + unsigned int *cmds_orig = cmds; + struct kgsl_iommu *iommu = adreno_dev->dev.mmu.priv; + + ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pt); + contextidr = kgsl_mmu_pagetable_get_contextidr(pt); + + cmds += adreno_iommu_set_apriv(adreno_dev, cmds, 1); + + cmds += _adreno_iommu_add_idle_indirect_cmds(adreno_dev, cmds, + device->mmu.setstate_memory.gpuaddr + + KGSL_IOMMU_SETSTATE_NOP_OFFSET); + + if (iommu->version >= 2) { + if (adreno_is_a5xx(adreno_dev)) + cmds += _adreno_iommu_set_pt_v2_a5xx(device, cmds, + ttbr0, contextidr, rb); + else if (adreno_is_a4xx(adreno_dev)) + cmds += _adreno_iommu_set_pt_v2_a4xx(device, cmds, + ttbr0, contextidr); + else if (adreno_is_a3xx(adreno_dev)) + cmds += _adreno_iommu_set_pt_v2_a3xx(device, cmds, + ttbr0, contextidr); + else + BUG(); /* new GPU family? */ + } else { + cmds += _adreno_iommu_set_pt_v1(rb, cmds, ttbr0, contextidr, + pt->name); + } + + /* invalidate all base pointers */ + cmds += cp_invalidate_state(adreno_dev, cmds); + + cmds += adreno_iommu_set_apriv(adreno_dev, cmds, 0); + + return cmds - cmds_orig; +} + +/** + * adreno_iommu_set_pt_ib() - Generate commands to switch pagetable. The + * commands generated use an IB + * @rb: The RB in which the commands will be executed + * @cmds: Memory pointer where commands are generated + * @pt: The pagetable to switch to + */ +unsigned int adreno_iommu_set_pt_ib(struct adreno_ringbuffer *rb, + unsigned int *cmds, + struct kgsl_pagetable *pt) +{ + struct kgsl_device *device = rb->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + unsigned int *cmds_orig = cmds; + struct kgsl_iommu_pt *iommu_pt = pt->priv; + + /* Write the ttbr0 and contextidr values to pagetable desc memory */ + *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1); + cmds += cp_gpuaddr(adreno_dev, cmds, + (rb->pagetable_desc.gpuaddr + + offsetof(struct adreno_ringbuffer_pagetable_info, + ttbr0))); + *cmds++ = lower_32_bits(iommu_pt->ttbr0); + + *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1); + cmds += cp_gpuaddr(adreno_dev, cmds, + (rb->pagetable_desc.gpuaddr + + offsetof(struct adreno_ringbuffer_pagetable_info, + contextidr))); + *cmds++ = iommu_pt->contextidr; + + *cmds++ = cp_packet(adreno_dev, CP_WAIT_MEM_WRITES, 1); + *cmds++ = 0; + cmds += cp_wait_for_me(adreno_dev, cmds); + *cmds++ = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1); + cmds += cp_gpuaddr(adreno_dev, cmds, rb->pt_update_desc.gpuaddr); + *cmds++ = rb->pt_update_desc.size / sizeof(unsigned int); + + return cmds - cmds_orig; +} + +/** + * __add_curr_ctxt_cmds() - Add commands to set a context id in memstore + * @rb: The RB in which the commands will be added for execution + * @cmds: Pointer to memory where commands are added + * @drawctxt: The context whose id is being set in memstore + * + * Returns the number of dwords + */ +static unsigned int __add_curr_ctxt_cmds(struct adreno_ringbuffer *rb, + unsigned int *cmds, + struct adreno_context *drawctxt) +{ + unsigned int *cmds_orig = cmds; + struct kgsl_device *device = rb->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + /* write the context identifier to memstore memory */ + *cmds++ = cp_packet(adreno_dev, CP_NOP, 1); + *cmds++ = KGSL_CONTEXT_TO_MEM_IDENTIFIER; + + *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1); + cmds += cp_gpuaddr(adreno_dev, cmds, device->memstore.gpuaddr + + KGSL_MEMSTORE_RB_OFFSET(rb, current_context)); + *cmds++ = (drawctxt ? drawctxt->base.id : 0); + + *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1); + cmds += cp_gpuaddr(adreno_dev, cmds, device->memstore.gpuaddr + + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, + current_context)); + *cmds++ = (drawctxt ? drawctxt->base.id : 0); + + /* Invalidate UCHE for new context */ + if (adreno_is_a5xx(adreno_dev)) { + *cmds++ = cp_register(adreno_dev, + adreno_getreg(adreno_dev, + ADRENO_REG_UCHE_INVALIDATE0), 1); + *cmds++ = 0x12; + } else if (adreno_is_a4xx(adreno_dev)) { + *cmds++ = cp_register(adreno_dev, + adreno_getreg(adreno_dev, + ADRENO_REG_UCHE_INVALIDATE0), 2); + *cmds++ = 0; + *cmds++ = 0x12; + } else if (adreno_is_a3xx(adreno_dev)) { + *cmds++ = cp_register(adreno_dev, + adreno_getreg(adreno_dev, + ADRENO_REG_UCHE_INVALIDATE0), 2); + *cmds++ = 0; + *cmds++ = 0x90000000; + } else + BUG(); + + return cmds - cmds_orig; +} + +/* + * _set_ctxt_cpu() - Set the current context in memstore + * @rb: The ringbuffer memstore to set curr context + * @drawctxt: The context whose id is being set in memstore + */ +static void _set_ctxt_cpu(struct adreno_ringbuffer *rb, + struct adreno_context *drawctxt) +{ + struct kgsl_device *device = rb->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + if (rb == adreno_dev->cur_rb) { + _invalidate_uche_cpu(adreno_dev); + /* Update global memstore with current context */ + kgsl_sharedmem_writel(device, &device->memstore, + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, + current_context), + drawctxt ? drawctxt->base.id : 0); + } + /* Update rb memstore with current context */ + kgsl_sharedmem_writel(device, &device->memstore, + KGSL_MEMSTORE_RB_OFFSET(rb, current_context), + drawctxt ? drawctxt->base.id : 0); +} + +/** + * _set_ctxt_gpu() - Add commands to set the current context in memstore + * @rb: The ringbuffer in which commands to set memstore are added + * @drawctxt: The context whose id is being set in memstore + */ +static int _set_ctxt_gpu(struct adreno_ringbuffer *rb, + struct adreno_context *drawctxt) +{ + unsigned int link[15], *cmds; + int result; + + cmds = &link[0]; + cmds += __add_curr_ctxt_cmds(rb, cmds, drawctxt); + result = adreno_ringbuffer_issuecmds(rb, 0, link, + (unsigned int)(cmds - link)); + return result; +} + +/** + * _set_pagetable_cpu() - Use CPU to switch the pagetable + * @rb: The rb for which pagetable needs to be switched + * @new_pt: The pagetable to switch to + */ +static int _set_pagetable_cpu(struct adreno_ringbuffer *rb, + struct kgsl_pagetable *new_pt) +{ + struct kgsl_device *device = rb->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + int result; + + /* update TTBR0 only if we are updating current RB */ + if (adreno_dev->cur_rb == rb) { + result = kgsl_mmu_set_pt(&device->mmu, new_pt); + if (result) + return result; + /* write the new pt set to memory var */ + kgsl_sharedmem_writel(device, + &adreno_dev->ringbuffers[0].pagetable_desc, + offsetof( + struct adreno_ringbuffer_pagetable_info, + current_global_ptname), new_pt->name); + } + + /* Update the RB pagetable info here */ + kgsl_sharedmem_writel(device, &rb->pagetable_desc, + offsetof( + struct adreno_ringbuffer_pagetable_info, + current_rb_ptname), new_pt->name); + kgsl_sharedmem_writeq(device, &rb->pagetable_desc, + offsetof( + struct adreno_ringbuffer_pagetable_info, + ttbr0), kgsl_mmu_pagetable_get_ttbr0(new_pt)); + kgsl_sharedmem_writel(device, &rb->pagetable_desc, + offsetof( + struct adreno_ringbuffer_pagetable_info, + contextidr), kgsl_mmu_pagetable_get_contextidr(new_pt)); + + return 0; +} + +/** + * _set_pagetable_gpu() - Use GPU to switch the pagetable + * @rb: The rb in which commands to switch pagetable are to be + * submitted + * @new_pt: The pagetable to switch to + */ +static int _set_pagetable_gpu(struct adreno_ringbuffer *rb, + struct kgsl_pagetable *new_pt) +{ + unsigned int *link = NULL, *cmds; + struct kgsl_device *device = rb->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + int result; + + link = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (link == NULL) { + result = -ENOMEM; + goto done; + } + + cmds = link; + + /* If we are in a fault the MMU will be reset soon */ + if (test_bit(ADRENO_DEVICE_FAULT, &adreno_dev->priv)) + return 0; + + kgsl_mmu_enable_clk(&device->mmu); + + cmds += adreno_iommu_set_pt_generate_cmds(rb, cmds, new_pt); + + if ((unsigned int) (cmds - link) > (PAGE_SIZE / sizeof(unsigned int))) { + KGSL_DRV_ERR(device, "Temp command buffer overflow\n"); + BUG(); + } + /* + * This returns the per context timestamp but we need to + * use the global timestamp for iommu clock disablement + */ + result = adreno_ringbuffer_issuecmds(rb, + KGSL_CMD_FLAGS_PMODE, link, + (unsigned int)(cmds - link)); + + /* + * On error disable the IOMMU clock right away otherwise turn it off + * after the command has been retired + */ + if (result) + kgsl_mmu_disable_clk(&device->mmu); + else + adreno_ringbuffer_mmu_disable_clk_on_ts(device, rb, + rb->timestamp); + +done: + kfree(link); + return result; +} + +/** + * adreno_iommu_init() - Adreno iommu init + * @adreno_dev: Adreno device + */ +int adreno_iommu_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + + if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_NONE) + return 0; + + /* + * A nop is required in an indirect buffer when switching + * pagetables in-stream + */ + kgsl_sharedmem_writel(device, &device->mmu.setstate_memory, + KGSL_IOMMU_SETSTATE_NOP_OFFSET, + cp_packet(adreno_dev, CP_NOP, 1)); + + /* set iommu features here */ + if (adreno_is_a420(adreno_dev)) + device->mmu.features |= KGSL_MMU_FLUSH_TLB_ON_MAP; + + /* + * A5XX: per process PT is supported starting PFP 0x5FF064 me 0x5FF052 + * versions + */ + if (adreno_is_a5xx(adreno_dev) && + !MMU_FEATURE(&device->mmu, KGSL_MMU_GLOBAL_PAGETABLE)) { + if ((adreno_compare_pfp_version(adreno_dev, + A5XX_PFP_PER_PROCESS_UCODE_VER) < 0) || + (adreno_compare_pm4_version(adreno_dev, + A5XX_PM4_PER_PROCESS_UCODE_VER) < 0)) { + KGSL_DRV_ERR(device, + "Invalid ucode for per process pagetables\n"); + return -ENODEV; + } + } + + return 0; +} + +/** + * adreno_mmu_set_pt_ctx() - Change the pagetable of the current RB + * @device: Pointer to device to which the rb belongs + * @rb: The RB pointer on which pagetable is to be changed + * @new_pt: The new pt the device will change to + * @drawctxt: The context whose pagetable the ringbuffer is switching to, + * NULL means KGSL_CONTEXT_GLOBAL + * + * Returns 0 on success else error code. + */ +int adreno_iommu_set_pt_ctx(struct adreno_ringbuffer *rb, + struct kgsl_pagetable *new_pt, + struct adreno_context *drawctxt) +{ + struct kgsl_device *device = rb->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_pagetable *cur_pt = device->mmu.defaultpagetable; + int result = 0; + int cpu_path = 0; + + if (rb->drawctxt_active) + cur_pt = rb->drawctxt_active->base.proc_priv->pagetable; + + cpu_path = _ctx_switch_use_cpu_path(adreno_dev, new_pt, rb); + + /* Pagetable switch */ + if (new_pt != cur_pt) { + if (cpu_path) + result = _set_pagetable_cpu(rb, new_pt); + else + result = _set_pagetable_gpu(rb, new_pt); + } + + if (result) { + KGSL_DRV_ERR(device, "Error switching pagetable %d\n", result); + return result; + } + + /* Context switch */ + if (cpu_path) + _set_ctxt_cpu(rb, drawctxt); + else + result = _set_ctxt_gpu(rb, drawctxt); + + if (result) + KGSL_DRV_ERR(device, "Error switching context %d\n", result); + + return result; +} +/** + * adreno_iommu_set_pt_generate_rb_cmds() - Generate commands to switch pt + * in a ringbuffer descriptor + * @rb: The RB whose descriptor is used + * @pt: The pt to switch to + */ +void adreno_iommu_set_pt_generate_rb_cmds(struct adreno_ringbuffer *rb, + struct kgsl_pagetable *pt) +{ + if (rb->pt_update_desc.hostptr) + return; + + rb->pt_update_desc.hostptr = rb->pagetable_desc.hostptr + + sizeof(struct adreno_ringbuffer_pagetable_info); + rb->pt_update_desc.size = + adreno_iommu_set_pt_generate_cmds(rb, + rb->pt_update_desc.hostptr, pt) * + sizeof(unsigned int); + rb->pt_update_desc.gpuaddr = rb->pagetable_desc.gpuaddr + + sizeof(struct adreno_ringbuffer_pagetable_info); +} diff --git a/drivers/gpu/msm/adreno_perfcounter.c b/drivers/gpu/msm/adreno_perfcounter.c new file mode 100644 index 000000000000..31cd8c5cd731 --- /dev/null +++ b/drivers/gpu/msm/adreno_perfcounter.c @@ -0,0 +1,1011 @@ +/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/module.h> +#include <linux/uaccess.h> + +#include "kgsl.h" +#include "adreno.h" +#include "adreno_perfcounter.h" +#include "adreno_pm4types.h" +#include "a5xx_reg.h" + +/* Bit flag for RBMM_PERFCTR_CTL */ +#define RBBM_PERFCTR_CTL_ENABLE 0x00000001 + +#define VBIF2_PERF_CNT_SEL_MASK 0x7F +/* offset of clear register from select register */ +#define VBIF2_PERF_CLR_REG_SEL_OFF 8 +/* offset of enable register from select register */ +#define VBIF2_PERF_EN_REG_SEL_OFF 16 +/* offset of high counter from low counter value */ +#define VBIF2_PERF_HIGH_REG_LOW_OFF 8 + +/* offset of clear register from the enable register */ +#define VBIF2_PERF_PWR_CLR_REG_EN_OFF 8 +/* offset of high counter from low counter value */ +#define VBIF2_PERF_PWR_HIGH_REG_LOW_OFF 8 + +#define REG_64BIT_VAL(hi, lo, val) (((((uint64_t) hi) << 32) | lo) + val) +/* + * Return true if the countable is used and not broken + */ +static inline int active_countable(unsigned int countable) +{ + return ((countable != KGSL_PERFCOUNTER_NOT_USED) && + (countable != KGSL_PERFCOUNTER_BROKEN)); +} + +/** + * adreno_perfcounter_init: Reserve kernel performance counters + * @adreno_dev: Pointer to an adreno_device struct + * + * The kernel needs/wants a certain group of performance counters for + * its own activities. Reserve these performance counters at init time + * to ensure that they are always reserved for the kernel. The performance + * counters used by the kernel can be obtained by the user, but these + * performance counters will remain active as long as the device is alive. + */ +void adreno_perfcounter_init(struct adreno_device *adreno_dev) +{ + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (gpudev->perfcounter_init) + gpudev->perfcounter_init(adreno_dev); +} + +/** + * adreno_perfcounter_write() - Write the physical performance + * counter values. + * @adreno_dev - Adreno device whose registers are to be written to. + * @reg - register address of the physical counter to which the value is + * written to. + * + * This function loads the 64 bit saved value into the particular physical + * counter by enabling the corresponding bit in A3XX_RBBM_PERFCTR_LOAD_CMD* + * register. + */ +static void adreno_perfcounter_write(struct adreno_device *adreno_dev, + struct adreno_perfcount_register *reg) +{ + unsigned int val, i; + int cmd[] = { ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0, + ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1, + ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2, + ADRENO_REG_RBBM_PERFCTR_LOAD_CMD3 }; + + /* If not loadable then return quickly */ + if (reg->load_bit < 0) + return; + + /* Get the offset/cmd for loading */ + i = reg->load_bit / 32; + + /* Get the register bit offset for loading */ + val = BIT(reg->load_bit & 31); + + /* Write the saved value to PERFCTR_LOAD_VALUE* registers. */ + adreno_writereg64(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO, + ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI, reg->value); + + /* + * Set the load bit in PERFCTR_LOAD_CMD for the physical counter + * we want to restore. The value in PERFCTR_LOAD_VALUE* is loaded + * into the corresponding physical counter. The value for the select + * register gets cleared once RBBM reads it so no need to clear the + * select register afterwards. + */ + adreno_writereg(adreno_dev, cmd[i], val); +} + +/** + * adreno_perfcounter_close() - Release counters initialized by + * adreno_perfcounter_close + * @adreno_dev: Pointer to an adreno_device struct + */ +void adreno_perfcounter_close(struct adreno_device *adreno_dev) +{ + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + if (gpudev->perfcounter_close) + gpudev->perfcounter_close(adreno_dev); +} + +/** + * adreno_perfcounter_restore() - Restore performance counters + * @adreno_dev: adreno device to configure + * + * Load the physical performance counters with 64 bit value which are + * saved on GPU power collapse. + */ +void adreno_perfcounter_restore(struct adreno_device *adreno_dev) +{ + struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev); + struct adreno_perfcount_group *group; + unsigned int counter, groupid; + + if (counters == NULL) + return; + + for (groupid = 0; groupid < counters->group_count; groupid++) { + group = &(counters->groups[groupid]); + + /* Restore the counters for the group */ + for (counter = 0; counter < group->reg_count; counter++) { + /* If not active or broken, skip this counter */ + if (!active_countable(group->regs[counter].countable)) + continue; + + adreno_perfcounter_write(adreno_dev, + &group->regs[counter]); + } + } +} + +/** + * adreno_perfcounter_save() - Save performance counters + * @adreno_dev: adreno device to configure + * + * Save the performance counter values before GPU power collapse. + * The saved values are restored on restart. + * This ensures physical counters are coherent across power-collapse. + */ +inline void adreno_perfcounter_save(struct adreno_device *adreno_dev) +{ + struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev); + struct adreno_perfcount_group *group; + unsigned int counter, groupid; + + if (counters == NULL) + return; + + for (groupid = 0; groupid < counters->group_count; groupid++) { + group = &(counters->groups[groupid]); + + /* Save the counter values for the group */ + for (counter = 0; counter < group->reg_count; counter++) { + /* If not active or broken, skip this counter */ + if (!active_countable(group->regs[counter].countable)) + continue; + + /* accumulate values for non-loadable counters */ + if (group->regs[counter].load_bit >= 0) + group->regs[counter].value = 0; + + group->regs[counter].value = + group->regs[counter].value + + adreno_perfcounter_read(adreno_dev, groupid, + counter); + } + } +} + +static int adreno_perfcounter_enable(struct adreno_device *adreno_dev, + unsigned int group, unsigned int counter, unsigned int countable); + +/** + * adreno_perfcounter_start: Enable performance counters + * @adreno_dev: Adreno device to configure + * + * Ensure all performance counters are enabled that are allocated. Since + * the device was most likely stopped, we can't trust that the counters + * are still valid so make it so. + */ + +void adreno_perfcounter_start(struct adreno_device *adreno_dev) +{ + struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev); + struct adreno_perfcount_group *group; + unsigned int i, j; + + if (NULL == counters) + return; + /* group id iter */ + for (i = 0; i < counters->group_count; i++) { + group = &(counters->groups[i]); + + /* countable iter */ + for (j = 0; j < group->reg_count; j++) { + if (!active_countable(group->regs[j].countable)) + continue; + + /* + * The GPU has to be idle before calling the perfcounter + * enable function, but since this function is called + * during start we already know the GPU is idle. + * Since the countable/counter pairs have already been + * validated, there is no way for _enable() to fail so + * no need to check the return code. + */ + adreno_perfcounter_enable(adreno_dev, i, j, + group->regs[j].countable); + } + } +} + +/** + * adreno_perfcounter_read_group() - Determine which countables are in counters + * @adreno_dev: Adreno device to configure + * @reads: List of kgsl_perfcounter_read_groups + * @count: Length of list + * + * Read the performance counters for the groupid/countable pairs and return + * the 64 bit result for each pair + */ + +int adreno_perfcounter_read_group(struct adreno_device *adreno_dev, + struct kgsl_perfcounter_read_group __user *reads, unsigned int count) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev); + struct adreno_perfcount_group *group; + struct kgsl_perfcounter_read_group *list = NULL; + unsigned int i, j; + int ret = 0; + + if (NULL == counters) + return -EINVAL; + + /* sanity check params passed in */ + if (reads == NULL || count == 0 || count > 100) + return -EINVAL; + + list = kmalloc(sizeof(struct kgsl_perfcounter_read_group) * count, + GFP_KERNEL); + if (!list) + return -ENOMEM; + + if (copy_from_user(list, reads, + sizeof(struct kgsl_perfcounter_read_group) * count)) { + ret = -EFAULT; + goto done; + } + + mutex_lock(&device->mutex); + ret = kgsl_active_count_get(device); + if (ret) { + mutex_unlock(&device->mutex); + goto done; + } + + /* list iterator */ + for (j = 0; j < count; j++) { + + list[j].value = 0; + + /* Verify that the group ID is within range */ + if (list[j].groupid >= counters->group_count) { + ret = -EINVAL; + break; + } + + group = &(counters->groups[list[j].groupid]); + + /* group/counter iterator */ + for (i = 0; i < group->reg_count; i++) { + if (group->regs[i].countable == list[j].countable) { + list[j].value = adreno_perfcounter_read( + adreno_dev, list[j].groupid, i); + break; + } + } + } + + kgsl_active_count_put(device); + mutex_unlock(&device->mutex); + + /* write the data */ + if (ret == 0) + if (copy_to_user(reads, list, + sizeof(struct kgsl_perfcounter_read_group) * count)) + ret = -EFAULT; + +done: + kfree(list); + return ret; +} + +/** + * adreno_perfcounter_get_groupid() - Get the performance counter ID + * @adreno_dev: Adreno device + * @name: Performance counter group name string + * + * Get the groupid based on the name and return this ID + */ + +int adreno_perfcounter_get_groupid(struct adreno_device *adreno_dev, + const char *name) +{ + struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev); + struct adreno_perfcount_group *group; + int i; + + if (name == NULL || counters == NULL) + return -EINVAL; + + for (i = 0; i < counters->group_count; ++i) { + group = &(counters->groups[i]); + + /* make sure there is a name for this group */ + if (group->name == NULL) + continue; + + /* verify name and length */ + if (strlen(name) == strlen(group->name) && + strcmp(group->name, name) == 0) + return i; + } + + return -EINVAL; +} + +/** + * adreno_perfcounter_get_name() - Get the group name + * @adreno_dev: Adreno device + * @groupid: Desired performance counter groupid + * + * Get the name based on the groupid and return it + */ + +const char *adreno_perfcounter_get_name(struct adreno_device *adreno_dev, + unsigned int groupid) +{ + struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev); + + if (counters != NULL && groupid < counters->group_count) + return counters->groups[groupid].name; + + return NULL; +} + +/** + * adreno_perfcounter_query_group: Determine which countables are in counters + * @adreno_dev: Adreno device to configure + * @groupid: Desired performance counter group + * @countables: Return list of all countables in the groups counters + * @count: Max length of the array + * @max_counters: max counters for the groupid + * + * Query the current state of counters for the group. + */ + +int adreno_perfcounter_query_group(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int __user *countables, + unsigned int count, unsigned int *max_counters) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev); + struct adreno_perfcount_group *group; + unsigned int i, t; + int ret = 0; + unsigned int *buf; + + *max_counters = 0; + + if (counters == NULL || groupid >= counters->group_count) + return -EINVAL; + + mutex_lock(&device->mutex); + + group = &(counters->groups[groupid]); + *max_counters = group->reg_count; + + /* + * if NULL countable or *count of zero, return max reg_count in + * *max_counters and return success + */ + if (countables == NULL || count == 0) { + mutex_unlock(&device->mutex); + return 0; + } + + t = min_t(unsigned int, group->reg_count, count); + + buf = kmalloc(t * sizeof(unsigned int), GFP_KERNEL); + if (buf == NULL) { + mutex_unlock(&device->mutex); + return -ENOMEM; + } + + for (i = 0; i < t; i++) + buf[i] = group->regs[i].countable; + + mutex_unlock(&device->mutex); + + if (copy_to_user(countables, buf, sizeof(unsigned int) * t)) + ret = -EFAULT; + + kfree(buf); + + return ret; +} + +static inline void refcount_group(struct adreno_perfcount_group *group, + unsigned int reg, unsigned int flags, + unsigned int *lo, unsigned int *hi) +{ + if (flags & PERFCOUNTER_FLAG_KERNEL) + group->regs[reg].kernelcount++; + else + group->regs[reg].usercount++; + + if (lo) + *lo = group->regs[reg].offset; + + if (hi) + *hi = group->regs[reg].offset_hi; +} + +/** + * adreno_perfcounter_get: Try to put a countable in an available counter + * @adreno_dev: Adreno device to configure + * @groupid: Desired performance counter group + * @countable: Countable desired to be in a counter + * @offset: Return offset of the LO counter assigned + * @offset_hi: Return offset of the HI counter assigned + * @flags: Used to setup kernel perf counters + * + * Try to place a countable in an available counter. If the countable is + * already in a counter, reference count the counter/countable pair resource + * and return success + */ + +int adreno_perfcounter_get(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable, unsigned int *offset, + unsigned int *offset_hi, unsigned int flags) +{ + struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev); + struct adreno_perfcount_group *group; + unsigned int empty = -1; + int ret = 0; + + /* always clear return variables */ + if (offset) + *offset = 0; + if (offset_hi) + *offset_hi = 0; + + if (NULL == counters) + return -EINVAL; + + if (groupid >= counters->group_count) + return -EINVAL; + + group = &(counters->groups[groupid]); + + if (group->flags & ADRENO_PERFCOUNTER_GROUP_FIXED) { + /* + * In fixed groups the countable equals the fixed register the + * user wants. First make sure it is in range + */ + + if (countable >= group->reg_count) + return -EINVAL; + + /* If it is already reserved, just increase the refcounts */ + if ((group->regs[countable].kernelcount != 0) || + (group->regs[countable].usercount != 0)) { + refcount_group(group, countable, flags, + offset, offset_hi); + return 0; + } + + empty = countable; + } else { + unsigned int i; + + /* + * Check if the countable is already associated with a counter. + * Refcount and return the offset, otherwise, try and find an + * empty counter and assign the countable to it. + */ + + for (i = 0; i < group->reg_count; i++) { + if (group->regs[i].countable == countable) { + refcount_group(group, i, flags, + offset, offset_hi); + return 0; + } else if (group->regs[i].countable == + KGSL_PERFCOUNTER_NOT_USED) { + /* keep track of unused counter */ + empty = i; + } + } + } + + /* no available counters, so do nothing else */ + if (empty == -1) + return -EBUSY; + + /* enable the new counter */ + ret = adreno_perfcounter_enable(adreno_dev, groupid, empty, countable); + if (ret) + return ret; + /* initialize the new counter */ + group->regs[empty].countable = countable; + + /* set initial kernel and user count */ + if (flags & PERFCOUNTER_FLAG_KERNEL) { + group->regs[empty].kernelcount = 1; + group->regs[empty].usercount = 0; + } else { + group->regs[empty].kernelcount = 0; + group->regs[empty].usercount = 1; + } + + if (offset) + *offset = group->regs[empty].offset; + if (offset_hi) + *offset_hi = group->regs[empty].offset_hi; + + return ret; +} + + +/** + * adreno_perfcounter_put: Release a countable from counter resource + * @adreno_dev: Adreno device to configure + * @groupid: Desired performance counter group + * @countable: Countable desired to be freed from a counter + * @flags: Flag to determine if kernel or user space request + * + * Put a performance counter/countable pair that was previously received. If + * noone else is using the countable, free up the counter for others. + */ +int adreno_perfcounter_put(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable, unsigned int flags) +{ + struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev); + struct adreno_perfcount_group *group; + unsigned int i; + + if (counters == NULL || groupid >= counters->group_count) + return -EINVAL; + + group = &(counters->groups[groupid]); + + /* + * Find if the counter/countable pair is used currently. + * Start cycling through registers in the bank. + */ + for (i = 0; i < group->reg_count; i++) { + /* check if countable assigned is what we are looking for */ + if (group->regs[i].countable == countable) { + /* found pair, book keep count based on request type */ + if (flags & PERFCOUNTER_FLAG_KERNEL && + group->regs[i].kernelcount > 0) + group->regs[i].kernelcount--; + else if (group->regs[i].usercount > 0) + group->regs[i].usercount--; + else + break; + + /* mark available if not used anymore */ + if (group->regs[i].kernelcount == 0 && + group->regs[i].usercount == 0) + group->regs[i].countable = + KGSL_PERFCOUNTER_NOT_USED; + + return 0; + } + } + + return -EINVAL; +} + +static int _perfcounter_enable_pwr(struct adreno_device *adreno_dev, + unsigned int counter) +{ + /* PWR counters enabled by default on A3XX/A4XX so nothing to do */ + if (adreno_is_a3xx(adreno_dev) || adreno_is_a4xx(adreno_dev)) + return 0; + + /* + * On 5XX we have to emulate the PWR counters which are physically + * missing. Program countable 6 on RBBM_PERFCTR_RBBM_0 as a substitute + * for PWR:1. Don't emulate PWR:0 as nobody uses it and we don't want + * to take away too many of the generic RBBM counters. + */ + + if (counter == 0) + return -EINVAL; + + kgsl_regwrite(&adreno_dev->dev, A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6); + + return 0; +} + +static void _perfcounter_enable_vbif(struct adreno_device *adreno_dev, + struct adreno_perfcounters *counters, unsigned int counter, + unsigned int countable) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_perfcount_register *reg; + + reg = &counters->groups[KGSL_PERFCOUNTER_GROUP_VBIF].regs[counter]; + /* Write 1, followed by 0 to CLR register for clearing the counter */ + kgsl_regwrite(device, reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 1); + kgsl_regwrite(device, reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 0); + kgsl_regwrite(device, reg->select, countable & VBIF2_PERF_CNT_SEL_MASK); + /* enable reg is 8 DWORDS before select reg */ + kgsl_regwrite(device, reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 1); + reg->value = 0; +} + +static void _perfcounter_enable_vbif_pwr(struct adreno_device *adreno_dev, + struct adreno_perfcounters *counters, unsigned int counter) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_perfcount_register *reg; + + reg = &counters->groups[KGSL_PERFCOUNTER_GROUP_VBIF_PWR].regs[counter]; + /* Write 1, followed by 0 to CLR register for clearing the counter */ + kgsl_regwrite(device, reg->select + VBIF2_PERF_PWR_CLR_REG_EN_OFF, 1); + kgsl_regwrite(device, reg->select + VBIF2_PERF_PWR_CLR_REG_EN_OFF, 0); + kgsl_regwrite(device, reg->select, 1); + reg->value = 0; +} + +static void _power_counter_enable_alwayson(struct adreno_device *adreno_dev, + struct adreno_perfcounters *counters) +{ + struct kgsl_device *device = &adreno_dev->dev; + + kgsl_regwrite(device, A5XX_GPMU_ALWAYS_ON_COUNTER_RESET, 1); + counters->groups[KGSL_PERFCOUNTER_GROUP_ALWAYSON_PWR].regs[0].value = 0; +} + +static void _power_counter_enable_gpmu(struct adreno_device *adreno_dev, + struct adreno_perfcounters *counters, unsigned int group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_perfcount_register *reg; + + if (countable > 43) + return; + + reg = &counters->groups[group].regs[counter]; + + /* Move the countable to the correct byte offset */ + countable = countable << ((counter % 4) * 8); + + kgsl_regwrite(device, reg->select, countable); + + kgsl_regwrite(device, A5XX_GPMU_POWER_COUNTER_ENABLE, 1); + reg->value = 0; +} + +static void _power_counter_enable_default(struct adreno_device *adreno_dev, + struct adreno_perfcounters *counters, unsigned int group, + unsigned int counter, unsigned int countable) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_perfcount_register *reg; + + reg = &counters->groups[group].regs[counter]; + kgsl_regwrite(device, reg->select, countable); + kgsl_regwrite(device, A5XX_GPMU_POWER_COUNTER_ENABLE, 1); + reg->value = 0; +} + +static int _perfcounter_enable_default(struct adreno_device *adreno_dev, + struct adreno_perfcounters *counters, unsigned int group, + unsigned int counter, unsigned int countable) +{ + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct adreno_perfcount_register *reg; + int i; + int ret = 0; + + /* + * check whether the countable is valid or not by matching it against + * the list on invalid countables + */ + if (gpudev->invalid_countables) { + struct adreno_invalid_countables invalid_countable = + gpudev->invalid_countables[group]; + for (i = 0; i < invalid_countable.num_countables; i++) + if (countable == invalid_countable.countables[i]) + return -EACCES; + } + reg = &(counters->groups[group].regs[counter]); + + if (test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv)) { + struct adreno_ringbuffer *rb = &adreno_dev->ringbuffers[0]; + unsigned int buf[4]; + unsigned int *cmds = buf; + int ret; + + cmds += cp_wait_for_idle(adreno_dev, cmds); + *cmds++ = cp_register(adreno_dev, reg->select, 1); + *cmds++ = countable; + /* submit to highest priority RB always */ + ret = adreno_ringbuffer_issuecmds(rb, 0, buf, cmds-buf); + if (ret) + return ret; + /* + * schedule dispatcher to make sure rb[0] is run, because + * if the current RB is not rb[0] and gpu is idle then + * rb[0] will not get scheduled to run + */ + if (adreno_dev->cur_rb != rb) + adreno_dispatcher_schedule(rb->device); + /* wait for the above commands submitted to complete */ + ret = adreno_ringbuffer_waittimestamp(rb, rb->timestamp, + ADRENO_IDLE_TIMEOUT); + if (ret) + KGSL_DRV_ERR(rb->device, + "Perfcounter %u/%u/%u start via commands failed %d\n", + group, counter, countable, ret); + } else { + /* Select the desired perfcounter */ + kgsl_regwrite(&adreno_dev->dev, reg->select, countable); + } + + if (!ret) + reg->value = 0; + return 0; +} + +/** + * adreno_perfcounter_enable - Configure a performance counter for a countable + * @adreno_dev - Adreno device to configure + * @group - Desired performance counter group + * @counter - Desired performance counter in the group + * @countable - Desired countable + * + * Function is used for adreno cores + * Physically set up a counter within a group with the desired countable + * Return 0 on success else error code + */ +static int adreno_perfcounter_enable(struct adreno_device *adreno_dev, + unsigned int group, unsigned int counter, unsigned int countable) +{ + struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev); + + if (counters == NULL) + return -EINVAL; + + if (group >= counters->group_count) + return -EINVAL; + + if (counter >= counters->groups[group].reg_count) + return -EINVAL; + + switch (group) { + case KGSL_PERFCOUNTER_GROUP_ALWAYSON: + /* alwayson counter is global, so init value is 0 */ + break; + case KGSL_PERFCOUNTER_GROUP_PWR: + return _perfcounter_enable_pwr(adreno_dev, counter); + case KGSL_PERFCOUNTER_GROUP_VBIF: + if (countable > VBIF2_PERF_CNT_SEL_MASK) + return -EINVAL; + _perfcounter_enable_vbif(adreno_dev, counters, counter, + countable); + break; + case KGSL_PERFCOUNTER_GROUP_VBIF_PWR: + _perfcounter_enable_vbif_pwr(adreno_dev, counters, counter); + break; + case KGSL_PERFCOUNTER_GROUP_SP_PWR: + case KGSL_PERFCOUNTER_GROUP_TP_PWR: + case KGSL_PERFCOUNTER_GROUP_RB_PWR: + case KGSL_PERFCOUNTER_GROUP_CCU_PWR: + case KGSL_PERFCOUNTER_GROUP_UCHE_PWR: + case KGSL_PERFCOUNTER_GROUP_CP_PWR: + _power_counter_enable_default(adreno_dev, counters, group, + counter, countable); + break; + case KGSL_PERFCOUNTER_GROUP_GPMU_PWR: + _power_counter_enable_gpmu(adreno_dev, counters, group, counter, + countable); + break; + case KGSL_PERFCOUNTER_GROUP_ALWAYSON_PWR: + _power_counter_enable_alwayson(adreno_dev, counters); + break; + default: + return _perfcounter_enable_default(adreno_dev, counters, group, + counter, countable); + } + + return 0; +} + +static uint64_t _perfcounter_read_alwayson(struct adreno_device *adreno_dev, + struct adreno_perfcount_group *group, unsigned int counter) +{ + uint64_t val = 0; + + adreno_readreg64(adreno_dev, ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO, + ADRENO_REG_RBBM_ALWAYSON_COUNTER_HI, &val); + + return val + group->regs[counter].value; +} + +static uint64_t _perfcounter_read_pwr(struct adreno_device *adreno_dev, + struct adreno_perfcount_group *group, unsigned int counter) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_perfcount_register *reg; + unsigned int in = 0, out, lo = 0, hi = 0; + unsigned int enable_bit; + + reg = &group->regs[counter]; + + /* Remember, counter 0 is not emulated on 5XX */ + if (adreno_is_a5xx(adreno_dev) && (counter == 0)) + return -EINVAL; + + if (adreno_is_a3xx(adreno_dev)) { + /* On A3XX we need to freeze the counter so we can read it */ + if (0 == counter) + enable_bit = 0x00010000; + else + enable_bit = 0x00020000; + + /* freeze counter */ + adreno_readreg(adreno_dev, ADRENO_REG_RBBM_RBBM_CTL, &in); + out = (in & ~enable_bit); + adreno_writereg(adreno_dev, ADRENO_REG_RBBM_RBBM_CTL, out); + } + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + /* restore the counter control value */ + if (adreno_is_a3xx(adreno_dev)) + adreno_writereg(adreno_dev, ADRENO_REG_RBBM_RBBM_CTL, in); + + return REG_64BIT_VAL(hi, lo, reg->value); +} + +static uint64_t _perfcounter_read_vbif(struct adreno_device *adreno_dev, + struct adreno_perfcount_group *group, unsigned int counter) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_perfcount_register *reg; + unsigned int lo = 0, hi = 0; + + reg = &group->regs[counter]; + + /* freeze counter */ + if (adreno_is_a3xx(adreno_dev)) + kgsl_regwrite(device, reg->select - VBIF2_PERF_EN_REG_SEL_OFF, + 0); + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + /* un-freeze counter */ + if (adreno_is_a3xx(adreno_dev)) + kgsl_regwrite(device, reg->select - VBIF2_PERF_EN_REG_SEL_OFF, + 1); + + return REG_64BIT_VAL(hi, lo, reg->value); +} + +static uint64_t _perfcounter_read_vbif_pwr(struct adreno_device *adreno_dev, + struct adreno_perfcount_group *group, unsigned int counter) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_perfcount_register *reg; + unsigned int lo = 0, hi = 0; + + reg = &group->regs[counter]; + + /* freeze counter */ + if (adreno_is_a3xx(adreno_dev)) + kgsl_regwrite(device, reg->select, 0); + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + /* un-freeze counter */ + if (adreno_is_a3xx(adreno_dev)) + kgsl_regwrite(device, reg->select, 1); + + return REG_64BIT_VAL(hi, lo, reg->value); +} + +static uint64_t _perfcounter_read_pwrcntr(struct adreno_device *adreno_dev, + struct adreno_perfcount_group *group, unsigned int counter) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_perfcount_register *reg; + unsigned int lo = 0, hi = 0; + + reg = &group->regs[counter]; + + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + return REG_64BIT_VAL(hi, lo, reg->value); +} + +static uint64_t _perfcounter_read_default(struct adreno_device *adreno_dev, + struct adreno_perfcount_group *group, unsigned int counter) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_perfcount_register *reg; + unsigned int lo = 0, hi = 0; + unsigned int in = 0, out; + + reg = &group->regs[counter]; + + /* Freeze the counter */ + if (adreno_is_a3xx(adreno_dev)) { + adreno_readreg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_CTL, &in); + out = in & ~RBBM_PERFCTR_CTL_ENABLE; + adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_CTL, out); + } + + /* Read the values */ + kgsl_regread(device, reg->offset, &lo); + kgsl_regread(device, reg->offset_hi, &hi); + + /* Re-Enable the counter */ + if (adreno_is_a3xx(adreno_dev)) + adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_CTL, in); + + return REG_64BIT_VAL(hi, lo, 0); +} + +/** + * adreno_perfcounter_read() - Reads a performance counter + * @adreno_dev: The device on which the counter is running + * @group: The group of the counter + * @counter: The counter within the group + * + * Function is used to read the counter of adreno devices + * Returns the 64 bit counter value on success else 0. + */ +uint64_t adreno_perfcounter_read(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int counter) +{ + struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev); + struct adreno_perfcount_group *group; + + /* Lets hope this doesn't fail. Now subfunctions don't need to check */ + if (counters == NULL) + return 0; + + if (groupid >= counters->group_count) + return 0; + + group = &counters->groups[groupid]; + + if (counter >= group->reg_count) + return 0; + + switch (groupid) { + case KGSL_PERFCOUNTER_GROUP_ALWAYSON: + return _perfcounter_read_alwayson(adreno_dev, group, counter); + case KGSL_PERFCOUNTER_GROUP_VBIF_PWR: + return _perfcounter_read_vbif_pwr(adreno_dev, group, counter); + case KGSL_PERFCOUNTER_GROUP_VBIF: + return _perfcounter_read_vbif(adreno_dev, group, counter); + case KGSL_PERFCOUNTER_GROUP_PWR: + return _perfcounter_read_pwr(adreno_dev, group, counter); + case KGSL_PERFCOUNTER_GROUP_SP_PWR: + case KGSL_PERFCOUNTER_GROUP_TP_PWR: + case KGSL_PERFCOUNTER_GROUP_RB_PWR: + case KGSL_PERFCOUNTER_GROUP_CCU_PWR: + case KGSL_PERFCOUNTER_GROUP_UCHE_PWR: + case KGSL_PERFCOUNTER_GROUP_CP_PWR: + case KGSL_PERFCOUNTER_GROUP_GPMU_PWR: + case KGSL_PERFCOUNTER_GROUP_ALWAYSON_PWR: + return _perfcounter_read_pwrcntr(adreno_dev, group, counter); + default: + return _perfcounter_read_default(adreno_dev, group, counter); + } +} diff --git a/drivers/gpu/msm/adreno_perfcounter.h b/drivers/gpu/msm/adreno_perfcounter.h new file mode 100644 index 000000000000..8c4db38983b1 --- /dev/null +++ b/drivers/gpu/msm/adreno_perfcounter.h @@ -0,0 +1,141 @@ +/* Copyright (c) 2008-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __ADRENO_PERFCOUNTER_H +#define __ADRENO_PERFCOUNTER_H + +#include "adreno.h" + +struct adreno_device; + +/* ADRENO_PERFCOUNTERS - Given an adreno device, return the perfcounters list */ +#define ADRENO_PERFCOUNTERS(_a) \ + (ADRENO_GPU_DEVICE(_a) ? ADRENO_GPU_DEVICE(_a)->perfcounters : NULL) + +#define PERFCOUNTER_FLAG_NONE 0x0 +#define PERFCOUNTER_FLAG_KERNEL 0x1 + +/* Structs to maintain the list of active performance counters */ + +/** + * struct adreno_perfcount_register: register state + * @countable: countable the register holds + * @kernelcount: number of user space users of the register + * @usercount: number of kernel users of the register + * @offset: register hardware offset + * @load_bit: The bit number in LOAD register which corresponds to this counter + * @select: The countable register offset + * @value: The 64 bit countable register value + */ +struct adreno_perfcount_register { + unsigned int countable; + unsigned int kernelcount; + unsigned int usercount; + unsigned int offset; + unsigned int offset_hi; + int load_bit; + unsigned int select; + uint64_t value; +}; + +/** + * struct adreno_perfcount_group: registers for a hardware group + * @regs: available registers for this group + * @reg_count: total registers for this group + * @name: group name for this group + */ +struct adreno_perfcount_group { + struct adreno_perfcount_register *regs; + unsigned int reg_count; + const char *name; + unsigned long flags; +}; + +/* + * ADRENO_PERFCOUNTER_GROUP_FIXED indicates that a perfcounter group is fixed - + * instead of having configurable countables like the other groups, registers in + * fixed groups have a hardwired countable. So when the user requests a + * countable in one of these groups, that countable should be used as the + * register offset to return + */ + +#define ADRENO_PERFCOUNTER_GROUP_FIXED BIT(0) + +/** + * adreno_perfcounts: all available perfcounter groups + * @groups: available groups for this device + * @group_count: total groups for this device + */ +struct adreno_perfcounters { + struct adreno_perfcount_group *groups; + unsigned int group_count; +}; + +/** + * adreno_invalid_countabless: Invalid countables that do not work properly + * @countables: List of unusable countables + * @num_countables: Number of unusable countables + */ +struct adreno_invalid_countables { + const unsigned int *countables; + int num_countables; +}; + +#define ADRENO_PERFCOUNTER_GROUP_FLAGS(core, offset, name, flags) \ + [KGSL_PERFCOUNTER_GROUP_##offset] = { core##_perfcounters_##name, \ + ARRAY_SIZE(core##_perfcounters_##name), __stringify(name), flags } + +#define ADRENO_PERFCOUNTER_GROUP(core, offset, name) \ + ADRENO_PERFCOUNTER_GROUP_FLAGS(core, offset, name, 0) + +#define ADRENO_POWER_COUNTER_GROUP(core, offset, name) \ + [KGSL_PERFCOUNTER_GROUP_##offset##_PWR] = { core##_pwrcounters_##name, \ + ARRAY_SIZE(core##_pwrcounters_##name), __stringify(name##_pwr), 0} + +#define ADRENO_PERFCOUNTER_INVALID_COUNTABLE(name, off) \ + [KGSL_PERFCOUNTER_GROUP_##off] = { name##_invalid_countables, \ + ARRAY_SIZE(name##_invalid_countables) } + +int adreno_perfcounter_query_group(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int __user *countables, + unsigned int count, unsigned int *max_counters); + +int adreno_perfcounter_read_group(struct adreno_device *adreno_dev, + struct kgsl_perfcounter_read_group __user *reads, unsigned int count); + +void adreno_perfcounter_close(struct adreno_device *adreno_dev); + +void adreno_perfcounter_restore(struct adreno_device *adreno_dev); + +void adreno_perfcounter_save(struct adreno_device *adreno_dev); + +void adreno_perfcounter_start(struct adreno_device *adreno_dev); + +void adreno_perfcounter_init(struct adreno_device *adreno_dev); + +int adreno_perfcounter_get_groupid(struct adreno_device *adreno_dev, + const char *name); + +uint64_t adreno_perfcounter_read(struct adreno_device *adreno_dev, + unsigned int group, unsigned int counter); + +const char *adreno_perfcounter_get_name(struct adreno_device + *adreno_dev, unsigned int groupid); + +int adreno_perfcounter_get(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable, unsigned int *offset, + unsigned int *offset_hi, unsigned int flags); + +int adreno_perfcounter_put(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable, unsigned int flags); + +#endif /* __ADRENO_PERFCOUNTER_H */ diff --git a/drivers/gpu/msm/adreno_pm4types.h b/drivers/gpu/msm/adreno_pm4types.h new file mode 100644 index 000000000000..f81c0f20e10b --- /dev/null +++ b/drivers/gpu/msm/adreno_pm4types.h @@ -0,0 +1,468 @@ +/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __ADRENO_PM4TYPES_H +#define __ADRENO_PM4TYPES_H + +#include "adreno.h" + +#define CP_PKT_MASK 0xc0000000 + +#define CP_TYPE0_PKT ((unsigned int)0 << 30) +#define CP_TYPE3_PKT ((unsigned int)3 << 30) +#define CP_TYPE4_PKT ((unsigned int)4 << 28) +#define CP_TYPE7_PKT ((unsigned int)7 << 28) + +#define PM4_TYPE4_PKT_SIZE_MAX 128 + +/* type3 packets */ + +/* Enable preemption flag */ +#define CP_PREEMPT_ENABLE 0x1C +/* Preemption token command on which preemption occurs */ +#define CP_PREEMPT_TOKEN 0x1E +/* Bit to set in CP_PREEMPT_TOKEN ordinal for interrupt on preemption */ +#define CP_PREEMPT_ORDINAL_INTERRUPT 24 +/* copy from ME scratch RAM to a register */ +#define CP_SCRATCH_TO_REG 0x4d + +/* Copy from REG to ME scratch RAM */ +#define CP_REG_TO_SCRATCH 0x4a + +/* Wait for memory writes to complete */ +#define CP_WAIT_MEM_WRITES 0x12 + +/* Conditional execution based on register comparison */ +#define CP_COND_REG_EXEC 0x47 + +/* Memory to REG copy */ +#define CP_MEM_TO_REG 0x42 + +/* initialize CP's micro-engine */ +#define CP_ME_INIT 0x48 + +/* skip N 32-bit words to get to the next packet */ +#define CP_NOP 0x10 + +/* indirect buffer dispatch. same as IB, but init is pipelined */ +#define CP_INDIRECT_BUFFER_PFD 0x37 + +/* wait for the IDLE state of the engine */ +#define CP_WAIT_FOR_IDLE 0x26 + +/* wait until a register or memory location is a specific value */ +#define CP_WAIT_REG_MEM 0x3c + +/* wait until a register location is equal to a specific value */ +#define CP_WAIT_REG_EQ 0x52 + +/* switches SMMU pagetable, used on a5xx only */ +#define CP_SMMU_TABLE_UPDATE 0x53 + +/* wait until a read completes */ +#define CP_WAIT_UNTIL_READ 0x5c + +/* wait until all base/size writes from an IB_PFD packet have completed */ +#define CP_WAIT_IB_PFD_COMPLETE 0x5d + +/* register read/modify/write */ +#define CP_REG_RMW 0x21 + +/* Set binning configuration registers */ +#define CP_SET_BIN_DATA 0x2f + +/* reads register in chip and writes to memory */ +#define CP_REG_TO_MEM 0x3e + +/* write N 32-bit words to memory */ +#define CP_MEM_WRITE 0x3d + +/* write CP_PROG_COUNTER value to memory */ +#define CP_MEM_WRITE_CNTR 0x4f + +/* conditional execution of a sequence of packets */ +#define CP_COND_EXEC 0x44 + +/* conditional write to memory or register */ +#define CP_COND_WRITE 0x45 + +/* generate an event that creates a write to memory when completed */ +#define CP_EVENT_WRITE 0x46 + +/* generate a VS|PS_done event */ +#define CP_EVENT_WRITE_SHD 0x58 + +/* generate a cache flush done event */ +#define CP_EVENT_WRITE_CFL 0x59 + +/* generate a z_pass done event */ +#define CP_EVENT_WRITE_ZPD 0x5b + + +/* initiate fetch of index buffer and draw */ +#define CP_DRAW_INDX 0x22 + +/* draw using supplied indices in packet */ +#define CP_DRAW_INDX_2 0x36 + +/* initiate fetch of index buffer and binIDs and draw */ +#define CP_DRAW_INDX_BIN 0x34 + +/* initiate fetch of bin IDs and draw using supplied indices */ +#define CP_DRAW_INDX_2_BIN 0x35 + +/* New draw packets defined for A4XX */ +#define CP_DRAW_INDX_OFFSET 0x38 +#define CP_DRAW_INDIRECT 0x28 +#define CP_DRAW_INDX_INDIRECT 0x29 +#define CP_DRAW_AUTO 0x24 + +/* begin/end initiator for viz query extent processing */ +#define CP_VIZ_QUERY 0x23 + +/* fetch state sub-blocks and initiate shader code DMAs */ +#define CP_SET_STATE 0x25 + +/* load constant into chip and to memory */ +#define CP_SET_CONSTANT 0x2d + +/* load sequencer instruction memory (pointer-based) */ +#define CP_IM_LOAD 0x27 + +/* load sequencer instruction memory (code embedded in packet) */ +#define CP_IM_LOAD_IMMEDIATE 0x2b + +/* load constants from a location in memory */ +#define CP_LOAD_CONSTANT_CONTEXT 0x2e + +/* selective invalidation of state pointers */ +#define CP_INVALIDATE_STATE 0x3b + + +/* dynamically changes shader instruction memory partition */ +#define CP_SET_SHADER_BASES 0x4A + +/* sets the 64-bit BIN_MASK register in the PFP */ +#define CP_SET_BIN_MASK 0x50 + +/* sets the 64-bit BIN_SELECT register in the PFP */ +#define CP_SET_BIN_SELECT 0x51 + + +/* updates the current context, if needed */ +#define CP_CONTEXT_UPDATE 0x5e + +/* generate interrupt from the command stream */ +#define CP_INTERRUPT 0x40 + +/* A5XX Enable yield in RB only */ +#define CP_YIELD_ENABLE 0x1C + +/* Enable/Disable/Defer A5x global preemption model */ +#define CP_PREEMPT_ENABLE_GLOBAL 0x69 + +/* Enable/Disable A5x local preemption model */ +#define CP_PREEMPT_ENABLE_LOCAL 0x6A + +/* Yeild token on a5xx similar to CP_PREEMPT on a4xx */ +#define CP_CONTEXT_SWITCH_YIELD 0x6B + +/* Inform CP about current render mode (needed for a5xx preemption) */ +#define CP_SET_RENDER_MODE 0x6C + +/* copy sequencer instruction memory to system memory */ +#define CP_IM_STORE 0x2c + +/* test 2 memory locations to dword values specified */ +#define CP_TEST_TWO_MEMS 0x71 + +/* Write register, ignoring context state for context sensitive registers */ +#define CP_REG_WR_NO_CTXT 0x78 + +/* + * for A4xx + * Write to register with address that does not fit into type-0 pkt + */ +#define CP_WIDE_REG_WRITE 0x74 + + +/* PFP waits until the FIFO between the PFP and the ME is empty */ +#define CP_WAIT_FOR_ME 0x13 + +/* Record the real-time when this packet is processed by PFP */ +#define CP_RECORD_PFP_TIMESTAMP 0x11 + +#define CP_SET_PROTECTED_MODE 0x5f /* sets the register protection mode */ + +/* Used to switch GPU between secure and non-secure modes */ +#define CP_SET_SECURE_MODE 0x66 + +#define CP_BOOTSTRAP_UCODE 0x6f /* bootstraps microcode */ + +/* + * for a3xx + */ + +#define CP_LOAD_STATE 0x30 /* load high level sequencer command */ + +/* Conditionally load a IB based on a flag */ +#define CP_COND_INDIRECT_BUFFER_PFE 0x3A /* prefetch enabled */ +#define CP_COND_INDIRECT_BUFFER_PFD 0x32 /* prefetch disabled */ + +/* Load a buffer with pre-fetch enabled */ +#define CP_INDIRECT_BUFFER_PFE 0x3F + +#define CP_EXEC_CL 0x31 + +/* (A4x) save PM4 stream pointers to execute upon a visible draw */ +#define CP_SET_DRAW_STATE 0x43 + +#define CP_LOADSTATE_DSTOFFSET_SHIFT 0x00000000 +#define CP_LOADSTATE_STATESRC_SHIFT 0x00000010 +#define CP_LOADSTATE_STATEBLOCKID_SHIFT 0x00000013 +#define CP_LOADSTATE_NUMOFUNITS_SHIFT 0x00000016 +#define CP_LOADSTATE_STATETYPE_SHIFT 0x00000000 +#define CP_LOADSTATE_EXTSRCADDR_SHIFT 0x00000002 + +static inline uint pm4_calc_odd_parity_bit(uint val) +{ + return (0x9669 >> (0xf & ((val) ^ + ((val) >> 4) ^ ((val) >> 8) ^ ((val) >> 12) ^ + ((val) >> 16) ^ ((val) >> 20) ^ ((val) >> 24) ^ + ((val) >> 28)))) & 1; +} + +/* + * PM4 packet header functions + * For all the packet functions the passed in count should be the size of the + * payload excluding the header + */ +static inline uint cp_type0_packet(uint regindx, uint cnt) +{ + return CP_TYPE0_PKT | ((cnt-1) << 16) | ((regindx) & 0x7FFF); +} + +static inline uint cp_type3_packet(uint opcode, uint cnt) +{ + return CP_TYPE3_PKT | ((cnt-1) << 16) | (((opcode) & 0xFF) << 8); +} + +static inline uint cp_type4_packet(uint opcode, uint cnt) +{ + return CP_TYPE4_PKT | ((cnt) << 0) | + (pm4_calc_odd_parity_bit(cnt) << 7) | + (((opcode) & 0x3FFFF) << 8) | + ((pm4_calc_odd_parity_bit(opcode) << 27)); +} + +static inline uint cp_type7_packet(uint opcode, uint cnt) +{ + return CP_TYPE7_PKT | ((cnt) << 0) | + (pm4_calc_odd_parity_bit(cnt) << 15) | + (((opcode) & 0x7F) << 16) | + ((pm4_calc_odd_parity_bit(opcode) << 23)); + +} + +#define pkt_is_type0(pkt) (((pkt) & 0XC0000000) == CP_TYPE0_PKT) + +#define type0_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1) +#define type0_pkt_offset(pkt) ((pkt) & 0x7FFF) + +/* + * Check both for the type3 opcode and make sure that the reserved bits [1:7] + * and 15 are 0 + */ + +#define pkt_is_type3(pkt) \ + ((((pkt) & 0xC0000000) == CP_TYPE3_PKT) && \ + (((pkt) & 0x80FE) == 0)) + +#define cp_type3_opcode(pkt) (((pkt) >> 8) & 0xFF) +#define type3_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1) + +#define pkt_is_type4(pkt) \ + ((((pkt) & 0xF0000000) == CP_TYPE4_PKT) && \ + ((((pkt) >> 27) & 0x1) == \ + pm4_calc_odd_parity_bit(cp_type4_base_index_one_reg_wr(pkt))) \ + && ((((pkt) >> 7) & 0x1) == \ + pm4_calc_odd_parity_bit(type4_pkt_size(pkt)))) + +#define cp_type4_base_index_one_reg_wr(pkt) (((pkt) >> 8) & 0x7FFFF) +#define type4_pkt_size(pkt) ((pkt) & 0x7F) + +#define pkt_is_type7(pkt) \ + ((((pkt) & 0xF0000000) == CP_TYPE7_PKT) && \ + (((pkt) & 0x0F000000) == 0) && \ + ((((pkt) >> 23) & 0x1) == \ + pm4_calc_odd_parity_bit(cp_type7_opcode(pkt))) \ + && ((((pkt) >> 15) & 0x1) == \ + pm4_calc_odd_parity_bit(type7_pkt_size(pkt)))) + +#define cp_type7_opcode(pkt) (((pkt) >> 16) & 0x7F) +#define type7_pkt_size(pkt) ((pkt) & 0x3FFF) + +/* dword base address of the GFX decode space */ +#define SUBBLOCK_OFFSET(reg) ((unsigned int)((reg) - (0x2000))) + +/* gmem command buffer length */ +#define CP_REG(reg) ((0x4 << 16) | (SUBBLOCK_OFFSET(reg))) + +/* Return true if the hardware uses the legacy (A4XX and older) PM4 format */ +#define ADRENO_LEGACY_PM4(_d) (ADRENO_GPUREV(_d) < 500) + +/** + * cp_packet - Generic CP packet to support different opcodes on + * different GPU cores. + * @adreno_dev: The adreno device + * @opcode: Operation for cp packet + * @size: size for cp packet + */ +static inline uint cp_packet(struct adreno_device *adreno_dev, + int opcode, uint size) +{ + if (ADRENO_LEGACY_PM4(adreno_dev)) + return cp_type3_packet(opcode, size); + + return cp_type7_packet(opcode, size); +} + +/** + * cp_mem_packet - Generic CP memory packet to support different + * opcodes on different GPU cores. + * @adreno_dev: The adreno device + * @opcode: mem operation for cp packet + * @size: size for cp packet + * @num_mem: num of mem access + */ +static inline uint cp_mem_packet(struct adreno_device *adreno_dev, + int opcode, uint size, uint num_mem) +{ + if (ADRENO_LEGACY_PM4(adreno_dev)) + return cp_type3_packet(opcode, size); + + return cp_type7_packet(opcode, size + num_mem); +} + +/* Return 1 if the command is an indirect buffer of any kind */ +static inline int adreno_cmd_is_ib(struct adreno_device *adreno_dev, + unsigned int cmd) +{ + return cmd == cp_mem_packet(adreno_dev, + CP_INDIRECT_BUFFER_PFE, 2, 1) || + cmd == cp_mem_packet(adreno_dev, + CP_INDIRECT_BUFFER_PFD, 2, 1) || + cmd == cp_mem_packet(adreno_dev, + CP_COND_INDIRECT_BUFFER_PFE, 2, 1) || + cmd == cp_mem_packet(adreno_dev, + CP_COND_INDIRECT_BUFFER_PFD, 2, 1); +} + +/** + * cp_gpuaddr - Generic function to add 64bit and 32bit gpuaddr + * to pm4 commands + * @adreno_dev: The adreno device + * @cmds: command pointer to add gpuaddr + * @gpuaddr: gpuaddr to add + */ +static inline uint cp_gpuaddr(struct adreno_device *adreno_dev, + uint *cmds, uint64_t gpuaddr) +{ + uint *start = cmds; + + if (ADRENO_LEGACY_PM4(adreno_dev)) + *cmds++ = (uint)gpuaddr; + else { + *cmds++ = lower_32_bits(gpuaddr); + *cmds++ = upper_32_bits(gpuaddr); + } + return cmds - start; +} + +/** + * cp_register - Generic function for gpu register operation + * @adreno_dev: The adreno device + * @reg: GPU register + * @size: count for PM4 operation + */ +static inline uint cp_register(struct adreno_device *adreno_dev, + unsigned int reg, unsigned int size) +{ + if (ADRENO_LEGACY_PM4(adreno_dev)) + return cp_type0_packet(reg, size); + + return cp_type4_packet(reg, size); +} + +/** + * cp_wait_for_me - common function for WAIT_FOR_ME + * @adreno_dev: The adreno device + * @cmds: command pointer to add gpuaddr + */ +static inline uint cp_wait_for_me(struct adreno_device *adreno_dev, + uint *cmds) +{ + uint *start = cmds; + + if (ADRENO_LEGACY_PM4(adreno_dev)) { + *cmds++ = cp_type3_packet(CP_WAIT_FOR_ME, 1); + *cmds++ = 0; + } else + *cmds++ = cp_type7_packet(CP_WAIT_FOR_ME, 0); + + return cmds - start; +} + +/** + * cp_wait_for_idle - common function for WAIT_FOR_IDLE + * @adreno_dev: The adreno device + * @cmds: command pointer to add gpuaddr + */ +static inline uint cp_wait_for_idle(struct adreno_device *adreno_dev, + uint *cmds) +{ + uint *start = cmds; + + if (ADRENO_LEGACY_PM4(adreno_dev)) { + *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); + *cmds++ = 0; + } else + *cmds++ = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); + + return cmds - start; +} + +/** + * cp_invalidate_state - common function for invalidating cp + * state + * @adreno_dev: The adreno device + * @cmds: command pointer to add gpuaddr + */ +static inline uint cp_invalidate_state(struct adreno_device *adreno_dev, + uint *cmds) +{ + uint *start = cmds; + + if (ADRENO_GPUREV(adreno_dev) < 500) { + *cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1); + *cmds++ = 0x7fff; + } else { + *cmds++ = cp_type7_packet(CP_SET_DRAW_STATE, 3); + *cmds++ = 0x40000; + *cmds++ = 0; + *cmds++ = 0; + } + + return cmds - start; +} + +#endif /* __ADRENO_PM4TYPES_H */ diff --git a/drivers/gpu/msm/adreno_profile.c b/drivers/gpu/msm/adreno_profile.c new file mode 100644 index 000000000000..5476f9892f89 --- /dev/null +++ b/drivers/gpu/msm/adreno_profile.c @@ -0,0 +1,1230 @@ +/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/fs.h> +#include <linux/kernel.h> +#include <linux/ctype.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> +#include <linux/debugfs.h> + +#include "adreno.h" +#include "adreno_profile.h" +#include "kgsl_sharedmem.h" +#include "kgsl_cffdump.h" +#include "adreno_pm4types.h" + +#define ASSIGNS_STR_FORMAT "%.8s:%u " + +/* + * Raw Data for processing later: + * : 3 - timestamp, count, context id + * [per counter] - data for each counter + * : 1 - Register offset + * : 2 - Pre IB register hi/lo value + * : 2 - Post IB register hi/lo value + * [per counter end] + */ +#define SIZE_DATA(cnt) (6 + (cnt) * 5) + +/* + * Pre-IB command size (in dwords): + * : 2 - NOP start identifier + * : 4 - timestamp + * : 4 - count + * : 4 - context id + * : 4 - pid + * : 4 - tid + * : 4 - type + * [loop count start] - for each counter to watch + * : 4 - Register offset + * : 4 - Register read lo + * : 4 - Register read high + * [loop end] + * : 2 - NOP end identifier + */ +#define SIZE_PREIB(cnt) (28 + (cnt) * 12) + +/* + * Post-IB command size (in dwords): + * : 2 - NOP start identifier + * [loop count start] - for each counter to watch + * : 4 - Register read lo + * : 4 - Register read high + * [loop end] + * : 2 - NOP end identifier + */ +#define SIZE_POSTIB(cnt) (4 + (cnt) * 8) + +/* Counter data + Pre size + post size = total size */ +#define SIZE_SHARED_ENTRY(cnt) (SIZE_DATA(cnt) + SIZE_PREIB(cnt) \ + + SIZE_POSTIB(cnt)) + +/* + * Space for following string :"%u %u %u %.5s %u " + * [count iterations]: "%.8s:%u %llu %llu%c" + */ +#define SIZE_PIPE_ENTRY(cnt) (50 + (cnt) * 62) +#define SIZE_LOG_ENTRY(cnt) (6 + (cnt) * 5) + +static struct adreno_context_type ctxt_type_table[] = {KGSL_CONTEXT_TYPES}; + +static const char *get_api_type_str(unsigned int type) +{ + int i; + for (i = 0; i < ARRAY_SIZE(ctxt_type_table) - 1; i++) { + if (ctxt_type_table[i].type == type) + return ctxt_type_table[i].str; + } + return "UNKNOWN"; +} + +static inline uint _ib_start(struct adreno_device *adreno_dev, + unsigned int *cmds) +{ + unsigned int *start = cmds; + + *cmds++ = cp_packet(adreno_dev, CP_NOP, 1); + *cmds++ = KGSL_START_OF_PROFILE_IDENTIFIER; + + return cmds - start; +} + +static inline uint _ib_end(struct adreno_device *adreno_dev, + unsigned int *cmds) +{ + unsigned int *start = cmds; + + *cmds++ = cp_packet(adreno_dev, CP_NOP, 1); + *cmds++ = KGSL_END_OF_PROFILE_IDENTIFIER; + + return cmds - start; +} + +static inline uint _ib_cmd_mem_write(struct adreno_device *adreno_dev, + uint *cmds, uint64_t gpuaddr, uint val, uint *off) +{ + unsigned int *start = cmds; + + *cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1); + cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr); + *cmds++ = val; + + *off += sizeof(unsigned int); + return cmds - start; +} + +static inline uint _ib_cmd_reg_to_mem(struct adreno_device *adreno_dev, + uint *cmds, uint64_t gpuaddr, uint val, uint *off) +{ + unsigned int *start = cmds; + + *cmds++ = cp_mem_packet(adreno_dev, CP_REG_TO_MEM, 2, 1); + *cmds++ = val; + cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr); + + *off += sizeof(unsigned int); + return cmds - start; +} + +static inline int _create_ib_ref(struct adreno_device *adreno_dev, + struct kgsl_memdesc *memdesc, unsigned int *cmd, + unsigned int cnt, unsigned int off) +{ + unsigned int *start = cmd; + + *cmd++ = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1); + cmd += cp_gpuaddr(adreno_dev, cmd, (memdesc->gpuaddr + off)); + *cmd++ = cnt; + + return cmd - start; +} + +static int _build_pre_ib_cmds(struct adreno_device *adreno_dev, + struct adreno_profile *profile, + unsigned int *rbcmds, unsigned int head, + unsigned int timestamp, struct adreno_context *drawctxt) +{ + struct adreno_profile_assigns_list *entry; + unsigned int *start, *ibcmds; + unsigned int count = profile->assignment_count; + uint64_t gpuaddr = profile->shared_buffer.gpuaddr; + unsigned int ib_offset = head + SIZE_DATA(count); + unsigned int data_offset = head * sizeof(unsigned int); + + ibcmds = ib_offset + ((unsigned int *) profile->shared_buffer.hostptr); + start = ibcmds; + + /* start of profile identifier */ + ibcmds += _ib_start(adreno_dev, ibcmds); + + /* + * Write ringbuffer commands to save the following to memory: + * timestamp, count, context_id, pid, tid, context type + */ + ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset, + timestamp, &data_offset); + ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset, + profile->assignment_count, &data_offset); + ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset, + drawctxt->base.id, &data_offset); + ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset, + drawctxt->base.proc_priv->pid, &data_offset); + ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset, + drawctxt->base.tid, &data_offset); + ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset, + drawctxt->type, &data_offset); + + /* loop for each countable assigned */ + list_for_each_entry(entry, &profile->assignments_list, list) { + ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, + gpuaddr + data_offset, entry->offset, + &data_offset); + ibcmds += _ib_cmd_reg_to_mem(adreno_dev, ibcmds, + gpuaddr + data_offset, entry->offset, + &data_offset); + ibcmds += _ib_cmd_reg_to_mem(adreno_dev, ibcmds, + gpuaddr + data_offset, entry->offset_hi, + &data_offset); + + /* skip over post_ib counter data */ + data_offset += sizeof(unsigned int) * 2; + } + + /* end of profile identifier */ + ibcmds += _ib_end(adreno_dev, ibcmds); + + return _create_ib_ref(adreno_dev, &profile->shared_buffer, rbcmds, + ibcmds - start, ib_offset * sizeof(unsigned int)); +} + +static int _build_post_ib_cmds(struct adreno_device *adreno_dev, + struct adreno_profile *profile, + unsigned int *rbcmds, unsigned int head) +{ + struct adreno_profile_assigns_list *entry; + unsigned int *start, *ibcmds; + unsigned int count = profile->assignment_count; + uint64_t gpuaddr = profile->shared_buffer.gpuaddr; + unsigned int ib_offset = head + SIZE_DATA(count) + SIZE_PREIB(count); + unsigned int data_offset = head * sizeof(unsigned int); + + ibcmds = ib_offset + ((unsigned int *) profile->shared_buffer.hostptr); + start = ibcmds; + /* start of profile identifier */ + ibcmds += _ib_start(adreno_dev, ibcmds); + + /* skip over pre_ib preamble */ + data_offset += sizeof(unsigned int) * 6; + + /* loop for each countable assigned */ + list_for_each_entry(entry, &profile->assignments_list, list) { + /* skip over pre_ib counter data */ + data_offset += sizeof(unsigned int) * 3; + ibcmds += _ib_cmd_reg_to_mem(adreno_dev, ibcmds, + gpuaddr + data_offset, entry->offset, + &data_offset); + ibcmds += _ib_cmd_reg_to_mem(adreno_dev, ibcmds, + gpuaddr + data_offset, entry->offset_hi, + &data_offset); + } + + /* end of profile identifier */ + ibcmds += _ib_end(adreno_dev, ibcmds); + + return _create_ib_ref(adreno_dev, &profile->shared_buffer, rbcmds, + ibcmds - start, ib_offset * sizeof(unsigned int)); +} + +static bool shared_buf_empty(struct adreno_profile *profile) +{ + if (profile->shared_buffer.hostptr == NULL || + profile->shared_buffer.size == 0) + return true; + + if (profile->shared_head == profile->shared_tail) + return true; + + return false; +} + +static inline void shared_buf_inc(unsigned int max_size, + unsigned int *offset, size_t inc) +{ + *offset = (*offset + inc) % max_size; +} + +static inline void log_buf_wrapcnt(unsigned int cnt, uintptr_t *off) +{ + *off = (*off + cnt) % ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS; +} + +static inline void log_buf_wrapinc_len(unsigned int *profile_log_buffer, + unsigned int **ptr, unsigned int len) +{ + *ptr += len; + if (*ptr >= (profile_log_buffer + + ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS)) + *ptr -= ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS; +} + +static inline void log_buf_wrapinc(unsigned int *profile_log_buffer, + unsigned int **ptr) +{ + log_buf_wrapinc_len(profile_log_buffer, ptr, 1); +} + +static inline unsigned int log_buf_available(struct adreno_profile *profile, + unsigned int *head_ptr) +{ + uintptr_t tail, head; + + tail = (uintptr_t) profile->log_tail - + (uintptr_t) profile->log_buffer; + head = (uintptr_t)head_ptr - (uintptr_t) profile->log_buffer; + if (tail > head) + return (tail - head) / sizeof(uintptr_t); + else + return ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS - ((head - tail) / + sizeof(uintptr_t)); +} + +static inline unsigned int shared_buf_available(struct adreno_profile *profile) +{ + if (profile->shared_tail > profile->shared_head) + return profile->shared_tail - profile->shared_head; + else + return profile->shared_size - + (profile->shared_head - profile->shared_tail); +} + +static struct adreno_profile_assigns_list *_find_assignment_by_offset( + struct adreno_profile *profile, unsigned int offset) +{ + struct adreno_profile_assigns_list *entry; + + list_for_each_entry(entry, &profile->assignments_list, list) { + if (entry->offset == offset) + return entry; + } + + return NULL; +} + +static bool _in_assignments_list(struct adreno_profile *profile, + unsigned int groupid, unsigned int countable) +{ + struct adreno_profile_assigns_list *entry; + + list_for_each_entry(entry, &profile->assignments_list, list) { + if (entry->groupid == groupid && entry->countable == + countable) + return true; + } + + return false; +} + +static bool _add_to_assignments_list(struct adreno_profile *profile, + const char *str, unsigned int groupid, unsigned int countable, + unsigned int offset, unsigned int offset_hi) +{ + struct adreno_profile_assigns_list *entry; + + /* first make sure we can alloc memory */ + entry = kmalloc(sizeof(struct adreno_profile_assigns_list), GFP_KERNEL); + if (!entry) + return false; + + list_add_tail(&entry->list, &profile->assignments_list); + + entry->countable = countable; + entry->groupid = groupid; + entry->offset = offset; + entry->offset_hi = offset_hi; + + strlcpy(entry->name, str, sizeof(entry->name)); + + profile->assignment_count++; + + return true; +} + +static bool results_available(struct adreno_device *adreno_dev, + struct adreno_profile *profile, unsigned int *shared_buf_tail) +{ + struct kgsl_device *device = &adreno_dev->dev; + unsigned int global_eop; + unsigned int off = profile->shared_tail; + unsigned int *shared_ptr = (unsigned int *) + profile->shared_buffer.hostptr; + unsigned int ts, cnt; + int ts_cmp; + + /* + * If shared_buffer empty or Memstore EOP timestamp is less than + * outstanding counter buffer timestamps then no results available + */ + if (shared_buf_empty(profile)) + return false; + + if (adreno_rb_readtimestamp(device, + adreno_dev->cur_rb, + KGSL_TIMESTAMP_RETIRED, &global_eop)) + return false; + do { + cnt = *(shared_ptr + off + 1); + if (cnt == 0) + return false; + + ts = *(shared_ptr + off); + ts_cmp = timestamp_cmp(ts, global_eop); + if (ts_cmp >= 0) { + *shared_buf_tail = off; + if (off == profile->shared_tail) + return false; + else + return true; + } + shared_buf_inc(profile->shared_size, &off, + SIZE_SHARED_ENTRY(cnt)); + } while (off != profile->shared_head); + + *shared_buf_tail = profile->shared_head; + + return true; +} + +static void transfer_results(struct adreno_profile *profile, + unsigned int shared_buf_tail) +{ + unsigned int buf_off; + unsigned int ts, cnt, ctxt_id, pid, tid, client_type; + unsigned int *ptr = (unsigned int *) profile->shared_buffer.hostptr; + unsigned int *log_ptr, *log_base; + struct adreno_profile_assigns_list *assigns_list; + int i, tmp_tail; + + log_ptr = profile->log_head; + log_base = profile->log_buffer; + if (log_ptr == NULL) + return; + + /* + * go through counter buffers and format for write into log_buffer + * if log buffer doesn't have space just overwrite it circularly + * shared_buf is guaranteed to not wrap within an entry so can use + * ptr increment + */ + while (profile->shared_tail != shared_buf_tail) { + buf_off = profile->shared_tail; + /* + * format: timestamp, count, context_id + * count entries: pc_off, pc_start, pc_end + */ + ts = *(ptr + buf_off++); + cnt = *(ptr + buf_off++); + ctxt_id = *(ptr + buf_off++); + pid = *(ptr + buf_off++); + tid = *(ptr + buf_off++); + client_type = *(ptr + buf_off++); + + /* + * if entry overwrites the tail of log_buffer then adjust tail + * ptr to make room for the new entry, discarding old entry + */ + while (log_buf_available(profile, log_ptr) <= + SIZE_LOG_ENTRY(cnt)) { + unsigned int size_tail; + uintptr_t boff; + size_tail = SIZE_LOG_ENTRY(0xffff & + *(profile->log_tail)); + boff = ((uintptr_t) profile->log_tail - + (uintptr_t) log_base) / sizeof(uintptr_t); + log_buf_wrapcnt(size_tail, &boff); + profile->log_tail = log_base + boff; + } + + *log_ptr = cnt; + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = client_type; + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = pid; + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = tid; + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = ctxt_id; + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = ts; + log_buf_wrapinc(log_base, &log_ptr); + + for (i = 0; i < cnt; i++) { + assigns_list = _find_assignment_by_offset( + profile, *(ptr + buf_off++)); + if (assigns_list == NULL) { + *log_ptr = (unsigned int) -1; + + shared_buf_inc(profile->shared_size, + &profile->shared_tail, + SIZE_SHARED_ENTRY(cnt)); + goto err; + } else { + *log_ptr = assigns_list->groupid << 16 | + (assigns_list->countable & 0xffff); + } + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = *(ptr + buf_off++); /* perf cntr start hi */ + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = *(ptr + buf_off++); /* perf cntr start lo */ + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = *(ptr + buf_off++); /* perf cntr end hi */ + log_buf_wrapinc(log_base, &log_ptr); + *log_ptr = *(ptr + buf_off++); /* perf cntr end lo */ + log_buf_wrapinc(log_base, &log_ptr); + + } + + tmp_tail = profile->shared_tail; + shared_buf_inc(profile->shared_size, + &profile->shared_tail, + SIZE_SHARED_ENTRY(cnt)); + /* + * Possibly lost some room as we cycled around, so it's safe to + * reset the max size + */ + if (profile->shared_tail < tmp_tail) + profile->shared_size = + ADRENO_PROFILE_SHARED_BUF_SIZE_DWORDS; + + } + profile->log_head = log_ptr; + return; +err: + /* reset head/tail to same on error in hopes we work correctly later */ + profile->log_head = profile->log_tail; +} + +static int profile_enable_get(void *data, u64 *val) +{ + struct kgsl_device *device = data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + mutex_lock(&device->mutex); + *val = adreno_profile_enabled(&adreno_dev->profile); + mutex_unlock(&device->mutex); + + return 0; +} + +static int profile_enable_set(void *data, u64 val) +{ + struct kgsl_device *device = data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_profile *profile = &adreno_dev->profile; + + mutex_lock(&device->mutex); + + if (val && profile->log_buffer == NULL) { + /* allocate profile_log_buffer the first time enabled */ + profile->log_buffer = vmalloc(ADRENO_PROFILE_LOG_BUF_SIZE); + if (profile->log_buffer == NULL) { + mutex_unlock(&device->mutex); + return -ENOMEM; + } + profile->log_tail = profile->log_buffer; + profile->log_head = profile->log_buffer; + } + + profile->enabled = val; + + mutex_unlock(&device->mutex); + + return 0; +} + +static ssize_t profile_assignments_read(struct file *filep, + char __user *ubuf, size_t max, loff_t *ppos) +{ + struct kgsl_device *device = (struct kgsl_device *) filep->private_data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_profile *profile = &adreno_dev->profile; + struct adreno_profile_assigns_list *entry; + int len = 0, max_size = PAGE_SIZE; + char *buf, *pos; + ssize_t size = 0; + + mutex_lock(&device->mutex); + + if (profile->assignment_count == 0) { + mutex_unlock(&device->mutex); + return 0; + } + + buf = kmalloc(max_size, GFP_KERNEL); + if (!buf) { + mutex_unlock(&device->mutex); + return -ENOMEM; + } + + pos = buf; + + /* copy all assingments from list to str */ + list_for_each_entry(entry, &profile->assignments_list, list) { + len = snprintf(pos, max_size, ASSIGNS_STR_FORMAT, + entry->name, entry->countable); + + max_size -= len; + pos += len; + } + + size = simple_read_from_buffer(ubuf, max, ppos, buf, + strlen(buf)); + + kfree(buf); + + mutex_unlock(&device->mutex); + return size; +} + +static void _remove_assignment(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable) +{ + struct adreno_profile *profile = &adreno_dev->profile; + struct adreno_profile_assigns_list *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &profile->assignments_list, list) { + if (entry->groupid == groupid && + entry->countable == countable) { + list_del(&entry->list); + + profile->assignment_count--; + + kfree(entry); + + /* remove from perf counter allocation */ + adreno_perfcounter_put(adreno_dev, groupid, countable, + PERFCOUNTER_FLAG_KERNEL); + } + } +} + +static void _add_assignment(struct adreno_device *adreno_dev, + unsigned int groupid, unsigned int countable) +{ + struct adreno_profile *profile = &adreno_dev->profile; + unsigned int offset, offset_hi; + const char *name = NULL; + + name = adreno_perfcounter_get_name(adreno_dev, groupid); + if (!name) + return; + + /* if already in assigned list skip it */ + if (_in_assignments_list(profile, groupid, countable)) + return; + + /* add to perf counter allocation, if fail skip it */ + if (adreno_perfcounter_get(adreno_dev, groupid, countable, + &offset, &offset_hi, PERFCOUNTER_FLAG_NONE)) + return; + + /* add to assignments list, put counter back if error */ + if (!_add_to_assignments_list(profile, name, groupid, + countable, offset, offset_hi)) + adreno_perfcounter_put(adreno_dev, groupid, + countable, PERFCOUNTER_FLAG_KERNEL); +} + +static char *_parse_next_assignment(struct adreno_device *adreno_dev, + char *str, int *groupid, int *countable, bool *remove) +{ + char *groupid_str, *countable_str, *next_str = NULL; + int ret; + + *groupid = -EINVAL; + *countable = -EINVAL; + *remove = false; + + /* remove spaces */ + while (*str == ' ') + str++; + + /* check if it's a remove assignment */ + if (*str == '-') { + *remove = true; + str++; + } + + /* get the groupid string */ + groupid_str = str; + while (*str != ':') { + if (*str == '\0') + return NULL; + *str = tolower(*str); + str++; + } + if (groupid_str == str) + return NULL; + + *str = '\0'; + str++; + + /* get the countable string */ + countable_str = str; + while (*str != ' ' && *str != '\0') + str++; + if (countable_str == str) + return NULL; + + /* + * If we have reached the end of the original string then make sure we + * return NULL from this function or we could accidently overrun + */ + + if (*str != '\0') { + *str = '\0'; + next_str = str + 1; + } + + /* set results */ + *groupid = adreno_perfcounter_get_groupid(adreno_dev, + groupid_str); + if (*groupid < 0) + return NULL; + ret = kstrtou32(countable_str, 10, countable); + if (ret) + return NULL; + + return next_str; +} + +static ssize_t profile_assignments_write(struct file *filep, + const char __user *user_buf, size_t len, loff_t *off) +{ + struct kgsl_device *device = (struct kgsl_device *) filep->private_data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_profile *profile = &adreno_dev->profile; + size_t size = 0; + char *buf, *pbuf; + bool remove_assignment = false; + int groupid, countable, ret; + + if (len >= PAGE_SIZE || len == 0) + return -EINVAL; + + buf = kmalloc(len + 1, GFP_KERNEL); + if (buf == NULL) + return -ENOMEM; + + if (copy_from_user(buf, user_buf, len)) { + size = -EFAULT; + goto error_free; + } + + mutex_lock(&device->mutex); + + if (adreno_profile_enabled(profile)) { + size = -EINVAL; + goto error_unlock; + } + + ret = kgsl_active_count_get(device); + if (ret) { + size = ret; + goto error_unlock; + } + + /* + * When adding/removing assignments, ensure that the GPU is done with + * all it's work. This helps to syncronize the work flow to the + * GPU and avoid racey conditions. + */ + if (adreno_idle(device)) { + size = -ETIMEDOUT; + goto error_put; + } + + /* clear all shared buffer results */ + adreno_profile_process_results(adreno_dev); + + pbuf = buf; + + /* clear the log buffer */ + if (profile->log_buffer != NULL) { + profile->log_head = profile->log_buffer; + profile->log_tail = profile->log_buffer; + } + + + /* for sanity and parsing, ensure it is null terminated */ + buf[len] = '\0'; + + /* parse file buf and add(remove) to(from) appropriate lists */ + while (pbuf) { + pbuf = _parse_next_assignment(adreno_dev, pbuf, &groupid, + &countable, &remove_assignment); + if (groupid < 0 || countable < 0) + break; + + if (remove_assignment) + _remove_assignment(adreno_dev, groupid, countable); + else + _add_assignment(adreno_dev, groupid, countable); + } + + size = len; + +error_put: + kgsl_active_count_put(device); +error_unlock: + mutex_unlock(&device->mutex); +error_free: + kfree(buf); + return size; +} + +static int _pipe_print_pending(char __user *ubuf, size_t max) +{ + loff_t unused = 0; + char str[] = "Operation Would Block!"; + + return simple_read_from_buffer(ubuf, max, + &unused, str, strlen(str)); +} + +static int _pipe_print_results(struct adreno_device *adreno_dev, + char __user *ubuf, size_t max) +{ + struct adreno_profile *profile = &adreno_dev->profile; + const char *grp_name; + char __user *usr_buf = ubuf; + unsigned int *log_ptr = NULL, *tmp_log_ptr = NULL; + int len, i; + int status = 0; + ssize_t size, total_size = 0; + unsigned int cnt, api_type, ctxt_id, pid, tid, ts, cnt_reg; + unsigned long long pc_start, pc_end; + const char *api_str; + char format_space; + loff_t unused = 0; + char pipe_hdr_buf[51]; /* 4 uint32 + 5 space + 5 API type + '\0' */ + char pipe_cntr_buf[63]; /* 2 uint64 + 1 uint32 + 4 spaces + 8 group */ + + /* convert unread entries to ASCII, copy to user-space */ + log_ptr = profile->log_tail; + + do { + /* store the tmp var for error cases so we can skip */ + tmp_log_ptr = log_ptr; + + /* Too many to output to pipe, so skip this data */ + cnt = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + + if (SIZE_PIPE_ENTRY(cnt) > max) { + log_buf_wrapinc_len(profile->log_buffer, + &tmp_log_ptr, SIZE_PIPE_ENTRY(cnt)); + log_ptr = tmp_log_ptr; + goto done; + } + + /* + * Not enough space left in pipe, return without doing + * anything + */ + if ((max - (usr_buf - ubuf)) < SIZE_PIPE_ENTRY(cnt)) { + log_ptr = tmp_log_ptr; + goto done; + } + + api_type = *log_ptr; + api_str = get_api_type_str(api_type); + log_buf_wrapinc(profile->log_buffer, &log_ptr); + pid = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + tid = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + ctxt_id = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + ts = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + len = snprintf(pipe_hdr_buf, sizeof(pipe_hdr_buf) - 1, + "%u %u %u %.5s %u ", + pid, tid, ctxt_id, api_str, ts); + size = simple_read_from_buffer(usr_buf, + max - (usr_buf - ubuf), + &unused, pipe_hdr_buf, len); + + /* non-fatal error, so skip rest of entry and return */ + if (size < 0) { + log_buf_wrapinc_len(profile->log_buffer, + &tmp_log_ptr, SIZE_PIPE_ENTRY(cnt)); + log_ptr = tmp_log_ptr; + goto done; + } + + unused = 0; + usr_buf += size; + total_size += size; + + for (i = 0; i < cnt; i++) { + unsigned int start_lo, start_hi; + unsigned int end_lo, end_hi; + + grp_name = adreno_perfcounter_get_name( + adreno_dev, (*log_ptr >> 16) & 0xffff); + + /* non-fatal error, so skip rest of entry and return */ + if (grp_name == NULL) { + log_buf_wrapinc_len(profile->log_buffer, + &tmp_log_ptr, SIZE_PIPE_ENTRY(cnt)); + log_ptr = tmp_log_ptr; + goto done; + } + + if (i == cnt - 1) + format_space = '\n'; + else + format_space = ' '; + + cnt_reg = *log_ptr & 0xffff; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + start_lo = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + start_hi = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + end_lo = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + end_hi = *log_ptr; + log_buf_wrapinc(profile->log_buffer, &log_ptr); + + pc_start = (((uint64_t) start_hi) << 32) | start_lo; + pc_end = (((uint64_t) end_hi) << 32) | end_lo; + + len = snprintf(pipe_cntr_buf, + sizeof(pipe_cntr_buf) - 1, + "%.8s:%u %llu %llu%c", + grp_name, cnt_reg, pc_start, + pc_end, format_space); + + size = simple_read_from_buffer(usr_buf, + max - (usr_buf - ubuf), + &unused, pipe_cntr_buf, len); + + /* non-fatal error, so skip rest of entry and return */ + if (size < 0) { + log_buf_wrapinc_len(profile->log_buffer, + &tmp_log_ptr, SIZE_PIPE_ENTRY(cnt)); + log_ptr = tmp_log_ptr; + goto done; + } + unused = 0; + usr_buf += size; + total_size += size; + } + } while (log_ptr != profile->log_head); + +done: + status = total_size; + profile->log_tail = log_ptr; + + return status; +} + +static ssize_t profile_pipe_print(struct file *filep, char __user *ubuf, + size_t max, loff_t *ppos) +{ + struct kgsl_device *device = (struct kgsl_device *) filep->private_data; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_profile *profile = &adreno_dev->profile; + char __user *usr_buf = ubuf; + int status = 0; + + /* + * this file not seekable since it only supports streaming, ignore + * ppos <> 0 + */ + /* + * format <pid> <tid> <context id> <cnt<<16 | client type> <timestamp> + * for each perf counter <cntr_reg_off> <start hi & lo> <end hi & low> + */ + + mutex_lock(&device->mutex); + + while (1) { + /* process any results that are available into the log_buffer */ + status = adreno_profile_process_results(adreno_dev); + if (status > 0) { + /* if we have results, print them and exit */ + status = _pipe_print_results(adreno_dev, usr_buf, max); + break; + } + + /* there are no unread results, act accordingly */ + if (filep->f_flags & O_NONBLOCK) { + if (profile->shared_tail != profile->shared_head) { + status = _pipe_print_pending(usr_buf, max); + break; + } else { + status = 0; + break; + } + } + + mutex_unlock(&device->mutex); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ / 10); + mutex_lock(&device->mutex); + + if (signal_pending(current)) { + status = 0; + break; + } + } + + mutex_unlock(&device->mutex); + + return status; +} + +static int profile_groups_print(struct seq_file *s, void *unused) +{ + struct kgsl_device *device = (struct kgsl_device *) s->private; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct adreno_perfcounters *counters = gpudev->perfcounters; + struct adreno_perfcount_group *group; + int i, j, used; + + mutex_lock(&device->mutex); + + for (i = 0; i < counters->group_count; ++i) { + group = &(counters->groups[i]); + /* get number of counters used for this group */ + used = 0; + for (j = 0; j < group->reg_count; j++) { + if (group->regs[j].countable != + KGSL_PERFCOUNTER_NOT_USED) + used++; + } + + seq_printf(s, "%s %d %d\n", group->name, + group->reg_count, used); + } + + mutex_unlock(&device->mutex); + + return 0; +} + +static int profile_groups_open(struct inode *inode, struct file *file) +{ + return single_open(file, profile_groups_print, inode->i_private); +} + +static const struct file_operations profile_groups_fops = { + .owner = THIS_MODULE, + .open = profile_groups_open, + .read = seq_read, + .llseek = noop_llseek, + .release = single_release, +}; + +static const struct file_operations profile_pipe_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = profile_pipe_print, + .llseek = noop_llseek, +}; + +static const struct file_operations profile_assignments_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = profile_assignments_read, + .write = profile_assignments_write, + .llseek = noop_llseek, +}; + +DEFINE_SIMPLE_ATTRIBUTE(profile_enable_fops, + profile_enable_get, + profile_enable_set, "%llu\n"); + +void adreno_profile_init(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_profile *profile = &adreno_dev->profile; + struct dentry *profile_dir; + int ret; + + profile->enabled = false; + + /* allocate shared_buffer, which includes pre_ib and post_ib */ + profile->shared_size = ADRENO_PROFILE_SHARED_BUF_SIZE_DWORDS; + ret = kgsl_allocate_global(device, &profile->shared_buffer, + profile->shared_size * sizeof(unsigned int), 0, 0); + + if (ret) { + profile->shared_size = 0; + return; + } + + INIT_LIST_HEAD(&profile->assignments_list); + + /* Create perf counter debugfs */ + profile_dir = debugfs_create_dir("profiling", device->d_debugfs); + if (IS_ERR(profile_dir)) + return; + + debugfs_create_file("enable", 0644, profile_dir, device, + &profile_enable_fops); + debugfs_create_file("blocks", 0444, profile_dir, device, + &profile_groups_fops); + debugfs_create_file("pipe", 0444, profile_dir, device, + &profile_pipe_fops); + debugfs_create_file("assignments", 0644, profile_dir, device, + &profile_assignments_fops); +} + +void adreno_profile_close(struct adreno_device *adreno_dev) +{ + struct adreno_profile *profile = &adreno_dev->profile; + struct adreno_profile_assigns_list *entry, *tmp; + + profile->enabled = false; + vfree(profile->log_buffer); + profile->log_buffer = NULL; + profile->log_head = NULL; + profile->log_tail = NULL; + profile->shared_head = 0; + profile->shared_tail = 0; + kgsl_free_global(&profile->shared_buffer); + profile->shared_size = 0; + + profile->assignment_count = 0; + + list_for_each_entry_safe(entry, tmp, &profile->assignments_list, list) { + list_del(&entry->list); + kfree(entry); + } +} + +int adreno_profile_process_results(struct adreno_device *adreno_dev) +{ + struct adreno_profile *profile = &adreno_dev->profile; + unsigned int shared_buf_tail = profile->shared_tail; + + if (!results_available(adreno_dev, profile, &shared_buf_tail)) + return 0; + + /* + * transfer retired results to log_buffer + * update shared_buffer tail ptr + */ + transfer_results(profile, shared_buf_tail); + + return 1; +} + +void adreno_profile_preib_processing(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, unsigned int *cmd_flags, + unsigned int **rbptr) +{ + struct adreno_profile *profile = &adreno_dev->profile; + int count = profile->assignment_count; + unsigned int entry_head = profile->shared_head; + unsigned int *shared_ptr; + struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); + unsigned int rbcmds[4]; + unsigned int *ptr = *rbptr; + unsigned int i, ret = 0; + + *cmd_flags &= ~KGSL_CMD_FLAGS_PROFILE; + + if (!adreno_profile_assignments_ready(profile)) + goto done; + + /* + * check if space available, include the post_ib in space available + * check so don't have to handle trying to undo the pre_ib insertion in + * ringbuffer in the case where only the post_ib fails enough space + */ + if (SIZE_SHARED_ENTRY(count) >= shared_buf_available(profile)) + goto done; + + if (entry_head + SIZE_SHARED_ENTRY(count) >= profile->shared_size) { + /* entry_head would wrap, start entry_head at 0 in buffer */ + entry_head = 0; + profile->shared_size = profile->shared_head; + profile->shared_head = 0; + + /* recheck space available */ + if (SIZE_SHARED_ENTRY(count) >= shared_buf_available(profile)) + goto done; + } + + /* zero out the counter area of shared_buffer entry_head */ + shared_ptr = entry_head + ((unsigned int *) + profile->shared_buffer.hostptr); + memset(shared_ptr, 0, SIZE_SHARED_ENTRY(count) * sizeof(unsigned int)); + + /* reserve space for the pre ib shared buffer */ + shared_buf_inc(profile->shared_size, &profile->shared_head, + SIZE_SHARED_ENTRY(count)); + + /* create the shared ibdesc */ + ret = _build_pre_ib_cmds(adreno_dev, profile, rbcmds, entry_head, + rb->timestamp + 1, drawctxt); + + /* set flag to sync with post ib commands */ + *cmd_flags |= KGSL_CMD_FLAGS_PROFILE; + +done: + /* write the ibdesc to the ringbuffer */ + for (i = 0; i < ret; i++) + *ptr++ = rbcmds[i]; + + *rbptr = ptr; +} + +void adreno_profile_postib_processing(struct adreno_device *adreno_dev, + unsigned int *cmd_flags, unsigned int **rbptr) +{ + struct adreno_profile *profile = &adreno_dev->profile; + int count = profile->assignment_count; + unsigned int entry_head = profile->shared_head - + SIZE_SHARED_ENTRY(count); + unsigned int *ptr = *rbptr; + unsigned int rbcmds[4]; + int ret = 0, i; + + if (!adreno_profile_assignments_ready(profile)) + goto done; + + if (!(*cmd_flags & KGSL_CMD_FLAGS_PROFILE)) + goto done; + + /* create the shared ibdesc */ + ret = _build_post_ib_cmds(adreno_dev, profile, rbcmds, entry_head); + +done: + /* write the ibdesc to the ringbuffer */ + for (i = 0; i < ret; i++) + *ptr++ = rbcmds[i]; + + *rbptr = ptr; + + /* reset the sync flag */ + *cmd_flags &= ~KGSL_CMD_FLAGS_PROFILE; +} + diff --git a/drivers/gpu/msm/adreno_profile.h b/drivers/gpu/msm/adreno_profile.h new file mode 100644 index 000000000000..4d81abd14837 --- /dev/null +++ b/drivers/gpu/msm/adreno_profile.h @@ -0,0 +1,111 @@ +/* Copyright (c) 2013-2014, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __ADRENO_PROFILE_H +#define __ADRENO_PROFILE_H +#include <linux/seq_file.h> + +/** + * struct adreno_profile_assigns_list: linked list for assigned perf counters + * @list: linkage for nodes in list + * @name: group name or GPU name name + * @groupid: group id + * @countable: countable assigned to perfcounter + * @offset: perfcounter register address offset + */ +struct adreno_profile_assigns_list { + struct list_head list; + char name[25]; + unsigned int groupid; + unsigned int countable; + unsigned int offset; /* LO offset */ + unsigned int offset_hi; /* HI offset */ +}; + +struct adreno_profile { + struct list_head assignments_list; /* list of all assignments */ + unsigned int assignment_count; /* Number of assigned counters */ + unsigned int *log_buffer; + unsigned int *log_head; + unsigned int *log_tail; + bool enabled; + /* counter, pre_ib, and post_ib held in one large circular buffer + * shared between kgsl and GPU + * counter entry 0 + * pre_ib entry 0 + * post_ib entry 0 + * ... + * counter entry N + * pre_ib entry N + * post_ib entry N + */ + struct kgsl_memdesc shared_buffer; + unsigned int shared_head; + unsigned int shared_tail; + unsigned int shared_size; +}; + +#define ADRENO_PROFILE_SHARED_BUF_SIZE_DWORDS (48 * 4096 / sizeof(uint)) +/* sized @ 48 pages should allow for over 50 outstanding IBs minimum, 1755 max*/ + +#define ADRENO_PROFILE_LOG_BUF_SIZE (1024 * 920) +/* sized for 1024 entries of fully assigned 45 cnters in log buffer, 230 pages*/ +#define ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS (ADRENO_PROFILE_LOG_BUF_SIZE / \ + sizeof(unsigned int)) + +#ifdef CONFIG_DEBUG_FS +void adreno_profile_init(struct adreno_device *adreno_dev); +void adreno_profile_close(struct adreno_device *adreno_dev); +int adreno_profile_process_results(struct adreno_device *adreno_dev); +void adreno_profile_preib_processing(struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, unsigned int *cmd_flags, + unsigned int **rbptr); +void adreno_profile_postib_processing(struct adreno_device *adreno_dev, + unsigned int *cmd_flags, unsigned int **rbptr); +#else +static inline void adreno_profile_init(struct adreno_device *adreno_dev) { } +static inline void adreno_profile_close(struct adreno_device *adreno_dev) { } +static inline int adreno_profile_process_results( + struct adreno_device *adreno_dev) +{ + return 0; +} + +static inline void adreno_profile_preib_processing( + struct adreno_device *adreno_dev, + struct adreno_context *drawctxt, unsigned int *cmd_flags, + unsigned int **rbptr) { } + +static inline void adreno_profile_postib_processing( + struct adreno_device *adreno_dev, + unsigned int *cmd_flags, unsigned int **rbptr) { } +#endif + +static inline bool adreno_profile_enabled(struct adreno_profile *profile) +{ + return profile->enabled; +} + +static inline bool adreno_profile_has_assignments( + struct adreno_profile *profile) +{ + return list_empty(&profile->assignments_list) ? false : true; +} + +static inline bool adreno_profile_assignments_ready( + struct adreno_profile *profile) +{ + return adreno_profile_enabled(profile) && + adreno_profile_has_assignments(profile); +} + +#endif diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c new file mode 100644 index 000000000000..a80707385e3b --- /dev/null +++ b/drivers/gpu/msm/adreno_ringbuffer.c @@ -0,0 +1,1357 @@ +/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/log2.h> +#include <linux/time.h> +#include <linux/delay.h> + +#include "kgsl.h" +#include "kgsl_sharedmem.h" +#include "kgsl_cffdump.h" +#include "kgsl_trace.h" +#include "kgsl_pwrctrl.h" + +#include "adreno.h" +#include "adreno_pm4types.h" +#include "adreno_ringbuffer.h" + +#include "a3xx_reg.h" +#include "adreno_a5xx.h" + +#define GSL_RB_NOP_SIZEDWORDS 2 + +#define ADRENO_RB_PREEMPT_TOKEN_IB_DWORDS 50 +#define ADRENO_RB_PREEMPT_TOKEN_DWORDS 125 + +#define RB_HOSTPTR(_rb, _pos) \ + ((unsigned int *) ((_rb)->buffer_desc.hostptr + \ + ((_pos) * sizeof(unsigned int)))) + +#define RB_GPUADDR(_rb, _pos) \ + ((_rb)->buffer_desc.gpuaddr + ((_pos) * sizeof(unsigned int))) + +static void _cff_write_ringbuffer(struct adreno_ringbuffer *rb) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device); + struct kgsl_device *device = &adreno_dev->dev; + uint64_t gpuaddr; + unsigned int *hostptr; + size_t size; + + if (device->cff_dump_enable == 0) + return; + + /* + * This code is predicated on the fact that we write a full block of + * stuff without wrapping + */ + BUG_ON(rb->wptr < rb->last_wptr); + + size = (rb->wptr - rb->last_wptr) * sizeof(unsigned int); + + hostptr = RB_HOSTPTR(rb, rb->last_wptr); + gpuaddr = RB_GPUADDR(rb, rb->last_wptr); + + kgsl_cffdump_memcpy(device, gpuaddr, hostptr, size); +} + +void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb, + struct adreno_submit_time *time) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device); + BUG_ON(rb->wptr == 0); + + /* Write the changes to CFF if so enabled */ + _cff_write_ringbuffer(rb); + + /* + * Read the current GPU ticks and wallclock for most accurate + * profiling + */ + + if (time != NULL) { + /* + * Here we are attempting to create a mapping between the + * GPU time domain (alwayson counter) and the CPU time domain + * (local_clock) by sampling both values as close together as + * possible. This is useful for many types of debugging and + * profiling. In order to make this mapping as accurate as + * possible, we must turn off interrupts to avoid running + * interrupt handlers between the two samples. + */ + unsigned long flags; + local_irq_save(flags); + + /* Read always on registers */ + if (!adreno_is_a3xx(adreno_dev)) { + adreno_readreg64(adreno_dev, + ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO, + ADRENO_REG_RBBM_ALWAYSON_COUNTER_HI, + &time->ticks); + + /* + * Mask hi bits as they may be incorrect on + * a4x and some a5x + */ + if (ADRENO_GPUREV(adreno_dev) >= 400 && + ADRENO_GPUREV(adreno_dev) <= ADRENO_REV_A530) + time->ticks &= 0xFFFFFFFF; + } + else + time->ticks = 0; + + /* Get the kernel clock for time since boot */ + time->ktime = local_clock(); + + /* Get the timeofday for the wall time (for the user) */ + getnstimeofday(&time->utime); + + local_irq_restore(flags); + } + + /* Memory barrier before informing the hardware of new commands */ + mb(); + + if (adreno_preempt_state(adreno_dev, ADRENO_DISPATCHER_PREEMPT_CLEAR) && + (adreno_dev->cur_rb == rb)) { + /* + * Let the pwrscale policy know that new commands have + * been submitted. + */ + kgsl_pwrscale_busy(rb->device); + adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR, rb->wptr); + } +} + +int adreno_ringbuffer_submit_spin(struct adreno_ringbuffer *rb, + struct adreno_submit_time *time, unsigned int timeout) +{ + adreno_ringbuffer_submit(rb, NULL); + return adreno_spin_idle(rb->device, timeout); +} + +static int +adreno_ringbuffer_waitspace(struct adreno_ringbuffer *rb, + unsigned int numcmds, int wptr_ahead) +{ + int nopcount = 0; + unsigned int freecmds; + unsigned int wptr = rb->wptr; + unsigned int *cmds = NULL; + uint64_t gpuaddr; + unsigned long wait_time; + unsigned long wait_timeout = msecs_to_jiffies(ADRENO_IDLE_TIMEOUT); + unsigned int rptr; + struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device); + + /* if wptr ahead, fill the remaining with NOPs */ + if (wptr_ahead) { + /* -1 for header */ + nopcount = KGSL_RB_DWORDS - rb->wptr - 1; + + cmds = RB_HOSTPTR(rb, rb->wptr); + gpuaddr = RB_GPUADDR(rb, rb->wptr); + + rptr = adreno_get_rptr(rb); + /* For non current rb we don't expect the rptr to move */ + if ((adreno_dev->cur_rb != rb || + !adreno_preempt_state(adreno_dev, + ADRENO_DISPATCHER_PREEMPT_CLEAR)) && + !rptr) + return -ENOSPC; + + /* Make sure that rptr is not 0 before submitting + * commands at the end of ringbuffer. We do not + * want the rptr and wptr to become equal when + * the ringbuffer is not empty */ + wait_time = jiffies + wait_timeout; + while (!rptr) { + rptr = adreno_get_rptr(rb); + if (time_after(jiffies, wait_time)) + return -ETIMEDOUT; + } + + rb->wptr = 0; + } + + rptr = adreno_get_rptr(rb); + freecmds = rptr - rb->wptr; + if (freecmds == 0 || freecmds > numcmds) + goto done; + + /* non current rptr will not advance anyway or if preemption underway */ + if (adreno_dev->cur_rb != rb || + !adreno_preempt_state(adreno_dev, + ADRENO_DISPATCHER_PREEMPT_CLEAR)) { + rb->wptr = wptr; + return -ENOSPC; + } + + wait_time = jiffies + wait_timeout; + /* wait for space in ringbuffer */ + while (1) { + rptr = adreno_get_rptr(rb); + + freecmds = rptr - rb->wptr; + + if (freecmds == 0 || freecmds > numcmds) + break; + + if (time_after(jiffies, wait_time)) { + KGSL_DRV_ERR(rb->device, + "Timed out waiting for freespace in RB rptr: 0x%x, wptr: 0x%x, rb id %d\n", + rptr, wptr, rb->id); + return -ETIMEDOUT; + } + } +done: + if (wptr_ahead) { + *cmds = cp_packet(adreno_dev, CP_NOP, nopcount); + kgsl_cffdump_write(rb->device, gpuaddr, *cmds); + + } + return 0; +} + +unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb, + unsigned int numcmds) +{ + unsigned int *ptr = NULL; + int ret = 0; + unsigned int rptr; + BUG_ON(numcmds >= KGSL_RB_DWORDS); + + rptr = adreno_get_rptr(rb); + /* check for available space */ + if (rb->wptr >= rptr) { + /* wptr ahead or equal to rptr */ + /* reserve dwords for nop packet */ + if ((rb->wptr + numcmds) > (KGSL_RB_DWORDS - + GSL_RB_NOP_SIZEDWORDS)) + ret = adreno_ringbuffer_waitspace(rb, numcmds, 1); + } else { + /* wptr behind rptr */ + if ((rb->wptr + numcmds) >= rptr) + ret = adreno_ringbuffer_waitspace(rb, numcmds, 0); + /* check for remaining space */ + /* reserve dwords for nop packet */ + if (!ret && (rb->wptr + numcmds) > (KGSL_RB_DWORDS - + GSL_RB_NOP_SIZEDWORDS)) + ret = adreno_ringbuffer_waitspace(rb, numcmds, 1); + } + + if (!ret) { + rb->last_wptr = rb->wptr; + + ptr = (unsigned int *)rb->buffer_desc.hostptr + rb->wptr; + rb->wptr += numcmds; + } else + ptr = ERR_PTR(ret); + + return ptr; +} + +/** + * _ringbuffer_setup_common() - Ringbuffer start + * @rb: Pointer to adreno ringbuffer + * + * Setup ringbuffer for GPU. + */ +static void _ringbuffer_setup_common(struct adreno_ringbuffer *rb) +{ + struct kgsl_device *device = rb->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_ringbuffer *rb_temp; + int i; + + FOR_EACH_RINGBUFFER(adreno_dev, rb_temp, i) { + kgsl_sharedmem_set(rb_temp->device, + &(rb_temp->buffer_desc), 0, + 0xAA, KGSL_RB_SIZE); + rb_temp->wptr = 0; + rb_temp->rptr = 0; + rb_temp->wptr_preempt_end = 0xFFFFFFFF; + rb_temp->starve_timer_state = + ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT; + adreno_iommu_set_pt_generate_rb_cmds(rb_temp, + device->mmu.defaultpagetable); + } + + /* + * The size of the ringbuffer in the hardware is the log2 + * representation of the size in quadwords (sizedwords / 2). + * Also disable the host RPTR shadow register as it might be unreliable + * in certain circumstances. + */ + + adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_CNTL, + (ilog2(KGSL_RB_DWORDS >> 1) & 0x3F) | + (1 << 27)); + + adreno_writereg64(adreno_dev, ADRENO_REG_CP_RB_BASE, + ADRENO_REG_CP_RB_BASE_HI, rb->buffer_desc.gpuaddr); + + /* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */ + if (adreno_is_a3xx(adreno_dev)) { + unsigned int val = 0x000E0602; + + if (adreno_is_a305b(adreno_dev) || + adreno_is_a310(adreno_dev) || + adreno_is_a330(adreno_dev)) + val = 0x003E2008; + kgsl_regwrite(device, A3XX_CP_QUEUE_THRESHOLDS, val); + } +} + +/** + * _ringbuffer_start_common() - Ringbuffer start + * @rb: Pointer to adreno ringbuffer + * + * Start ringbuffer for GPU. + */ +static int _ringbuffer_start_common(struct adreno_ringbuffer *rb) +{ + int status; + struct kgsl_device *device = rb->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + /* clear ME_HALT to start micro engine */ + adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0); + + /* ME init is GPU specific, so jump into the sub-function */ + status = gpudev->rb_init(adreno_dev, rb); + if (status) + return status; + + return status; +} + +/** + * adreno_ringbuffer_start() - Ringbuffer start + * @adreno_dev: Pointer to adreno device + * @start_type: Warm or cold start + */ +int adreno_ringbuffer_start(struct adreno_device *adreno_dev, + unsigned int start_type) +{ + int status; + struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev); + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + + _ringbuffer_setup_common(rb); + + status = gpudev->microcode_load(adreno_dev, start_type); + if (status) + return status; + + return _ringbuffer_start_common(rb); +} + +void adreno_ringbuffer_stop(struct adreno_device *adreno_dev) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct adreno_ringbuffer *rb; + int i; + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) + kgsl_cancel_events(device, &(rb->events)); +} + +static int _adreno_ringbuffer_init(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb, int id) +{ + int ret; + char name[64]; + + rb->device = &adreno_dev->dev; + rb->id = id; + + snprintf(name, sizeof(name), "rb_events-%d", id); + kgsl_add_event_group(&rb->events, NULL, name, + adreno_rb_readtimestamp, rb); + rb->timestamp = 0; + init_waitqueue_head(&rb->ts_expire_waitq); + + /* + * Allocate mem for storing RB pagetables and commands to + * switch pagetable + */ + ret = kgsl_allocate_global(&adreno_dev->dev, &rb->pagetable_desc, + PAGE_SIZE, 0, KGSL_MEMDESC_PRIVILEGED); + if (ret) + return ret; + + ret = kgsl_allocate_global(&adreno_dev->dev, &rb->buffer_desc, + KGSL_RB_SIZE, KGSL_MEMFLAGS_GPUREADONLY, 0); + return ret; +} + +int adreno_ringbuffer_init(struct adreno_device *adreno_dev, bool nopreempt) +{ + int status = 0; + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct adreno_ringbuffer *rb; + int i; + + if (nopreempt == false && ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION)) + adreno_dev->num_ringbuffers = gpudev->num_prio_levels; + else + adreno_dev->num_ringbuffers = 1; + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + status = _adreno_ringbuffer_init(adreno_dev, rb, i); + if (status) + break; + } + if (status) + adreno_ringbuffer_close(adreno_dev); + else + adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]); + + return status; +} + +static void _adreno_ringbuffer_close(struct adreno_ringbuffer *rb) +{ + kgsl_free_global(&rb->pagetable_desc); + kgsl_free_global(&rb->preemption_desc); + + memset(&rb->pt_update_desc, 0, sizeof(struct kgsl_memdesc)); + + kgsl_free_global(&rb->buffer_desc); + kgsl_del_event_group(&rb->events); + memset(rb, 0, sizeof(struct adreno_ringbuffer)); +} + +void adreno_ringbuffer_close(struct adreno_device *adreno_dev) +{ + struct adreno_ringbuffer *rb; + int i; + + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) + _adreno_ringbuffer_close(rb); +} + +/* + * cp_secure_mode() - Put GPU in trusted mode + * @adreno_dev: Pointer to adreno device + * @cmds: Pointer to cmds to be put in the ringbuffer + * @set: 1 - secure mode, 0 - unsecure mode + * + * Add commands to the ringbuffer to put the GPU in secure mode + * or unsecure mode based on the variable set. + */ +int cp_secure_mode(struct adreno_device *adreno_dev, uint *cmds, + int set) +{ + uint *start = cmds; + + if (adreno_is_a4xx(adreno_dev)) { + cmds += cp_wait_for_idle(adreno_dev, cmds); + /* + * The two commands will stall the PFP until the PFP-ME-AHB + * is drained and the GPU is idle. As soon as this happens, + * the PFP will start moving again. + */ + cmds += cp_wait_for_me(adreno_dev, cmds); + + /* + * Below commands are processed by ME. GPU will be + * idle when they are processed. But the PFP will continue + * to fetch instructions at the same time. + */ + *cmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1); + *cmds++ = 0; + *cmds++ = cp_packet(adreno_dev, CP_WIDE_REG_WRITE, 2); + *cmds++ = adreno_getreg(adreno_dev, + ADRENO_REG_RBBM_SECVID_TRUST_CONTROL); + *cmds++ = set; + *cmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1); + *cmds++ = 1; + + /* Stall PFP until all above commands are complete */ + cmds += cp_wait_for_me(adreno_dev, cmds); + } else { + /* + * A5xx has a separate opcode specifically to put the GPU + * in and out of secure mode. + */ + *cmds++ = cp_packet(adreno_dev, CP_SET_SECURE_MODE, 1); + *cmds++ = set; + } + + return cmds - start; +} + +static int +adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, + unsigned int flags, unsigned int *cmds, + unsigned int sizedwords, uint32_t timestamp, + struct adreno_submit_time *time) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device); + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct kgsl_device *device = rb->device; + unsigned int *ringcmds, *start; + unsigned int total_sizedwords = sizedwords; + unsigned int i; + unsigned int context_id = 0; + uint64_t gpuaddr = rb->device->memstore.gpuaddr; + bool profile_ready; + struct adreno_context *drawctxt = rb->drawctxt_active; + struct kgsl_context *context = NULL; + bool secured_ctxt = false; + uint64_t cond_addr; + + if (drawctxt != NULL && kgsl_context_detached(&drawctxt->base) && + !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) + return -ENOENT; + + rb->timestamp++; + + /* If this is a internal IB, use the global timestamp for it */ + if (!drawctxt || (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) + timestamp = rb->timestamp; + else { + context_id = drawctxt->base.id; + context = &drawctxt->base; + } + + /* + * Note that we cannot safely take drawctxt->mutex here without + * potential mutex inversion with device->mutex which is held + * here. As a result, any other code that accesses this variable + * must also use device->mutex. + */ + if (drawctxt) { + drawctxt->internal_timestamp = rb->timestamp; + if (drawctxt->base.flags & KGSL_CONTEXT_SECURE) + secured_ctxt = true; + } + + /* + * If in stream ib profiling is enabled and there are counters + * assigned, then space needs to be reserved for profiling. This + * space in the ringbuffer is always consumed (might be filled with + * NOPs in error case. profile_ready needs to be consistent through + * the _addcmds call since it is allocating additional ringbuffer + * command space. + */ + profile_ready = drawctxt && + adreno_profile_assignments_ready(&adreno_dev->profile) && + !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE); + + /* reserve space to temporarily turn off protected mode + * error checking if needed + */ + total_sizedwords += flags & KGSL_CMD_FLAGS_PMODE ? 4 : 0; + /* 2 dwords to store the start of command sequence */ + total_sizedwords += 2; + /* internal ib command identifier for the ringbuffer */ + total_sizedwords += (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE) ? 2 : 0; + + total_sizedwords += (secured_ctxt) ? 26 : 0; + + /* context rollover */ + if (adreno_is_a3xx(adreno_dev)) + total_sizedwords += 3; + + /* For HLSQ updates below */ + if (adreno_is_a4xx(adreno_dev) || adreno_is_a3xx(adreno_dev)) + total_sizedwords += 4; + + if (gpudev->preemption_pre_ibsubmit && + adreno_is_preemption_enabled(adreno_dev)) + total_sizedwords += 20; + + if (gpudev->preemption_post_ibsubmit && + adreno_is_preemption_enabled(adreno_dev)) + total_sizedwords += 13; + + /* + * a5xx uses 64 bit memory address. pm4 commands that involve read/write + * from memory take 4 bytes more than a4xx because of 64 bit addressing. + * This function is shared between gpucores, so reserve the max size + * required in ringbuffer and adjust the write pointer depending on + * gpucore at the end of this function. + */ + total_sizedwords += 4; /* sop timestamp */ + total_sizedwords += 5; /* eop timestamp */ + + if (drawctxt && !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) { + total_sizedwords += 4; /* global timestamp without cache + * flush for non-zero context */ + } + + if (flags & KGSL_CMD_FLAGS_WFI) + total_sizedwords += 2; /* WFI */ + + if (profile_ready) + total_sizedwords += 8; /* space for pre_ib and post_ib */ + + /* Add space for the power on shader fixup if we need it */ + if (flags & KGSL_CMD_FLAGS_PWRON_FIXUP) + total_sizedwords += 9; + + /* WAIT_MEM_WRITES - needed in the stall on fault case + * to prevent out of order CP operations that can result + * in a CACHE_FLUSH_TS interrupt storm */ + if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, + &adreno_dev->ft_pf_policy)) + total_sizedwords += 1; + + ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords); + if (IS_ERR(ringcmds)) + return PTR_ERR(ringcmds); + + start = ringcmds; + + *ringcmds++ = cp_packet(adreno_dev, CP_NOP, 1); + *ringcmds++ = KGSL_CMD_IDENTIFIER; + + if (adreno_is_preemption_enabled(adreno_dev) && + gpudev->preemption_pre_ibsubmit) { + cond_addr = device->memstore.gpuaddr + + KGSL_MEMSTORE_OFFSET(context_id, + preempted); + ringcmds += gpudev->preemption_pre_ibsubmit( + adreno_dev, rb, ringcmds, context, + cond_addr, NULL); + } + + if (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE) { + *ringcmds++ = cp_packet(adreno_dev, CP_NOP, 1); + *ringcmds++ = KGSL_CMD_INTERNAL_IDENTIFIER; + } + + if (flags & KGSL_CMD_FLAGS_PWRON_FIXUP) { + /* Disable protected mode for the fixup */ + *ringcmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1); + *ringcmds++ = 0; + + *ringcmds++ = cp_packet(adreno_dev, CP_NOP, 1); + *ringcmds++ = KGSL_PWRON_FIXUP_IDENTIFIER; + *ringcmds++ = cp_mem_packet(adreno_dev, + CP_INDIRECT_BUFFER_PFE, 2, 1); + ringcmds += cp_gpuaddr(adreno_dev, ringcmds, + adreno_dev->pwron_fixup.gpuaddr); + *ringcmds++ = adreno_dev->pwron_fixup_dwords; + + /* Re-enable protected mode */ + *ringcmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1); + *ringcmds++ = 1; + } + + /* Add any IB required for profiling if it is enabled */ + if (profile_ready) + adreno_profile_preib_processing(adreno_dev, drawctxt, + &flags, &ringcmds); + + /* start-of-pipeline timestamp */ + *ringcmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1); + if (drawctxt && !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) + ringcmds += cp_gpuaddr(adreno_dev, ringcmds, + gpuaddr + KGSL_MEMSTORE_OFFSET(context_id, + soptimestamp)); + else + ringcmds += cp_gpuaddr(adreno_dev, ringcmds, + gpuaddr + KGSL_MEMSTORE_RB_OFFSET(rb, soptimestamp)); + *ringcmds++ = timestamp; + + if (secured_ctxt) + ringcmds += cp_secure_mode(adreno_dev, ringcmds, 1); + + if (flags & KGSL_CMD_FLAGS_PMODE) { + /* disable protected mode error checking */ + *ringcmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1); + *ringcmds++ = 0; + } + + for (i = 0; i < sizedwords; i++) + *ringcmds++ = cmds[i]; + + if (flags & KGSL_CMD_FLAGS_PMODE) { + /* re-enable protected mode error checking */ + *ringcmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1); + *ringcmds++ = 1; + } + + /* + * Flush HLSQ lazy updates to make sure there are no + * resources pending for indirect loads after the timestamp + */ + if (adreno_is_a4xx(adreno_dev) || adreno_is_a3xx(adreno_dev)) { + *ringcmds++ = cp_packet(adreno_dev, CP_EVENT_WRITE, 1); + *ringcmds++ = 0x07; /* HLSQ_FLUSH */ + ringcmds += cp_wait_for_idle(adreno_dev, ringcmds); + } + + /* Add any postIB required for profiling if it is enabled and has + assigned counters */ + if (profile_ready) + adreno_profile_postib_processing(adreno_dev, &flags, &ringcmds); + + /* + * WAIT_MEM_WRITES - needed in the stall on fault case to prevent + * out of order CP operations that can result in a CACHE_FLUSH_TS + * interrupt storm + */ + if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, + &adreno_dev->ft_pf_policy)) + *ringcmds++ = cp_packet(adreno_dev, CP_WAIT_MEM_WRITES, 0); + + /* + * end-of-pipeline timestamp. If per context timestamps is not + * enabled, then drawctxt will be NULL or internal command flag will be + * set and hence the rb timestamp will be used in else statement below. + */ + *ringcmds++ = cp_mem_packet(adreno_dev, CP_EVENT_WRITE, 3, 1); + if (drawctxt || (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) + *ringcmds++ = CACHE_FLUSH_TS | (1 << 31); + else + *ringcmds++ = CACHE_FLUSH_TS; + + if (drawctxt && !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) { + ringcmds += cp_gpuaddr(adreno_dev, ringcmds, gpuaddr + + KGSL_MEMSTORE_OFFSET(context_id, eoptimestamp)); + *ringcmds++ = timestamp; + *ringcmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1); + ringcmds += cp_gpuaddr(adreno_dev, ringcmds, gpuaddr + + KGSL_MEMSTORE_RB_OFFSET(rb, eoptimestamp)); + *ringcmds++ = rb->timestamp; + } else { + ringcmds += cp_gpuaddr(adreno_dev, ringcmds, gpuaddr + + KGSL_MEMSTORE_RB_OFFSET(rb, eoptimestamp)); + *ringcmds++ = timestamp; + } + + if (adreno_is_a3xx(adreno_dev)) { + /* Dummy set-constant to trigger context rollover */ + *ringcmds++ = cp_packet(adreno_dev, CP_SET_CONSTANT, 2); + *ringcmds++ = + (0x4<<16) | (A3XX_HLSQ_CL_KERNEL_GROUP_X_REG - 0x2000); + *ringcmds++ = 0; + } + + if (flags & KGSL_CMD_FLAGS_WFI) { + ringcmds += cp_wait_for_idle(adreno_dev, ringcmds); + } + + if (secured_ctxt) + ringcmds += cp_secure_mode(adreno_dev, ringcmds, 0); + + if (gpudev->preemption_post_ibsubmit && + adreno_is_preemption_enabled(adreno_dev)) + ringcmds += gpudev->preemption_post_ibsubmit(adreno_dev, + rb, ringcmds, &drawctxt->base); + + /* + * If we have more ringbuffer commands than space reserved + * in ringbuffer BUG() to fix this because it will lead to + * weird errors. + */ + if ((ringcmds - start) > total_sizedwords) + BUG(); + /* + * Allocate total_sizedwords space in RB, this is the max space + * required. If we have commands less than the space reserved in RB + * adjust the wptr accordingly. + */ + rb->wptr = rb->wptr - (total_sizedwords - (ringcmds - start)); + + adreno_ringbuffer_submit(rb, time); + + return 0; +} + +int +adreno_ringbuffer_issuecmds(struct adreno_ringbuffer *rb, + unsigned int flags, + unsigned int *cmds, + int sizedwords) +{ + flags |= KGSL_CMD_FLAGS_INTERNAL_ISSUE; + + return adreno_ringbuffer_addcmds(rb, flags, cmds, + sizedwords, 0, NULL); +} + +/** + * _ringbuffer_verify_ib() - Check if an IB's size is within a permitted limit + * @device: The kgsl device pointer + * @ibdesc: Pointer to the IB descriptor + */ +static inline bool _ringbuffer_verify_ib(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, struct kgsl_memobj_node *ib) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_process_private *private = dev_priv->process_priv; + + /* The maximum allowable size for an IB in the CP is 0xFFFFF dwords */ + if (ib->size == 0 || ((ib->size >> 2) > 0xFFFFF)) { + pr_context(device, context, "ctxt %d invalid ib size %lld\n", + context->id, ib->size); + return false; + } + + /* Make sure that the address is mapped */ + if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, ib->gpuaddr)) { + pr_context(device, context, "ctxt %d invalid ib gpuaddr %llX\n", + context->id, ib->gpuaddr); + return false; + } + + return true; +} + +int +adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, + struct kgsl_cmdbatch *cmdbatch, + uint32_t *timestamp) +{ + struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_context *drawctxt = ADRENO_CONTEXT(context); + struct kgsl_memobj_node *ib; + int ret; + + if (kgsl_context_invalid(context)) + return -EDEADLK; + + /* Verify the IBs before they get queued */ + list_for_each_entry(ib, &cmdbatch->cmdlist, node) + if (_ringbuffer_verify_ib(dev_priv, context, ib) == false) + return -EINVAL; + + /* wait for the suspend gate */ + wait_for_completion(&device->cmdbatch_gate); + + /* + * Clear the wake on touch bit to indicate an IB has been + * submitted since the last time we set it. But only clear + * it when we have rendering commands. + */ + if (!(cmdbatch->flags & KGSL_CMDBATCH_MARKER) + && !(cmdbatch->flags & KGSL_CMDBATCH_SYNC)) + device->flags &= ~KGSL_FLAG_WAKE_ON_TOUCH; + + /* Queue the command in the ringbuffer */ + ret = adreno_dispatcher_queue_cmd(adreno_dev, drawctxt, cmdbatch, + timestamp); + + /* + * Return -EPROTO if the device has faulted since the last time we + * checked - userspace uses this to perform post-fault activities + */ + if (!ret && test_and_clear_bit(ADRENO_CONTEXT_FAULT, &context->priv)) + ret = -EPROTO; + + return ret; +} + +static void adreno_ringbuffer_set_constraint(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch) +{ + struct kgsl_context *context = cmdbatch->context; + /* + * Check if the context has a constraint and constraint flags are + * set. + */ + if (context->pwr_constraint.type && + ((context->flags & KGSL_CONTEXT_PWR_CONSTRAINT) || + (cmdbatch->flags & KGSL_CONTEXT_PWR_CONSTRAINT))) + kgsl_pwrctrl_set_constraint(device, &context->pwr_constraint, + context->id); +} + +static inline int _get_alwayson_counter(struct adreno_device *adreno_dev, + unsigned int *cmds, uint64_t gpuaddr) +{ + unsigned int *p = cmds; + + *p++ = cp_mem_packet(adreno_dev, CP_REG_TO_MEM, 2, 1); + + /* + * For a4x and some a5x the alwayson_hi read through CPU + * will be masked. Only do 32 bit CP reads for keeping the + * numbers consistent + */ + if (ADRENO_GPUREV(adreno_dev) >= 400 && + ADRENO_GPUREV(adreno_dev) <= ADRENO_REV_A530) + *p++ = adreno_getreg(adreno_dev, + ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO); + else + *p++ = adreno_getreg(adreno_dev, + ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO) | + (1 << 30) | (2 << 18); + p += cp_gpuaddr(adreno_dev, p, gpuaddr); + + return (unsigned int)(p - cmds); +} + +/* adreno_rindbuffer_submitcmd - submit userspace IBs to the GPU */ +int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_cmdbatch *cmdbatch, struct adreno_submit_time *time) +{ + struct kgsl_device *device = &adreno_dev->dev; + struct kgsl_memobj_node *ib; + unsigned int numibs = 0; + unsigned int *link; + unsigned int *cmds; + struct kgsl_context *context; + struct adreno_context *drawctxt; + bool use_preamble = true; + bool cmdbatch_user_profiling = false; + bool cmdbatch_kernel_profiling = false; + int flags = KGSL_CMD_FLAGS_NONE; + int ret; + struct adreno_ringbuffer *rb; + struct kgsl_cmdbatch_profiling_buffer *profile_buffer = NULL; + unsigned int dwords = 0; + struct adreno_submit_time local; + + struct kgsl_mem_entry *entry = cmdbatch->profiling_buf_entry; + if (entry) + profile_buffer = kgsl_gpuaddr_to_vaddr(&entry->memdesc, + cmdbatch->profiling_buffer_gpuaddr); + + context = cmdbatch->context; + drawctxt = ADRENO_CONTEXT(context); + + /* Get the total IBs in the list */ + list_for_each_entry(ib, &cmdbatch->cmdlist, node) + numibs++; + + rb = drawctxt->rb; + + /* process any profiling results that are available into the log_buf */ + adreno_profile_process_results(adreno_dev); + + /* + * If SKIP CMD flag is set for current context + * a) set SKIPCMD as fault_recovery for current commandbatch + * b) store context's commandbatch fault_policy in current + * commandbatch fault_policy and clear context's commandbatch + * fault_policy + * c) force preamble for commandbatch + */ + if (test_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv) && + (!test_bit(CMDBATCH_FLAG_SKIP, &cmdbatch->priv))) { + + set_bit(KGSL_FT_SKIPCMD, &cmdbatch->fault_recovery); + cmdbatch->fault_policy = drawctxt->fault_policy; + set_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &cmdbatch->priv); + + /* if context is detached print fault recovery */ + adreno_fault_skipcmd_detached(device, drawctxt, cmdbatch); + + /* clear the drawctxt flags */ + clear_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv); + drawctxt->fault_policy = 0; + } + + /*When preamble is enabled, the preamble buffer with state restoration + commands are stored in the first node of the IB chain. We can skip that + if a context switch hasn't occured */ + + if ((drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE) && + !test_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &cmdbatch->priv) && + (rb->drawctxt_active == drawctxt)) + use_preamble = false; + + /* + * In skip mode don't issue the draw IBs but keep all the other + * accoutrements of a submision (including the interrupt) to keep + * the accounting sane. Set start_index and numibs to 0 to just + * generate the start and end markers and skip everything else + */ + if (test_bit(CMDBATCH_FLAG_SKIP, &cmdbatch->priv)) { + use_preamble = false; + numibs = 0; + } + + /* + * a5xx uses 64 bit memory address. pm4 commands that involve read/write + * from memory take 4 bytes more than a4xx because of 64 bit addressing. + * This function is shared between gpucores, so reserve the max size + * required and adjust the number of commands before calling addcmds. + * Each submission needs 7 dwords max for wrappers and other red tape. + */ + dwords = 7; + + /* Each IB takes up 30 dwords in worst case */ + dwords += (numibs * 30); + + if (cmdbatch->flags & KGSL_CMDBATCH_PROFILING && + !adreno_is_a3xx(adreno_dev) && profile_buffer) { + cmdbatch_user_profiling = true; + dwords += 6; + + /* + * REG_TO_MEM packet on A5xx needs another ordinal. + * Add 2 more dwords since we do profiling before and after. + */ + if (adreno_is_a5xx(adreno_dev)) + dwords += 2; + + /* + * we want to use an adreno_submit_time struct to get the + * precise moment when the command is submitted to the + * ringbuffer. If an upstream caller already passed down a + * pointer piggyback on that otherwise use a local struct + */ + + if (time == NULL) + time = &local; + } + + if (test_bit(CMDBATCH_FLAG_PROFILE, &cmdbatch->priv)) { + cmdbatch_kernel_profiling = true; + dwords += 6; + if (adreno_is_a5xx(adreno_dev)) + dwords += 2; + } + + link = kzalloc(sizeof(unsigned int) * dwords, GFP_KERNEL); + if (!link) { + ret = -ENOMEM; + goto done; + } + + cmds = link; + + *cmds++ = cp_packet(adreno_dev, CP_NOP, 1); + *cmds++ = KGSL_START_OF_IB_IDENTIFIER; + + if (cmdbatch_kernel_profiling) { + cmds += _get_alwayson_counter(adreno_dev, cmds, + adreno_dev->cmdbatch_profile_buffer.gpuaddr + + ADRENO_CMDBATCH_PROFILE_OFFSET(cmdbatch->profile_index, + started)); + } + + /* + * Add cmds to read the GPU ticks at the start of the cmdbatch and + * write it into the appropriate cmdbatch profiling buffer offset + */ + if (cmdbatch_user_profiling) { + cmds += _get_alwayson_counter(adreno_dev, cmds, + cmdbatch->profiling_buffer_gpuaddr + + offsetof(struct kgsl_cmdbatch_profiling_buffer, + gpu_ticks_submitted)); + } + + if (numibs) { + list_for_each_entry(ib, &cmdbatch->cmdlist, node) { + /* + * Skip 0 sized IBs - these are presumed to have been + * removed from consideration by the FT policy + */ + if (ib->priv & MEMOBJ_SKIP || + (ib->priv & MEMOBJ_PREAMBLE && + use_preamble == false)) + *cmds++ = cp_mem_packet(adreno_dev, CP_NOP, + 3, 1); + + *cmds++ = cp_mem_packet(adreno_dev, + CP_INDIRECT_BUFFER_PFE, 2, 1); + cmds += cp_gpuaddr(adreno_dev, cmds, ib->gpuaddr); + *cmds++ = (unsigned int) ib->size >> 2; + /* preamble is required on only for first command */ + use_preamble = false; + } + } + + if (cmdbatch_kernel_profiling) { + cmds += _get_alwayson_counter(adreno_dev, cmds, + adreno_dev->cmdbatch_profile_buffer.gpuaddr + + ADRENO_CMDBATCH_PROFILE_OFFSET(cmdbatch->profile_index, + retired)); + } + + /* + * Add cmds to read the GPU ticks at the end of the cmdbatch and + * write it into the appropriate cmdbatch profiling buffer offset + */ + if (cmdbatch_user_profiling) { + cmds += _get_alwayson_counter(adreno_dev, cmds, + cmdbatch->profiling_buffer_gpuaddr + + offsetof(struct kgsl_cmdbatch_profiling_buffer, + gpu_ticks_retired)); + } + + *cmds++ = cp_packet(adreno_dev, CP_NOP, 1); + *cmds++ = KGSL_END_OF_IB_IDENTIFIER; + + ret = adreno_drawctxt_switch(adreno_dev, rb, drawctxt, cmdbatch->flags); + + /* + * In the unlikely event of an error in the drawctxt switch, + * treat it like a hang + */ + if (ret) + goto done; + + if (test_bit(CMDBATCH_FLAG_WFI, &cmdbatch->priv)) + flags = KGSL_CMD_FLAGS_WFI; + + /* + * For some targets, we need to execute a dummy shader operation after a + * power collapse + */ + + if (test_and_clear_bit(ADRENO_DEVICE_PWRON, &adreno_dev->priv) && + test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv)) + flags |= KGSL_CMD_FLAGS_PWRON_FIXUP; + + /* Set the constraints before adding to ringbuffer */ + adreno_ringbuffer_set_constraint(device, cmdbatch); + + /* CFF stuff executed only if CFF is enabled */ + kgsl_cffdump_capture_ib_desc(device, context, cmdbatch); + + + ret = adreno_ringbuffer_addcmds(rb, flags, + &link[0], (cmds - link), + cmdbatch->timestamp, time); + + if (!ret) { + cmdbatch->global_ts = drawctxt->internal_timestamp; + + /* Put the timevalues in the profiling buffer */ + if (cmdbatch_user_profiling) { + profile_buffer->wall_clock_s = time->utime.tv_sec; + profile_buffer->wall_clock_ns = time->utime.tv_nsec; + profile_buffer->gpu_ticks_queued = time->ticks; + } + } + + kgsl_cffdump_regpoll(device, + adreno_getreg(adreno_dev, ADRENO_REG_RBBM_STATUS) << 2, + 0x00000000, 0x80000000); +done: + /* Corresponding unmap to the memdesc map of profile_buffer */ + if (entry) + kgsl_memdesc_unmap(&entry->memdesc); + + + trace_kgsl_issueibcmds(device, context->id, cmdbatch, + numibs, cmdbatch->timestamp, + cmdbatch->flags, ret, drawctxt->type); + + kfree(link); + return ret; +} + +/** + * adreno_ringbuffer_mmu_clk_disable_event() - Callback function that + * disables the MMU clocks. + * @device: Device pointer + * @context: The ringbuffer context pointer + * @data: Pointer containing the adreno_mmu_disable_clk_param structure + * @type: The event call type (RETIRED or CANCELLED) + */ +static void adreno_ringbuffer_mmu_clk_disable_event(struct kgsl_device *device, + struct kgsl_event_group *group, void *data, int type) +{ + kgsl_mmu_disable_clk(&device->mmu); +} + +/* + * adreno_ringbuffer_mmu_disable_clk_on_ts() - Sets up event to disable MMU + * clocks + * @device - The kgsl device pointer + * @rb: The ringbuffer in whose event list the event is added + * @timestamp: The timestamp on which the event should trigger + * + * Creates an event to disable the MMU clocks on timestamp and if event + * already exists then updates the timestamp of disabling the MMU clocks + * with the passed in ts if it is greater than the current value at which + * the clocks will be disabled + * Return - void + */ +void +adreno_ringbuffer_mmu_disable_clk_on_ts(struct kgsl_device *device, + struct adreno_ringbuffer *rb, unsigned int timestamp) +{ + if (kgsl_add_event(device, &(rb->events), timestamp, + adreno_ringbuffer_mmu_clk_disable_event, NULL)) { + KGSL_DRV_ERR(device, + "Failed to add IOMMU disable clk event\n"); + } +} + +/** + * adreno_ringbuffer_wait_callback() - Callback function for event registered + * on a ringbuffer timestamp + * @device: Device for which the the callback is valid + * @context: The context of the event + * @priv: The private parameter of the event + * @result: Result of the event trigger + */ +static void adreno_ringbuffer_wait_callback(struct kgsl_device *device, + struct kgsl_event_group *group, + void *priv, int result) +{ + struct adreno_ringbuffer *rb = group->priv; + wake_up_all(&rb->ts_expire_waitq); +} + +/** + * adreno_ringbuffer_waittimestamp() - Wait for a RB timestamp + * @rb: The ringbuffer to wait on + * @timestamp: The timestamp to wait for + * @msecs: The wait timeout period + */ +int adreno_ringbuffer_waittimestamp(struct adreno_ringbuffer *rb, + unsigned int timestamp, + unsigned int msecs) +{ + struct kgsl_device *device = rb->device; + int ret; + unsigned long wait_time; + + /* force a timeout from caller for the wait */ + BUG_ON(0 == msecs); + + ret = kgsl_add_event(device, &rb->events, timestamp, + adreno_ringbuffer_wait_callback, NULL); + if (ret) + return ret; + + mutex_unlock(&device->mutex); + + wait_time = msecs_to_jiffies(msecs); + if (0 == wait_event_timeout(rb->ts_expire_waitq, + !kgsl_event_pending(device, &rb->events, timestamp, + adreno_ringbuffer_wait_callback, NULL), + wait_time)) + ret = -ETIMEDOUT; + + mutex_lock(&device->mutex); + /* + * after wake up make sure that expected timestamp has retired + * because the wakeup could have happened due to a cancel event + */ + if (!ret && !adreno_ringbuffer_check_timestamp(rb, + timestamp, KGSL_TIMESTAMP_RETIRED)) { + ret = -EAGAIN; + } + + return ret; +} + +/** + * adreno_ringbuffer_submit_preempt_token() - Submit a preempt token + * @rb: Ringbuffer in which the token is submitted + * @incoming_rb: The RB to which the GPU switches when this preemption + * token is executed. + * + * Called to make sure that an outstanding preemption request is + * granted. + */ +int adreno_ringbuffer_submit_preempt_token(struct adreno_ringbuffer *rb, + struct adreno_ringbuffer *incoming_rb) +{ + unsigned int *ringcmds, *start; + struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device); + struct kgsl_device *device = &(adreno_dev->dev); + struct kgsl_iommu *iommu = device->mmu.priv; + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + int ptname; + struct kgsl_pagetable *pt; + int pt_switch_sizedwords = 0, total_sizedwords = 20; + unsigned link[ADRENO_RB_PREEMPT_TOKEN_DWORDS]; + uint i; + uint64_t ttbr0; + + if (incoming_rb->preempted_midway) { + + if (adreno_is_a5xx(adreno_dev)) { + kgsl_sharedmem_readq(&rb->pagetable_desc, &ttbr0, + offsetof(struct adreno_ringbuffer_pagetable_info + , ttbr0)); + kgsl_sharedmem_writeq(rb->device, &iommu->smmu_info, + offsetof(struct a5xx_cp_smmu_info, ttbr0), + ttbr0); + } else { + kgsl_sharedmem_readl(&incoming_rb->pagetable_desc, + &ptname, offsetof( + struct adreno_ringbuffer_pagetable_info, + current_rb_ptname)); + pt = kgsl_mmu_get_pt_from_ptname(&(rb->device->mmu), + ptname); + /* + * always expect a valid pt, else pt refcounting is + * messed up or current pt tracking has a bug which + * could lead to eventual disaster + */ + BUG_ON(!pt); + /* set the ringbuffer for incoming RB */ + pt_switch_sizedwords = + adreno_iommu_set_pt_generate_cmds(incoming_rb, + &link[0], pt); + total_sizedwords += pt_switch_sizedwords; + + } + } + + /* + * Allocate total_sizedwords space in RB, this is the max space + * required. + */ + ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords); + + if (IS_ERR(ringcmds)) + return PTR_ERR(ringcmds); + + start = ringcmds; + + *ringcmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1); + *ringcmds++ = 0; + + if (incoming_rb->preempted_midway) { + for (i = 0; i < pt_switch_sizedwords; i++) + *ringcmds++ = link[i]; + } + + *ringcmds++ = cp_register(adreno_dev, adreno_getreg(adreno_dev, + ADRENO_REG_CP_PREEMPT_DISABLE), 1); + *ringcmds++ = 0; + + *ringcmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1); + *ringcmds++ = 1; + + ringcmds += gpudev->preemption_token(adreno_dev, rb, ringcmds, + rb->device->memstore.gpuaddr + + KGSL_MEMSTORE_RB_OFFSET(rb, preempted)); + + if ((uint)(ringcmds - start) > total_sizedwords) { + KGSL_DRV_ERR(device, "Insufficient rb size allocated\n"); + BUG(); + } + + /* + * If we have commands less than the space reserved in RB + * adjust the wptr accordingly + */ + rb->wptr = rb->wptr - (total_sizedwords - (uint)(ringcmds - start)); + + /* submit just the preempt token */ + mb(); + kgsl_pwrscale_busy(rb->device); + adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR, rb->wptr); + return 0; +} diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h new file mode 100644 index 000000000000..7fd28f52db83 --- /dev/null +++ b/drivers/gpu/msm/adreno_ringbuffer.h @@ -0,0 +1,226 @@ +/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __ADRENO_RINGBUFFER_H +#define __ADRENO_RINGBUFFER_H + +#include "kgsl_iommu.h" +#include "adreno_dispatch.h" + +/* Adreno ringbuffer size in bytes */ +#define KGSL_RB_SIZE (32 * 1024) + +/* + * A handy macro to convert the RB size to dwords since most ringbuffer + * operations happen in dword increments + */ +#define KGSL_RB_DWORDS (KGSL_RB_SIZE >> 2) + +struct kgsl_device; +struct kgsl_device_private; + +/** + * struct adreno_submit_time - utility structure to store the wall clock / GPU + * ticks at command submit time + * @ticks: GPU ticks at submit time (from the 19.2Mhz timer) + * @ktime: local clock time (in nanoseconds) + * @utime: Wall clock time + */ +struct adreno_submit_time { + uint64_t ticks; + u64 ktime; + struct timespec utime; +}; + +/** + * struct adreno_ringbuffer_pagetable_info - Contains fields used during a + * pagetable switch. + * @current_global_ptname: The current pagetable id being used by the GPU. + * Only the ringbuffers[0] current_global_ptname is used to keep track of + * the current pagetable id + * @current_rb_ptname: The current pagetable active on the given RB + * @incoming_ptname: Contains the incoming pagetable we are switching to. After + * switching of pagetable this value equals current_rb_ptname. + * @switch_pt_enable: Flag used during pagetable switch to check if pt + * switch can be skipped + * @ttbr0: value to program into TTBR0 during pagetable switch. + * @contextidr: value to program into CONTEXTIDR during pagetable switch. + */ +struct adreno_ringbuffer_pagetable_info { + int current_global_ptname; + int current_rb_ptname; + int incoming_ptname; + int switch_pt_enable; + uint64_t ttbr0; + unsigned int contextidr; +}; + +/** + * struct adreno_ringbuffer - Definition for an adreno ringbuffer object + * @device: KGSL device that owns the ringbuffer object + * @flags: Internal control flags for the ringbuffer + * @buffer_desc: Pointer to the ringbuffer memory descriptor + * @wptr: Local copy of the wptr offset + * @rptr: Read pointer offset in dwords from baseaddr + * @last_wptr: offset of the last H/W committed wptr + * @rb_ctx: The context that represents a ringbuffer + * @id: Priority level of the ringbuffer, also used as an ID + * @fault_detect_ts: The last retired global timestamp read during fault detect + * @timestamp: The RB's global timestamp + * @events: A kgsl_event_group for this context - contains the list of GPU + * events + * @drawctxt_active: The last pagetable that this ringbuffer is set to + * @preemption_desc: The memory descriptor containing + * preemption info written/read by CP + * @pagetable_desc: Memory to hold information about the pagetables being used + * and the commands to switch pagetable on the RB + * @pt_update_desc: The memory descriptor containing commands that update + * pagetable + * @dispatch_q: The dispatcher side queue for this ringbuffer + * @ts_expire_waitq: Wait queue to wait for rb timestamp to expire + * @ts_expire_waitq: Wait q to wait for rb timestamp to expire + * @wptr_preempt_end: Used during preemption to check that preemption occurred + * at the right rptr + * @gpr11: The gpr11 value of this RB + * @preempted_midway: Indicates that the RB was preempted before rptr = wptr + * @sched_timer: Timer that tracks how long RB has been waiting to be scheduled + * or how long it has been scheduled for after preempting in + * @starve_timer_state: Indicates the state of the wait. + */ +struct adreno_ringbuffer { + struct kgsl_device *device; + uint32_t flags; + struct kgsl_memdesc buffer_desc; + unsigned int sizedwords; + unsigned int wptr; + unsigned int rptr; + unsigned int last_wptr; + int id; + unsigned int fault_detect_ts; + unsigned int timestamp; + struct kgsl_event_group events; + struct adreno_context *drawctxt_active; + struct kgsl_memdesc preemption_desc; + struct kgsl_memdesc pagetable_desc; + struct kgsl_memdesc pt_update_desc; + struct adreno_dispatcher_cmdqueue dispatch_q; + wait_queue_head_t ts_expire_waitq; + unsigned int wptr_preempt_end; + unsigned int gpr11; + int preempted_midway; + unsigned long sched_timer; + enum adreno_dispatcher_starve_timer_states starve_timer_state; +}; + +/* enable timestamp (...scratch0) memory shadowing */ +#define GSL_RB_MEMPTRS_SCRATCH_MASK 0x1 + +/* + * protected mode error checking below register address 0x800 + * note: if CP_INTERRUPT packet is used then checking needs + * to change to below register address 0x7C8 + */ +#define GSL_RB_PROTECTED_MODE_CONTROL 0x200001F2 + +/* Returns the current ringbuffer */ +#define ADRENO_CURRENT_RINGBUFFER(a) ((a)->cur_rb) + +#define KGSL_MEMSTORE_RB_OFFSET(rb, field) \ + KGSL_MEMSTORE_OFFSET((rb->id + KGSL_MEMSTORE_MAX), field) + +int cp_secure_mode(struct adreno_device *adreno_dev, uint *cmds, int set); + +int adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv, + struct kgsl_context *context, + struct kgsl_cmdbatch *cmdbatch, + uint32_t *timestamp); + +int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, + struct kgsl_cmdbatch *cmdbatch, + struct adreno_submit_time *time); + +int adreno_ringbuffer_init(struct adreno_device *adreno_dev, bool nopreempt); + +int adreno_ringbuffer_start(struct adreno_device *adreno_dev, + unsigned int start_type); + +void adreno_ringbuffer_stop(struct adreno_device *adreno_dev); + +void adreno_ringbuffer_close(struct adreno_device *adreno_dev); + +int adreno_ringbuffer_issuecmds(struct adreno_ringbuffer *rb, + unsigned int flags, + unsigned int *cmdaddr, + int sizedwords); + +void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb, + struct adreno_submit_time *time); + +int adreno_ringbuffer_submit_spin(struct adreno_ringbuffer *rb, + struct adreno_submit_time *time, unsigned int timeout); + +void kgsl_cp_intrcallback(struct kgsl_device *device); + +unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb, + unsigned int numcmds); + +void adreno_ringbuffer_read_pfp_ucode(struct kgsl_device *device); + +void adreno_ringbuffer_read_pm4_ucode(struct kgsl_device *device); + +void adreno_ringbuffer_mmu_disable_clk_on_ts(struct kgsl_device *device, + struct adreno_ringbuffer *rb, unsigned int ts); + +int adreno_ringbuffer_waittimestamp(struct adreno_ringbuffer *rb, + unsigned int timestamp, + unsigned int msecs); + +int adreno_rb_readtimestamp(struct kgsl_device *device, + void *priv, enum kgsl_timestamp_type type, + unsigned int *timestamp); + +int adreno_ringbuffer_submit_preempt_token(struct adreno_ringbuffer *rb, + struct adreno_ringbuffer *incoming_rb); + +static inline int adreno_ringbuffer_count(struct adreno_ringbuffer *rb, + unsigned int rptr) +{ + if (rb->wptr >= rptr) + return rb->wptr - rptr; + return rb->wptr + KGSL_RB_DWORDS - rptr; +} + +/* Increment a value by 4 bytes with wrap-around based on size */ +static inline unsigned int adreno_ringbuffer_inc_wrapped(unsigned int val, + unsigned int size) +{ + return (val + sizeof(unsigned int)) % size; +} + +/* Decrement a value by 4 bytes with wrap-around based on size */ +static inline unsigned int adreno_ringbuffer_dec_wrapped(unsigned int val, + unsigned int size) +{ + return (val + size - sizeof(unsigned int)) % size; +} + +/* check if timestamp is greater than the current rb timestamp */ +static inline int adreno_ringbuffer_check_timestamp( + struct adreno_ringbuffer *rb, + unsigned int timestamp, int type) +{ + unsigned int ts; + adreno_rb_readtimestamp(rb->device, rb, type, &ts); + return (timestamp_cmp(ts, timestamp) >= 0); +} + +#endif /* __ADRENO_RINGBUFFER_H */ diff --git a/drivers/gpu/msm/adreno_snapshot.c b/drivers/gpu/msm/adreno_snapshot.c new file mode 100644 index 000000000000..81c6fc4ce381 --- /dev/null +++ b/drivers/gpu/msm/adreno_snapshot.c @@ -0,0 +1,1127 @@ +/* Copyright (c) 2012-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "kgsl.h" +#include "kgsl_sharedmem.h" +#include "kgsl_snapshot.h" + +#include "adreno.h" +#include "adreno_pm4types.h" +#include "a3xx_reg.h" +#include "adreno_cp_parser.h" +#include "adreno_snapshot.h" +#include "adreno_a5xx.h" + +/* Number of dwords of ringbuffer history to record */ +#define NUM_DWORDS_OF_RINGBUFFER_HISTORY 100 + +#define VPC_MEMORY_BANKS 4 + +/* Maintain a list of the objects we see during parsing */ + +#define SNAPSHOT_OBJ_BUFSIZE 64 + +#define SNAPSHOT_OBJ_TYPE_IB 0 + +/* Used to print error message if an IB has too many objects in it */ +static int ib_max_objs; + +struct snapshot_rb_params { + struct kgsl_snapshot *snapshot; + struct adreno_ringbuffer *rb; +}; + +/* Keep track of how many bytes are frozen after a snapshot and tell the user */ +static size_t snapshot_frozen_objsize; + +static struct kgsl_snapshot_object objbuf[SNAPSHOT_OBJ_BUFSIZE]; + +/* Pointer to the next open entry in the object list */ +static unsigned int objbufptr; + +static inline int adreno_rb_ctxtswitch(struct adreno_device *adreno_dev, + unsigned int *cmd) +{ + return cmd[0] == cp_packet(adreno_dev, CP_NOP, 1) && + cmd[1] == KGSL_CONTEXT_TO_MEM_IDENTIFIER; +} + +/* Push a new buffer object onto the list */ +static void push_object(int type, + struct kgsl_process_private *process, + uint64_t gpuaddr, uint64_t dwords) +{ + int index; + struct kgsl_mem_entry *entry; + + if (process == NULL) + return; + + /* + * Sometimes IBs can be reused in the same dump. Because we parse from + * oldest to newest, if we come across an IB that has already been used, + * assume that it has been reused and update the list with the newest + * size. + */ + + for (index = 0; index < objbufptr; index++) { + if (objbuf[index].gpuaddr == gpuaddr && + objbuf[index].entry->priv == process) { + + objbuf[index].size = max_t(uint64_t, + objbuf[index].size, + dwords << 2); + return; + } + } + + if (objbufptr == SNAPSHOT_OBJ_BUFSIZE) { + KGSL_CORE_ERR("snapshot: too many snapshot objects\n"); + return; + } + + entry = kgsl_sharedmem_find(process, gpuaddr); + if (entry == NULL) { + KGSL_CORE_ERR("snapshot: Can't find entry for 0x%016llX\n", + gpuaddr); + return; + } + + if (!kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, dwords << 2)) { + KGSL_CORE_ERR("snapshot: Mem entry 0x%016llX is too small\n", + gpuaddr); + kgsl_mem_entry_put(entry); + return; + } + + /* Put it on the list of things to parse */ + objbuf[objbufptr].type = type; + objbuf[objbufptr].gpuaddr = gpuaddr; + objbuf[objbufptr].size = dwords << 2; + objbuf[objbufptr++].entry = entry; +} + +/* + * Return a 1 if the specified object is already on the list of buffers + * to be dumped + */ + +static int find_object(int type, uint64_t gpuaddr, + struct kgsl_process_private *process) +{ + int index; + + for (index = 0; index < objbufptr; index++) { + if (objbuf[index].gpuaddr == gpuaddr && + objbuf[index].entry->priv == process) + return 1; + } + + return 0; +} + +/* + * snapshot_freeze_obj_list() - Take a list of ib objects and freeze their + * memory for snapshot + * @snapshot: The snapshot data. + * @process: The process to which the IB belongs + * @ib_obj_list: List of the IB objects + * @ib2base: IB2 base address at time of the fault + * + * Returns 0 on success else error code + */ +static int snapshot_freeze_obj_list(struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process, + struct adreno_ib_object_list *ib_obj_list, + uint64_t ib2base) +{ + int ret = 0; + struct adreno_ib_object *ib_objs; + int i; + + for (i = 0; i < ib_obj_list->num_objs; i++) { + int temp_ret; + int index; + int freeze = 1; + + ib_objs = &(ib_obj_list->obj_list[i]); + /* Make sure this object is not going to be saved statically */ + for (index = 0; index < objbufptr; index++) { + if ((objbuf[index].gpuaddr <= ib_objs->gpuaddr) && + ((objbuf[index].gpuaddr + + (objbuf[index].size)) >= + (ib_objs->gpuaddr + ib_objs->size)) && + (objbuf[index].entry->priv == process)) { + freeze = 0; + break; + } + } + + if (freeze) { + /* Save current IB2 statically */ + if (ib2base == ib_objs->gpuaddr) { + push_object(SNAPSHOT_OBJ_TYPE_IB, + process, ib_objs->gpuaddr, ib_objs->size >> 2); + } else { + temp_ret = kgsl_snapshot_get_object(snapshot, + process, ib_objs->gpuaddr, + ib_objs->size, + ib_objs->snapshot_obj_type); + if (temp_ret < 0) { + if (ret >= 0) + ret = temp_ret; + } else { + snapshot_frozen_objsize += temp_ret; + } + } + } + } + return ret; +} + +/* + * We want to store the last executed IB1 and IB2 in the static region to ensure + * that we get at least some information out of the snapshot even if we can't + * access the dynamic data from the sysfs file. Push all other IBs on the + * dynamic list + */ +static inline void parse_ib(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process, + uint64_t gpuaddr, uint64_t dwords) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + uint64_t ib1base; + struct adreno_ib_object_list *ib_obj_list; + + /* + * Check the IB address - if it is either the last executed IB1 + * then push it into the static blob otherwise put it in the dynamic + * list + */ + + adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE, + ADRENO_REG_CP_IB1_BASE_HI, &ib1base); + + if (gpuaddr == ib1base) { + push_object(SNAPSHOT_OBJ_TYPE_IB, process, + gpuaddr, dwords); + return; + } + + if (kgsl_snapshot_have_object(snapshot, process, + gpuaddr, dwords << 2)) + return; + + if (-E2BIG == adreno_ib_create_object_list(device, process, + gpuaddr, dwords, &ib_obj_list)) + ib_max_objs = 1; + + if (ib_obj_list) + kgsl_snapshot_add_ib_obj_list(snapshot, ib_obj_list); + +} + +/** + * snapshot_rb_ibs() - Dump rb data and capture the IB's in the RB as well + * @rb: The RB to dump + * @data: Pointer to memory where the RB data is to be dumped + * @snapshot: Pointer to information about the current snapshot being taken + */ +static void snapshot_rb_ibs(struct adreno_ringbuffer *rb, + unsigned int *data, + struct kgsl_snapshot *snapshot) +{ + struct kgsl_device *device = rb->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + unsigned int rptr, *rbptr; + uint64_t ibbase; + int index, i; + int parse_ibs = 0, ib_parse_start; + + /* Get the current read pointers for the RB */ + adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &rptr); + + /* Address of the last processed IB */ + adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE, + ADRENO_REG_CP_IB1_BASE_HI, &ibbase); + + /* + * Figure out the window of ringbuffer data to dump. First we need to + * find where the last processed IB ws submitted. Start walking back + * from the rptr + */ + + index = rptr; + rbptr = rb->buffer_desc.hostptr; + + do { + index--; + + if (index < 0) { + index = KGSL_RB_DWORDS - 3; + + /* We wrapped without finding what we wanted */ + if (index < rb->wptr) { + index = rb->wptr; + break; + } + } + + if (adreno_cmd_is_ib(adreno_dev, rbptr[index]) && + rbptr[index + 1] == ibbase) + break; + } while (index != rb->wptr); + + /* + * index points at the last submitted IB. We can only trust that the + * memory between the context switch and the hanging IB is valid, so + * the next step is to find the context switch before the submission + */ + + while (index != rb->wptr) { + index--; + + if (index < 0) { + index = KGSL_RB_DWORDS - 2; + + /* + * Wrapped without finding the context switch. This is + * harmless - we should still have enough data to dump a + * valid state + */ + + if (index < rb->wptr) { + index = rb->wptr; + break; + } + } + + /* Break if the current packet is a context switch identifier */ + if ((rbptr[index] == cp_packet(adreno_dev, CP_NOP, 1)) && + (rbptr[index + 1] == KGSL_CONTEXT_TO_MEM_IDENTIFIER)) + break; + } + + /* + * Index represents the start of the window of interest. We will try + * to dump all buffers between here and the rptr + */ + + ib_parse_start = index; + + /* + * Loop through the RB, copying the data and looking for indirect + * buffers and MMU pagetable changes + */ + + index = rb->wptr; + for (i = 0; i < KGSL_RB_DWORDS; i++) { + *data = rbptr[index]; + + /* + * Only parse IBs between the start and the rptr or the next + * context switch, whichever comes first + */ + + if (parse_ibs == 0 && index == ib_parse_start) + parse_ibs = 1; + else if (index == rptr || adreno_rb_ctxtswitch(adreno_dev, + &rbptr[index])) + parse_ibs = 0; + + if (parse_ibs && adreno_cmd_is_ib(adreno_dev, rbptr[index])) { + uint64_t ibaddr; + uint64_t ibsize; + + if (ADRENO_LEGACY_PM4(adreno_dev)) { + ibaddr = rbptr[index + 1]; + ibsize = rbptr[index + 2]; + } else { + ibaddr = rbptr[index + 2]; + ibaddr = ibaddr << 32 | rbptr[index + 1]; + ibsize = rbptr[index + 3]; + } + + /* + * Sometimes the kernel generates IBs in global + * memory. We dump the interesting global buffers, + * so there's no need to parse these IBs. + */ + if (!kgsl_search_global_pt_entries(ibaddr, ibsize)) + parse_ib(device, snapshot, snapshot->process, + ibaddr, ibsize); + } + + index = index + 1; + + if (index == KGSL_RB_DWORDS) + index = 0; + + data++; + } + +} + +/* Snapshot the ringbuffer memory */ +static size_t snapshot_rb(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct snapshot_rb_params *snap_rb_params = priv; + struct kgsl_snapshot *snapshot = snap_rb_params->snapshot; + struct adreno_ringbuffer *rb = snap_rb_params->rb; + + /* + * Dump the entire ringbuffer - the parser can choose how much of it to + * process + */ + + if (remain < KGSL_RB_SIZE + sizeof(*header)) { + KGSL_CORE_ERR("snapshot: Not enough memory for the rb section"); + return 0; + } + + /* Write the sub-header for the section */ + header->start = rb->wptr; + header->end = rb->wptr; + header->wptr = rb->wptr; + header->rptr = rb->rptr; + header->rbsize = KGSL_RB_DWORDS; + header->count = KGSL_RB_DWORDS; + adreno_rb_readtimestamp(device, rb, KGSL_TIMESTAMP_QUEUED, + &header->timestamp_queued); + adreno_rb_readtimestamp(device, rb, KGSL_TIMESTAMP_RETIRED, + &header->timestamp_retired); + header->gpuaddr = rb->buffer_desc.gpuaddr; + header->id = rb->id; + + if (rb == adreno_dev->cur_rb) { + snapshot_rb_ibs(rb, data, snapshot); + } else { + /* Just copy the ringbuffer, there are no active IBs */ + memcpy(data, rb->buffer_desc.hostptr, KGSL_RB_SIZE); + } + /* Return the size of the section */ + return KGSL_RB_SIZE + sizeof(*header); +} + +static int _count_mem_entries(int id, void *ptr, void *data) +{ + int *count = data; + *count = *count + 1; + return 0; +} + +struct mem_entry { + uint64_t gpuaddr; + uint64_t size; + unsigned int type; +} __packed; + +static int _save_mem_entries(int id, void *ptr, void *data) +{ + struct kgsl_mem_entry *entry = ptr; + struct mem_entry *m = (struct mem_entry *) data; + + m->gpuaddr = entry->memdesc.gpuaddr; + m->size = entry->memdesc.size; + m->type = kgsl_memdesc_get_memtype(&entry->memdesc); + + return 0; +} + +static size_t snapshot_capture_mem_list(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_mem_list_v2 *header = + (struct kgsl_snapshot_mem_list_v2 *)buf; + int num_mem = 0; + int ret = 0; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + struct kgsl_process_private *process = priv; + + /* we need a process to search! */ + if (process == NULL) + return 0; + + spin_lock(&process->mem_lock); + + /* We need to know the number of memory objects that the process has */ + idr_for_each(&process->mem_idr, _count_mem_entries, &num_mem); + + if (num_mem == 0) + goto out; + + if (remain < ((num_mem * sizeof(struct mem_entry)) + sizeof(*header))) { + KGSL_CORE_ERR("snapshot: Not enough memory for the mem list"); + goto out; + } + + header->num_entries = num_mem; + header->ptbase = kgsl_mmu_pagetable_get_ttbr0(process->pagetable); + /* + * Walk throught the memory list and store the + * tuples(gpuaddr, size, memtype) in snapshot + */ + + idr_for_each(&process->mem_idr, _save_mem_entries, data); + + ret = sizeof(*header) + (num_mem * sizeof(struct mem_entry)); +out: + spin_unlock(&process->mem_lock); + return ret; +} + +struct snapshot_ib_meta { + struct kgsl_snapshot *snapshot; + struct kgsl_snapshot_object *obj; + uint64_t ib1base; + uint64_t ib1size; + uint64_t ib2base; + uint64_t ib2size; +}; + +/* Snapshot the memory for an indirect buffer */ +static size_t snapshot_ib(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_ib_v2 *header = (struct kgsl_snapshot_ib_v2 *)buf; + struct snapshot_ib_meta *meta = priv; + unsigned int *src; + unsigned int *dst = (unsigned int *)(buf + sizeof(*header)); + struct adreno_ib_object_list *ib_obj_list; + struct kgsl_snapshot *snapshot; + struct kgsl_snapshot_object *obj; + + if (meta == NULL || meta->snapshot == NULL || meta->obj == NULL) { + KGSL_CORE_ERR("snapshot: bad metadata"); + return 0; + } + snapshot = meta->snapshot; + obj = meta->obj; + + if (remain < (obj->size + sizeof(*header))) { + KGSL_CORE_ERR("snapshot: Not enough memory for the ib\n"); + return 0; + } + + src = kgsl_gpuaddr_to_vaddr(&obj->entry->memdesc, obj->gpuaddr); + if (src == NULL) { + KGSL_DRV_ERR(device, + "snapshot: Unable to map GPU memory object 0x%016llX into the kernel\n", + obj->gpuaddr); + return 0; + } + + if (remain < (obj->size + sizeof(*header))) { + KGSL_CORE_ERR("snapshot: Not enough memory for the ib\n"); + return 0; + } + + /* only do this for IB1 because the IB2's are part of IB1 objects */ + if (meta->ib1base == obj->gpuaddr) { + if (-E2BIG == adreno_ib_create_object_list(device, + obj->entry->priv, + obj->gpuaddr, obj->size >> 2, + &ib_obj_list)) + ib_max_objs = 1; + if (ib_obj_list) { + /* freeze the IB objects in the IB */ + snapshot_freeze_obj_list(snapshot, + obj->entry->priv, + ib_obj_list, meta->ib2base); + adreno_ib_destroy_obj_list(ib_obj_list); + } + } + + /* Write the sub-header for the section */ + header->gpuaddr = obj->gpuaddr; + header->ptbase = + kgsl_mmu_pagetable_get_ttbr0(obj->entry->priv->pagetable); + header->size = obj->size >> 2; + + /* Write the contents of the ib */ + memcpy((void *)dst, (void *)src, (size_t) obj->size); + /* Write the contents of the ib */ + + return obj->size + sizeof(*header); +} + +/* Dump another item on the current pending list */ +static void dump_object(struct kgsl_device *device, int obj, + struct kgsl_snapshot *snapshot, + uint64_t ib1base, uint64_t ib1size, + uint64_t ib2base, uint64_t ib2size) +{ + struct snapshot_ib_meta meta; + + switch (objbuf[obj].type) { + case SNAPSHOT_OBJ_TYPE_IB: + meta.snapshot = snapshot; + meta.obj = &objbuf[obj]; + meta.ib1base = ib1base; + meta.ib1size = ib1size; + meta.ib2base = ib2base; + meta.ib2size = ib2size; + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_IB_V2, + snapshot, snapshot_ib, &meta); + if (objbuf[obj].entry) { + kgsl_memdesc_unmap(&(objbuf[obj].entry->memdesc)); + kgsl_mem_entry_put(objbuf[obj].entry); + } + break; + default: + KGSL_CORE_ERR("snapshot: Invalid snapshot object type: %d\n", + objbuf[obj].type); + break; + } +} + +/* setup_fault process - Find kgsl_process_private struct that caused the fault + * + * Find the faulting process based what the dispatcher thinks happened and + * what the hardware is using for the current pagetable. The process struct + * will be used to look up GPU addresses that are encountered while parsing + * the GPU state. + */ +static void setup_fault_process(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process) +{ + u64 hw_ptbase, proc_ptbase; + + if (process != NULL && !kgsl_process_private_get(process)) + process = NULL; + + /* Get the physical address of the MMU pagetable */ + hw_ptbase = kgsl_mmu_get_current_ttbr0(&device->mmu); + + /* if we have an input process, make sure the ptbases match */ + if (process) { + proc_ptbase = kgsl_mmu_pagetable_get_ttbr0(process->pagetable); + /* agreement! No need to check further */ + if (hw_ptbase == proc_ptbase) + goto done; + + kgsl_process_private_put(process); + process = NULL; + KGSL_CORE_ERR("snapshot: ptbase mismatch hw %llx sw %llx\n", + hw_ptbase, proc_ptbase); + } + + /* try to find the right pagetable by walking the process list */ + if (kgsl_mmu_is_perprocess(&device->mmu)) { + struct kgsl_process_private *tmp; + + mutex_lock(&kgsl_driver.process_mutex); + list_for_each_entry(tmp, &kgsl_driver.process_list, list) { + u64 pt_ttbr0; + + pt_ttbr0 = kgsl_mmu_pagetable_get_ttbr0(tmp->pagetable); + if ((pt_ttbr0 == hw_ptbase) + && kgsl_process_private_get(tmp)) { + process = tmp; + break; + } + } + mutex_unlock(&kgsl_driver.process_mutex); + } +done: + snapshot->process = process; +} + +/* Snapshot a global memory buffer */ +static size_t snapshot_global(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_memdesc *memdesc = priv; + + struct kgsl_snapshot_gpu_object_v2 *header = + (struct kgsl_snapshot_gpu_object_v2 *)buf; + + u8 *ptr = buf + sizeof(*header); + + if (memdesc->size == 0) + return 0; + + if (remain < (memdesc->size + sizeof(*header))) { + KGSL_CORE_ERR("snapshot: Not enough memory for the memdesc\n"); + return 0; + } + + if (memdesc->hostptr == NULL) { + KGSL_CORE_ERR("snapshot: no kernel mapping for global object 0x%016llX\n", + memdesc->gpuaddr); + return 0; + } + + header->size = memdesc->size >> 2; + header->gpuaddr = memdesc->gpuaddr; + header->ptbase = + kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); + header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; + + memcpy(ptr, memdesc->hostptr, memdesc->size); + + return memdesc->size + sizeof(*header); +} + +/* Snapshot a preemption record buffer */ +static size_t snapshot_preemption_record(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_memdesc *memdesc = priv; + struct a5xx_cp_preemption_record record; + int size = sizeof(record); + + struct kgsl_snapshot_gpu_object_v2 *header = + (struct kgsl_snapshot_gpu_object_v2 *)buf; + + u8 *ptr = buf + sizeof(*header); + + if (size == 0) + return 0; + + if (remain < (size + sizeof(*header))) { + KGSL_CORE_ERR( + "snapshot: Not enough memory for preemption record\n"); + return 0; + } + + if (memdesc->hostptr == NULL) { + KGSL_CORE_ERR( + "snapshot: no kernel mapping for preemption record 0x%016llX\n", + memdesc->gpuaddr); + return 0; + } + + header->size = size >> 2; + header->gpuaddr = memdesc->gpuaddr; + header->ptbase = + kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable); + header->type = SNAPSHOT_GPU_OBJECT_GLOBAL; + + memcpy(ptr, memdesc->hostptr, size); + + return size + sizeof(*header); +} + +/* adreno_snapshot - Snapshot the Adreno GPU state + * @device - KGSL device to snapshot + * @snapshot - Pointer to the snapshot instance + * @context - context that caused the fault, if known by the driver + * This is a hook function called by kgsl_snapshot to snapshot the + * Adreno specific information for the GPU snapshot. In turn, this function + * calls the GPU specific snapshot function to get core specific information. + */ +void adreno_snapshot(struct kgsl_device *device, struct kgsl_snapshot *snapshot, + struct kgsl_context *context) +{ + unsigned int i; + uint64_t ib1base, ib2base; + unsigned int ib1size, ib2size; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); + struct adreno_ringbuffer *rb; + struct snapshot_rb_params snap_rb_params; + struct kgsl_iommu *iommu = device->mmu.priv; + + ib_max_objs = 0; + /* Reset the list of objects */ + objbufptr = 0; + + snapshot_frozen_objsize = 0; + + setup_fault_process(device, snapshot, + context ? context->proc_priv : NULL); + + /* Dump the current ringbuffer */ + snap_rb_params.snapshot = snapshot; + snap_rb_params.rb = adreno_dev->cur_rb; + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_RB_V2, snapshot, + snapshot_rb, &snap_rb_params); + + /* Dump the prev ringbuffer */ + if (adreno_dev->prev_rb) { + snap_rb_params.rb = adreno_dev->prev_rb; + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_RB_V2, + snapshot, snapshot_rb, &snap_rb_params); + } + + /* Dump next ringbuffer */ + if (adreno_dev->next_rb) { + snap_rb_params.rb = adreno_dev->next_rb; + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_RB_V2, + snapshot, snapshot_rb, &snap_rb_params); + } + + adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE, + ADRENO_REG_CP_IB1_BASE_HI, &ib1base); + adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BUFSZ, &ib1size); + adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB2_BASE, + ADRENO_REG_CP_IB2_BASE_HI, &ib2base); + adreno_readreg(adreno_dev, ADRENO_REG_CP_IB2_BUFSZ, &ib2size); + + /* Add GPU specific sections - registers mainly, but other stuff too */ + if (gpudev->snapshot) + gpudev->snapshot(adreno_dev, snapshot); + + /* Dump selected global buffers */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, snapshot_global, &adreno_dev->dev.memstore); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, snapshot_global, + &adreno_dev->dev.mmu.setstate_memory); + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, snapshot_global, + &adreno_dev->pwron_fixup); + + if (test_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv)) { + FOR_EACH_RINGBUFFER(adreno_dev, rb, i) { + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, snapshot_preemption_record, + &rb->preemption_desc); + } + + kgsl_snapshot_add_section(device, + KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2, + snapshot, snapshot_global, &iommu->smmu_info); + } + + /* + * Add a section that lists (gpuaddr, size, memtype) tuples of the + * hanging process + */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MEMLIST_V2, + snapshot, snapshot_capture_mem_list, snapshot->process); + /* + * Make sure that the last IB1 that was being executed is dumped. + * Since this was the last IB1 that was processed, we should have + * already added it to the list during the ringbuffer parse but we + * want to be double plus sure. + * The problem is that IB size from the register is the unprocessed size + * of the buffer not the original size, so if we didn't catch this + * buffer being directly used in the RB, then we might not be able to + * dump the whole thing. Print a warning message so we can try to + * figure how often this really happens. + */ + + if (!find_object(SNAPSHOT_OBJ_TYPE_IB, ib1base, + snapshot->process) && ib1size) { + push_object(SNAPSHOT_OBJ_TYPE_IB, snapshot->process, + ib1base, ib1size); + KGSL_CORE_ERR( + "CP_IB1_BASE not found in the ringbuffer.Dumping %x dwords of the buffer.\n", + ib1size); + } + + /* + * Add the last parsed IB2 to the list. The IB2 should be found as we + * parse the objects below, but we try to add it to the list first, so + * it too can be parsed. Don't print an error message in this case - if + * the IB2 is found during parsing, the list will be updated with the + * correct size. + */ + + if (!find_object(SNAPSHOT_OBJ_TYPE_IB, ib2base, + snapshot->process) && ib2size) { + push_object(SNAPSHOT_OBJ_TYPE_IB, snapshot->process, + ib2base, ib2size); + } + + /* + * Go through the list of found objects and dump each one. As the IBs + * are parsed, more objects might be found, and objbufptr will increase + */ + for (i = 0; i < objbufptr; i++) + dump_object(device, i, snapshot, ib1base, ib1size, + ib2base, ib2size); + + if (ib_max_objs) + KGSL_CORE_ERR("Max objects found in IB\n"); + if (snapshot_frozen_objsize) + KGSL_CORE_ERR("GPU snapshot froze %zdKb of GPU buffers\n", + snapshot_frozen_objsize / 1024); + +} + +/* + * adreno_snapshot_cp_roq - Dump CP merciu data in snapshot + * @device: Device being snapshotted + * @remain: Bytes remaining in snapshot memory + * @priv: Size of merciu data in Dwords + */ +size_t adreno_snapshot_cp_merciu(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + int i, size = *((int *)priv); + + /* The MERCIU data is two dwords per entry */ + size = size << 1; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "CP MERCIU DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_MERCIU; + header->size = size; + + adreno_writereg(adreno_dev, ADRENO_REG_CP_MERCIU_ADDR, 0x0); + + for (i = 0; i < size; i++) { + adreno_readreg(adreno_dev, ADRENO_REG_CP_MERCIU_DATA, + &data[(i * 2)]); + adreno_readreg(adreno_dev, ADRENO_REG_CP_MERCIU_DATA2, + &data[(i * 2) + 1]); + } + + return DEBUG_SECTION_SZ(size); +} + +/* + * adreno_snapshot_cp_roq - Dump ROQ data in snapshot + * @device: Device being snapshotted + * @remain: Bytes remaining in snapshot memory + * @priv: Size of ROQ data in Dwords + */ +size_t adreno_snapshot_cp_roq(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + int i, size = *((int *)priv); + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "CP ROQ DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_ROQ; + header->size = size; + + adreno_writereg(adreno_dev, ADRENO_REG_CP_ROQ_ADDR, 0x0); + for (i = 0; i < size; i++) + adreno_readreg(adreno_dev, ADRENO_REG_CP_ROQ_DATA, &data[i]); + + return DEBUG_SECTION_SZ(size); +} + +/* + * adreno_snapshot_cp_pm4_ram() - Dump PM4 data in snapshot + * @device: Device being snapshotted + * @buf: Snapshot memory + * @remain: Number of bytes left in snapshot memory + * @priv: Unused + */ +size_t adreno_snapshot_cp_pm4_ram(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + int i; + size_t size = adreno_dev->pm4_fw_size - 1; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "CP PM4 RAM DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_PM4_RAM; + header->size = size; + + /* + * Read the firmware from the GPU rather than use our cache in order to + * try to catch mis-programming or corruption in the hardware. We do + * use the cached version of the size, however, instead of trying to + * maintain always changing hardcoded constants + */ + + adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_RAM_RADDR, 0x0); + for (i = 0; i < size; i++) + adreno_readreg(adreno_dev, ADRENO_REG_CP_ME_RAM_DATA, &data[i]); + + return DEBUG_SECTION_SZ(size); +} + +/* + * adreno_snapshot_cp_pfp_ram() - Dump the PFP data on snapshot + * @device: Device being snapshotted + * @buf: Snapshot memory + * @remain: Amount of butes left in snapshot memory + * @priv: Unused + */ +size_t adreno_snapshot_cp_pfp_ram(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + int i, size = adreno_dev->pfp_fw_size - 1; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "CP PFP RAM DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_PFP_RAM; + header->size = size; + + /* + * Read the firmware from the GPU rather than use our cache in order to + * try to catch mis-programming or corruption in the hardware. We do + * use the cached version of the size, however, instead of trying to + * maintain always changing hardcoded constants + */ + adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x0); + for (i = 0; i < size; i++) + adreno_readreg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, + &data[i]); + + return DEBUG_SECTION_SZ(size); +} + +/* + * adreno_snapshot_vpc_memory() - Save VPC data in snapshot + * @device: Device being snapshotted + * @buf: Snapshot memory + * @remain: Number of bytes left in snapshot memory + * @priv: Private data for VPC if any + */ +size_t adreno_snapshot_vpc_memory(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + int vpc_mem_size = *((int *)priv); + size_t size = VPC_MEMORY_BANKS * vpc_mem_size; + int bank, addr, i = 0; + + if (remain < DEBUG_SECTION_SZ(size)) { + SNAPSHOT_ERR_NOMEM(device, "VPC MEMORY"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_VPC_MEMORY; + header->size = size; + + for (bank = 0; bank < VPC_MEMORY_BANKS; bank++) { + for (addr = 0; addr < vpc_mem_size; addr++) { + unsigned int val = bank | (addr << 4); + adreno_writereg(adreno_dev, + ADRENO_REG_VPC_DEBUG_RAM_SEL, val); + adreno_readreg(adreno_dev, + ADRENO_REG_VPC_DEBUG_RAM_READ, &data[i++]); + } + } + + return DEBUG_SECTION_SZ(size); +} + +/* + * adreno_snapshot_cp_meq() - Save CP MEQ data in snapshot + * @device: Device being snapshotted + * @buf: Snapshot memory + * @remain: Number of bytes left in snapshot memory + * @priv: Contains the size of MEQ data + */ +size_t adreno_snapshot_cp_meq(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + int i; + int cp_meq_sz = *((int *)priv); + + if (remain < DEBUG_SECTION_SZ(cp_meq_sz)) { + SNAPSHOT_ERR_NOMEM(device, "CP MEQ DEBUG"); + return 0; + } + + header->type = SNAPSHOT_DEBUG_CP_MEQ; + header->size = cp_meq_sz; + + adreno_writereg(adreno_dev, ADRENO_REG_CP_MEQ_ADDR, 0x0); + for (i = 0; i < cp_meq_sz; i++) + adreno_readreg(adreno_dev, ADRENO_REG_CP_MEQ_DATA, &data[i]); + + return DEBUG_SECTION_SZ(cp_meq_sz); +} + +static const struct adreno_vbif_snapshot_registers *vbif_registers( + struct adreno_device *adreno_dev, + const struct adreno_vbif_snapshot_registers *list, + unsigned int count) +{ + unsigned int version; + unsigned int i; + + adreno_readreg(adreno_dev, ADRENO_REG_VBIF_VERSION, &version); + + for (i = 0; i < count; i++) { + if (list[i].version == version) + return &list[i]; + } + + KGSL_CORE_ERR( + "snapshot: Registers for VBIF version %X register were not dumped\n", + version); + + return NULL; +} + +void adreno_snapshot_registers(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + const unsigned int *regs, unsigned int count) +{ + struct kgsl_snapshot_registers r; + + r.regs = regs; + r.count = count; + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, snapshot, + kgsl_snapshot_dump_registers, &r); +} + +void adreno_snapshot_vbif_registers(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + const struct adreno_vbif_snapshot_registers *list, + unsigned int count) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + struct kgsl_snapshot_registers regs; + const struct adreno_vbif_snapshot_registers *vbif; + + vbif = vbif_registers(adreno_dev, list, count); + + if (vbif != NULL) { + regs.regs = vbif->registers; + regs.count = vbif->count; + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, + snapshot, kgsl_snapshot_dump_registers, ®s); + } +} diff --git a/drivers/gpu/msm/adreno_snapshot.h b/drivers/gpu/msm/adreno_snapshot.h new file mode 100644 index 000000000000..6af050762402 --- /dev/null +++ b/drivers/gpu/msm/adreno_snapshot.h @@ -0,0 +1,58 @@ +/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __ADRENO_SNAPSHOT_H +#define __ADRENO_SNAPSHOT_H + +#include "kgsl_snapshot.h" + +#define CP_CRASH_DUMPER_TIMEOUT 1000 + +#define DEBUG_SECTION_SZ(_dwords) (((_dwords) * sizeof(unsigned int)) \ + + sizeof(struct kgsl_snapshot_debug)) + +#define SHADER_SECTION_SZ(_dwords) (((_dwords) * sizeof(unsigned int)) \ + + sizeof(struct kgsl_snapshot_shader)) + +/* Section sizes for A320 */ +#define A320_SNAPSHOT_CP_STATE_SECTION_SIZE 0x2e +#define A320_SNAPSHOT_ROQ_SECTION_SIZE 512 +#define A320_SNAPSHOT_CP_MERCIU_SECTION_SIZE 32 + +/* Macro to make it super easy to dump registers */ +#define SNAPSHOT_REGISTERS(_d, _s, _r) \ + adreno_snapshot_registers((_d), (_s), \ + (unsigned int *) _r, ARRAY_SIZE(_r) / 2) + +size_t adreno_snapshot_cp_merciu(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv); +size_t adreno_snapshot_cp_roq(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv); +size_t adreno_snapshot_cp_pm4_ram(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv); +size_t adreno_snapshot_cp_pfp_ram(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv); +size_t adreno_snapshot_cp_meq(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv); +size_t adreno_snapshot_vpc_memory(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv); + +void adreno_snapshot_registers(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + const unsigned int *regs, unsigned int count); + +void adreno_snapshot_vbif_registers(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + const struct adreno_vbif_snapshot_registers *list, + unsigned int count); + +#endif /*__ADRENO_SNAPSHOT_H */ diff --git a/drivers/gpu/msm/adreno_sysfs.c b/drivers/gpu/msm/adreno_sysfs.c new file mode 100644 index 000000000000..4c082fc2850e --- /dev/null +++ b/drivers/gpu/msm/adreno_sysfs.c @@ -0,0 +1,475 @@ +/* Copyright (c) 2014-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/sysfs.h> +#include <linux/device.h> + +#include "kgsl_device.h" +#include "adreno.h" + +struct adreno_sysfs_attribute { + struct device_attribute attr; + unsigned int (*show)(struct adreno_device *adreno_dev); + int (*store)(struct adreno_device *adreno_dev, unsigned int val); +}; + +#define _ADRENO_SYSFS_ATTR(_name, __show, __store) \ +struct adreno_sysfs_attribute adreno_attr_##_name = { \ + .attr = __ATTR(_name, 0644, __show, __store), \ + .show = _ ## _name ## _show, \ + .store = _ ## _name ## _store, \ +} + +#define ADRENO_SYSFS_ATTR(_a) \ + container_of((_a), struct adreno_sysfs_attribute, attr) + +static struct adreno_device *_get_adreno_dev(struct device *dev) +{ + struct kgsl_device *device = kgsl_device_from_dev(dev); + return device ? ADRENO_DEVICE(device) : NULL; +} + +static int _ft_policy_store(struct adreno_device *adreno_dev, + unsigned int val) +{ + adreno_dev->ft_policy = val & KGSL_FT_POLICY_MASK; + return 0; +} + +static unsigned int _ft_policy_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->ft_policy; +} + +static int _ft_pagefault_policy_store(struct adreno_device *adreno_dev, + unsigned int val) +{ + struct kgsl_device *device = &adreno_dev->dev; + int ret = 0; + + mutex_lock(&device->mutex); + val &= KGSL_FT_PAGEFAULT_MASK; + + if (test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv)) + ret = kgsl_mmu_set_pagefault_policy(&device->mmu, + (unsigned long) val); + + if (ret == 0) + adreno_dev->ft_pf_policy = val; + + mutex_unlock(&device->mutex); + + return 0; +} + +static unsigned int _ft_pagefault_policy_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->ft_pf_policy; +} + +static int _ft_fast_hang_detect_store(struct adreno_device *adreno_dev, + unsigned int val) +{ + struct kgsl_device *device = &adreno_dev->dev; + + if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv)) + return 0; + + mutex_lock(&device->mutex); + + if (val) { + if (!kgsl_active_count_get(device)) { + adreno_fault_detect_start(adreno_dev); + kgsl_active_count_put(device); + } + } else + adreno_fault_detect_stop(adreno_dev); + + mutex_unlock(&device->mutex); + + return 0; +} + +static unsigned int _ft_fast_hang_detect_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->fast_hang_detect; +} + +static int _ft_long_ib_detect_store(struct adreno_device *adreno_dev, + unsigned int val) +{ + adreno_dev->long_ib_detect = val; + return 0; +} + +static unsigned int _ft_long_ib_detect_show(struct adreno_device *adreno_dev) +{ + return adreno_dev->long_ib_detect; +} + +static int _ft_hang_intr_status_store(struct adreno_device *adreno_dev, + unsigned int val) +{ + struct kgsl_device *device = &adreno_dev->dev; + int ret = 0; + + if (val == test_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv)) + return 0; + + mutex_lock(&device->mutex); + change_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv); + + if (test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv)) { + kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); + adreno_irqctrl(adreno_dev, 1); + } else if (device->state == KGSL_STATE_INIT) { + ret = -EACCES; + change_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv); + } + + mutex_unlock(&device->mutex); + return ret; +} + +static unsigned int _ft_hang_intr_status_show(struct adreno_device *adreno_dev) +{ + return test_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv); +} + +static int _pwrctrl_store(struct adreno_device *adreno_dev, + unsigned int val, unsigned int flag) +{ + struct kgsl_device *device = &adreno_dev->dev; + + if (val == test_bit(flag, &adreno_dev->pwrctrl_flag)) + return 0; + + mutex_lock(&device->mutex); + + /* Power down the GPU before changing the state */ + kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND); + change_bit(flag, &adreno_dev->pwrctrl_flag); + kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER); + + mutex_unlock(&device->mutex); + + return 0; +} + +static int _preemption_store(struct adreno_device *adreno_dev, + unsigned int val) +{ + struct kgsl_device *device = &adreno_dev->dev; + + if (test_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv) == val) + return 0; + + mutex_lock(&device->mutex); + + kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND); + change_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv); + adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]); + kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER); + + mutex_unlock(&device->mutex); + + return 0; +} + +static unsigned int _preemption_show(struct adreno_device *adreno_dev) +{ + return adreno_is_preemption_enabled(adreno_dev); +} + +static int _sptp_pc_store(struct adreno_device *adreno_dev, + unsigned int val) +{ + return _pwrctrl_store(adreno_dev, val, ADRENO_SPTP_PC_CTRL); +} + +static unsigned int _sptp_pc_show(struct adreno_device *adreno_dev) +{ + return test_bit(ADRENO_SPTP_PC_CTRL, &adreno_dev->pwrctrl_flag); +} + +static int _lm_store(struct adreno_device *adreno_dev, unsigned int val) +{ + return _pwrctrl_store(adreno_dev, val, ADRENO_LM_CTRL); +} + +static unsigned int _lm_show(struct adreno_device *adreno_dev) +{ + return test_bit(ADRENO_LM_CTRL, &adreno_dev->pwrctrl_flag); +} + +static ssize_t _sysfs_store_u32(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct adreno_device *adreno_dev = _get_adreno_dev(dev); + struct adreno_sysfs_attribute *_attr = ADRENO_SYSFS_ATTR(attr); + unsigned int val = 0; + int ret; + + if (adreno_dev == NULL) + return 0; + + ret = kgsl_sysfs_store(buf, &val); + + if (!ret && _attr->store) + ret = _attr->store(adreno_dev, val); + + return (ssize_t) ret < 0 ? ret : count; +} + +static ssize_t _sysfs_show_u32(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct adreno_device *adreno_dev = _get_adreno_dev(dev); + struct adreno_sysfs_attribute *_attr = ADRENO_SYSFS_ATTR(attr); + unsigned int val = 0; + + if (adreno_dev == NULL) + return 0; + + if (_attr->show) + val = _attr->show(adreno_dev); + + return snprintf(buf, PAGE_SIZE, "0x%X\n", val); +} + +static ssize_t _sysfs_store_bool(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct adreno_device *adreno_dev = _get_adreno_dev(dev); + struct adreno_sysfs_attribute *_attr = ADRENO_SYSFS_ATTR(attr); + unsigned int val = 0; + int ret; + + if (adreno_dev == NULL) + return 0; + + ret = kgsl_sysfs_store(buf, &val); + + if (!ret && _attr->store) + ret = _attr->store(adreno_dev, val ? 1 : 0); + + return (ssize_t) ret < 0 ? ret : count; +} + +static ssize_t _sysfs_show_bool(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct adreno_device *adreno_dev = _get_adreno_dev(dev); + struct adreno_sysfs_attribute *_attr = ADRENO_SYSFS_ATTR(attr); + unsigned int val = 0; + + if (adreno_dev == NULL) + return 0; + + if (_attr->show) + val = _attr->show(adreno_dev); + + return snprintf(buf, PAGE_SIZE, "%d\n", val); +} + +#define ADRENO_SYSFS_BOOL(_name) \ + _ADRENO_SYSFS_ATTR(_name, _sysfs_show_bool, _sysfs_store_bool) + +#define ADRENO_SYSFS_U32(_name) \ + _ADRENO_SYSFS_ATTR(_name, _sysfs_show_u32, _sysfs_store_u32) + +static ADRENO_SYSFS_U32(ft_policy); +static ADRENO_SYSFS_U32(ft_pagefault_policy); +static ADRENO_SYSFS_BOOL(ft_fast_hang_detect); +static ADRENO_SYSFS_BOOL(ft_long_ib_detect); +static ADRENO_SYSFS_BOOL(ft_hang_intr_status); + +static DEVICE_INT_ATTR(wake_nice, 0644, adreno_wake_nice); +static DEVICE_INT_ATTR(wake_timeout, 0644, adreno_wake_timeout); + +static ADRENO_SYSFS_BOOL(sptp_pc); +static ADRENO_SYSFS_BOOL(lm); +static ADRENO_SYSFS_BOOL(preemption); + +static const struct device_attribute *_attr_list[] = { + &adreno_attr_ft_policy.attr, + &adreno_attr_ft_pagefault_policy.attr, + &adreno_attr_ft_fast_hang_detect.attr, + &adreno_attr_ft_long_ib_detect.attr, + &adreno_attr_ft_hang_intr_status.attr, + &dev_attr_wake_nice.attr, + &dev_attr_wake_timeout.attr, + &adreno_attr_sptp_pc.attr, + &adreno_attr_lm.attr, + &adreno_attr_preemption.attr, + NULL, +}; + +/* Add a ppd directory for controlling different knobs from sysfs */ +struct adreno_ppd_attribute { + struct attribute attr; + ssize_t (*show)(struct kgsl_device *device, char *buf); + ssize_t (*store)(struct kgsl_device *device, const char *buf, + size_t count); +}; + +#define PPD_ATTR(_name, _mode, _show, _store) \ +struct adreno_ppd_attribute attr_##_name = { \ + .attr = { .name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +} + +#define to_ppd_attr(a) \ +container_of((a), struct adreno_ppd_attribute, attr) + +#define kobj_to_device(a) \ +container_of((a), struct kgsl_device, ppd_kobj) + +static ssize_t ppd_enable_store(struct kgsl_device *device, + const char *buf, size_t count) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + unsigned int ppd_on = 0; + int ret; + + if (!adreno_is_a430v2(adreno_dev) || + !ADRENO_FEATURE(adreno_dev, ADRENO_PPD)) + return count; + + ret = kgsl_sysfs_store(buf, &ppd_on); + if (ret < 0) + return ret; + + ppd_on = (ppd_on) ? 1 : 0; + + if (ppd_on == test_bit(ADRENO_PPD_CTRL, &adreno_dev->pwrctrl_flag)) + return count; + + mutex_lock(&device->mutex); + + kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND); + change_bit(ADRENO_PPD_CTRL, &adreno_dev->pwrctrl_flag); + kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER); + + mutex_unlock(&device->mutex); + return count; +} + +static ssize_t ppd_enable_show(struct kgsl_device *device, + char *buf) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + return snprintf(buf, PAGE_SIZE, "%u\n", + test_bit(ADRENO_PPD_CTRL, &adreno_dev->pwrctrl_flag)); +} +/* Add individual ppd attributes here */ +static PPD_ATTR(enable, 0644, ppd_enable_show, ppd_enable_store); + +static ssize_t ppd_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct adreno_ppd_attribute *pattr = to_ppd_attr(attr); + struct kgsl_device *device = kobj_to_device(kobj); + ssize_t ret = -EIO; + + if (device != NULL && pattr->show != NULL) + ret = pattr->show(device, buf); + + return ret; +} + +static ssize_t ppd_sysfs_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + struct adreno_ppd_attribute *pattr = to_ppd_attr(attr); + struct kgsl_device *device = kobj_to_device(kobj); + ssize_t ret = -EIO; + + if (device != NULL && pattr->store != NULL) + ret = pattr->store(device, buf, count); + + return ret; +} + +static const struct sysfs_ops ppd_sysfs_ops = { + .show = ppd_sysfs_show, + .store = ppd_sysfs_store, +}; + +static struct kobj_type ktype_ppd = { + .sysfs_ops = &ppd_sysfs_ops, +}; + +static void ppd_sysfs_close(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_PPD)) + return; + + sysfs_remove_file(&device->ppd_kobj, &attr_enable.attr); + kobject_put(&device->ppd_kobj); +} + +static int ppd_sysfs_init(struct kgsl_device *device) +{ + int ret; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + + if (!ADRENO_FEATURE(adreno_dev, ADRENO_PPD)) + return -ENODEV; + + ret = kobject_init_and_add(&device->ppd_kobj, &ktype_ppd, + &device->dev->kobj, "ppd"); + + if (ret == 0) + ret = sysfs_create_file(&device->ppd_kobj, &attr_enable.attr); + + return ret; +} + +/** + * adreno_sysfs_close() - Take down the adreno sysfs files + * @device: Pointer to the KGSL device + * + * Take down the sysfs files on when the device goes away + */ +void adreno_sysfs_close(struct kgsl_device *device) +{ + ppd_sysfs_close(device); + kgsl_remove_device_sysfs_files(device->dev, _attr_list); +} + +/** + * adreno_sysfs_init() - Initialize adreno sysfs files + * @device: Pointer to the KGSL device + * + * Initialize many of the adreno specific sysfs files especially for fault + * tolerance and power control + */ +int adreno_sysfs_init(struct kgsl_device *device) +{ + int ret = kgsl_create_device_sysfs_files(device->dev, _attr_list); + if (ret != 0) + return ret; + + /* Add the PPD directory and files */ + ppd_sysfs_init(device); + + return 0; +} + diff --git a/drivers/gpu/msm/adreno_trace.c b/drivers/gpu/msm/adreno_trace.c new file mode 100644 index 000000000000..20a7210afc1b --- /dev/null +++ b/drivers/gpu/msm/adreno_trace.c @@ -0,0 +1,21 @@ +/* Copyright (c) 2013-2014, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "adreno.h" + +/* Instantiate tracepoints */ +#define CREATE_TRACE_POINTS +#include "a3xx_reg.h" +#include "a4xx_reg.h" +#include "a5xx_reg.h" +#include "adreno_trace.h" diff --git a/drivers/gpu/msm/adreno_trace.h b/drivers/gpu/msm/adreno_trace.h new file mode 100644 index 000000000000..c0926cbb6a85 --- /dev/null +++ b/drivers/gpu/msm/adreno_trace.h @@ -0,0 +1,687 @@ +/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#if !defined(_ADRENO_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _ADRENO_TRACE_H + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kgsl +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE adreno_trace + +#include <linux/tracepoint.h> + +TRACE_EVENT(adreno_cmdbatch_queued, + TP_PROTO(struct kgsl_cmdbatch *cmdbatch, unsigned int queued), + TP_ARGS(cmdbatch, queued), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(unsigned int, queued) + __field(unsigned int, flags) + __field(unsigned int, prio) + ), + TP_fast_assign( + __entry->id = cmdbatch->context->id; + __entry->timestamp = cmdbatch->timestamp; + __entry->queued = queued; + __entry->flags = cmdbatch->flags; + __entry->prio = cmdbatch->context->priority; + ), + TP_printk( + "ctx=%u ctx_prio=%u ts=%u queued=%u flags=%s", + __entry->id, __entry->prio, + __entry->timestamp, __entry->queued, + __entry->flags ? __print_flags(__entry->flags, "|", + KGSL_CMDBATCH_FLAGS) : "none" + ) +); + +TRACE_EVENT(adreno_cmdbatch_submitted, + TP_PROTO(struct kgsl_cmdbatch *cmdbatch, int inflight, uint64_t ticks, + unsigned long secs, unsigned long usecs, + struct adreno_ringbuffer *rb), + TP_ARGS(cmdbatch, inflight, ticks, secs, usecs, rb), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(int, inflight) + __field(unsigned int, flags) + __field(uint64_t, ticks) + __field(unsigned long, secs) + __field(unsigned long, usecs) + __field(int, prio) + __field(int, rb_id) + __field(unsigned int, rptr) + __field(unsigned int, wptr) + __field(int, q_inflight) + ), + TP_fast_assign( + __entry->id = cmdbatch->context->id; + __entry->timestamp = cmdbatch->timestamp; + __entry->inflight = inflight; + __entry->flags = cmdbatch->flags; + __entry->ticks = ticks; + __entry->secs = secs; + __entry->usecs = usecs; + __entry->prio = cmdbatch->context->priority; + __entry->rb_id = rb->id; + __entry->rptr = rb->rptr; + __entry->wptr = rb->wptr; + __entry->q_inflight = rb->dispatch_q.inflight; + ), + TP_printk( + "ctx=%u ctx_prio=%d ts=%u inflight=%d flags=%s ticks=%lld time=%lu.%0lu rb_id=%d r/w=%x/%x, q_inflight=%d", + __entry->id, __entry->prio, __entry->timestamp, + __entry->inflight, + __entry->flags ? __print_flags(__entry->flags, "|", + KGSL_CMDBATCH_FLAGS) : "none", + __entry->ticks, __entry->secs, __entry->usecs, + __entry->rb_id, __entry->rptr, __entry->wptr, + __entry->q_inflight + ) +); + +TRACE_EVENT(adreno_cmdbatch_retired, + TP_PROTO(struct kgsl_cmdbatch *cmdbatch, int inflight, + uint64_t start, uint64_t retire, + struct adreno_ringbuffer *rb), + TP_ARGS(cmdbatch, inflight, start, retire, rb), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(int, inflight) + __field(unsigned int, recovery) + __field(unsigned int, flags) + __field(uint64_t, start) + __field(uint64_t, retire) + __field(int, prio) + __field(int, rb_id) + __field(unsigned int, rptr) + __field(unsigned int, wptr) + __field(int, q_inflight) + ), + TP_fast_assign( + __entry->id = cmdbatch->context->id; + __entry->timestamp = cmdbatch->timestamp; + __entry->inflight = inflight; + __entry->recovery = cmdbatch->fault_recovery; + __entry->flags = cmdbatch->flags; + __entry->start = start; + __entry->retire = retire; + __entry->prio = cmdbatch->context->priority; + __entry->rb_id = rb->id; + __entry->rptr = rb->rptr; + __entry->wptr = rb->wptr; + __entry->q_inflight = rb->dispatch_q.inflight; + ), + TP_printk( + "ctx=%u ctx_prio=%d ts=%u inflight=%d recovery=%s flags=%s start=%lld retire=%lld rb_id=%d, r/w=%x/%x, q_inflight=%d", + __entry->id, __entry->prio, __entry->timestamp, + __entry->inflight, + __entry->recovery ? + __print_flags(__entry->recovery, "|", + ADRENO_FT_TYPES) : "none", + __entry->flags ? __print_flags(__entry->flags, "|", + KGSL_CMDBATCH_FLAGS) : "none", + __entry->start, + __entry->retire, + __entry->rb_id, __entry->rptr, __entry->wptr, + __entry->q_inflight + ) +); + +TRACE_EVENT(adreno_cmdbatch_fault, + TP_PROTO(struct kgsl_cmdbatch *cmdbatch, unsigned int fault), + TP_ARGS(cmdbatch, fault), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(unsigned int, fault) + ), + TP_fast_assign( + __entry->id = cmdbatch->context->id; + __entry->timestamp = cmdbatch->timestamp; + __entry->fault = fault; + ), + TP_printk( + "ctx=%u ts=%u type=%s", + __entry->id, __entry->timestamp, + __print_symbolic(__entry->fault, + { 0, "none" }, + { ADRENO_SOFT_FAULT, "soft" }, + { ADRENO_HARD_FAULT, "hard" }, + { ADRENO_TIMEOUT_FAULT, "timeout" }) + ) +); + +TRACE_EVENT(adreno_cmdbatch_recovery, + TP_PROTO(struct kgsl_cmdbatch *cmdbatch, unsigned int action), + TP_ARGS(cmdbatch, action), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(unsigned int, action) + ), + TP_fast_assign( + __entry->id = cmdbatch->context->id; + __entry->timestamp = cmdbatch->timestamp; + __entry->action = action; + ), + TP_printk( + "ctx=%u ts=%u action=%s", + __entry->id, __entry->timestamp, + __print_symbolic(__entry->action, ADRENO_FT_TYPES) + ) +); + +DECLARE_EVENT_CLASS(adreno_drawctxt_template, + TP_PROTO(struct adreno_context *drawctxt), + TP_ARGS(drawctxt), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, priority) + ), + TP_fast_assign( + __entry->id = drawctxt->base.id; + __entry->priority = drawctxt->base.priority; + ), + TP_printk("ctx=%u priority=%u", __entry->id, __entry->priority) +); + +DEFINE_EVENT(adreno_drawctxt_template, adreno_drawctxt_sleep, + TP_PROTO(struct adreno_context *drawctxt), + TP_ARGS(drawctxt) +); + +DEFINE_EVENT(adreno_drawctxt_template, adreno_drawctxt_wake, + TP_PROTO(struct adreno_context *drawctxt), + TP_ARGS(drawctxt) +); + +DEFINE_EVENT(adreno_drawctxt_template, dispatch_queue_context, + TP_PROTO(struct adreno_context *drawctxt), + TP_ARGS(drawctxt) +); + +DEFINE_EVENT(adreno_drawctxt_template, adreno_drawctxt_invalidate, + TP_PROTO(struct adreno_context *drawctxt), + TP_ARGS(drawctxt) +); + +TRACE_EVENT(adreno_drawctxt_wait_start, + TP_PROTO(unsigned int rb_id, unsigned int ctx_id, unsigned int ts), + TP_ARGS(rb_id, ctx_id, ts), + TP_STRUCT__entry( + __field(unsigned int, rb_id) + __field(unsigned int, ctx_id) + __field(unsigned int, ts) + ), + TP_fast_assign( + __entry->rb_id = rb_id; + __entry->ctx_id = ctx_id; + __entry->ts = ts; + ), + TP_printk( + "rb=%u ctx=%u ts=%u", + __entry->rb_id, __entry->ctx_id, __entry->ts + ) +); + +TRACE_EVENT(adreno_drawctxt_wait_done, + TP_PROTO(unsigned int rb_id, unsigned int ctx_id, + unsigned int ts, int status), + TP_ARGS(rb_id, ctx_id, ts, status), + TP_STRUCT__entry( + __field(unsigned int, rb_id) + __field(unsigned int, ctx_id) + __field(unsigned int, ts) + __field(int, status) + ), + TP_fast_assign( + __entry->rb_id = rb_id; + __entry->ctx_id = ctx_id; + __entry->ts = ts; + __entry->status = status; + ), + TP_printk( + "rb=%u ctx=%u ts=%u status=%d", + __entry->rb_id, __entry->ctx_id, __entry->ts, __entry->status + ) +); + +TRACE_EVENT(adreno_drawctxt_switch, + TP_PROTO(struct adreno_ringbuffer *rb, + struct adreno_context *newctx, + unsigned int flags), + TP_ARGS(rb, newctx, flags), + TP_STRUCT__entry( + __field(int, rb_level) + __field(unsigned int, oldctx) + __field(unsigned int, newctx) + __field(unsigned int, flags) + ), + TP_fast_assign( + __entry->rb_level = rb->id; + __entry->oldctx = rb->drawctxt_active ? + rb->drawctxt_active->base.id : 0; + __entry->newctx = newctx ? newctx->base.id : 0; + ), + TP_printk( + "rb level=%d oldctx=%u newctx=%u flags=%X", + __entry->rb_level, __entry->oldctx, __entry->newctx, flags + ) +); + +TRACE_EVENT(adreno_gpu_fault, + TP_PROTO(unsigned int ctx, unsigned int ts, + unsigned int status, unsigned int rptr, unsigned int wptr, + unsigned int ib1base, unsigned int ib1size, + unsigned int ib2base, unsigned int ib2size, int rb_id), + TP_ARGS(ctx, ts, status, rptr, wptr, ib1base, ib1size, ib2base, + ib2size, rb_id), + TP_STRUCT__entry( + __field(unsigned int, ctx) + __field(unsigned int, ts) + __field(unsigned int, status) + __field(unsigned int, rptr) + __field(unsigned int, wptr) + __field(unsigned int, ib1base) + __field(unsigned int, ib1size) + __field(unsigned int, ib2base) + __field(unsigned int, ib2size) + __field(int, rb_id) + ), + TP_fast_assign( + __entry->ctx = ctx; + __entry->ts = ts; + __entry->status = status; + __entry->rptr = rptr; + __entry->wptr = wptr; + __entry->ib1base = ib1base; + __entry->ib1size = ib1size; + __entry->ib2base = ib2base; + __entry->ib2size = ib2size; + __entry->rb_id = rb_id; + ), + TP_printk("ctx=%d ts=%d rb_id=%d status=%X RB=%X/%X IB1=%X/%X IB2=%X/%X", + __entry->ctx, __entry->ts, __entry->rb_id, __entry->status, + __entry->wptr, __entry->rptr, __entry->ib1base, + __entry->ib1size, __entry->ib2base, __entry->ib2size) +); + +TRACE_EVENT(adreno_sp_tp, + + TP_PROTO(unsigned long ip), + + TP_ARGS(ip), + + TP_STRUCT__entry( + __field(unsigned long, ip) + ), + + TP_fast_assign( + __entry->ip = ip; + ), + + TP_printk( + "func=%pf", (void *) __entry->ip + ) +); + +/* + * Tracepoint for a3xx irq. Includes status info + */ +TRACE_EVENT(kgsl_a3xx_irq_status, + + TP_PROTO(struct adreno_device *adreno_dev, unsigned int status), + + TP_ARGS(adreno_dev, status), + + TP_STRUCT__entry( + __string(device_name, adreno_dev->dev.name) + __field(unsigned int, status) + ), + + TP_fast_assign( + __assign_str(device_name, adreno_dev->dev.name); + __entry->status = status; + ), + + TP_printk( + "d_name=%s status=%s", + __get_str(device_name), + __entry->status ? __print_flags(__entry->status, "|", + { 1 << A3XX_INT_RBBM_GPU_IDLE, "RBBM_GPU_IDLE" }, + { 1 << A3XX_INT_RBBM_AHB_ERROR, "RBBM_AHB_ERR" }, + { 1 << A3XX_INT_RBBM_REG_TIMEOUT, "RBBM_REG_TIMEOUT" }, + { 1 << A3XX_INT_RBBM_ME_MS_TIMEOUT, + "RBBM_ME_MS_TIMEOUT" }, + { 1 << A3XX_INT_RBBM_PFP_MS_TIMEOUT, + "RBBM_PFP_MS_TIMEOUT" }, + { 1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW, + "RBBM_ATB_BUS_OVERFLOW" }, + { 1 << A3XX_INT_VFD_ERROR, "RBBM_VFD_ERROR" }, + { 1 << A3XX_INT_CP_SW_INT, "CP_SW" }, + { 1 << A3XX_INT_CP_T0_PACKET_IN_IB, + "CP_T0_PACKET_IN_IB" }, + { 1 << A3XX_INT_CP_OPCODE_ERROR, "CP_OPCODE_ERROR" }, + { 1 << A3XX_INT_CP_RESERVED_BIT_ERROR, + "CP_RESERVED_BIT_ERROR" }, + { 1 << A3XX_INT_CP_HW_FAULT, "CP_HW_FAULT" }, + { 1 << A3XX_INT_CP_DMA, "CP_DMA" }, + { 1 << A3XX_INT_CP_IB2_INT, "CP_IB2_INT" }, + { 1 << A3XX_INT_CP_IB1_INT, "CP_IB1_INT" }, + { 1 << A3XX_INT_CP_RB_INT, "CP_RB_INT" }, + { 1 << A3XX_INT_CP_REG_PROTECT_FAULT, + "CP_REG_PROTECT_FAULT" }, + { 1 << A3XX_INT_CP_RB_DONE_TS, "CP_RB_DONE_TS" }, + { 1 << A3XX_INT_CP_VS_DONE_TS, "CP_VS_DONE_TS" }, + { 1 << A3XX_INT_CP_PS_DONE_TS, "CP_PS_DONE_TS" }, + { 1 << A3XX_INT_CACHE_FLUSH_TS, "CACHE_FLUSH_TS" }, + { 1 << A3XX_INT_CP_AHB_ERROR_HALT, + "CP_AHB_ERROR_HALT" }, + { 1 << A3XX_INT_MISC_HANG_DETECT, "MISC_HANG_DETECT" }, + { 1 << A3XX_INT_UCHE_OOB_ACCESS, "UCHE_OOB_ACCESS" }) + : "None" + ) +); + +/* + * Tracepoint for a4xx irq. Includes status info + */ +TRACE_EVENT(kgsl_a4xx_irq_status, + + TP_PROTO(struct adreno_device *adreno_dev, unsigned int status), + + TP_ARGS(adreno_dev, status), + + TP_STRUCT__entry( + __string(device_name, adreno_dev->dev.name) + __field(unsigned int, status) + ), + + TP_fast_assign( + __assign_str(device_name, adreno_dev->dev.name); + __entry->status = status; + ), + + TP_printk( + "d_name=%s status=%s", + __get_str(device_name), + __entry->status ? __print_flags(__entry->status, "|", + { 1 << A4XX_INT_RBBM_GPU_IDLE, "RBBM_GPU_IDLE" }, + { 1 << A4XX_INT_RBBM_AHB_ERROR, "RBBM_AHB_ERR" }, + { 1 << A4XX_INT_RBBM_REG_TIMEOUT, "RBBM_REG_TIMEOUT" }, + { 1 << A4XX_INT_RBBM_ME_MS_TIMEOUT, + "RBBM_ME_MS_TIMEOUT" }, + { 1 << A4XX_INT_RBBM_PFP_MS_TIMEOUT, + "RBBM_PFP_MS_TIMEOUT" }, + { 1 << A4XX_INT_RBBM_ETS_MS_TIMEOUT, + "RBBM_ETS_MS_TIMEOUT" }, + { 1 << A4XX_INT_RBBM_ASYNC_OVERFLOW, + "RBBM_ASYNC_OVERFLOW" }, + { 1 << A4XX_INT_RBBM_GPC_ERR, + "RBBM_GPC_ERR" }, + { 1 << A4XX_INT_CP_SW, "CP_SW" }, + { 1 << A4XX_INT_CP_OPCODE_ERROR, "CP_OPCODE_ERROR" }, + { 1 << A4XX_INT_CP_RESERVED_BIT_ERROR, + "CP_RESERVED_BIT_ERROR" }, + { 1 << A4XX_INT_CP_HW_FAULT, "CP_HW_FAULT" }, + { 1 << A4XX_INT_CP_DMA, "CP_DMA" }, + { 1 << A4XX_INT_CP_IB2_INT, "CP_IB2_INT" }, + { 1 << A4XX_INT_CP_IB1_INT, "CP_IB1_INT" }, + { 1 << A4XX_INT_CP_RB_INT, "CP_RB_INT" }, + { 1 << A4XX_INT_CP_REG_PROTECT_FAULT, + "CP_REG_PROTECT_FAULT" }, + { 1 << A4XX_INT_CP_RB_DONE_TS, "CP_RB_DONE_TS" }, + { 1 << A4XX_INT_CP_VS_DONE_TS, "CP_VS_DONE_TS" }, + { 1 << A4XX_INT_CP_PS_DONE_TS, "CP_PS_DONE_TS" }, + { 1 << A4XX_INT_CACHE_FLUSH_TS, "CACHE_FLUSH_TS" }, + { 1 << A4XX_INT_CP_AHB_ERROR_HALT, + "CP_AHB_ERROR_HALT" }, + { 1 << A4XX_INT_RBBM_ATB_BUS_OVERFLOW, + "RBBM_ATB_BUS_OVERFLOW" }, + { 1 << A4XX_INT_MISC_HANG_DETECT, "MISC_HANG_DETECT" }, + { 1 << A4XX_INT_UCHE_OOB_ACCESS, "UCHE_OOB_ACCESS" }, + { 1 << A4XX_INT_RBBM_DPM_CALC_ERR, + "RBBM_DPM_CALC_ERR" }, + { 1 << A4XX_INT_RBBM_DPM_EPOCH_ERR, + "RBBM_DPM_CALC_ERR" }, + { 1 << A4XX_INT_RBBM_DPM_THERMAL_YELLOW_ERR, + "RBBM_DPM_THERMAL_YELLOW_ERR" }, + { 1 << A4XX_INT_RBBM_DPM_THERMAL_RED_ERR, + "RBBM_DPM_THERMAL_RED_ERR" }) + : "None" + ) +); + +DECLARE_EVENT_CLASS(adreno_hw_preempt_template, + TP_PROTO(struct adreno_ringbuffer *cur_rb, + struct adreno_ringbuffer *new_rb), + TP_ARGS(cur_rb, new_rb), + TP_STRUCT__entry(__field(int, cur_level) + __field(int, new_level) + __field(unsigned int, cur_rptr) + __field(unsigned int, new_rptr) + __field(unsigned int, cur_wptr) + __field(unsigned int, new_wptr) + __field(unsigned int, cur_rbbase) + __field(unsigned int, new_rbbase) + ), + TP_fast_assign(__entry->cur_level = cur_rb->id; + __entry->new_level = new_rb->id; + __entry->cur_rptr = cur_rb->rptr; + __entry->new_rptr = new_rb->rptr; + __entry->cur_wptr = cur_rb->wptr; + __entry->new_wptr = new_rb->wptr; + __entry->cur_rbbase = cur_rb->buffer_desc.gpuaddr; + __entry->new_rbbase = new_rb->buffer_desc.gpuaddr; + ), + TP_printk( + "cur_rb_lvl=%d rptr=%x wptr=%x rbbase=%x new_rb_lvl=%d rptr=%x wptr=%x rbbase=%x", + __entry->cur_level, __entry->cur_rptr, + __entry->cur_wptr, __entry->cur_rbbase, + __entry->new_level, __entry->new_rptr, + __entry->new_wptr, __entry->new_rbbase + ) +); + +DEFINE_EVENT(adreno_hw_preempt_template, adreno_hw_preempt_clear_to_trig, + TP_PROTO(struct adreno_ringbuffer *cur_rb, + struct adreno_ringbuffer *new_rb), + TP_ARGS(cur_rb, new_rb) +); + +DEFINE_EVENT(adreno_hw_preempt_template, adreno_hw_preempt_trig_to_comp, + TP_PROTO(struct adreno_ringbuffer *cur_rb, + struct adreno_ringbuffer *new_rb), + TP_ARGS(cur_rb, new_rb) +); + +DEFINE_EVENT(adreno_hw_preempt_template, adreno_hw_preempt_trig_to_comp_int, + TP_PROTO(struct adreno_ringbuffer *cur_rb, + struct adreno_ringbuffer *new_rb), + TP_ARGS(cur_rb, new_rb) +); + +TRACE_EVENT(adreno_hw_preempt_comp_to_clear, + TP_PROTO(struct adreno_ringbuffer *cur_rb, + struct adreno_ringbuffer *new_rb), + TP_ARGS(cur_rb, new_rb), + TP_STRUCT__entry(__field(int, cur_level) + __field(int, new_level) + __field(unsigned int, cur_rptr) + __field(unsigned int, new_rptr) + __field(unsigned int, cur_wptr) + __field(unsigned int, new_wptr_end) + __field(unsigned int, new_wptr) + __field(unsigned int, cur_rbbase) + __field(unsigned int, new_rbbase) + ), + TP_fast_assign(__entry->cur_level = cur_rb->id; + __entry->new_level = new_rb->id; + __entry->cur_rptr = cur_rb->rptr; + __entry->new_rptr = new_rb->rptr; + __entry->cur_wptr = cur_rb->wptr; + __entry->new_wptr_end = new_rb->wptr_preempt_end; + __entry->new_wptr = new_rb->wptr; + __entry->cur_rbbase = cur_rb->buffer_desc.gpuaddr; + __entry->new_rbbase = new_rb->buffer_desc.gpuaddr; + ), + TP_printk( + "cur_rb_lvl=%d rptr=%x wptr=%x rbbase=%x prev_rb_lvl=%d rptr=%x wptr_preempt_end=%x wptr=%x rbbase=%x", + __entry->cur_level, __entry->cur_rptr, + __entry->cur_wptr, __entry->cur_rbbase, + __entry->new_level, __entry->new_rptr, + __entry->new_wptr_end, __entry->new_wptr, __entry->new_rbbase + ) +); + +TRACE_EVENT(adreno_hw_preempt_token_submit, + TP_PROTO(struct adreno_ringbuffer *cur_rb, + struct adreno_ringbuffer *new_rb), + TP_ARGS(cur_rb, new_rb), + TP_STRUCT__entry(__field(int, cur_level) + __field(int, new_level) + __field(unsigned int, cur_rptr) + __field(unsigned int, new_rptr) + __field(unsigned int, cur_wptr) + __field(unsigned int, cur_wptr_end) + __field(unsigned int, new_wptr) + __field(unsigned int, cur_rbbase) + __field(unsigned int, new_rbbase) + ), + TP_fast_assign(__entry->cur_level = cur_rb->id; + __entry->new_level = new_rb->id; + __entry->cur_rptr = cur_rb->rptr; + __entry->new_rptr = new_rb->rptr; + __entry->cur_wptr = cur_rb->wptr; + __entry->cur_wptr_end = cur_rb->wptr_preempt_end; + __entry->new_wptr = new_rb->wptr; + __entry->cur_rbbase = cur_rb->buffer_desc.gpuaddr; + __entry->new_rbbase = new_rb->buffer_desc.gpuaddr; + ), + TP_printk( + "cur_rb_lvl=%d rptr=%x wptr_preempt_end=%x wptr=%x rbbase=%x new_rb_lvl=%d rptr=%x wptr=%x rbbase=%x", + __entry->cur_level, __entry->cur_rptr, + __entry->cur_wptr_end, __entry->cur_wptr, + __entry->cur_rbbase, + __entry->new_level, __entry->new_rptr, + __entry->new_wptr, __entry->new_rbbase + ) +); + +TRACE_EVENT(adreno_rb_starve, + TP_PROTO(struct adreno_ringbuffer *rb), + TP_ARGS(rb), + TP_STRUCT__entry(__field(int, id) + __field(unsigned int, rptr) + __field(unsigned int, wptr) + ), + TP_fast_assign(__entry->id = rb->id; + __entry->rptr = rb->rptr; + __entry->wptr = rb->wptr; + ), + TP_printk( + "rb %d r/w %x/%x starved", __entry->id, __entry->rptr, + __entry->wptr + ) +); + +/* + * Tracepoint for a5xx irq. Includes status info + */ +TRACE_EVENT(kgsl_a5xx_irq_status, + + TP_PROTO(struct adreno_device *adreno_dev, unsigned int status), + + TP_ARGS(adreno_dev, status), + + TP_STRUCT__entry( + __string(device_name, adreno_dev->dev.name) + __field(unsigned int, status) + ), + + TP_fast_assign( + __assign_str(device_name, adreno_dev->dev.name); + __entry->status = status; + ), + + TP_printk( + "d_name=%s status=%s", + __get_str(device_name), + __entry->status ? __print_flags(__entry->status, "|", + { 1 << A5XX_INT_RBBM_GPU_IDLE, "RBBM_GPU_IDLE" }, + { 1 << A5XX_INT_RBBM_AHB_ERROR, "RBBM_AHB_ERR" }, + { 1 << A5XX_INT_RBBM_TRANSFER_TIMEOUT, + "RBBM_TRANSFER_TIMEOUT" }, + { 1 << A5XX_INT_RBBM_ME_MS_TIMEOUT, + "RBBM_ME_MS_TIMEOUT" }, + { 1 << A5XX_INT_RBBM_PFP_MS_TIMEOUT, + "RBBM_PFP_MS_TIMEOUT" }, + { 1 << A5XX_INT_RBBM_ETS_MS_TIMEOUT, + "RBBM_ETS_MS_TIMEOUT" }, + { 1 << A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW, + "RBBM_ATB_ASYNC_OVERFLOW" }, + { 1 << A5XX_INT_RBBM_GPC_ERROR, + "RBBM_GPC_ERR" }, + { 1 << A5XX_INT_CP_SW, "CP_SW" }, + { 1 << A5XX_INT_CP_HW_ERROR, "CP_OPCODE_ERROR" }, + { 1 << A5XX_INT_CP_CCU_FLUSH_DEPTH_TS, + "CP_CCU_FLUSH_DEPTH_TS" }, + { 1 << A5XX_INT_CP_CCU_FLUSH_COLOR_TS, + "CP_CCU_FLUSH_COLOR_TS" }, + { 1 << A5XX_INT_CP_CCU_RESOLVE_TS, + "CP_CCU_RESOLVE_TS" }, + { 1 << A5XX_INT_CP_IB2, "CP_IB2_INT" }, + { 1 << A5XX_INT_CP_IB1, "CP_IB1_INT" }, + { 1 << A5XX_INT_CP_RB, "CP_RB_INT" }, + { 1 << A5XX_INT_CP_UNUSED_1, "CP_UNUSED_1" }, + { 1 << A5XX_INT_CP_RB_DONE_TS, "CP_RB_DONE_TS" }, + { 1 << A5XX_INT_CP_WT_DONE_TS, "CP_WT_DONE_TS" }, + { 1 << A5XX_INT_UNKNOWN_1, "UNKNOWN_1" }, + { 1 << A5XX_INT_CP_CACHE_FLUSH_TS, + "CP_CACHE_FLUSH_TS" }, + { 1 << A5XX_INT_UNUSED_2, + "UNUSED_2" }, + { 1 << A5XX_INT_RBBM_ATB_BUS_OVERFLOW, + "RBBM_ATB_BUS_OVERFLOW" }, + { 1 << A5XX_INT_MISC_HANG_DETECT, + "MISC_HANG_DETECT" }, + { 1 << A5XX_INT_UCHE_OOB_ACCESS, + "UCHE_OOB_ACCESS" }, + { 1 << A5XX_INT_UCHE_TRAP_INTR, + "UCHE_TRAP_INTR" }, + { 1 << A5XX_INT_DEBBUS_INTR_0, + "DEBBUS_INTR_0" }, + { 1 << A5XX_INT_DEBBUS_INTR_1, + "DEBBUS_INTR_1" }, + { 1 << A5XX_INT_GPMU_VOLTAGE_DROOP, + "GPMU_VOLTAGE_DROOP" }, + { 1 << A5XX_INT_GPMU_FIRMWARE, + "GPMU_FIRMWARE" }, + { 1 << A5XX_INT_ISDB_CPU_IRQ, + "ISDB_CPU_IRQ" }, + { 1 << A5XX_INT_ISDB_UNDER_DEBUG, + "ISDB_UNDER_DEBUG" }) + : "None" + ) +); + +#endif /* _ADRENO_TRACE_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c new file mode 100644 index 000000000000..2f28a6f604ba --- /dev/null +++ b/drivers/gpu/msm/kgsl.c @@ -0,0 +1,4113 @@ +/* Copyright (c) 2008-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/module.h> +#include <linux/fb.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/fdtable.h> +#include <linux/list.h> +#include <linux/debugfs.h> +#include <linux/uaccess.h> +#include <linux/interrupt.h> +#include <linux/workqueue.h> +#include <linux/dma-buf.h> +#include <linux/pm_runtime.h> +#include <linux/rbtree.h> +#include <linux/major.h> +#include <linux/io.h> +#include <linux/mman.h> +#include <linux/sort.h> +#include <linux/security.h> +#include <linux/compat.h> +#include <linux/ctype.h> + +#include "kgsl.h" +#include "kgsl_debugfs.h" +#include "kgsl_cffdump.h" +#include "kgsl_log.h" +#include "kgsl_sharedmem.h" +#include "kgsl_cmdbatch.h" +#include "kgsl_device.h" +#include "kgsl_trace.h" +#include "kgsl_sync.h" +#include "kgsl_compat.h" + +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX "kgsl." + +#ifndef arch_mmap_check +#define arch_mmap_check(addr, len, flags) (0) +#endif + +#ifndef pgprot_writebackcache +#define pgprot_writebackcache(_prot) (_prot) +#endif + +#ifndef pgprot_writethroughcache +#define pgprot_writethroughcache(_prot) (_prot) +#endif + +#ifdef CONFIG_ARM_LPAE +#define KGSL_DMA_BIT_MASK DMA_BIT_MASK(64) +#else +#define KGSL_DMA_BIT_MASK DMA_BIT_MASK(32) +#endif + +static char *ksgl_mmu_type; +module_param_named(mmutype, ksgl_mmu_type, charp, 0); +MODULE_PARM_DESC(ksgl_mmu_type, +"Type of MMU to be used for graphics. Valid values are 'iommu' or 'nommu'"); + +/* Mutex used for the IOMMU sync quirk */ +DEFINE_MUTEX(kgsl_mmu_sync); +EXPORT_SYMBOL(kgsl_mmu_sync); + +struct kgsl_dma_buf_meta { + struct dma_buf_attachment *attach; + struct dma_buf *dmabuf; + struct sg_table *table; +}; + +static void kgsl_mem_entry_detach_process(struct kgsl_mem_entry *entry); + +static const struct file_operations kgsl_fops; + +/* + * The memfree list contains the last N blocks of memory that have been freed. + * On a GPU fault we walk the list to see if the faulting address had been + * recently freed and print out a message to that effect + */ + +#define MEMFREE_ENTRIES 512 + +static DEFINE_SPINLOCK(memfree_lock); + +struct memfree_entry { + pid_t ptname; + uint64_t gpuaddr; + uint64_t size; + pid_t pid; + uint64_t flags; +}; + +static struct { + struct memfree_entry *list; + int head; + int tail; +} memfree; + +static int kgsl_memfree_init(void) +{ + memfree.list = kzalloc(MEMFREE_ENTRIES * sizeof(struct memfree_entry), + GFP_KERNEL); + + return (memfree.list) ? 0 : -ENOMEM; +} + +static void kgsl_memfree_exit(void) +{ + kfree(memfree.list); + memset(&memfree, 0, sizeof(memfree)); +} + +static inline bool match_memfree_addr(struct memfree_entry *entry, + pid_t ptname, uint64_t gpuaddr) +{ + return ((entry->ptname == ptname) && + (entry->size > 0) && + (gpuaddr >= entry->gpuaddr && + gpuaddr < (entry->gpuaddr + entry->size))); +} +int kgsl_memfree_find_entry(pid_t ptname, uint64_t *gpuaddr, + uint64_t *size, uint64_t *flags, pid_t *pid) +{ + int ptr; + + if (memfree.list == NULL) + return 0; + + spin_lock(&memfree_lock); + + ptr = memfree.head - 1; + if (ptr < 0) + ptr = MEMFREE_ENTRIES - 1; + + /* Walk backwards through the list looking for the last match */ + while (ptr != memfree.tail) { + struct memfree_entry *entry = &memfree.list[ptr]; + + if (match_memfree_addr(entry, ptname, *gpuaddr)) { + *gpuaddr = entry->gpuaddr; + *flags = entry->flags; + *size = entry->size; + *pid = entry->pid; + + spin_unlock(&memfree_lock); + return 1; + } + + ptr = ptr - 1; + + if (ptr < 0) + ptr = MEMFREE_ENTRIES - 1; + } + + spin_unlock(&memfree_lock); + return 0; +} + +static void kgsl_memfree_purge(pid_t ptname, uint64_t gpuaddr, + uint64_t size) +{ + int i; + + if (memfree.list == NULL) + return; + + spin_lock(&memfree_lock); + + for (i = 0; i < MEMFREE_ENTRIES; i++) { + struct memfree_entry *entry = &memfree.list[i]; + + if (entry->ptname != ptname || entry->size == 0) + continue; + + if (gpuaddr > entry->gpuaddr && + gpuaddr < entry->gpuaddr + entry->size) { + /* truncate the end of the entry */ + entry->size = entry->gpuaddr - gpuaddr; + } else if (gpuaddr <= entry->gpuaddr && + gpuaddr + size < entry->gpuaddr + entry->size) + /* Truncate the beginning of the entry */ + entry->gpuaddr = gpuaddr + size; + else if (gpuaddr + size >= entry->gpuaddr + entry->size) { + /* Remove the entire entry */ + entry->size = 0; + } + } + spin_unlock(&memfree_lock); +} + +static void kgsl_memfree_add(pid_t pid, pid_t ptname, uint64_t gpuaddr, + uint64_t size, uint64_t flags) + +{ + struct memfree_entry *entry; + + if (memfree.list == NULL) + return; + + spin_lock(&memfree_lock); + + entry = &memfree.list[memfree.head]; + + entry->pid = pid; + entry->ptname = ptname; + entry->gpuaddr = gpuaddr; + entry->size = size; + entry->flags = flags; + + memfree.head = (memfree.head + 1) % MEMFREE_ENTRIES; + + if (memfree.head == memfree.tail) + memfree.tail = (memfree.tail + 1) % MEMFREE_ENTRIES; + + spin_unlock(&memfree_lock); +} + +int kgsl_readtimestamp(struct kgsl_device *device, void *priv, + enum kgsl_timestamp_type type, unsigned int *timestamp) +{ + return device->ftbl->readtimestamp(device, priv, type, timestamp); +} +EXPORT_SYMBOL(kgsl_readtimestamp); + +/* Scheduled by kgsl_mem_entry_put_deferred() */ +static void _deferred_put(struct work_struct *work) +{ + struct kgsl_mem_entry *entry = + container_of(work, struct kgsl_mem_entry, work); + + kgsl_mem_entry_put(entry); +} + +static inline struct kgsl_mem_entry * +kgsl_mem_entry_create(void) +{ + struct kgsl_mem_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); + + if (entry != NULL) { + kref_init(&entry->refcount); + INIT_WORK(&entry->work, _deferred_put); + } + + return entry; +} +#ifdef CONFIG_DMA_SHARED_BUFFER +static void kgsl_destroy_ion(struct kgsl_dma_buf_meta *meta) +{ + if (meta != NULL) { + dma_buf_unmap_attachment(meta->attach, meta->table, + DMA_FROM_DEVICE); + dma_buf_detach(meta->dmabuf, meta->attach); + dma_buf_put(meta->dmabuf); + kfree(meta); + } +} +#else +static void kgsl_destroy_ion(struct kgsl_dma_buf_meta *meta) +{ + +} +#endif + +void +kgsl_mem_entry_destroy(struct kref *kref) +{ + struct kgsl_mem_entry *entry = container_of(kref, + struct kgsl_mem_entry, + refcount); + unsigned int memtype; + + if (entry == NULL) + return; + + /* pull out the memtype before the flags get cleared */ + memtype = kgsl_memdesc_usermem_type(&entry->memdesc); + + /* Detach from process list */ + kgsl_mem_entry_detach_process(entry); + + if (memtype != KGSL_MEM_ENTRY_KERNEL) + atomic_long_sub(entry->memdesc.size, + &kgsl_driver.stats.mapped); + + /* + * Ion takes care of freeing the sg_table for us so + * clear the sg table before freeing the sharedmem + * so kgsl_sharedmem_free doesn't try to free it again + */ + if (memtype == KGSL_MEM_ENTRY_ION) + entry->memdesc.sgt = NULL; + + if ((memtype == KGSL_MEM_ENTRY_USER) + && !(entry->memdesc.flags & KGSL_MEMFLAGS_GPUREADONLY)) { + int i = 0, j; + struct scatterlist *sg; + struct page *page; + /* + * Mark all of pages in the scatterlist as dirty since they + * were writable by the GPU. + */ + for_each_sg(entry->memdesc.sgt->sgl, sg, + entry->memdesc.sgt->nents, i) { + page = sg_page(sg); + for (j = 0; j < (sg->length >> PAGE_SHIFT); j++) + set_page_dirty(nth_page(page, j)); + } + } + + kgsl_sharedmem_free(&entry->memdesc); + + switch (memtype) { + case KGSL_MEM_ENTRY_ION: + kgsl_destroy_ion(entry->priv_data); + break; + default: + break; + } + + kfree(entry); +} +EXPORT_SYMBOL(kgsl_mem_entry_destroy); + +/** + * kgsl_mem_entry_track_gpuaddr - Insert a mem_entry in the address tree and + * assign it with a gpu address space before insertion + * @process: the process that owns the memory + * @entry: the memory entry + * + * @returns - 0 on succcess else error code + * + * Insert the kgsl_mem_entry in to the rb_tree for searching by GPU address. + * The assignment of gpu address and insertion into list needs to + * happen with the memory lock held to avoid race conditions between + * gpu address being selected and some other thread looking through the + * rb list in search of memory based on gpuaddr + * This function should be called with processes memory spinlock held + */ +static int +kgsl_mem_entry_track_gpuaddr(struct kgsl_process_private *process, + struct kgsl_mem_entry *entry) +{ + struct kgsl_pagetable *pagetable = process->pagetable; + + /* + * If cpu=gpu map is used then caller needs to set the + * gpu address + */ + if (kgsl_memdesc_use_cpu_map(&entry->memdesc)) { + if (!entry->memdesc.gpuaddr) + return 0; + } else if (entry->memdesc.gpuaddr) { + WARN_ONCE(1, "gpuaddr assigned w/o holding memory lock\n"); + return -EINVAL; + } + if (kgsl_memdesc_is_secured(&entry->memdesc)) + pagetable = pagetable->mmu->securepagetable; + + return kgsl_mmu_get_gpuaddr(pagetable, &entry->memdesc); +} + +/** + * kgsl_mem_entry_untrack_gpuaddr() - Untrack memory that is previously tracked + * process - Pointer to process private to which memory belongs + * entry - Memory entry to untrack + * + * Function just does the opposite of kgsl_mem_entry_track_gpuaddr. Needs to be + * called with processes spin lock held + */ +static void +kgsl_mem_entry_untrack_gpuaddr(struct kgsl_process_private *process, + struct kgsl_mem_entry *entry) +{ + struct kgsl_pagetable *pagetable = entry->memdesc.pagetable; + + if (entry->memdesc.gpuaddr) + kgsl_mmu_put_gpuaddr(pagetable, &entry->memdesc); +} + +/** + * kgsl_mem_entry_attach_process - Attach a mem_entry to its owner process + * @entry: the memory entry + * @process: the owner process + * + * Attach a newly created mem_entry to its owner process so that + * it can be found later. The mem_entry will be added to mem_idr and have + * its 'id' field assigned. + * + * @returns - 0 on success or error code on failure. + */ +int +kgsl_mem_entry_attach_process(struct kgsl_mem_entry *entry, + struct kgsl_device_private *dev_priv) +{ + int id; + int ret; + struct kgsl_process_private *process = dev_priv->process_priv; + struct kgsl_pagetable *pagetable = NULL; + + ret = kgsl_process_private_get(process); + if (!ret) + return -EBADF; + idr_preload(GFP_KERNEL); + spin_lock(&process->mem_lock); + id = idr_alloc(&process->mem_idr, entry, 1, 0, GFP_NOWAIT); + spin_unlock(&process->mem_lock); + idr_preload_end(); + + if (id < 0) { + ret = id; + goto err_put_proc_priv; + } + + entry->id = id; + entry->priv = process; + + ret = kgsl_mem_entry_track_gpuaddr(process, entry); + if (ret) { + spin_lock(&process->mem_lock); + idr_remove(&process->mem_idr, entry->id); + spin_unlock(&process->mem_lock); + goto err_put_proc_priv; + } + + /* map the memory after unlocking if gpuaddr has been assigned */ + if (entry->memdesc.gpuaddr) { + /* if a secured buffer map it to secure global pagetable */ + if (kgsl_memdesc_is_secured(&entry->memdesc)) + pagetable = process->pagetable->mmu->securepagetable; + else + pagetable = process->pagetable; + + entry->memdesc.pagetable = pagetable; + ret = kgsl_mmu_map(pagetable, &entry->memdesc); + if (ret) + kgsl_mem_entry_detach_process(entry); + } + + kgsl_memfree_purge(pagetable ? pagetable->name : 0, + entry->memdesc.gpuaddr, entry->memdesc.size); + + return ret; + +err_put_proc_priv: + kgsl_process_private_put(process); + return ret; +} + +/* Detach a memory entry from a process and unmap it from the MMU */ + +static void kgsl_mem_entry_detach_process(struct kgsl_mem_entry *entry) +{ + unsigned int type; + if (entry == NULL) + return; + + /* Unmap here so that below we can call kgsl_mmu_put_gpuaddr */ + kgsl_mmu_unmap(entry->memdesc.pagetable, &entry->memdesc); + + kgsl_mem_entry_untrack_gpuaddr(entry->priv, entry); + + spin_lock(&entry->priv->mem_lock); + if (entry->id != 0) + idr_remove(&entry->priv->mem_idr, entry->id); + entry->id = 0; + + type = kgsl_memdesc_usermem_type(&entry->memdesc); + entry->priv->stats[type].cur -= entry->memdesc.size; + spin_unlock(&entry->priv->mem_lock); + kgsl_process_private_put(entry->priv); + + entry->priv = NULL; +} + +/** + * kgsl_context_dump() - dump information about a draw context + * @device: KGSL device that owns the context + * @context: KGSL context to dump information about + * + * Dump specific information about the context to the kernel log. Used for + * fence timeout callbacks + */ +void kgsl_context_dump(struct kgsl_context *context) +{ + struct kgsl_device *device; + + if (_kgsl_context_get(context) == 0) + return; + + device = context->device; + + if (kgsl_context_detached(context)) { + dev_err(device->dev, " context[%d]: context detached\n", + context->id); + } else if (device->ftbl->drawctxt_dump != NULL) + device->ftbl->drawctxt_dump(device, context); + + kgsl_context_put(context); +} +EXPORT_SYMBOL(kgsl_context_dump); + +/* Allocate a new context ID */ +static int _kgsl_get_context_id(struct kgsl_device *device, + struct kgsl_context *context) +{ + int id; + + idr_preload(GFP_KERNEL); + write_lock(&device->context_lock); + id = idr_alloc(&device->context_idr, context, 1, + KGSL_MEMSTORE_MAX, GFP_NOWAIT); + write_unlock(&device->context_lock); + idr_preload_end(); + + if (id > 0) + context->id = id; + + return id; +} + +/** + * kgsl_context_init() - helper to initialize kgsl_context members + * @dev_priv: the owner of the context + * @context: the newly created context struct, should be allocated by + * the device specific drawctxt_create function. + * + * This is a helper function for the device specific drawctxt_create + * function to initialize the common members of its context struct. + * If this function succeeds, reference counting is active in the context + * struct and the caller should kgsl_context_put() it on error. + * If it fails, the caller should just free the context structure + * it passed in. + */ +int kgsl_context_init(struct kgsl_device_private *dev_priv, + struct kgsl_context *context) +{ + struct kgsl_device *device = dev_priv->device; + char name[64]; + int ret = 0, id; + + id = _kgsl_get_context_id(device, context); + if (id == -ENOSPC) { + /* + * Before declaring that there are no contexts left try + * flushing the event workqueue just in case there are + * detached contexts waiting to finish + */ + + flush_workqueue(device->events_wq); + id = _kgsl_get_context_id(device, context); + } + + if (id < 0) { + if (id == -ENOSPC) + KGSL_DRV_INFO(device, + "cannot have more than %zu contexts due to memstore limitation\n", + KGSL_MEMSTORE_MAX); + + return id; + } + + kref_init(&context->refcount); + /* + * Get a refernce to the process private so its not destroyed, until + * the context is destroyed. This will also prevent the pagetable + * from being destroyed + */ + if (!kgsl_process_private_get(dev_priv->process_priv)) { + ret = -EBADF; + goto out; + } + context->device = dev_priv->device; + context->dev_priv = dev_priv; + context->proc_priv = dev_priv->process_priv; + context->tid = task_pid_nr(current); + + ret = kgsl_sync_timeline_create(context); + if (ret) + goto out; + + snprintf(name, sizeof(name), "context-%d", id); + kgsl_add_event_group(&context->events, context, name, + kgsl_readtimestamp, context); + +out: + if (ret) { + write_lock(&device->context_lock); + idr_remove(&dev_priv->device->context_idr, id); + write_unlock(&device->context_lock); + } + + return ret; +} +EXPORT_SYMBOL(kgsl_context_init); + +/** + * kgsl_context_detach() - Release the "master" context reference + * @context: The context that will be detached + * + * This is called when a context becomes unusable, because userspace + * has requested for it to be destroyed. The context itself may + * exist a bit longer until its reference count goes to zero. + * Other code referencing the context can detect that it has been + * detached by checking the KGSL_CONTEXT_PRIV_DETACHED bit in + * context->priv. + */ +static void kgsl_context_detach(struct kgsl_context *context) +{ + struct kgsl_device *device; + + if (context == NULL) + return; + + /* + * Mark the context as detached to keep others from using + * the context before it gets fully removed, and to make sure + * we don't try to detach twice. + */ + if (test_and_set_bit(KGSL_CONTEXT_PRIV_DETACHED, &context->priv)) + return; + + device = context->device; + + trace_kgsl_context_detach(device, context); + + context->device->ftbl->drawctxt_detach(context); + + /* + * Cancel all pending events after the device-specific context is + * detached, to avoid possibly freeing memory while it is still + * in use by the GPU. + */ + kgsl_cancel_events(device, &context->events); + + /* Remove the event group from the list */ + kgsl_del_event_group(&context->events); + + kgsl_context_put(context); +} + +void +kgsl_context_destroy(struct kref *kref) +{ + struct kgsl_context *context = container_of(kref, struct kgsl_context, + refcount); + struct kgsl_device *device = context->device; + + trace_kgsl_context_destroy(device, context); + + BUG_ON(!kgsl_context_detached(context)); + + write_lock(&device->context_lock); + if (context->id != KGSL_CONTEXT_INVALID) { + + /* Clear the timestamps in the memstore during destroy */ + kgsl_sharedmem_writel(device, &device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, soptimestamp), 0); + kgsl_sharedmem_writel(device, &device->memstore, + KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp), 0); + + /* clear device power constraint */ + if (context->id == device->pwrctrl.constraint.owner_id) { + trace_kgsl_constraint(device, + device->pwrctrl.constraint.type, + device->pwrctrl.active_pwrlevel, + 0); + device->pwrctrl.constraint.type = KGSL_CONSTRAINT_NONE; + } + + idr_remove(&device->context_idr, context->id); + context->id = KGSL_CONTEXT_INVALID; + } + write_unlock(&device->context_lock); + kgsl_sync_timeline_destroy(context); + kgsl_process_private_put(context->proc_priv); + + device->ftbl->drawctxt_destroy(context); +} + +struct kgsl_device *kgsl_get_device(int dev_idx) +{ + int i; + struct kgsl_device *ret = NULL; + + mutex_lock(&kgsl_driver.devlock); + + for (i = 0; i < KGSL_DEVICE_MAX; i++) { + if (kgsl_driver.devp[i] && kgsl_driver.devp[i]->id == dev_idx) { + ret = kgsl_driver.devp[i]; + break; + } + } + + mutex_unlock(&kgsl_driver.devlock); + return ret; +} +EXPORT_SYMBOL(kgsl_get_device); + +static struct kgsl_device *kgsl_get_minor(int minor) +{ + struct kgsl_device *ret = NULL; + + if (minor < 0 || minor >= KGSL_DEVICE_MAX) + return NULL; + + mutex_lock(&kgsl_driver.devlock); + ret = kgsl_driver.devp[minor]; + mutex_unlock(&kgsl_driver.devlock); + + return ret; +} + +/** + * kgsl_check_timestamp() - return true if the specified timestamp is retired + * @device: Pointer to the KGSL device to check + * @context: Pointer to the context for the timestamp + * @timestamp: The timestamp to compare + */ +int kgsl_check_timestamp(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp) +{ + unsigned int ts_processed; + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, + &ts_processed); + + return (timestamp_cmp(ts_processed, timestamp) >= 0); +} +EXPORT_SYMBOL(kgsl_check_timestamp); + +static int kgsl_suspend_device(struct kgsl_device *device, pm_message_t state) +{ + int status = -EINVAL; + + if (!device) + return -EINVAL; + + KGSL_PWR_WARN(device, "suspend start\n"); + + mutex_lock(&device->mutex); + status = kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND); + mutex_unlock(&device->mutex); + + KGSL_PWR_WARN(device, "suspend end\n"); + return status; +} + +static int kgsl_resume_device(struct kgsl_device *device) +{ + if (!device) + return -EINVAL; + + KGSL_PWR_WARN(device, "resume start\n"); + mutex_lock(&device->mutex); + if (device->state == KGSL_STATE_SUSPEND) { + kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER); + } else if (device->state != KGSL_STATE_INIT) { + /* + * This is an error situation,so wait for the device + * to idle and then put the device to SLUMBER state. + * This will put the device to the right state when + * we resume. + */ + if (device->state == KGSL_STATE_ACTIVE) + device->ftbl->idle(device); + kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER); + KGSL_PWR_ERR(device, + "resume invoked without a suspend\n"); + } + + mutex_unlock(&device->mutex); + KGSL_PWR_WARN(device, "resume end\n"); + return 0; +} + +static int kgsl_suspend(struct device *dev) +{ + + pm_message_t arg = {0}; + struct kgsl_device *device = dev_get_drvdata(dev); + return kgsl_suspend_device(device, arg); +} + +static int kgsl_resume(struct device *dev) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + return kgsl_resume_device(device); +} + +static int kgsl_runtime_suspend(struct device *dev) +{ + return 0; +} + +static int kgsl_runtime_resume(struct device *dev) +{ + return 0; +} + +const struct dev_pm_ops kgsl_pm_ops = { + .suspend = kgsl_suspend, + .resume = kgsl_resume, + .runtime_suspend = kgsl_runtime_suspend, + .runtime_resume = kgsl_runtime_resume, +}; +EXPORT_SYMBOL(kgsl_pm_ops); + +int kgsl_suspend_driver(struct platform_device *pdev, + pm_message_t state) +{ + struct kgsl_device *device = dev_get_drvdata(&pdev->dev); + return kgsl_suspend_device(device, state); +} +EXPORT_SYMBOL(kgsl_suspend_driver); + +int kgsl_resume_driver(struct platform_device *pdev) +{ + struct kgsl_device *device = dev_get_drvdata(&pdev->dev); + return kgsl_resume_device(device); +} +EXPORT_SYMBOL(kgsl_resume_driver); + +/** + * kgsl_destroy_process_private() - Cleanup function to free process private + * @kref: - Pointer to object being destroyed's kref struct + * Free struct object and all other resources attached to it. + * Since the function can be used when not all resources inside process + * private have been allocated, there is a check to (before each resource + * cleanup) see if the struct member being cleaned is in fact allocated or not. + * If the value is not NULL, resource is freed. + */ +static void kgsl_destroy_process_private(struct kref *kref) +{ + struct kgsl_process_private *private = container_of(kref, + struct kgsl_process_private, refcount); + + idr_destroy(&private->mem_idr); + idr_destroy(&private->syncsource_idr); + + /* When using global pagetables, do not detach global pagetable */ + if (kgsl_mmu_enabled() && + private->pagetable->name != KGSL_MMU_GLOBAL_PT) + kgsl_mmu_putpagetable(private->pagetable); + + kfree(private); + return; +} + +void +kgsl_process_private_put(struct kgsl_process_private *private) +{ + if (private) + kref_put(&private->refcount, kgsl_destroy_process_private); +} + +/** + * kgsl_process_private_find() - Find the process associated with the specified + * name + * @name: pid_t of the process to search for + * Return the process struct for the given ID. + */ +struct kgsl_process_private *kgsl_process_private_find(pid_t pid) +{ + struct kgsl_process_private *p, *private = NULL; + + mutex_lock(&kgsl_driver.process_mutex); + list_for_each_entry(p, &kgsl_driver.process_list, list) { + if (p->pid == pid) { + if (kgsl_process_private_get(p)) + private = p; + break; + } + } + mutex_unlock(&kgsl_driver.process_mutex); + return private; +} + +static struct kgsl_process_private *kgsl_process_private_new( + struct kgsl_device *device) +{ + struct kgsl_process_private *private; + pid_t tgid = task_tgid_nr(current); + + /* Search in the process list */ + list_for_each_entry(private, &kgsl_driver.process_list, list) { + if (private->pid == tgid) { + if (!kgsl_process_private_get(private)) + private = ERR_PTR(-EINVAL); + return private; + } + } + + /* Create a new object */ + private = kzalloc(sizeof(struct kgsl_process_private), GFP_KERNEL); + if (private == NULL) + return ERR_PTR(-ENOMEM); + + kref_init(&private->refcount); + + private->pid = tgid; + get_task_comm(private->comm, current->group_leader); + + spin_lock_init(&private->mem_lock); + spin_lock_init(&private->syncsource_lock); + + idr_init(&private->mem_idr); + idr_init(&private->syncsource_idr); + + /* Allocate a pagetable for the new process object */ + if (kgsl_mmu_enabled()) { + private->pagetable = kgsl_mmu_getpagetable(&device->mmu, tgid); + if (IS_ERR(private->pagetable)) { + int err = PTR_ERR(private->pagetable); + + idr_destroy(&private->mem_idr); + idr_destroy(&private->syncsource_idr); + + kfree(private); + private = ERR_PTR(err); + } + } + + return private; +} + +static void process_release_memory(struct kgsl_process_private *private) +{ + struct kgsl_mem_entry *entry; + int next = 0; + + while (1) { + spin_lock(&private->mem_lock); + entry = idr_get_next(&private->mem_idr, &next); + if (entry == NULL) { + spin_unlock(&private->mem_lock); + break; + } + /* + * If the free pending flag is not set it means that user space + * did not free it's reference to this entry, in that case + * free a reference to this entry, other references are from + * within kgsl so they will be freed eventually by kgsl + */ + if (!entry->pending_free) { + entry->pending_free = 1; + spin_unlock(&private->mem_lock); + kgsl_mem_entry_put(entry); + } else { + spin_unlock(&private->mem_lock); + } + next = next + 1; + } +} + +static void process_release_sync_sources(struct kgsl_process_private *private) +{ + struct kgsl_syncsource *syncsource; + int next = 0; + + while (1) { + spin_lock(&private->syncsource_lock); + syncsource = idr_get_next(&private->syncsource_idr, &next); + spin_unlock(&private->syncsource_lock); + + if (syncsource == NULL) + break; + + kgsl_syncsource_put(syncsource); + next = next + 1; + } +} + +static void kgsl_process_private_close(struct kgsl_device_private *dev_priv, + struct kgsl_process_private *private) +{ + mutex_lock(&kgsl_driver.process_mutex); + + if (--private->fd_count > 0) { + mutex_unlock(&kgsl_driver.process_mutex); + kgsl_process_private_put(private); + return; + } + + /* + * If this is the last file on the process take down the debug + * directories and garbage collect any outstanding resources + */ + + kgsl_process_uninit_sysfs(private); + debugfs_remove_recursive(private->debug_root); + + process_release_sync_sources(private); + + /* When using global pagetables, do not detach global pagetable */ + if (kgsl_mmu_enabled() && + private->pagetable->name != KGSL_MMU_GLOBAL_PT) + kgsl_mmu_detach_pagetable(private->pagetable); + + /* Remove the process struct from the master list */ + list_del(&private->list); + + /* + * Unlock the mutex before releasing the memory - this prevents a + * deadlock with the IOMMU mutex if a page fault occurs + */ + mutex_unlock(&kgsl_driver.process_mutex); + + process_release_memory(private); + + kgsl_process_private_put(private); +} + + +static struct kgsl_process_private *kgsl_process_private_open( + struct kgsl_device *device) +{ + struct kgsl_process_private *private; + + mutex_lock(&kgsl_driver.process_mutex); + private = kgsl_process_private_new(device); + + if (IS_ERR(private)) + goto done; + + /* + * If this is a new process create the debug directories and add it to + * the process list + */ + + if (private->fd_count++ == 0) { + kgsl_process_init_sysfs(device, private); + kgsl_process_init_debugfs(private); + + list_add(&private->list, &kgsl_driver.process_list); + } + +done: + mutex_unlock(&kgsl_driver.process_mutex); + return private; +} + +static int kgsl_close_device(struct kgsl_device *device) +{ + int result = 0; + + mutex_lock(&device->mutex); + device->open_count--; + if (device->open_count == 0) { + + /* Wait for the active count to go to 0 */ + kgsl_active_count_wait(device, 0); + + /* Fail if the wait times out */ + BUG_ON(atomic_read(&device->active_cnt) > 0); + + result = kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT); + } + mutex_unlock(&device->mutex); + return result; + +} + +static void device_release_contexts(struct kgsl_device_private *dev_priv) +{ + struct kgsl_device *device = dev_priv->device; + struct kgsl_context *context; + int next = 0; + + while (1) { + read_lock(&device->context_lock); + context = idr_get_next(&device->context_idr, &next); + read_unlock(&device->context_lock); + + if (context == NULL) + break; + + if (context->dev_priv == dev_priv) { + /* + * Hold a reference to the context in case somebody + * tries to put it while we are detaching + */ + + if (_kgsl_context_get(context)) { + kgsl_context_detach(context); + kgsl_context_put(context); + } + } + + next = next + 1; + } +} + +static int kgsl_release(struct inode *inodep, struct file *filep) +{ + struct kgsl_device_private *dev_priv = filep->private_data; + struct kgsl_device *device = dev_priv->device; + int result; + + filep->private_data = NULL; + + /* Release the contexts for the file */ + device_release_contexts(dev_priv); + + /* Close down the process wide resources for the file */ + kgsl_process_private_close(dev_priv, dev_priv->process_priv); + + kfree(dev_priv); + + result = kgsl_close_device(device); + pm_runtime_put(&device->pdev->dev); + + return result; +} + +static int kgsl_open_device(struct kgsl_device *device) +{ + int result = 0; + + mutex_lock(&device->mutex); + if (device->open_count == 0) { + /* + * active_cnt special case: we are starting up for the first + * time, so use this sequence instead of the kgsl_pwrctrl_wake() + * which will be called by kgsl_active_count_get(). + */ + atomic_inc(&device->active_cnt); + kgsl_sharedmem_set(device, &device->memstore, 0, 0, + device->memstore.size); + + result = device->ftbl->init(device); + if (result) + goto err; + + result = device->ftbl->start(device, 0); + if (result) + goto err; + /* + * Make sure the gates are open, so they don't block until + * we start suspend or FT. + */ + complete_all(&device->hwaccess_gate); + kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); + kgsl_active_count_put(device); + } + device->open_count++; +err: + if (result) { + kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT); + atomic_dec(&device->active_cnt); + } + + mutex_unlock(&device->mutex); + return result; +} + +static int kgsl_open(struct inode *inodep, struct file *filep) +{ + int result; + struct kgsl_device_private *dev_priv; + struct kgsl_device *device; + unsigned int minor = iminor(inodep); + + device = kgsl_get_minor(minor); + BUG_ON(device == NULL); + + result = pm_runtime_get_sync(&device->pdev->dev); + if (result < 0) { + KGSL_DRV_ERR(device, + "Runtime PM: Unable to wake up the device, rc = %d\n", + result); + return result; + } + result = 0; + + dev_priv = kzalloc(sizeof(struct kgsl_device_private), GFP_KERNEL); + if (dev_priv == NULL) { + result = -ENOMEM; + goto err; + } + + dev_priv->device = device; + filep->private_data = dev_priv; + + result = kgsl_open_device(device); + if (result) + goto err; + + /* + * Get file (per process) private struct. This must be done + * after the first start so that the global pagetable mappings + * are set up before we create the per-process pagetable. + */ + dev_priv->process_priv = kgsl_process_private_open(device); + if (IS_ERR(dev_priv->process_priv)) { + result = PTR_ERR(dev_priv->process_priv); + kgsl_close_device(device); + goto err; + } + +err: + if (result) { + filep->private_data = NULL; + kfree(dev_priv); + pm_runtime_put(&device->pdev->dev); + } + return result; +} + +#define GPUADDR_IN_MEMDESC(_val, _memdesc) \ + (((_val) >= (_memdesc)->gpuaddr) && \ + ((_val) < ((_memdesc)->gpuaddr + (_memdesc)->size))) + +/** + * kgsl_sharedmem_find() - Find a gpu memory allocation + * + * @private: private data for the process to check. + * @gpuaddr: start address of the region + * + * Find a gpu allocation. Caller must kgsl_mem_entry_put() + * the returned entry when finished using it. + */ +struct kgsl_mem_entry * __must_check +kgsl_sharedmem_find(struct kgsl_process_private *private, uint64_t gpuaddr) +{ + int ret = 0, id; + struct kgsl_mem_entry *entry = NULL; + + if (!private) + return NULL; + + if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, gpuaddr)) + return NULL; + + spin_lock(&private->mem_lock); + idr_for_each_entry(&private->mem_idr, entry, id) { + if (entry == NULL) + continue; + + if (GPUADDR_IN_MEMDESC(gpuaddr, &entry->memdesc)) { + ret = kgsl_mem_entry_get(entry); + break; + } + } + spin_unlock(&private->mem_lock); + + return (ret == 0) ? NULL : entry; +} +EXPORT_SYMBOL(kgsl_sharedmem_find); + +/** + * kgsl_sharedmem_find_id() - find a memory entry by id + * @process: the owning process + * @id: id to find + * + * @returns - the mem_entry or NULL + * + * Caller must kgsl_mem_entry_put() the returned entry, when finished using + * it. + */ +struct kgsl_mem_entry * __must_check +kgsl_sharedmem_find_id(struct kgsl_process_private *process, unsigned int id) +{ + int result = 0; + struct kgsl_mem_entry *entry; + + drain_workqueue(kgsl_driver.mem_workqueue); + + spin_lock(&process->mem_lock); + entry = idr_find(&process->mem_idr, id); + if (entry) + result = kgsl_mem_entry_get(entry); + spin_unlock(&process->mem_lock); + + if (!result) + return NULL; + return entry; +} + +/** + * kgsl_mem_entry_unset_pend() - Unset the pending free flag of an entry + * @entry - The memory entry + */ +static inline void kgsl_mem_entry_unset_pend(struct kgsl_mem_entry *entry) +{ + if (entry == NULL) + return; + spin_lock(&entry->priv->mem_lock); + entry->pending_free = 0; + spin_unlock(&entry->priv->mem_lock); +} + +/** + * kgsl_mem_entry_set_pend() - Set the pending free flag of a memory entry + * @entry - The memory entry + * + * @returns - true if pending flag was 0 else false + * + * This function will set the pending free flag if it is previously unset. Used + * to prevent race condition between ioctls calling free/freememontimestamp + * on the same entry. Whichever thread set's the flag first will do the free. + */ +static inline bool kgsl_mem_entry_set_pend(struct kgsl_mem_entry *entry) +{ + bool ret = false; + + if (entry == NULL) + return false; + + spin_lock(&entry->priv->mem_lock); + if (!entry->pending_free) { + entry->pending_free = 1; + ret = true; + } + spin_unlock(&entry->priv->mem_lock); + return ret; +} + +/*call all ioctl sub functions with driver locked*/ +long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result = 0; + struct kgsl_device_getproperty *param = data; + + switch (param->type) { + case KGSL_PROP_VERSION: + { + struct kgsl_version version; + if (param->sizebytes != sizeof(version)) { + result = -EINVAL; + break; + } + + version.drv_major = KGSL_VERSION_MAJOR; + version.drv_minor = KGSL_VERSION_MINOR; + version.dev_major = dev_priv->device->ver_major; + version.dev_minor = dev_priv->device->ver_minor; + + if (copy_to_user(param->value, &version, sizeof(version))) + result = -EFAULT; + + break; + } + case KGSL_PROP_GPU_RESET_STAT: + { + /* Return reset status of given context and clear it */ + uint32_t id; + struct kgsl_context *context; + + if (param->sizebytes != sizeof(unsigned int)) { + result = -EINVAL; + break; + } + /* We expect the value passed in to contain the context id */ + if (copy_from_user(&id, param->value, + sizeof(unsigned int))) { + result = -EFAULT; + break; + } + context = kgsl_context_get_owner(dev_priv, id); + if (!context) { + result = -EINVAL; + break; + } + /* + * Copy the reset status to value which also serves as + * the out parameter + */ + if (copy_to_user(param->value, &(context->reset_status), + sizeof(unsigned int))) + result = -EFAULT; + else { + /* Clear reset status once its been queried */ + context->reset_status = KGSL_CTX_STAT_NO_ERROR; + } + + kgsl_context_put(context); + break; + } + default: + if (is_compat_task()) + result = dev_priv->device->ftbl->getproperty_compat( + dev_priv->device, param->type, + param->value, param->sizebytes); + else + result = dev_priv->device->ftbl->getproperty( + dev_priv->device, param->type, + param->value, param->sizebytes); + } + + + return result; +} + +long kgsl_ioctl_device_setproperty(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result = 0; + /* The getproperty struct is reused for setproperty too */ + struct kgsl_device_getproperty *param = data; + + /* Reroute to compat version if coming from compat_ioctl */ + if (is_compat_task()) + result = dev_priv->device->ftbl->setproperty_compat( + dev_priv, param->type, param->value, + param->sizebytes); + else if (dev_priv->device->ftbl->setproperty) + result = dev_priv->device->ftbl->setproperty( + dev_priv, param->type, param->value, + param->sizebytes); + + return result; +} + +long kgsl_ioctl_device_waittimestamp_ctxtid( + struct kgsl_device_private *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_device_waittimestamp_ctxtid *param = data; + struct kgsl_device *device = dev_priv->device; + long result = -EINVAL; + unsigned int temp_cur_ts = 0; + struct kgsl_context *context; + + context = kgsl_context_get_owner(dev_priv, param->context_id); + if (context == NULL) + return result; + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, + &temp_cur_ts); + + trace_kgsl_waittimestamp_entry(device, context->id, temp_cur_ts, + param->timestamp, param->timeout); + + result = device->ftbl->waittimestamp(device, context, param->timestamp, + param->timeout); + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, + &temp_cur_ts); + trace_kgsl_waittimestamp_exit(device, temp_cur_ts, result); + + kgsl_context_put(context); + + return result; +} + +long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_ringbuffer_issueibcmds *param = data; + struct kgsl_device *device = dev_priv->device; + struct kgsl_context *context; + struct kgsl_cmdbatch *cmdbatch = NULL; + long result = -EINVAL; + + /* The legacy functions don't support synchronization commands */ + if ((param->flags & (KGSL_CMDBATCH_SYNC | KGSL_CMDBATCH_MARKER))) + return -EINVAL; + + /* Get the context */ + context = kgsl_context_get_owner(dev_priv, param->drawctxt_id); + if (context == NULL) + return -EINVAL; + + /* Create a command batch */ + cmdbatch = kgsl_cmdbatch_create(device, context, param->flags); + if (IS_ERR(cmdbatch)) { + result = PTR_ERR(cmdbatch); + goto done; + } + + if (param->flags & KGSL_CMDBATCH_SUBMIT_IB_LIST) { + /* Sanity check the number of IBs */ + if (param->numibs == 0 || param->numibs > KGSL_MAX_NUMIBS) { + result = -EINVAL; + goto done; + } + result = kgsl_cmdbatch_add_ibdesc_list(device, cmdbatch, + (void __user *) param->ibdesc_addr, + param->numibs); + } else { + struct kgsl_ibdesc ibdesc; + /* Ultra legacy path */ + + ibdesc.gpuaddr = param->ibdesc_addr; + ibdesc.sizedwords = param->numibs; + ibdesc.ctrl = 0; + + result = kgsl_cmdbatch_add_ibdesc(device, cmdbatch, &ibdesc); + } + + if (result) + goto done; + + result = dev_priv->device->ftbl->issueibcmds(dev_priv, context, + cmdbatch, ¶m->timestamp); + +done: + /* + * -EPROTO is a "success" error - it just tells the user that the + * context had previously faulted + */ + if (result && result != -EPROTO) + kgsl_cmdbatch_destroy(cmdbatch); + + kgsl_context_put(context); + return result; +} + +long kgsl_ioctl_submit_commands(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_submit_commands *param = data; + struct kgsl_device *device = dev_priv->device; + struct kgsl_context *context; + struct kgsl_cmdbatch *cmdbatch = NULL; + long result = -EINVAL; + + /* + * The SYNC bit is supposed to identify a dummy sync object so warn the + * user if they specified any IBs with it. A MARKER command can either + * have IBs or not but if the command has 0 IBs it is automatically + * assumed to be a marker. If none of the above make sure that the user + * specified a sane number of IBs + */ + + if ((param->flags & KGSL_CMDBATCH_SYNC) && param->numcmds) + KGSL_DEV_ERR_ONCE(device, + "Commands specified with the SYNC flag. They will be ignored\n"); + else if (param->numcmds > KGSL_MAX_NUMIBS) + return -EINVAL; + else if (!(param->flags & KGSL_CMDBATCH_SYNC) && param->numcmds == 0) + param->flags |= KGSL_CMDBATCH_MARKER; + + /* Make sure that we don't have too many syncpoints */ + if (param->numsyncs > KGSL_MAX_SYNCPOINTS) + return -EINVAL; + + context = kgsl_context_get_owner(dev_priv, param->context_id); + if (context == NULL) + return -EINVAL; + + /* Create a command batch */ + cmdbatch = kgsl_cmdbatch_create(device, context, param->flags); + if (IS_ERR(cmdbatch)) { + result = PTR_ERR(cmdbatch); + goto done; + } + + result = kgsl_cmdbatch_add_ibdesc_list(device, cmdbatch, + param->cmdlist, param->numcmds); + if (result) + goto done; + + result = kgsl_cmdbatch_add_syncpoints(device, cmdbatch, + param->synclist, param->numsyncs); + if (result) + goto done; + + /* If no profiling buffer was specified, clear the flag */ + if (cmdbatch->profiling_buf_entry == NULL) + cmdbatch->flags &= ~KGSL_CMDBATCH_PROFILING; + + result = dev_priv->device->ftbl->issueibcmds(dev_priv, context, + cmdbatch, ¶m->timestamp); + +done: + /* + * -EPROTO is a "success" error - it just tells the user that the + * context had previously faulted + */ + if (result && result != -EPROTO) + kgsl_cmdbatch_destroy(cmdbatch); + + kgsl_context_put(context); + return result; +} + +long kgsl_ioctl_gpu_command(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpu_command *param = data; + struct kgsl_device *device = dev_priv->device; + struct kgsl_context *context; + struct kgsl_cmdbatch *cmdbatch = NULL; + + long result = -EINVAL; + + /* + * The SYNC bit is supposed to identify a dummy sync object so warn the + * user if they specified any IBs with it. A MARKER command can either + * have IBs or not but if the command has 0 IBs it is automatically + * assumed to be a marker. If none of the above make sure that the user + * specified a sane number of IBs + */ + if ((param->flags & KGSL_CMDBATCH_SYNC) && param->numcmds) + KGSL_DEV_ERR_ONCE(device, + "Commands specified with the SYNC flag. They will be ignored\n"); + else if (!(param->flags & KGSL_CMDBATCH_SYNC) && param->numcmds == 0) + param->flags |= KGSL_CMDBATCH_MARKER; + + /* Make sure that the memobj and syncpoint count isn't too big */ + if (param->numcmds > KGSL_MAX_NUMIBS || + param->numobjs > KGSL_MAX_NUMIBS || + param->numsyncs > KGSL_MAX_SYNCPOINTS) + return -EINVAL; + + context = kgsl_context_get_owner(dev_priv, param->context_id); + if (context == NULL) + return -EINVAL; + + cmdbatch = kgsl_cmdbatch_create(device, context, param->flags); + if (IS_ERR(cmdbatch)) { + result = PTR_ERR(cmdbatch); + goto done; + } + + result = kgsl_cmdbatch_add_cmdlist(device, cmdbatch, + to_user_ptr(param->cmdlist), + param->cmdsize, param->numcmds); + if (result) + goto done; + + result = kgsl_cmdbatch_add_memlist(device, cmdbatch, + to_user_ptr(param->objlist), + param->objsize, param->numobjs); + if (result) + goto done; + + result = kgsl_cmdbatch_add_synclist(device, cmdbatch, + to_user_ptr(param->synclist), + param->syncsize, param->numsyncs); + if (result) + goto done; + + /* If no profiling buffer was specified, clear the flag */ + if (cmdbatch->profiling_buf_entry == NULL) + cmdbatch->flags &= ~KGSL_CMDBATCH_PROFILING; + + result = dev_priv->device->ftbl->issueibcmds(dev_priv, context, + cmdbatch, ¶m->timestamp); + +done: + /* + * -EPROTO is a "success" error - it just tells the user that the + * context had previously faulted + */ + if (result && result != -EPROTO) + kgsl_cmdbatch_destroy(cmdbatch); + + kgsl_context_put(context); + return result; +} + +long kgsl_ioctl_cmdstream_readtimestamp_ctxtid(struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_cmdstream_readtimestamp_ctxtid *param = data; + struct kgsl_device *device = dev_priv->device; + struct kgsl_context *context; + long result = -EINVAL; + + mutex_lock(&device->mutex); + context = kgsl_context_get_owner(dev_priv, param->context_id); + + if (context) { + result = kgsl_readtimestamp(device, context, + param->type, ¶m->timestamp); + + trace_kgsl_readtimestamp(device, context->id, + param->type, param->timestamp); + } + + kgsl_context_put(context); + mutex_unlock(&device->mutex); + return result; +} + +long kgsl_ioctl_drawctxt_create(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result = 0; + struct kgsl_drawctxt_create *param = data; + struct kgsl_context *context = NULL; + struct kgsl_device *device = dev_priv->device; + + context = device->ftbl->drawctxt_create(dev_priv, ¶m->flags); + if (IS_ERR(context)) { + result = PTR_ERR(context); + goto done; + } + trace_kgsl_context_create(dev_priv->device, context, param->flags); + param->drawctxt_id = context->id; +done: + return result; +} + +long kgsl_ioctl_drawctxt_destroy(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_drawctxt_destroy *param = data; + struct kgsl_context *context; + + context = kgsl_context_get_owner(dev_priv, param->drawctxt_id); + if (context == NULL) + return -EINVAL; + + kgsl_context_detach(context); + kgsl_context_put(context); + + return 0; +} + +static long gpumem_free_entry(struct kgsl_mem_entry *entry) +{ + pid_t ptname = 0; + + if (!kgsl_mem_entry_set_pend(entry)) + return -EBUSY; + + trace_kgsl_mem_free(entry); + + if (entry->memdesc.pagetable != NULL) + ptname = entry->memdesc.pagetable->name; + + kgsl_memfree_add(entry->priv->pid, ptname, entry->memdesc.gpuaddr, + entry->memdesc.size, entry->memdesc.flags); + + kgsl_mem_entry_put(entry); + + return 0; +} + +static void gpumem_free_func(struct kgsl_device *device, + struct kgsl_event_group *group, void *priv, int ret) +{ + struct kgsl_context *context = group->context; + struct kgsl_mem_entry *entry = priv; + unsigned int timestamp; + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, ×tamp); + + /* Free the memory for all event types */ + trace_kgsl_mem_timestamp_free(device, entry, KGSL_CONTEXT_ID(context), + timestamp, 0); + kgsl_mem_entry_put(entry); +} + +static long gpumem_free_entry_on_timestamp(struct kgsl_device *device, + struct kgsl_mem_entry *entry, + struct kgsl_context *context, unsigned int timestamp) +{ + int ret; + unsigned int temp; + + if (!kgsl_mem_entry_set_pend(entry)) + return -EBUSY; + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, &temp); + trace_kgsl_mem_timestamp_queue(device, entry, context->id, temp, + timestamp); + ret = kgsl_add_event(device, &context->events, + timestamp, gpumem_free_func, entry); + + if (ret) + kgsl_mem_entry_unset_pend(entry); + + return ret; +} + +long kgsl_ioctl_sharedmem_free(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_sharedmem_free *param = data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry; + long ret; + + entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr); + if (entry == NULL) { + KGSL_MEM_INFO(dev_priv->device, + "Invalid GPU address 0x%016llx\n", + (uint64_t) param->gpuaddr); + return -EINVAL; + } + + ret = gpumem_free_entry(entry); + kgsl_mem_entry_put(entry); + + return ret; +} + +long kgsl_ioctl_gpumem_free_id(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpumem_free_id *param = data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry; + long ret; + + entry = kgsl_sharedmem_find_id(private, param->id); + if (entry == NULL) { + KGSL_MEM_INFO(dev_priv->device, + "Invalid GPU memory object ID %d\n", param->id); + return -EINVAL; + } + + ret = gpumem_free_entry(entry); + kgsl_mem_entry_put(entry); + + return ret; +} + +static long gpuobj_free_on_timestamp(struct kgsl_device_private *dev_priv, + struct kgsl_mem_entry *entry, struct kgsl_gpuobj_free *param) +{ + struct kgsl_gpu_event_timestamp event; + struct kgsl_context *context; + long ret; + + memset(&event, 0, sizeof(event)); + + ret = _copy_from_user(&event, to_user_ptr(param->priv), + sizeof(event), param->len); + if (ret) + return ret; + + if (event.context_id == 0) + return -EINVAL; + + context = kgsl_context_get_owner(dev_priv, event.context_id); + if (context == NULL) + return -EINVAL; + + ret = gpumem_free_entry_on_timestamp(dev_priv->device, entry, context, + event.timestamp); + + kgsl_context_put(context); + return ret; +} + +static void gpuobj_free_fence_func(void *priv) +{ + kgsl_mem_entry_put_deferred((struct kgsl_mem_entry *) priv); +} + +static long gpuobj_free_on_fence(struct kgsl_device_private *dev_priv, + struct kgsl_mem_entry *entry, struct kgsl_gpuobj_free *param) +{ + struct kgsl_sync_fence_waiter *handle; + struct kgsl_gpu_event_fence event; + long ret; + + if (!kgsl_mem_entry_set_pend(entry)) + return -EBUSY; + + memset(&event, 0, sizeof(event)); + + ret = _copy_from_user(&event, to_user_ptr(param->priv), + sizeof(event), param->len); + if (ret) { + kgsl_mem_entry_unset_pend(entry); + return ret; + } + + if (event.fd < 0) { + kgsl_mem_entry_unset_pend(entry); + return -EINVAL; + } + + handle = kgsl_sync_fence_async_wait(event.fd, + gpuobj_free_fence_func, entry); + + /* if handle is NULL the fence has already signaled */ + if (handle == NULL) + return gpumem_free_entry(entry); + + if (IS_ERR(handle)) { + kgsl_mem_entry_unset_pend(entry); + return PTR_ERR(handle); + } + + return 0; +} + +long kgsl_ioctl_gpuobj_free(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpuobj_free *param = data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry; + long ret; + + entry = kgsl_sharedmem_find_id(private, param->id); + if (entry == NULL) { + KGSL_MEM_ERR(dev_priv->device, + "Invalid GPU memory object ID %d\n", param->id); + return -EINVAL; + } + + /* If no event is specified then free immediately */ + if (!(param->flags & KGSL_GPUOBJ_FREE_ON_EVENT)) + ret = gpumem_free_entry(entry); + else if (param->type == KGSL_GPU_EVENT_TIMESTAMP) + ret = gpuobj_free_on_timestamp(dev_priv, entry, param); + else if (param->type == KGSL_GPU_EVENT_FENCE) + ret = gpuobj_free_on_fence(dev_priv, entry, param); + else + ret = -EINVAL; + + kgsl_mem_entry_put(entry); + return ret; +} + +long kgsl_ioctl_cmdstream_freememontimestamp_ctxtid( + struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_cmdstream_freememontimestamp_ctxtid *param = data; + struct kgsl_context *context = NULL; + struct kgsl_mem_entry *entry; + long ret = -EINVAL; + + if (param->type != KGSL_TIMESTAMP_RETIRED) + return -EINVAL; + + context = kgsl_context_get_owner(dev_priv, param->context_id); + if (context == NULL) + return -EINVAL; + + entry = kgsl_sharedmem_find(dev_priv->process_priv, + (uint64_t) param->gpuaddr); + if (entry == NULL) { + KGSL_MEM_ERR(dev_priv->device, + "Invalid GPU address 0x%016llx\n", + (uint64_t) param->gpuaddr); + goto out; + } + + ret = gpumem_free_entry_on_timestamp(dev_priv->device, entry, + context, param->timestamp); + + kgsl_mem_entry_put(entry); +out: + kgsl_context_put(context); + + return ret; +} + +static inline int _check_region(unsigned long start, unsigned long size, + uint64_t len) +{ + uint64_t end = ((uint64_t) start) + size; + return (end > len); +} + +static int check_vma(struct vm_area_struct *vma, struct file *vmfile, + struct kgsl_memdesc *memdesc) +{ + if (vma == NULL || vma->vm_file != vmfile) + return -EINVAL; + + /* userspace may not know the size, in which case use the whole vma */ + if (memdesc->size == 0) + memdesc->size = vma->vm_end - vma->vm_start; + /* range checking */ + if (vma->vm_start != memdesc->useraddr || + (memdesc->useraddr + memdesc->size) != vma->vm_end) + return -EINVAL; + return 0; +} + +static int memdesc_sg_virt(struct kgsl_memdesc *memdesc, struct file *vmfile) +{ + int ret = 0; + long npages = 0, i; + size_t sglen = (size_t) (memdesc->size / PAGE_SIZE); + struct page **pages = NULL; + int write = (memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY) != 0; + + if (sglen == 0 || sglen >= LONG_MAX) + return -EINVAL; + + pages = kgsl_malloc(sglen * sizeof(struct page *)); + if (pages == NULL) + return -ENOMEM; + + memdesc->sgt = kmalloc(sizeof(struct sg_table), GFP_KERNEL); + if (memdesc->sgt == NULL) { + ret = -ENOMEM; + goto out; + } + + down_read(¤t->mm->mmap_sem); + /* If we have vmfile, make sure we map the correct vma and map it all */ + if (vmfile != NULL) + ret = check_vma(find_vma(current->mm, memdesc->useraddr), + vmfile, memdesc); + + if (ret == 0) { + npages = get_user_pages(current, current->mm, memdesc->useraddr, + sglen, write, 0, pages, NULL); + ret = (npages < 0) ? (int)npages : 0; + } + up_read(¤t->mm->mmap_sem); + + if (ret) + goto out; + + if ((unsigned long) npages != sglen) { + ret = -EINVAL; + goto out; + } + + ret = sg_alloc_table_from_pages(memdesc->sgt, pages, npages, + 0, memdesc->size, GFP_KERNEL); +out: + if (ret) { + for (i = 0; i < npages; i++) + put_page(pages[i]); + + kfree(memdesc->sgt); + memdesc->sgt = NULL; + } + kgsl_free(pages); + return ret; +} + +static int kgsl_setup_anon_useraddr(struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, unsigned long hostptr, + size_t offset, size_t size) +{ + /* Map an anonymous memory chunk */ + + if (size == 0 || offset != 0 || + !IS_ALIGNED(size, PAGE_SIZE)) + return -EINVAL; + + entry->memdesc.pagetable = pagetable; + entry->memdesc.size = (uint64_t) size; + entry->memdesc.mmapsize = (uint64_t) size; + entry->memdesc.useraddr = hostptr; + if (kgsl_memdesc_use_cpu_map(&entry->memdesc)) + entry->memdesc.gpuaddr = (uint64_t) entry->memdesc.useraddr; + entry->memdesc.flags |= KGSL_MEMFLAGS_USERMEM_ADDR; + + return memdesc_sg_virt(&entry->memdesc, NULL); +} + +static int match_file(const void *p, struct file *file, unsigned int fd) +{ + /* + * We must return fd + 1 because iterate_fd stops searching on + * non-zero return, but 0 is a valid fd. + */ + return (p == file) ? (fd + 1) : 0; +} + +static void _setup_cache_mode(struct kgsl_mem_entry *entry, + struct vm_area_struct *vma) +{ + unsigned int mode; + pgprot_t pgprot = vma->vm_page_prot; + + if (pgprot == pgprot_noncached(pgprot)) + mode = KGSL_CACHEMODE_UNCACHED; + else if (pgprot == pgprot_writecombine(pgprot)) + mode = KGSL_CACHEMODE_WRITECOMBINE; + else + mode = KGSL_CACHEMODE_WRITEBACK; + + entry->memdesc.flags |= (mode << KGSL_CACHEMODE_SHIFT); +} + +#ifdef CONFIG_DMA_SHARED_BUFFER +static int kgsl_setup_dma_buf(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + struct dma_buf *dmabuf); + +static int kgsl_setup_dmabuf_useraddr(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, unsigned long hostptr) +{ + struct vm_area_struct *vma; + struct dma_buf *dmabuf = NULL; + int ret; + + /* + * Find the VMA containing this pointer and figure out if it + * is a dma-buf. + */ + down_read(¤t->mm->mmap_sem); + vma = find_vma(current->mm, hostptr); + + if (vma && vma->vm_file) { + int fd; + + /* + * Check to see that this isn't our own memory that we have + * already mapped + */ + if (vma->vm_file->f_op == &kgsl_fops) { + up_read(¤t->mm->mmap_sem); + return -EFAULT; + } + + /* Look for the fd that matches this the vma file */ + fd = iterate_fd(current->files, 0, match_file, vma->vm_file); + if (fd != 0) + dmabuf = dma_buf_get(fd - 1); + } + up_read(¤t->mm->mmap_sem); + + if (dmabuf == NULL) + return -ENODEV; + + ret = kgsl_setup_dma_buf(device, pagetable, entry, dmabuf); + if (ret) { + dma_buf_put(dmabuf); + return ret; + } + + /* Setup the user addr/cache mode for cache operations */ + entry->memdesc.useraddr = hostptr; + _setup_cache_mode(entry, vma); + + return 0; +} +#else +static int kgsl_setup_dmabuf_useraddr(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, unsigned long hostptr) +{ + return -ENODEV; +} +#endif + +static int kgsl_setup_useraddr(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + unsigned long hostptr, size_t offset, size_t size) +{ + int ret; + + if (hostptr == 0 || !IS_ALIGNED(hostptr, PAGE_SIZE)) + return -EINVAL; + + /* Try to set up a dmabuf - if it returns -ENODEV assume anonymous */ + ret = kgsl_setup_dmabuf_useraddr(device, pagetable, entry, hostptr); + if (ret != -ENODEV) + return ret; + + /* Okay - lets go legacy */ + return kgsl_setup_anon_useraddr(pagetable, entry, + hostptr, offset, size); +} + +static long _gpuobj_map_useraddr(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + struct kgsl_gpuobj_import *param) +{ + struct kgsl_gpuobj_import_useraddr useraddr; + int ret; + + param->flags &= KGSL_MEMFLAGS_GPUREADONLY + | KGSL_CACHEMODE_MASK + | KGSL_MEMTYPE_MASK + | KGSL_MEMFLAGS_FORCE_32BIT; + + /* Specifying SECURE is an explicit error */ + if (param->flags & KGSL_MEMFLAGS_SECURE) + return -ENOTSUPP; + + ret = _copy_from_user(&useraddr, + to_user_ptr(param->priv), sizeof(useraddr), + param->priv_len); + if (ret) + return ret; + + /* Verify that the virtaddr and len are within bounds */ + if (useraddr.virtaddr > ULONG_MAX) + return -EINVAL; + + return kgsl_setup_useraddr(device, pagetable, entry, + (unsigned long) useraddr.virtaddr, 0, 0); +} + +#ifdef CONFIG_DMA_SHARED_BUFFER +static long _gpuobj_map_dma_buf(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + struct kgsl_gpuobj_import *param, + int *fd) +{ + struct kgsl_gpuobj_import_dma_buf buf; + struct dma_buf *dmabuf; + int ret; + + /* + * If content protection is not enabled and secure buffer + * is requested to be mapped return error. + */ + if (entry->memdesc.flags & KGSL_MEMFLAGS_SECURE) { + if (!kgsl_mmu_is_secured(&device->mmu)) { + dev_WARN_ONCE(device->dev, 1, + "Secure buffer not supported"); + return -ENOTSUPP; + } + + entry->memdesc.priv |= KGSL_MEMDESC_SECURE; + } + + ret = _copy_from_user(&buf, to_user_ptr(param->priv), + sizeof(buf), param->priv_len); + if (ret) + return ret; + + if (buf.fd == 0) + return -EINVAL; + + *fd = buf.fd; + dmabuf = dma_buf_get(buf.fd); + + if (IS_ERR_OR_NULL(dmabuf)) + return (dmabuf == NULL) ? -EINVAL : PTR_ERR(dmabuf); + + ret = kgsl_setup_dma_buf(device, pagetable, entry, dmabuf); + if (ret) + dma_buf_put(dmabuf); + + return ret; +} +#else +static long _gpuobj_map_dma_buf(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + struct kgsl_gpuobj_import *param, + int *fd) +{ + return -EINVAL; +} +#endif + +long kgsl_ioctl_gpuobj_import(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_gpuobj_import *param = data; + struct kgsl_mem_entry *entry; + int ret, fd = -1; + + entry = kgsl_mem_entry_create(); + if (entry == NULL) + return -ENOMEM; + + param->flags &= KGSL_MEMFLAGS_GPUREADONLY + | KGSL_MEMTYPE_MASK + | KGSL_MEMALIGN_MASK + | KGSL_MEMFLAGS_USE_CPU_MAP + | KGSL_MEMFLAGS_SECURE + | KGSL_MEMFLAGS_FORCE_32BIT; + + entry->memdesc.flags = param->flags; + + if (param->type == KGSL_USER_MEM_TYPE_ADDR) + ret = _gpuobj_map_useraddr(dev_priv->device, private->pagetable, + entry, param); + else if (param->type == KGSL_USER_MEM_TYPE_DMABUF) + ret = _gpuobj_map_dma_buf(dev_priv->device, private->pagetable, + entry, param, &fd); + else + ret = -ENOTSUPP; + + if (ret) + goto out; + + if (entry->memdesc.size >= SZ_1M) + kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_1M)); + else if (entry->memdesc.size >= SZ_64K) + kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_64K)); + + param->flags = entry->memdesc.flags; + + ret = kgsl_mem_entry_attach_process(entry, dev_priv); + if (ret) + goto unmap; + + param->id = entry->id; + + KGSL_STATS_ADD(entry->memdesc.size, &kgsl_driver.stats.mapped, + &kgsl_driver.stats.mapped_max); + + kgsl_process_add_stats(private, + kgsl_memdesc_usermem_type(&entry->memdesc), + entry->memdesc.size); + + trace_kgsl_mem_map(entry, fd); + + return 0; + +unmap: + if (param->type == KGSL_USER_MEM_TYPE_DMABUF) { + kgsl_destroy_ion(entry->priv_data); + entry->memdesc.sgt = NULL; + } + + kgsl_sharedmem_free(&entry->memdesc); + +out: + kfree(entry); + return ret; +} + +static long _map_usermem_addr(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, struct kgsl_mem_entry *entry, + unsigned long hostptr, size_t offset, size_t size) +{ + if (!kgsl_mmu_enabled()) { + KGSL_DRV_ERR(device, + "Cannot map paged memory with the MMU disabled\n"); + return -EINVAL; + } + + /* No CPU mapped buffer could ever be secure */ + if (entry->memdesc.flags & KGSL_MEMFLAGS_SECURE) + return -EINVAL; + + return kgsl_setup_useraddr(device, pagetable, entry, hostptr, + offset, size); +} + +#ifdef CONFIG_DMA_SHARED_BUFFER +static int _map_usermem_dma_buf(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + unsigned int fd) +{ + int ret; + struct dma_buf *dmabuf; + + /* + * If content protection is not enabled and secure buffer + * is requested to be mapped return error. + */ + + if (entry->memdesc.flags & KGSL_MEMFLAGS_SECURE) { + if (!kgsl_mmu_is_secured(&device->mmu)) { + dev_WARN_ONCE(device->dev, 1, + "Secure buffer not supported"); + return -EINVAL; + } + + entry->memdesc.priv |= KGSL_MEMDESC_SECURE; + } + + dmabuf = dma_buf_get(fd); + if (IS_ERR_OR_NULL(dmabuf)) { + ret = PTR_ERR(dmabuf); + return ret ? ret : -EINVAL; + } + ret = kgsl_setup_dma_buf(device, pagetable, entry, dmabuf); + if (ret) + dma_buf_put(dmabuf); + return ret; +} +#else +static int _map_usermem_dma_buf(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + unsigned int fd) +{ + return -EINVAL; +} +#endif + +#ifdef CONFIG_DMA_SHARED_BUFFER +static int kgsl_setup_dma_buf(struct kgsl_device *device, + struct kgsl_pagetable *pagetable, + struct kgsl_mem_entry *entry, + struct dma_buf *dmabuf) +{ + int ret = 0; + struct scatterlist *s; + struct sg_table *sg_table; + struct dma_buf_attachment *attach = NULL; + struct kgsl_dma_buf_meta *meta; + + meta = kzalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + return -ENOMEM; + + attach = dma_buf_attach(dmabuf, device->dev); + if (IS_ERR_OR_NULL(attach)) { + ret = attach ? PTR_ERR(attach) : -EINVAL; + goto out; + } + + meta->dmabuf = dmabuf; + meta->attach = attach; + + entry->priv_data = meta; + entry->memdesc.pagetable = pagetable; + entry->memdesc.size = 0; + entry->memdesc.mmapsize = 0; + /* USE_CPU_MAP is not impemented for ION. */ + entry->memdesc.flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP); + entry->memdesc.flags |= KGSL_MEMFLAGS_USERMEM_ION; + + sg_table = dma_buf_map_attachment(attach, DMA_TO_DEVICE); + + if (IS_ERR_OR_NULL(sg_table)) { + ret = PTR_ERR(sg_table); + goto out; + } + + meta->table = sg_table; + entry->priv_data = meta; + entry->memdesc.sgt = sg_table; + + /* Calculate the size of the memdesc from the sglist */ + for (s = entry->memdesc.sgt->sgl; s != NULL; s = sg_next(s)) { + int priv = (entry->memdesc.priv & KGSL_MEMDESC_SECURE) ? 1 : 0; + + /* + * Check that each chunk of of the sg table matches the secure + * flag. + */ + + if (PagePrivate(sg_page(s)) != priv) { + ret = -EPERM; + goto out; + } + + entry->memdesc.size += (uint64_t) s->length; + } + + entry->memdesc.size = PAGE_ALIGN(entry->memdesc.size); + entry->memdesc.mmapsize = PAGE_ALIGN(entry->memdesc.size); + +out: + if (ret) { + if (!IS_ERR_OR_NULL(attach)) + dma_buf_detach(dmabuf, attach); + + + kfree(meta); + } + + return ret; +} +#endif + +long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result = -EINVAL; + struct kgsl_map_user_mem *param = data; + struct kgsl_mem_entry *entry = NULL; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mmu *mmu = &dev_priv->device->mmu; + unsigned int memtype; + + /* + * If content protection is not enabled and secure buffer + * is requested to be mapped return error. + */ + + if (param->flags & KGSL_MEMFLAGS_SECURE) { + /* Log message and return if context protection isn't enabled */ + if (!kgsl_mmu_is_secured(mmu)) { + dev_WARN_ONCE(dev_priv->device->dev, 1, + "Secure buffer not supported"); + return -EOPNOTSUPP; + } + + /* Can't use CPU map with secure buffers */ + if (param->flags & KGSL_MEMFLAGS_USE_CPU_MAP) + return -EINVAL; + } + + entry = kgsl_mem_entry_create(); + + if (entry == NULL) + return -ENOMEM; + + /* + * Convert from enum value to KGSL_MEM_ENTRY value, so that + * we can use the latter consistently everywhere. + */ + memtype = param->memtype + 1; + + /* + * Mask off unknown flags from userspace. This way the caller can + * check if a flag is supported by looking at the returned flags. + * Note: CACHEMODE is ignored for this call. Caching should be + * determined by type of allocation being mapped. + */ + param->flags &= KGSL_MEMFLAGS_GPUREADONLY + | KGSL_MEMTYPE_MASK + | KGSL_MEMALIGN_MASK + | KGSL_MEMFLAGS_USE_CPU_MAP + | KGSL_MEMFLAGS_SECURE; + entry->memdesc.flags = ((uint64_t) param->flags) + | KGSL_MEMFLAGS_FORCE_32BIT; + + if (!kgsl_mmu_use_cpu_map(&dev_priv->device->mmu)) + entry->memdesc.flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP); + + if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU) + entry->memdesc.priv |= KGSL_MEMDESC_GUARD_PAGE; + + if (param->flags & KGSL_MEMFLAGS_SECURE) + entry->memdesc.priv |= KGSL_MEMDESC_SECURE; + + switch (memtype) { + case KGSL_MEM_ENTRY_USER: + result = _map_usermem_addr(dev_priv->device, private->pagetable, + entry, param->hostptr, param->offset, param->len); + break; + case KGSL_MEM_ENTRY_ION: + if (param->offset != 0) + result = -EINVAL; + else + result = _map_usermem_dma_buf(dev_priv->device, + private->pagetable, entry, param->fd); + break; + default: + KGSL_CORE_ERR("Invalid memory type: %x\n", memtype); + result = -EOPNOTSUPP; + break; + } + + if (result) + goto error; + + if ((param->flags & KGSL_MEMFLAGS_SECURE) && + (entry->memdesc.size & mmu->secure_align_mask)) { + KGSL_DRV_ERR(dev_priv->device, + "Secure buffer size %lld not aligned to %x alignment", + entry->memdesc.size, + mmu->secure_align_mask + 1); + result = -EINVAL; + goto error_attach; + } + + if (entry->memdesc.size >= SZ_2M) + kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_2M)); + else if (entry->memdesc.size >= SZ_1M) + kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_1M)); + else if (entry->memdesc.size >= SZ_64K) + kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_64)); + + /* echo back flags */ + param->flags = (unsigned int) entry->memdesc.flags; + + result = kgsl_mem_entry_attach_process(entry, dev_priv); + if (result) + goto error_attach; + + /* Adjust the returned value for a non 4k aligned offset */ + param->gpuaddr = (unsigned long) + entry->memdesc.gpuaddr + (param->offset & PAGE_MASK); + + KGSL_STATS_ADD(param->len, &kgsl_driver.stats.mapped, + &kgsl_driver.stats.mapped_max); + + kgsl_process_add_stats(private, + kgsl_memdesc_usermem_type(&entry->memdesc), param->len); + + trace_kgsl_mem_map(entry, param->fd); + + return result; + +error_attach: + switch (memtype) { + case KGSL_MEM_ENTRY_ION: + kgsl_destroy_ion(entry->priv_data); + entry->memdesc.sgt = NULL; + break; + default: + break; + } + kgsl_sharedmem_free(&entry->memdesc); +error: + /* Clear gpuaddr here so userspace doesn't get any wrong ideas */ + param->gpuaddr = 0; + + kfree(entry); + return result; +} + +static int _kgsl_gpumem_sync_cache(struct kgsl_mem_entry *entry, + uint64_t offset, uint64_t length, unsigned int op) +{ + int ret = 0; + int cacheop; + int mode; + + /* + * Flush is defined as (clean | invalidate). If both bits are set, then + * do a flush, otherwise check for the individual bits and clean or inv + * as requested + */ + + if ((op & KGSL_GPUMEM_CACHE_FLUSH) == KGSL_GPUMEM_CACHE_FLUSH) + cacheop = KGSL_CACHE_OP_FLUSH; + else if (op & KGSL_GPUMEM_CACHE_CLEAN) + cacheop = KGSL_CACHE_OP_CLEAN; + else if (op & KGSL_GPUMEM_CACHE_INV) + cacheop = KGSL_CACHE_OP_INV; + else { + ret = -EINVAL; + goto done; + } + + if (!(op & KGSL_GPUMEM_CACHE_RANGE)) { + offset = 0; + length = entry->memdesc.size; + } + + mode = kgsl_memdesc_get_cachemode(&entry->memdesc); + if (mode != KGSL_CACHEMODE_UNCACHED + && mode != KGSL_CACHEMODE_WRITECOMBINE) { + trace_kgsl_mem_sync_cache(entry, offset, length, op); + ret = kgsl_cache_range_op(&entry->memdesc, offset, + length, cacheop); + } + +done: + return ret; +} + +/* New cache sync function - supports both directions (clean and invalidate) */ + +long kgsl_ioctl_gpumem_sync_cache(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpumem_sync_cache *param = data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry = NULL; + long ret; + + if (param->id != 0) { + entry = kgsl_sharedmem_find_id(private, param->id); + if (entry == NULL) { + KGSL_MEM_INFO(dev_priv->device, "can't find id %d\n", + param->id); + return -EINVAL; + } + } else if (param->gpuaddr != 0) { + entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr); + if (entry == NULL) { + KGSL_MEM_INFO(dev_priv->device, + "can't find gpuaddr 0x%08lX\n", + param->gpuaddr); + return -EINVAL; + } + } else { + return -EINVAL; + } + + ret = _kgsl_gpumem_sync_cache(entry, (uint64_t) param->offset, + (uint64_t) param->length, param->op); + kgsl_mem_entry_put(entry); + return ret; +} + +static int mem_id_cmp(const void *_a, const void *_b) +{ + const unsigned int *a = _a, *b = _b; + if (*a == *b) + return 0; + return (*a > *b) ? 1 : -1; +} + +#ifdef CONFIG_ARM64 +/* Do not support full flush on ARM64 targets */ +static inline bool check_full_flush(size_t size, int op) +{ + return false; +} +#else +/* Support full flush if the size is bigger than the threshold */ +static inline bool check_full_flush(size_t size, int op) +{ + /* If we exceed the breakeven point, flush the entire cache */ + return (kgsl_driver.full_cache_threshold != 0) && + (size >= kgsl_driver.full_cache_threshold) && + (op == KGSL_GPUMEM_CACHE_FLUSH); +} +#endif + +long kgsl_ioctl_gpumem_sync_cache_bulk(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int i; + struct kgsl_gpumem_sync_cache_bulk *param = data; + struct kgsl_process_private *private = dev_priv->process_priv; + unsigned int id, last_id = 0, *id_list = NULL, actual_count = 0; + struct kgsl_mem_entry **entries = NULL; + long ret = 0; + uint64_t op_size = 0; + bool full_flush = false; + + if (param->id_list == NULL || param->count == 0 + || param->count > (PAGE_SIZE / sizeof(unsigned int))) + return -EINVAL; + + id_list = kzalloc(param->count * sizeof(unsigned int), GFP_KERNEL); + if (id_list == NULL) + return -ENOMEM; + + entries = kzalloc(param->count * sizeof(*entries), GFP_KERNEL); + if (entries == NULL) { + ret = -ENOMEM; + goto end; + } + + if (copy_from_user(id_list, param->id_list, + param->count * sizeof(unsigned int))) { + ret = -EFAULT; + goto end; + } + /* sort the ids so we can weed out duplicates */ + sort(id_list, param->count, sizeof(*id_list), mem_id_cmp, NULL); + + for (i = 0; i < param->count; i++) { + unsigned int cachemode; + struct kgsl_mem_entry *entry = NULL; + + id = id_list[i]; + /* skip 0 ids or duplicates */ + if (id == last_id) + continue; + + entry = kgsl_sharedmem_find_id(private, id); + if (entry == NULL) + continue; + + /* skip uncached memory */ + cachemode = kgsl_memdesc_get_cachemode(&entry->memdesc); + if (cachemode != KGSL_CACHEMODE_WRITETHROUGH && + cachemode != KGSL_CACHEMODE_WRITEBACK) { + kgsl_mem_entry_put(entry); + continue; + } + + op_size += entry->memdesc.size; + entries[actual_count++] = entry; + + full_flush = check_full_flush(op_size, param->op); + if (full_flush) + break; + + last_id = id; + } + if (full_flush) { + trace_kgsl_mem_sync_full_cache(actual_count, op_size); + flush_cache_all(); + } + + param->op &= ~KGSL_GPUMEM_CACHE_RANGE; + + for (i = 0; i < actual_count; i++) { + if (!full_flush) + _kgsl_gpumem_sync_cache(entries[i], 0, + entries[i]->memdesc.size, + param->op); + kgsl_mem_entry_put(entries[i]); + } +end: + kfree(entries); + kfree(id_list); + return ret; +} + +/* Legacy cache function, does a flush (clean + invalidate) */ + +long kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_sharedmem_free *param = data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry = NULL; + long ret; + + entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr); + if (entry == NULL) { + KGSL_MEM_INFO(dev_priv->device, + "can't find gpuaddr 0x%08lX\n", + param->gpuaddr); + return -EINVAL; + } + + ret = _kgsl_gpumem_sync_cache(entry, 0, entry->memdesc.size, + KGSL_GPUMEM_CACHE_FLUSH); + kgsl_mem_entry_put(entry); + return ret; +} + +long kgsl_ioctl_gpuobj_sync(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_gpuobj_sync *param = data; + struct kgsl_gpuobj_sync_obj *objs; + struct kgsl_mem_entry **entries; + long ret = 0; + bool full_flush = false; + uint64_t size = 0; + int i, count = 0; + void __user *ptr; + + if (param->count == 0 || param->count > 128) + return -EINVAL; + + objs = kzalloc(param->count * sizeof(*objs), GFP_KERNEL); + if (objs == NULL) + return -ENOMEM; + + entries = kzalloc(param->count * sizeof(*entries), GFP_KERNEL); + if (entries == NULL) { + ret = -ENOMEM; + goto out; + } + + ptr = to_user_ptr(param->objs); + + for (i = 0; i < param->count; i++) { + ret = _copy_from_user(&objs[i], ptr, sizeof(*objs), + param->obj_len); + if (ret) + goto out; + + entries[i] = kgsl_sharedmem_find_id(private, objs[i].id); + + /* Not finding the ID is not a fatal failure - just skip it */ + if (entries[i] == NULL) + continue; + + count++; + + if (!(objs[i].op & KGSL_GPUMEM_CACHE_RANGE)) + size += entries[i]->memdesc.size; + else if (objs[i].offset < entries[i]->memdesc.size) + size += (entries[i]->memdesc.size - objs[i].offset); + + full_flush = check_full_flush(size, objs[i].op); + if (full_flush) + break; + + ptr += sizeof(*objs); + } + + if (full_flush) { + trace_kgsl_mem_sync_full_cache(count, size); + flush_cache_all(); + } else { + for (i = 0; !ret && i < param->count; i++) + if (entries[i]) + ret = _kgsl_gpumem_sync_cache(entries[i], + objs[i].offset, objs[i].length, + objs[i].op); + } + + for (i = 0; i < param->count; i++) + if (entries[i]) + kgsl_mem_entry_put(entries[i]); + +out: + kfree(entries); + kfree(objs); + + return ret; +} + +#ifdef CONFIG_ARM64 +static uint64_t kgsl_filter_cachemode(uint64_t flags) +{ + /* + * WRITETHROUGH is not supported in arm64, so we tell the user that we + * use WRITEBACK which is the default caching policy. + */ + if ((flags & KGSL_CACHEMODE_MASK) >> KGSL_CACHEMODE_SHIFT == + KGSL_CACHEMODE_WRITETHROUGH) { + flags &= ~((uint64_t) KGSL_CACHEMODE_MASK); + flags |= (KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT) & + KGSL_CACHEMODE_MASK; + } + return flags; +} +#else +static uint64_t kgsl_filter_cachemode(uint64_t flags) +{ + return flags; +} +#endif + +/* The largest allowable alignment for a GPU object is 32MB */ +#define KGSL_MAX_ALIGN (32 * SZ_1M) + +static struct kgsl_mem_entry *gpumem_alloc_entry( + struct kgsl_device_private *dev_priv, + uint64_t size, uint64_t mmapsize, uint64_t flags) +{ + int ret; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry; + unsigned int align; + + flags &= KGSL_MEMFLAGS_GPUREADONLY + | KGSL_CACHEMODE_MASK + | KGSL_MEMTYPE_MASK + | KGSL_MEMALIGN_MASK + | KGSL_MEMFLAGS_USE_CPU_MAP + | KGSL_MEMFLAGS_SECURE + | KGSL_MEMFLAGS_FORCE_32BIT; + + /* Turn off SVM if the system doesn't support it */ + if (!kgsl_mmu_use_cpu_map(&dev_priv->device->mmu)) + flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP); + + /* Return not supported error if secure memory isn't enabled */ + if (!kgsl_mmu_is_secured(&dev_priv->device->mmu) && + (flags & KGSL_MEMFLAGS_SECURE)) { + dev_WARN_ONCE(dev_priv->device->dev, 1, + "Secure memory not supported"); + return ERR_PTR(-EOPNOTSUPP); + } + + /* Secure memory disables advanced addressing modes */ + if (flags & KGSL_MEMFLAGS_SECURE) + flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP); + + /* Cap the alignment bits to the highest number we can handle */ + align = MEMFLAGS(flags, KGSL_MEMALIGN_MASK, KGSL_MEMALIGN_SHIFT); + if (align >= ilog2(KGSL_MAX_ALIGN)) { + KGSL_CORE_ERR("Alignment too large; restricting to %dK\n", + KGSL_MAX_ALIGN >> 10); + + flags &= ~((uint64_t) KGSL_MEMALIGN_MASK); + flags |= (ilog2(KGSL_MAX_ALIGN) << KGSL_MEMALIGN_SHIFT) & + KGSL_MEMALIGN_MASK; + } + + if (mmapsize < size) + mmapsize = size; + + /* For now only allow allocations up to 4G */ + if (size > UINT_MAX) + return ERR_PTR(-EINVAL); + + /* Only allow a mmap size that we can actually mmap */ + if (mmapsize > UINT_MAX) + return ERR_PTR(-EINVAL); + + flags = kgsl_filter_cachemode(flags); + + entry = kgsl_mem_entry_create(); + if (entry == NULL) + return ERR_PTR(-ENOMEM); + + if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU) + entry->memdesc.priv |= KGSL_MEMDESC_GUARD_PAGE; + + if (flags & KGSL_MEMFLAGS_SECURE) + entry->memdesc.priv |= KGSL_MEMDESC_SECURE; + + ret = kgsl_allocate_user(dev_priv->device, &entry->memdesc, + private->pagetable, size, mmapsize, flags); + if (ret != 0) + goto err; + + ret = kgsl_mem_entry_attach_process(entry, dev_priv); + if (ret != 0) { + kgsl_sharedmem_free(&entry->memdesc); + goto err; + } + + kgsl_process_add_stats(private, + kgsl_memdesc_usermem_type(&entry->memdesc), + entry->memdesc.size); + trace_kgsl_mem_alloc(entry); + + return entry; +err: + kfree(entry); + return ERR_PTR(ret); +} + +static void copy_metadata(struct kgsl_mem_entry *entry, uint64_t metadata, + unsigned int len) +{ + unsigned int i, size; + + if (len == 0) + return; + + size = min_t(unsigned int, len, sizeof(entry->metadata) - 1); + + if (copy_from_user(entry->metadata, to_user_ptr(metadata), size)) { + memset(entry->metadata, 0, sizeof(entry->metadata)); + return; + } + + /* Clean up non printable characters in the string */ + for (i = 0; i < size && entry->metadata[i] != 0; i++) { + if (!isprint(entry->metadata[i])) + entry->metadata[i] = '?'; + } +} + +long kgsl_ioctl_gpuobj_alloc(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpuobj_alloc *param = data; + struct kgsl_mem_entry *entry; + + entry = gpumem_alloc_entry(dev_priv, param->size, + param->va_len, param->flags); + + if (IS_ERR(entry)) + return PTR_ERR(entry); + + copy_metadata(entry, param->metadata, param->metadata_len); + + param->size = entry->memdesc.size; + param->flags = entry->memdesc.flags; + param->mmapsize = kgsl_memdesc_mmapsize(&entry->memdesc); + param->id = entry->id; + + return 0; +} + +long kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpumem_alloc *param = data; + struct kgsl_mem_entry *entry; + uint64_t flags = param->flags; + + /* Legacy functions doesn't support these advanced features */ + flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP); + flags |= KGSL_MEMFLAGS_FORCE_32BIT; + + entry = gpumem_alloc_entry(dev_priv, (uint64_t) param->size, + (uint64_t) param->size, flags); + + if (IS_ERR(entry)) + return PTR_ERR(entry); + + param->gpuaddr = (unsigned long) entry->memdesc.gpuaddr; + param->size = (size_t) entry->memdesc.size; + param->flags = (unsigned int) entry->memdesc.flags; + + return 0; +} + +long kgsl_ioctl_gpumem_alloc_id(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpumem_alloc_id *param = data; + struct kgsl_mem_entry *entry; + uint64_t flags = param->flags; + + flags |= KGSL_MEMFLAGS_FORCE_32BIT; + + entry = gpumem_alloc_entry(dev_priv, (uint64_t) param->size, + (uint64_t) param->mmapsize, flags); + + if (IS_ERR(entry)) + return PTR_ERR(entry); + + param->id = entry->id; + param->flags = (unsigned int) entry->memdesc.flags; + param->size = (size_t) entry->memdesc.size; + param->mmapsize = (size_t) + kgsl_memdesc_mmapsize(&entry->memdesc); + param->gpuaddr = (unsigned long) entry->memdesc.gpuaddr; + + return 0; +} + +long kgsl_ioctl_gpumem_get_info(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_gpumem_get_info *param = data; + struct kgsl_mem_entry *entry = NULL; + int result = 0; + + if (param->id != 0) { + entry = kgsl_sharedmem_find_id(private, param->id); + if (entry == NULL) + return -EINVAL; + } else if (param->gpuaddr != 0) { + entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr); + if (entry == NULL) + return -EINVAL; + } else + return -EINVAL; + + /* + * If any of the 64 bit address / sizes would end up being + * truncated, return -ERANGE. That will signal the user that they + * should use a more modern API + */ + if (entry->memdesc.gpuaddr > ULONG_MAX) + result = -ERANGE; + + param->gpuaddr = (unsigned long) entry->memdesc.gpuaddr; + param->id = entry->id; + param->flags = (unsigned int) entry->memdesc.flags; + param->size = (size_t) entry->memdesc.size; + param->mmapsize = (size_t) kgsl_memdesc_mmapsize(&entry->memdesc); + param->useraddr = entry->memdesc.useraddr; + + kgsl_mem_entry_put(entry); + return result; +} + +long kgsl_ioctl_gpuobj_info(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_gpuobj_info *param = data; + struct kgsl_mem_entry *entry; + + if (param->id == 0) + return -EINVAL; + + entry = kgsl_sharedmem_find_id(private, param->id); + if (entry == NULL) + return -EINVAL; + + param->id = entry->id; + param->gpuaddr = entry->memdesc.gpuaddr; + param->flags = entry->memdesc.flags; + param->size = entry->memdesc.size; + param->va_len = kgsl_memdesc_mmapsize(&entry->memdesc); + param->va_addr = (uint64_t) entry->memdesc.useraddr; + + kgsl_mem_entry_put(entry); + return 0; +} + +long kgsl_ioctl_gpuobj_set_info(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_gpuobj_set_info *param = data; + struct kgsl_mem_entry *entry; + + if (param->id == 0) + return -EINVAL; + + entry = kgsl_sharedmem_find_id(private, param->id); + if (entry == NULL) + return -EINVAL; + + if (param->flags & KGSL_GPUOBJ_SET_INFO_METADATA) + copy_metadata(entry, param->metadata, param->metadata_len); + + if (param->flags & KGSL_GPUOBJ_SET_INFO_TYPE) { + entry->memdesc.flags &= ~((uint64_t) KGSL_MEMTYPE_MASK); + entry->memdesc.flags |= param->type << KGSL_MEMTYPE_SHIFT; + } + + kgsl_mem_entry_put(entry); + return 0; +} + +long kgsl_ioctl_cff_syncmem(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_cff_syncmem *param = data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry = NULL; + uint64_t offset, len; + + entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr); + if (entry == NULL) + return -EINVAL; + + /* + * Calculate the offset between the requested GPU address and the start + * of the object + */ + + offset = ((uint64_t) param->gpuaddr) - entry->memdesc.gpuaddr; + + if ((offset + param->len) > entry->memdesc.size) + len = entry->memdesc.size - offset; + else + len = param->len; + + kgsl_cffdump_syncmem(dev_priv->device, entry, offset, len, true); + + kgsl_mem_entry_put(entry); + return 0; +} + +long kgsl_ioctl_cff_sync_gpuobj(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_cff_sync_gpuobj *param = data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry = NULL; + + entry = kgsl_sharedmem_find_id(private, param->id); + if (entry == NULL) + return -EINVAL; + + kgsl_cffdump_syncmem(dev_priv->device, entry, param->offset, + param->length, true); + + kgsl_mem_entry_put(entry); + return 0; +} + +long kgsl_ioctl_cff_user_event(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result = 0; + struct kgsl_cff_user_event *param = data; + + kgsl_cffdump_user_event(dev_priv->device, param->cff_opcode, + param->op1, param->op2, + param->op3, param->op4, param->op5); + + return result; +} + +/** + * kgsl_ioctl_timestamp_event - Register a new timestamp event from userspace + * @dev_priv - pointer to the private device structure + * @cmd - the ioctl cmd passed from kgsl_ioctl + * @data - the user data buffer from kgsl_ioctl + * @returns 0 on success or error code on failure + */ + +long kgsl_ioctl_timestamp_event(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_timestamp_event *param = data; + int ret; + + switch (param->type) { + case KGSL_TIMESTAMP_EVENT_FENCE: + ret = kgsl_add_fence_event(dev_priv->device, + param->context_id, param->timestamp, param->priv, + param->len, dev_priv); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static int +kgsl_mmap_memstore(struct kgsl_device *device, struct vm_area_struct *vma) +{ + struct kgsl_memdesc *memdesc = &device->memstore; + int result; + unsigned int vma_size = vma->vm_end - vma->vm_start; + + /* The memstore can only be mapped as read only */ + + if (vma->vm_flags & VM_WRITE) + return -EPERM; + + if (memdesc->size != vma_size) { + KGSL_MEM_ERR(device, "memstore bad size: %d should be %llu\n", + vma_size, memdesc->size); + return -EINVAL; + } + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + result = remap_pfn_range(vma, vma->vm_start, + device->memstore.physaddr >> PAGE_SHIFT, + vma_size, vma->vm_page_prot); + if (result != 0) + KGSL_MEM_ERR(device, "remap_pfn_range failed: %d\n", + result); + + return result; +} + +/* + * kgsl_gpumem_vm_open is called whenever a vma region is copied or split. + * Increase the refcount to make sure that the accounting stays correct + */ + +static void kgsl_gpumem_vm_open(struct vm_area_struct *vma) +{ + struct kgsl_mem_entry *entry = vma->vm_private_data; + if (!kgsl_mem_entry_get(entry)) + vma->vm_private_data = NULL; +} + +static int +kgsl_gpumem_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct kgsl_mem_entry *entry = vma->vm_private_data; + + if (!entry) + return VM_FAULT_SIGBUS; + if (!entry->memdesc.ops || !entry->memdesc.ops->vmfault) + return VM_FAULT_SIGBUS; + + return entry->memdesc.ops->vmfault(&entry->memdesc, vma, vmf); +} + +static void +kgsl_gpumem_vm_close(struct vm_area_struct *vma) +{ + struct kgsl_mem_entry *entry = vma->vm_private_data; + + if (!entry) + return; + + entry->memdesc.useraddr = 0; + kgsl_mem_entry_put(entry); +} + +static struct vm_operations_struct kgsl_gpumem_vm_ops = { + .open = kgsl_gpumem_vm_open, + .fault = kgsl_gpumem_vm_fault, + .close = kgsl_gpumem_vm_close, +}; + +static int +get_mmap_entry(struct kgsl_process_private *private, + struct kgsl_mem_entry **out_entry, unsigned long pgoff, + unsigned long len) +{ + int ret = 0; + struct kgsl_mem_entry *entry; + + entry = kgsl_sharedmem_find_id(private, pgoff); + if (entry == NULL) + entry = kgsl_sharedmem_find(private, pgoff << PAGE_SHIFT); + + if (!entry) + return -EINVAL; + + if (!entry->memdesc.ops || + !entry->memdesc.ops->vmflags || + !entry->memdesc.ops->vmfault) { + ret = -EINVAL; + goto err_put; + } + + if (entry->memdesc.useraddr != 0) { + ret = -EBUSY; + goto err_put; + } + + if (kgsl_memdesc_use_cpu_map(&entry->memdesc)) { + if (len != kgsl_memdesc_mmapsize(&entry->memdesc)) { + ret = -ERANGE; + goto err_put; + } + } else if (len != kgsl_memdesc_mmapsize(&entry->memdesc) && + len != entry->memdesc.size) { + /* + * If cpu_map != gpumap then user can map either the + * mmapsize or the entry size + */ + ret = -ERANGE; + goto err_put; + } + + *out_entry = entry; + return 0; +err_put: + kgsl_mem_entry_put(entry); + return ret; +} + +static unsigned long _gpu_set_svm_region(struct kgsl_process_private *private, + struct kgsl_mem_entry *entry, unsigned long addr, + unsigned long size) +{ + int ret; + + ret = kgsl_mmu_set_svm_region(private->pagetable, (uint64_t) addr, + (uint64_t) size); + + if (ret != 0) + return ret; + + entry->memdesc.gpuaddr = (uint64_t) addr; + + ret = kgsl_mmu_map(private->pagetable, &entry->memdesc); + if (ret) { + kgsl_mmu_put_gpuaddr(private->pagetable, + &entry->memdesc); + return ret; + } + + kgsl_memfree_purge(private->pagetable ? private->pagetable->name : 0, + entry->memdesc.gpuaddr, entry->memdesc.size); + + return addr; +} + +static unsigned long _gpu_find_svm(struct kgsl_process_private *private, + unsigned long start, unsigned long end, unsigned long len, + unsigned int align) +{ + uint64_t addr = kgsl_mmu_find_svm_region(private->pagetable, + (uint64_t) start, (uint64_t)end, (uint64_t) len, align); + + BUG_ON(addr > ULONG_MAX); + + return (unsigned long) addr; +} + +/* Search top down in the CPU VM region for a free address */ +static unsigned long _cpu_get_unmapped_area(unsigned long bottom, + unsigned long top, unsigned long len, unsigned long align) +{ + struct vm_unmapped_area_info info; + unsigned long addr, err; + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.low_limit = bottom; + info.high_limit = top; + info.length = len; + info.align_offset = 0; + info.align_mask = align - 1; + + addr = vm_unmapped_area(&info); + + if (IS_ERR_VALUE(addr)) + return addr; + + err = security_mmap_addr(addr); + return err ? err : addr; +} + +static unsigned long _search_range(struct kgsl_process_private *private, + struct kgsl_mem_entry *entry, + unsigned long start, unsigned long end, + unsigned long len, uint64_t align) +{ + unsigned long cpu, gpu = end, result = -ENOMEM; + + while (gpu > start) { + /* find a new empty spot on the CPU below the last one */ + cpu = _cpu_get_unmapped_area(start, gpu, len, + (unsigned long) align); + if (IS_ERR_VALUE(cpu)) { + result = cpu; + break; + } + /* try to map it on the GPU */ + result = _gpu_set_svm_region(private, entry, cpu, len); + if (!IS_ERR_VALUE(result)) + break; + + trace_kgsl_mem_unmapped_area_collision(entry, cpu, len); + + if (cpu <= start) { + result = -ENOMEM; + break; + } + + /* move downward to the next empty spot on the GPU */ + gpu = _gpu_find_svm(private, start, cpu, len, align); + if (IS_ERR_VALUE(gpu)) { + result = gpu; + break; + } + + /* Check that_gpu_find_svm doesn't put us in a loop */ + BUG_ON(gpu >= cpu); + + /* Break if the recommended GPU address is out of range */ + if (gpu < start) { + result = -ENOMEM; + break; + } + + /* + * Add the length of the chunk to the GPU address to yield the + * upper bound for the CPU search + */ + gpu += len; + } + return result; +} + +static unsigned long _get_svm_area(struct kgsl_process_private *private, + struct kgsl_mem_entry *entry, unsigned long hint, + unsigned long len, unsigned long flags) +{ + uint64_t start, end; + int align_shift = kgsl_memdesc_get_align(&entry->memdesc); + uint64_t align; + unsigned long result; + unsigned long addr; + + if (align_shift >= ilog2(SZ_2M)) + align = SZ_2M; + else if (align_shift >= ilog2(SZ_1M)) + align = SZ_1M; + else if (align_shift >= ilog2(SZ_64K)) + align = SZ_64K; + else + align = SZ_4K; + + /* get the GPU pagetable's SVM range */ + if (kgsl_mmu_svm_range(private->pagetable, &start, &end, + entry->memdesc.flags)) + return -ERANGE; + + /* now clamp the range based on the CPU's requirements */ + start = max_t(uint64_t, start, mmap_min_addr); + end = min_t(uint64_t, end, current->mm->mmap_base); + if (start >= end) + return -ERANGE; + + if (flags & MAP_FIXED) { + /* we must use addr 'hint' or fail */ + return _gpu_set_svm_region(private, entry, hint, len); + } else if (hint != 0) { + struct vm_area_struct *vma; + + /* + * See if the hint is usable, if not we will use + * it as the start point for searching. + */ + addr = clamp_t(unsigned long, hint & ~(align - 1), + start, (end - len) & ~(align - 1)); + + vma = find_vma(current->mm, addr); + + if (vma == NULL || ((addr + len) <= vma->vm_start)) { + result = _gpu_set_svm_region(private, entry, addr, len); + + /* On failure drop down to keep searching */ + if (!IS_ERR_VALUE(result)) + return result; + } + } else { + /* no hint, start search at the top and work down */ + addr = end & ~(align - 1); + } + + /* + * Search downwards from the hint first. If that fails we + * must try to search above it. + */ + result = _search_range(private, entry, start, addr, len, align); + if (IS_ERR_VALUE(result) && hint != 0) + result = _search_range(private, entry, addr, end, len, align); + + return result; +} + +static unsigned long +kgsl_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + unsigned long val; + unsigned long vma_offset = pgoff << PAGE_SHIFT; + struct kgsl_device_private *dev_priv = file->private_data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_device *device = dev_priv->device; + struct kgsl_mem_entry *entry = NULL; + + if (vma_offset == (unsigned long) device->memstore.gpuaddr) + return get_unmapped_area(NULL, addr, len, pgoff, flags); + + val = get_mmap_entry(private, &entry, pgoff, len); + if (val) + return val; + + /* Do not allow CPU mappings for secure buffers */ + if (kgsl_memdesc_is_secured(&entry->memdesc)) { + val = -EPERM; + goto put; + } + + if (!kgsl_memdesc_use_cpu_map(&entry->memdesc)) { + val = get_unmapped_area(NULL, addr, len, 0, flags); + if (IS_ERR_VALUE(val)) + KGSL_MEM_ERR(device, + "get_unmapped_area: pid %d addr %lx pgoff %lx len %ld failed error %d\n", + private->pid, addr, pgoff, len, (int) val); + } else { + val = _get_svm_area(private, entry, addr, len, flags); + if (IS_ERR_VALUE(val)) + KGSL_MEM_ERR(device, + "_get_svm_area: pid %d addr %lx pgoff %lx len %ld failed error %d\n", + private->pid, addr, pgoff, len, (int) val); + } + +put: + kgsl_mem_entry_put(entry); + return val; +} + +static int kgsl_mmap(struct file *file, struct vm_area_struct *vma) +{ + unsigned int ret, cache; + unsigned long vma_offset = vma->vm_pgoff << PAGE_SHIFT; + struct kgsl_device_private *dev_priv = file->private_data; + struct kgsl_process_private *private = dev_priv->process_priv; + struct kgsl_mem_entry *entry = NULL; + struct kgsl_device *device = dev_priv->device; + + /* Handle leagacy behavior for memstore */ + + if (vma_offset == (unsigned long) device->memstore.gpuaddr) + return kgsl_mmap_memstore(device, vma); + + /* + * The reference count on the entry that we get from + * get_mmap_entry() will be held until kgsl_gpumem_vm_close(). + */ + ret = get_mmap_entry(private, &entry, vma->vm_pgoff, + vma->vm_end - vma->vm_start); + if (ret) + return ret; + + vma->vm_flags |= entry->memdesc.ops->vmflags; + + vma->vm_private_data = entry; + + /* Determine user-side caching policy */ + + cache = kgsl_memdesc_get_cachemode(&entry->memdesc); + + switch (cache) { + case KGSL_CACHEMODE_UNCACHED: + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + break; + case KGSL_CACHEMODE_WRITETHROUGH: + vma->vm_page_prot = pgprot_writethroughcache(vma->vm_page_prot); + if (vma->vm_page_prot == + pgprot_writebackcache(vma->vm_page_prot)) + WARN_ONCE(1, "WRITETHROUGH is deprecated for arm64"); + break; + case KGSL_CACHEMODE_WRITEBACK: + vma->vm_page_prot = pgprot_writebackcache(vma->vm_page_prot); + break; + case KGSL_CACHEMODE_WRITECOMBINE: + default: + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + break; + } + + vma->vm_ops = &kgsl_gpumem_vm_ops; + + if (cache == KGSL_CACHEMODE_WRITEBACK + || cache == KGSL_CACHEMODE_WRITETHROUGH) { + struct scatterlist *s; + int i; + unsigned long addr = vma->vm_start; + + for_each_sg(entry->memdesc.sgt->sgl, s, + entry->memdesc.sgt->nents, i) { + int j; + for (j = 0; j < (s->length >> PAGE_SHIFT); j++) { + struct page *page = sg_page(s); + page = nth_page(page, j); + vm_insert_page(vma, addr, page); + addr += PAGE_SIZE; + } + } + } + + vma->vm_file = file; + + entry->memdesc.useraddr = vma->vm_start; + + trace_kgsl_mem_mmap(entry); + return 0; +} + +static irqreturn_t kgsl_irq_handler(int irq, void *data) +{ + struct kgsl_device *device = data; + + return device->ftbl->irq_handler(device); + +} + +#define KGSL_READ_MESSAGE "OH HAI GPU\n" + +static ssize_t kgsl_read(struct file *filep, char __user *buf, size_t count, + loff_t *pos) +{ + return simple_read_from_buffer(buf, count, pos, + KGSL_READ_MESSAGE, strlen(KGSL_READ_MESSAGE) + 1); +} + +static const struct file_operations kgsl_fops = { + .owner = THIS_MODULE, + .release = kgsl_release, + .open = kgsl_open, + .mmap = kgsl_mmap, + .read = kgsl_read, + .get_unmapped_area = kgsl_get_unmapped_area, + .unlocked_ioctl = kgsl_ioctl, + .compat_ioctl = kgsl_compat_ioctl, +}; + +struct kgsl_driver kgsl_driver = { + .process_mutex = __MUTEX_INITIALIZER(kgsl_driver.process_mutex), + .ptlock = __SPIN_LOCK_UNLOCKED(kgsl_driver.ptlock), + .devlock = __MUTEX_INITIALIZER(kgsl_driver.devlock), + /* + * Full cache flushes are faster than line by line on at least + * 8064 and 8974 once the region to be flushed is > 16mb. + */ + .full_cache_threshold = SZ_16M, + + .stats.vmalloc = ATOMIC_LONG_INIT(0), + .stats.vmalloc_max = ATOMIC_LONG_INIT(0), + .stats.page_alloc = ATOMIC_LONG_INIT(0), + .stats.page_alloc_max = ATOMIC_LONG_INIT(0), + .stats.coherent = ATOMIC_LONG_INIT(0), + .stats.coherent_max = ATOMIC_LONG_INIT(0), + .stats.secure = ATOMIC_LONG_INIT(0), + .stats.secure_max = ATOMIC_LONG_INIT(0), + .stats.mapped = ATOMIC_LONG_INIT(0), + .stats.mapped_max = ATOMIC_LONG_INIT(0), +}; +EXPORT_SYMBOL(kgsl_driver); + +static void _unregister_device(struct kgsl_device *device) +{ + int minor; + + mutex_lock(&kgsl_driver.devlock); + for (minor = 0; minor < KGSL_DEVICE_MAX; minor++) { + if (device == kgsl_driver.devp[minor]) + break; + } + if (minor != KGSL_DEVICE_MAX) { + device_destroy(kgsl_driver.class, + MKDEV(MAJOR(kgsl_driver.major), minor)); + kgsl_driver.devp[minor] = NULL; + } + mutex_unlock(&kgsl_driver.devlock); +} + +static int _register_device(struct kgsl_device *device) +{ + int minor, ret; + dev_t dev; + + /* Find a minor for the device */ + + mutex_lock(&kgsl_driver.devlock); + for (minor = 0; minor < KGSL_DEVICE_MAX; minor++) { + if (kgsl_driver.devp[minor] == NULL) { + kgsl_driver.devp[minor] = device; + break; + } + } + mutex_unlock(&kgsl_driver.devlock); + + if (minor == KGSL_DEVICE_MAX) { + KGSL_CORE_ERR("minor devices exhausted\n"); + return -ENODEV; + } + + /* Create the device */ + dev = MKDEV(MAJOR(kgsl_driver.major), minor); + device->dev = device_create(kgsl_driver.class, + &device->pdev->dev, + dev, device, + device->name); + + if (IS_ERR(device->dev)) { + mutex_lock(&kgsl_driver.devlock); + kgsl_driver.devp[minor] = NULL; + mutex_unlock(&kgsl_driver.devlock); + ret = PTR_ERR(device->dev); + KGSL_CORE_ERR("device_create(%s): %d\n", device->name, ret); + return ret; + } + + dev_set_drvdata(&device->pdev->dev, device); + return 0; +} + +int kgsl_device_platform_probe(struct kgsl_device *device) +{ + int status = -EINVAL; + struct resource *res; + + status = _register_device(device); + if (status) + return status; + + /* Initialize logging first, so that failures below actually print. */ + kgsl_device_debugfs_init(device); + + status = kgsl_pwrctrl_init(device); + if (status) + goto error; + + /* Get starting physical address of device registers */ + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, + device->iomemname); + if (res == NULL) { + KGSL_DRV_ERR(device, "platform_get_resource_byname failed\n"); + status = -EINVAL; + goto error_pwrctrl_close; + } + if (res->start == 0 || resource_size(res) == 0) { + KGSL_DRV_ERR(device, "dev %d invalid register region\n", + device->id); + status = -EINVAL; + goto error_pwrctrl_close; + } + + device->reg_phys = res->start; + device->reg_len = resource_size(res); + + /* + * Check if a shadermemname is defined, and then get shader memory + * details including shader memory starting physical address + * and shader memory length + */ + if (device->shadermemname != NULL) { + res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM, + device->shadermemname); + + if (res == NULL) { + KGSL_DRV_WARN(device, + "Shader memory: platform_get_resource_byname failed\n"); + } + + else { + device->shader_mem_phys = res->start; + device->shader_mem_len = resource_size(res); + } + + if (!devm_request_mem_region(device->dev, + device->shader_mem_phys, + device->shader_mem_len, + device->name)) { + KGSL_DRV_WARN(device, "request_mem_region_failed\n"); + } + } + + if (!devm_request_mem_region(device->dev, device->reg_phys, + device->reg_len, device->name)) { + KGSL_DRV_ERR(device, "request_mem_region failed\n"); + status = -ENODEV; + goto error_pwrctrl_close; + } + + device->reg_virt = devm_ioremap(device->dev, device->reg_phys, + device->reg_len); + + if (device->reg_virt == NULL) { + KGSL_DRV_ERR(device, "ioremap failed\n"); + status = -ENODEV; + goto error_pwrctrl_close; + } + /*acquire interrupt */ + device->pwrctrl.interrupt_num = + platform_get_irq_byname(device->pdev, device->pwrctrl.irq_name); + + if (device->pwrctrl.interrupt_num <= 0) { + KGSL_DRV_ERR(device, "platform_get_irq_byname failed: %d\n", + device->pwrctrl.interrupt_num); + status = -EINVAL; + goto error_pwrctrl_close; + } + + status = devm_request_irq(device->dev, device->pwrctrl.interrupt_num, + kgsl_irq_handler, IRQF_TRIGGER_HIGH, + device->name, device); + if (status) { + KGSL_DRV_ERR(device, "request_irq(%d) failed: %d\n", + device->pwrctrl.interrupt_num, status); + goto error_pwrctrl_close; + } + disable_irq(device->pwrctrl.interrupt_num); + + KGSL_DRV_INFO(device, + "dev_id %d regs phys 0x%08lx size 0x%08x virt %p\n", + device->id, device->reg_phys, device->reg_len, + device->reg_virt); + + rwlock_init(&device->context_lock); + + setup_timer(&device->idle_timer, kgsl_timer, (unsigned long) device); + + status = kgsl_mmu_init(device, ksgl_mmu_type); + if (status != 0) { + KGSL_DRV_ERR(device, "kgsl_mmu_init failed %d\n", status); + goto error_pwrctrl_close; + } + + /* Check to see if our device can perform DMA correctly */ + status = dma_set_coherent_mask(&device->pdev->dev, KGSL_DMA_BIT_MASK); + if (status) + goto error_close_mmu; + + status = kgsl_allocate_global(device, &device->memstore, + KGSL_MEMSTORE_SIZE, 0, 0); + + if (status != 0) { + KGSL_DRV_ERR(device, "kgsl_allocate_global failed %d\n", + status); + goto error_close_mmu; + } + + /* + * The default request type PM_QOS_REQ_ALL_CORES is + * applicable to all CPU cores that are online and + * would have a power impact when there are more + * number of CPUs. PM_QOS_REQ_AFFINE_IRQ request + * type shall update/apply the vote only to that CPU to + * which IRQ's affinity is set to. + */ +#ifdef CONFIG_SMP + + device->pwrctrl.pm_qos_req_dma.type = PM_QOS_REQ_AFFINE_IRQ; + device->pwrctrl.pm_qos_req_dma.irq = device->pwrctrl.interrupt_num; + +#endif + pm_qos_add_request(&device->pwrctrl.pm_qos_req_dma, + PM_QOS_CPU_DMA_LATENCY, + PM_QOS_DEFAULT_VALUE); + + + device->events_wq = create_workqueue("kgsl-events"); + + /* Initalize the snapshot engine */ + kgsl_device_snapshot_init(device); + + /* Initialize common sysfs entries */ + kgsl_pwrctrl_init_sysfs(device); + + dev_info(device->dev, "Initialized %s: mmu=%s\n", device->name, + kgsl_mmu_enabled() ? "on" : "off"); + + return 0; + +error_close_mmu: + kgsl_mmu_close(device); +error_pwrctrl_close: + kgsl_pwrctrl_close(device); +error: + _unregister_device(device); + return status; +} +EXPORT_SYMBOL(kgsl_device_platform_probe); + +void kgsl_device_platform_remove(struct kgsl_device *device) +{ + destroy_workqueue(device->events_wq); + + kgsl_device_snapshot_close(device); + + kgsl_pwrctrl_uninit_sysfs(device); + + pm_qos_remove_request(&device->pwrctrl.pm_qos_req_dma); + + idr_destroy(&device->context_idr); + + kgsl_free_global(&device->memstore); + + kgsl_mmu_close(device); + + kgsl_pwrctrl_close(device); + + _unregister_device(device); +} +EXPORT_SYMBOL(kgsl_device_platform_remove); + +static void kgsl_core_exit(void) +{ + kgsl_events_exit(); + kgsl_cffdump_destroy(); + kgsl_core_debugfs_close(); + + /* + * We call kgsl_sharedmem_uninit_sysfs() and device_unregister() + * only if kgsl_driver.virtdev has been populated. + * We check at least one member of kgsl_driver.virtdev to + * see if it is not NULL (and thus, has been populated). + */ + if (kgsl_driver.virtdev.class) { + kgsl_sharedmem_uninit_sysfs(); + device_unregister(&kgsl_driver.virtdev); + } + + if (kgsl_driver.class) { + class_destroy(kgsl_driver.class); + kgsl_driver.class = NULL; + } + + kgsl_cmdbatch_exit(); + + kgsl_memfree_exit(); + unregister_chrdev_region(kgsl_driver.major, KGSL_DEVICE_MAX); +} + +static int __init kgsl_core_init(void) +{ + int result = 0; + /* alloc major and minor device numbers */ + result = alloc_chrdev_region(&kgsl_driver.major, 0, KGSL_DEVICE_MAX, + "kgsl"); + + if (result < 0) { + + KGSL_CORE_ERR("alloc_chrdev_region failed err = %d\n", result); + goto err; + } + + cdev_init(&kgsl_driver.cdev, &kgsl_fops); + kgsl_driver.cdev.owner = THIS_MODULE; + kgsl_driver.cdev.ops = &kgsl_fops; + result = cdev_add(&kgsl_driver.cdev, MKDEV(MAJOR(kgsl_driver.major), 0), + KGSL_DEVICE_MAX); + + if (result) { + KGSL_CORE_ERR("kgsl: cdev_add() failed, dev_num= %d," + " result= %d\n", kgsl_driver.major, result); + goto err; + } + + kgsl_driver.class = class_create(THIS_MODULE, "kgsl"); + + if (IS_ERR(kgsl_driver.class)) { + result = PTR_ERR(kgsl_driver.class); + KGSL_CORE_ERR("failed to create class for kgsl"); + goto err; + } + + /* Make a virtual device for managing core related things + in sysfs */ + kgsl_driver.virtdev.class = kgsl_driver.class; + dev_set_name(&kgsl_driver.virtdev, "kgsl"); + result = device_register(&kgsl_driver.virtdev); + if (result) { + KGSL_CORE_ERR("driver_register failed\n"); + goto err; + } + + /* Make kobjects in the virtual device for storing statistics */ + + kgsl_driver.ptkobj = + kobject_create_and_add("pagetables", + &kgsl_driver.virtdev.kobj); + + kgsl_driver.prockobj = + kobject_create_and_add("proc", + &kgsl_driver.virtdev.kobj); + + kgsl_core_debugfs_init(); + + kgsl_sharedmem_init_sysfs(); + kgsl_cffdump_init(); + + INIT_LIST_HEAD(&kgsl_driver.process_list); + + INIT_LIST_HEAD(&kgsl_driver.pagetable_list); + + kgsl_driver.workqueue = create_singlethread_workqueue("kgsl-workqueue"); + kgsl_driver.mem_workqueue = + create_singlethread_workqueue("kgsl-mementry"); + + kgsl_events_init(); + + result = kgsl_cmdbatch_init(); + if (result) + goto err; + + kgsl_memfree_init(); + + return 0; + +err: + kgsl_core_exit(); + return result; +} + +module_init(kgsl_core_init); +module_exit(kgsl_core_exit); + +MODULE_AUTHOR("Qualcomm Innovation Center, Inc."); +MODULE_DESCRIPTION("MSM GPU driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h new file mode 100644 index 000000000000..757c07e6da86 --- /dev/null +++ b/drivers/gpu/msm/kgsl.h @@ -0,0 +1,528 @@ +/* Copyright (c) 2008-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __KGSL_H +#define __KGSL_H + +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/msm_kgsl.h> +#include <linux/platform_device.h> +#include <linux/clk.h> +#include <linux/interrupt.h> +#include <linux/mutex.h> +#include <linux/cdev.h> +#include <linux/regulator/consumer.h> +#include <linux/mm.h> +#include <linux/dma-attrs.h> +#include <linux/uaccess.h> +#include <asm/cacheflush.h> + +/* The number of memstore arrays limits the number of contexts allowed. + * If more contexts are needed, update multiple for MEMSTORE_SIZE + */ +#define KGSL_MEMSTORE_SIZE ((int)(PAGE_SIZE * 2)) +#define KGSL_MEMSTORE_GLOBAL (0) +#define KGSL_PRIORITY_MAX_RB_LEVELS 4 +#define KGSL_MEMSTORE_MAX (KGSL_MEMSTORE_SIZE / \ + sizeof(struct kgsl_devmemstore) - 1 - KGSL_PRIORITY_MAX_RB_LEVELS) + +/* Timestamp window used to detect rollovers (half of integer range) */ +#define KGSL_TIMESTAMP_WINDOW 0x80000000 + +/* A macro for memory statistics - add the new size to the stat and if + the statisic is greater then _max, set _max +*/ + +static inline void KGSL_STATS_ADD(uint64_t size, atomic_long_t *stat, + atomic_long_t *max) +{ + uint64_t ret = atomic_long_add_return(size, stat); + + if (ret > atomic_long_read(max)) + atomic_long_set(max, ret); +} + +#define KGSL_MAX_NUMIBS 100000 +#define KGSL_MAX_SYNCPOINTS 32 + +struct kgsl_device; +struct kgsl_context; + +/** + * struct kgsl_driver - main container for global KGSL things + * @cdev: Character device struct + * @major: Major ID for the KGSL device + * @class: Pointer to the class struct for the core KGSL sysfs entries + * @virtdev: Virtual device for managing the core + * @ptkobj: kobject for storing the pagetable statistics + * @prockobj: kobject for storing the process statistics + * @devp: Array of pointers to the individual KGSL device structs + * @process_list: List of open processes + * @pagetable_list: LIst of open pagetables + * @ptlock: Lock for accessing the pagetable list + * @process_mutex: Mutex for accessing the process list + * @devlock: Mutex protecting the device list + * @stats: Struct containing atomic memory statistics + * @full_cache_threshold: the threshold that triggers a full cache flush + * @workqueue: Pointer to a single threaded workqueue + * @mem_workqueue: Pointer to a workqueue for deferring memory entries + */ +struct kgsl_driver { + struct cdev cdev; + dev_t major; + struct class *class; + struct device virtdev; + struct kobject *ptkobj; + struct kobject *prockobj; + struct kgsl_device *devp[KGSL_DEVICE_MAX]; + struct list_head process_list; + struct list_head pagetable_list; + spinlock_t ptlock; + struct mutex process_mutex; + struct mutex devlock; + struct { + atomic_long_t vmalloc; + atomic_long_t vmalloc_max; + atomic_long_t page_alloc; + atomic_long_t page_alloc_max; + atomic_long_t coherent; + atomic_long_t coherent_max; + atomic_long_t secure; + atomic_long_t secure_max; + atomic_long_t mapped; + atomic_long_t mapped_max; + } stats; + unsigned int full_cache_threshold; + struct workqueue_struct *workqueue; + struct workqueue_struct *mem_workqueue; +}; + +extern struct kgsl_driver kgsl_driver; +extern struct mutex kgsl_mmu_sync; + +struct kgsl_pagetable; +struct kgsl_memdesc; + +struct kgsl_memdesc_ops { + unsigned int vmflags; + int (*vmfault)(struct kgsl_memdesc *, struct vm_area_struct *, + struct vm_fault *); + void (*free)(struct kgsl_memdesc *memdesc); + int (*map_kernel)(struct kgsl_memdesc *); + void (*unmap_kernel)(struct kgsl_memdesc *); +}; + +/* Internal definitions for memdesc->priv */ +#define KGSL_MEMDESC_GUARD_PAGE BIT(0) +/* Set if the memdesc is mapped into all pagetables */ +#define KGSL_MEMDESC_GLOBAL BIT(1) +/* The memdesc is frozen during a snapshot */ +#define KGSL_MEMDESC_FROZEN BIT(2) +/* The memdesc is mapped into a pagetable */ +#define KGSL_MEMDESC_MAPPED BIT(3) +/* The memdesc is secured for content protection */ +#define KGSL_MEMDESC_SECURE BIT(4) +/* Memory is accessible in privileged mode */ +#define KGSL_MEMDESC_PRIVILEGED BIT(6) +/* The memdesc is TZ locked content protection */ +#define KGSL_MEMDESC_TZ_LOCKED BIT(7) + +/** + * struct kgsl_memdesc - GPU memory object descriptor + * @pagetable: Pointer to the pagetable that the object is mapped in + * @hostptr: Kernel virtual address + * @hostptr_count: Number of threads using hostptr + * @useraddr: User virtual address (if applicable) + * @gpuaddr: GPU virtual address + * @physaddr: Physical address of the memory object + * @size: Size of the memory object + * @mmapsize: Total size of the object in VM (including guard) + * @priv: Internal flags and settings + * @sgt: Scatter gather table for allocated pages + * @ops: Function hooks for the memdesc memory type + * @flags: Flags set from userspace + * @dev: Pointer to the struct device that owns this memory + * @memmap: bitmap of pages for mmapsize + * @memmap_len: Number of bits for memmap + */ +struct kgsl_memdesc { + struct kgsl_pagetable *pagetable; + void *hostptr; + unsigned int hostptr_count; + unsigned long useraddr; + uint64_t gpuaddr; + phys_addr_t physaddr; + uint64_t size; + uint64_t mmapsize; + unsigned int priv; + struct sg_table *sgt; + struct kgsl_memdesc_ops *ops; + uint64_t flags; + struct device *dev; + struct dma_attrs attrs; +}; + +/* + * List of different memory entry types. The usermem enum + * starts at 0, which we use for allocated memory, so 1 is + * added to the enum values. + */ +#define KGSL_MEM_ENTRY_KERNEL 0 +#define KGSL_MEM_ENTRY_USER (KGSL_USER_MEM_TYPE_ADDR + 1) +#define KGSL_MEM_ENTRY_ION (KGSL_USER_MEM_TYPE_ION + 1) +#define KGSL_MEM_ENTRY_MAX (KGSL_USER_MEM_TYPE_MAX + 1) + +/* symbolic table for trace and debugfs */ +#define KGSL_MEM_TYPES \ + { KGSL_MEM_ENTRY_KERNEL, "gpumem" }, \ + { KGSL_MEM_ENTRY_USER, "usermem" }, \ + { KGSL_MEM_ENTRY_ION, "ion" } + +/* + * struct kgsl_mem_entry - a userspace memory allocation + * @refcount: reference count. Currently userspace can only + * hold a single reference count, but the kernel may hold more. + * @memdesc: description of the memory + * @priv_data: type-specific data, such as the dma-buf attachment pointer. + * @node: rb_node for the gpu address lookup rb tree + * @id: idr index for this entry, can be used to find memory that does not have + * a valid GPU address. + * @priv: back pointer to the process that owns this memory + * @pending_free: if !0, userspace requested that his memory be freed, but there + * are still references to it. + * @dev_priv: back pointer to the device file that created this entry. + * @metadata: String containing user specified metadata for the entry + * @work: Work struct used to schedule a kgsl_mem_entry_put in atomic contexts + */ +struct kgsl_mem_entry { + struct kref refcount; + struct kgsl_memdesc memdesc; + void *priv_data; + struct rb_node node; + unsigned int id; + struct kgsl_process_private *priv; + int pending_free; + char metadata[KGSL_GPUOBJ_ALLOC_METADATA_MAX + 1]; + struct work_struct work; +}; + +struct kgsl_device_private; +struct kgsl_event_group; + +typedef void (*kgsl_event_func)(struct kgsl_device *, struct kgsl_event_group *, + void *, int); + +/** + * struct kgsl_event - KGSL GPU timestamp event + * @device: Pointer to the KGSL device that owns the event + * @context: Pointer to the context that owns the event + * @timestamp: Timestamp for the event to expire + * @func: Callback function for for the event when it expires + * @priv: Private data passed to the callback function + * @node: List node for the kgsl_event_group list + * @created: Jiffies when the event was created + * @work: Work struct for dispatching the callback + * @result: KGSL event result type to pass to the callback + * group: The event group this event belongs to + */ +struct kgsl_event { + struct kgsl_device *device; + struct kgsl_context *context; + unsigned int timestamp; + kgsl_event_func func; + void *priv; + struct list_head node; + unsigned int created; + struct work_struct work; + int result; + struct kgsl_event_group *group; +}; + +typedef int (*readtimestamp_func)(struct kgsl_device *, void *, + enum kgsl_timestamp_type, unsigned int *); + +/** + * struct event_group - A list of GPU events + * @context: Pointer to the active context for the events + * @lock: Spinlock for protecting the list + * @events: List of active GPU events + * @group: Node for the master group list + * @processed: Last processed timestamp + * @name: String name for the group (for the debugfs file) + * @readtimestamp: Function pointer to read a timestamp + * @priv: Priv member to pass to the readtimestamp function + */ +struct kgsl_event_group { + struct kgsl_context *context; + spinlock_t lock; + struct list_head events; + struct list_head group; + unsigned int processed; + char name[64]; + readtimestamp_func readtimestamp; + void *priv; +}; + +/** + * struct kgsl_protected_registers - Protected register range + * @base: Offset of the range to be protected + * @range: Range (# of registers = 2 ** range) + */ +struct kgsl_protected_registers { + unsigned int base; + int range; +}; + +long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_device_setproperty(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_device_waittimestamp_ctxtid(struct kgsl_device_private + *dev_priv, unsigned int cmd, void *data); +long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_submit_commands(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_cmdstream_readtimestamp_ctxtid(struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data); +long kgsl_ioctl_cmdstream_freememontimestamp_ctxtid( + struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data); +long kgsl_ioctl_drawctxt_create(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_drawctxt_destroy(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_sharedmem_free(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpumem_free_id(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpumem_sync_cache(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpumem_sync_cache_bulk(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpumem_alloc_id(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpumem_get_info(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_cff_syncmem(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_cff_user_event(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_timestamp_event(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_cff_sync_gpuobj(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpuobj_alloc(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpuobj_free(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpuobj_info(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpuobj_import(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpuobj_sync(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpu_command(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_gpuobj_set_info(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); + +void kgsl_mem_entry_destroy(struct kref *kref); + +struct kgsl_mem_entry * __must_check +kgsl_sharedmem_find(struct kgsl_process_private *private, uint64_t gpuaddr); + +struct kgsl_mem_entry * __must_check +kgsl_sharedmem_find_id(struct kgsl_process_private *process, unsigned int id); + +void kgsl_get_memory_usage(char *str, size_t len, uint64_t memflags); + +extern const struct dev_pm_ops kgsl_pm_ops; + +int kgsl_suspend_driver(struct platform_device *pdev, pm_message_t state); +int kgsl_resume_driver(struct platform_device *pdev); + +static inline int kgsl_gpuaddr_in_memdesc(const struct kgsl_memdesc *memdesc, + uint64_t gpuaddr, uint64_t size) +{ + /* set a minimum size to search for */ + if (!size) + size = 1; + + /* don't overflow */ + if (size > U64_MAX - gpuaddr) + return 0; + + if (gpuaddr >= memdesc->gpuaddr && + ((gpuaddr + size) <= (memdesc->gpuaddr + memdesc->size))) { + return 1; + } + return 0; +} + +static inline void *kgsl_memdesc_map(struct kgsl_memdesc *memdesc) +{ + if (memdesc->ops && memdesc->ops->map_kernel) + memdesc->ops->map_kernel(memdesc); + + return memdesc->hostptr; +} + +static inline void kgsl_memdesc_unmap(struct kgsl_memdesc *memdesc) +{ + if (memdesc->ops && memdesc->ops->unmap_kernel) + memdesc->ops->unmap_kernel(memdesc); +} + +static inline void *kgsl_gpuaddr_to_vaddr(struct kgsl_memdesc *memdesc, + uint64_t gpuaddr) +{ + void *hostptr = NULL; + + if ((gpuaddr >= memdesc->gpuaddr) && + (gpuaddr < (memdesc->gpuaddr + memdesc->size))) + hostptr = kgsl_memdesc_map(memdesc); + + return hostptr != NULL ? hostptr + (gpuaddr - memdesc->gpuaddr) : NULL; +} + +static inline int timestamp_cmp(unsigned int a, unsigned int b) +{ + /* check for equal */ + if (a == b) + return 0; + + /* check for greater-than for non-rollover case */ + if ((a > b) && (a - b < KGSL_TIMESTAMP_WINDOW)) + return 1; + + /* check for greater-than for rollover case + * note that <= is required to ensure that consistent + * results are returned for values whose difference is + * equal to the window size + */ + a += KGSL_TIMESTAMP_WINDOW; + b += KGSL_TIMESTAMP_WINDOW; + return ((a > b) && (a - b <= KGSL_TIMESTAMP_WINDOW)) ? 1 : -1; +} + +/** + * kgsl_schedule_work() - Schedule a work item on the KGSL workqueue + * @work: work item to schedule + */ +static inline void kgsl_schedule_work(struct work_struct *work) +{ + queue_work(kgsl_driver.workqueue, work); +} + +static inline int +kgsl_mem_entry_get(struct kgsl_mem_entry *entry) +{ + return kref_get_unless_zero(&entry->refcount); +} + +static inline void +kgsl_mem_entry_put(struct kgsl_mem_entry *entry) +{ + kref_put(&entry->refcount, kgsl_mem_entry_destroy); +} + +/** + * kgsl_mem_entry_put_deferred() - Schedule a task to put the memory entry + * @entry: Mem entry to put + * + * This function is for atomic contexts where a normal kgsl_mem_entry_put() + * would result in the memory entry getting destroyed and possibly taking + * mutexes along the way. Schedule the work to happen outside of the atomic + * context. + */ +static inline void kgsl_mem_entry_put_deferred(struct kgsl_mem_entry *entry) +{ + if (entry != NULL) + queue_work(kgsl_driver.mem_workqueue, &entry->work); +} + +/* + * kgsl_addr_range_overlap() - Checks if 2 ranges overlap + * @gpuaddr1: Start of first address range + * @size1: Size of first address range + * @gpuaddr2: Start of second address range + * @size2: Size of second address range + * + * Function returns true if the 2 given address ranges overlap + * else false + */ +static inline bool kgsl_addr_range_overlap(uint64_t gpuaddr1, + uint64_t size1, uint64_t gpuaddr2, uint64_t size2) +{ + if ((size1 > (U64_MAX - gpuaddr1)) || (size2 > (U64_MAX - gpuaddr2))) + return false; + return !(((gpuaddr1 + size1) <= gpuaddr2) || + (gpuaddr1 >= (gpuaddr2 + size2))); +} + +/** + * kgsl_malloc() - Use either kzalloc or vmalloc to allocate memory + * @size: Size of the desired allocation + * + * Allocate a block of memory for the driver - if it is small try to allocate it + * from kmalloc (fast!) otherwise we need to go with vmalloc (safe!) + */ +static inline void *kgsl_malloc(size_t size) +{ + if (size <= PAGE_SIZE) + return kzalloc(size, GFP_KERNEL); + + return vmalloc(size); +} + +/** + * kgsl_free() - Free memory allocated by kgsl_malloc() + * @ptr: Pointer to the memory to free + * + * Free the memory be it in vmalloc or kmalloc space + */ +static inline void kgsl_free(void *ptr) +{ + if (ptr != NULL && is_vmalloc_addr(ptr)) + return vfree(ptr); + + kfree(ptr); +} + +static inline int _copy_from_user(void *dest, void __user *src, + unsigned int ksize, unsigned int usize) +{ + unsigned int copy = ksize < usize ? ksize : usize; + + if (copy == 0) + return -EINVAL; + + return copy_from_user(dest, src, copy) ? -EFAULT : 0; +} + +static inline void __user *to_user_ptr(uint64_t address) +{ + return (void __user *)(uintptr_t)address; +} + +#endif /* __KGSL_H */ diff --git a/drivers/gpu/msm/kgsl_cffdump.c b/drivers/gpu/msm/kgsl_cffdump.c new file mode 100644 index 000000000000..1f10a333adf7 --- /dev/null +++ b/drivers/gpu/msm/kgsl_cffdump.c @@ -0,0 +1,747 @@ +/* Copyright (c) 2010-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +/* #define DEBUG */ +#define ALIGN_CPU + +#include <linux/spinlock.h> +#include <linux/debugfs.h> +#include <linux/relay.h> +#include <linux/slab.h> +#include <linux/time.h> +#include <linux/sched.h> + +#include "kgsl.h" +#include "kgsl_cffdump.h" +#include "kgsl_debugfs.h" +#include "kgsl_log.h" +#include "kgsl_sharedmem.h" +#include "adreno_pm4types.h" +#include "adreno.h" +#include "adreno_cp_parser.h" + +static struct rchan *chan; +static struct dentry *dir; +static int suspended; +static size_t dropped; +static size_t subbuf_size = 256*1024; +static size_t n_subbufs = 64; + +/* forward declarations */ +static void destroy_channel(void); +static struct rchan *create_channel(unsigned subbuf_size, unsigned n_subbufs); + +static spinlock_t cffdump_lock; +static ulong serial_nr; +static ulong total_bytes; +static ulong total_syncmem; +static long last_sec; + +/* Some simulators have start address of gmem at this offset */ +#define KGSL_CFF_GMEM_OFFSET 0x100000 + +#define MEMBUF_SIZE 64 + +#define CFF_OP_WRITE_REG 0x00000002 +struct cff_op_write_reg { + unsigned char op; + uint addr; + uint value; +} __packed; + +#define CFF_OP_POLL_REG 0x00000004 +struct cff_op_poll_reg { + unsigned char op; + uint addr; + uint value; + uint mask; +} __packed; + +#define CFF_OP_WAIT_IRQ 0x00000005 +struct cff_op_wait_irq { + unsigned char op; +} __packed; + +#define CFF_OP_RMW 0x0000000a + +struct cff_op_write_mem { + unsigned char op; + uint addr; + uint value; +} __packed; + +#define CFF_OP_WRITE_MEMBUF 0x0000000c +struct cff_op_write_membuf { + unsigned char op; + uint addr; + ushort count; + uint buffer[MEMBUF_SIZE]; +} __packed; + +#define CFF_OP_MEMORY_BASE 0x0000000d +struct cff_op_memory_base { + unsigned char op; + uint base; + uint size; + uint gmemsize; +} __packed; + +#define CFF_OP_HANG 0x0000000e +struct cff_op_hang { + unsigned char op; +} __packed; + +#define CFF_OP_EOF 0xffffffff +struct cff_op_eof { + unsigned char op; +} __packed; + +#define CFF_OP_VERIFY_MEM_FILE 0x00000007 +#define CFF_OP_WRITE_SURFACE_PARAMS 0x00000011 +struct cff_op_user_event { + unsigned char op; + unsigned int op1; + unsigned int op2; + unsigned int op3; + unsigned int op4; + unsigned int op5; +} __packed; + + +static void b64_encodeblock(unsigned char in[3], unsigned char out[4], int len) +{ + static const char tob64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmno" + "pqrstuvwxyz0123456789+/"; + + out[0] = tob64[in[0] >> 2]; + out[1] = tob64[((in[0] & 0x03) << 4) | ((in[1] & 0xf0) >> 4)]; + out[2] = (unsigned char) (len > 1 ? tob64[((in[1] & 0x0f) << 2) + | ((in[2] & 0xc0) >> 6)] : '='); + out[3] = (unsigned char) (len > 2 ? tob64[in[2] & 0x3f] : '='); +} + +static void b64_encode(const unsigned char *in_buf, int in_size, + unsigned char *out_buf, int out_bufsize, int *out_size) +{ + unsigned char in[3], out[4]; + int i, len; + + *out_size = 0; + while (in_size > 0) { + len = 0; + for (i = 0; i < 3; ++i) { + if (in_size-- > 0) { + in[i] = *in_buf++; + ++len; + } else + in[i] = 0; + } + if (len) { + b64_encodeblock(in, out, len); + if (out_bufsize < 4) { + pr_warn("kgsl: cffdump: %s: out of buffer\n", + __func__); + return; + } + for (i = 0; i < 4; ++i) + *out_buf++ = out[i]; + *out_size += 4; + out_bufsize -= 4; + } + } +} + +#define KLOG_TMPBUF_SIZE (1024) +static void klog_printk(const char *fmt, ...) +{ + /* per-cpu klog formatting temporary buffer */ + static char klog_buf[NR_CPUS][KLOG_TMPBUF_SIZE]; + + va_list args; + int len; + char *cbuf; + unsigned long flags; + + local_irq_save(flags); + cbuf = klog_buf[smp_processor_id()]; + va_start(args, fmt); + len = vsnprintf(cbuf, KLOG_TMPBUF_SIZE, fmt, args); + total_bytes += len; + va_end(args); + relay_write(chan, cbuf, len); + local_irq_restore(flags); +} + +static struct cff_op_write_membuf cff_op_write_membuf; +static void cffdump_membuf(int id, unsigned char *out_buf, int out_bufsize) +{ + void *data; + int len, out_size; + struct cff_op_write_mem cff_op_write_mem; + + uint addr = cff_op_write_membuf.addr + - sizeof(uint)*cff_op_write_membuf.count; + + if (!cff_op_write_membuf.count) { + pr_warn("kgsl: cffdump: membuf: count == 0, skipping"); + return; + } + + if (cff_op_write_membuf.count != 1) { + cff_op_write_membuf.op = CFF_OP_WRITE_MEMBUF; + cff_op_write_membuf.addr = addr; + len = sizeof(cff_op_write_membuf) - + sizeof(uint)*(MEMBUF_SIZE - cff_op_write_membuf.count); + data = &cff_op_write_membuf; + } else { + cff_op_write_mem.op = CFF_OP_WRITE_MEM; + cff_op_write_mem.addr = addr; + cff_op_write_mem.value = cff_op_write_membuf.buffer[0]; + data = &cff_op_write_mem; + len = sizeof(cff_op_write_mem); + } + b64_encode(data, len, out_buf, out_bufsize, &out_size); + out_buf[out_size] = 0; + klog_printk("%ld:%d;%s\n", ++serial_nr, id, out_buf); + cff_op_write_membuf.count = 0; + cff_op_write_membuf.addr = 0; +} + +void kgsl_cffdump_printline(int id, uint opcode, uint op1, uint op2, + uint op3, uint op4, uint op5) +{ + struct cff_op_write_reg cff_op_write_reg; + struct cff_op_poll_reg cff_op_poll_reg; + struct cff_op_wait_irq cff_op_wait_irq; + struct cff_op_memory_base cff_op_memory_base; + struct cff_op_hang cff_op_hang; + struct cff_op_eof cff_op_eof; + struct cff_op_user_event cff_op_user_event; + unsigned char out_buf[sizeof(cff_op_write_membuf)/3*4 + 16]; + void *data; + int len = 0, out_size; + long cur_secs; + + spin_lock(&cffdump_lock); + if (opcode == CFF_OP_WRITE_MEM) { + if ((cff_op_write_membuf.addr != op1 && + cff_op_write_membuf.count) + || (cff_op_write_membuf.count == MEMBUF_SIZE)) + cffdump_membuf(id, out_buf, sizeof(out_buf)); + + cff_op_write_membuf.buffer[cff_op_write_membuf.count++] = op2; + cff_op_write_membuf.addr = op1 + sizeof(uint); + spin_unlock(&cffdump_lock); + return; + } else if (cff_op_write_membuf.count) + cffdump_membuf(id, out_buf, sizeof(out_buf)); + spin_unlock(&cffdump_lock); + + switch (opcode) { + case CFF_OP_WRITE_REG: + cff_op_write_reg.op = opcode; + cff_op_write_reg.addr = op1; + cff_op_write_reg.value = op2; + data = &cff_op_write_reg; + len = sizeof(cff_op_write_reg); + break; + + case CFF_OP_POLL_REG: + cff_op_poll_reg.op = opcode; + cff_op_poll_reg.addr = op1; + cff_op_poll_reg.value = op2; + cff_op_poll_reg.mask = op3; + data = &cff_op_poll_reg; + len = sizeof(cff_op_poll_reg); + break; + + case CFF_OP_WAIT_IRQ: + cff_op_wait_irq.op = opcode; + data = &cff_op_wait_irq; + len = sizeof(cff_op_wait_irq); + break; + + case CFF_OP_MEMORY_BASE: + cff_op_memory_base.op = opcode; + cff_op_memory_base.base = op1; + cff_op_memory_base.size = op2; + cff_op_memory_base.gmemsize = op3; + data = &cff_op_memory_base; + len = sizeof(cff_op_memory_base); + break; + + case CFF_OP_HANG: + cff_op_hang.op = opcode; + data = &cff_op_hang; + len = sizeof(cff_op_hang); + break; + + case CFF_OP_EOF: + cff_op_eof.op = opcode; + data = &cff_op_eof; + len = sizeof(cff_op_eof); + break; + + case CFF_OP_WRITE_SURFACE_PARAMS: + case CFF_OP_VERIFY_MEM_FILE: + cff_op_user_event.op = opcode; + cff_op_user_event.op1 = op1; + cff_op_user_event.op2 = op2; + cff_op_user_event.op3 = op3; + cff_op_user_event.op4 = op4; + cff_op_user_event.op5 = op5; + data = &cff_op_user_event; + len = sizeof(cff_op_user_event); + break; + } + + if (len) { + b64_encode(data, len, out_buf, sizeof(out_buf), &out_size); + out_buf[out_size] = 0; + klog_printk("%ld:%d;%s\n", ++serial_nr, id, out_buf); + } else + pr_warn("kgsl: cffdump: unhandled opcode: %d\n", opcode); + + cur_secs = get_seconds(); + if ((cur_secs - last_sec) > 10 || (last_sec - cur_secs) > 10) { + pr_info("kgsl: cffdump: total [bytes:%lu kB, syncmem:%lu kB], " + "seq#: %lu\n", total_bytes/1024, total_syncmem/1024, + serial_nr); + last_sec = cur_secs; + } +} +EXPORT_SYMBOL(kgsl_cffdump_printline); + +void kgsl_cffdump_init() +{ + struct dentry *debugfs_dir = kgsl_get_debugfs_dir(); + +#ifdef ALIGN_CPU + cpumask_t mask; + + cpumask_clear(&mask); + cpumask_set_cpu(0, &mask); + sched_setaffinity(0, &mask); +#endif + if (!debugfs_dir || IS_ERR(debugfs_dir)) { + KGSL_CORE_ERR("Debugfs directory is bad\n"); + return; + } + + spin_lock_init(&cffdump_lock); + + dir = debugfs_create_dir("cff", debugfs_dir); + if (!dir) { + KGSL_CORE_ERR("debugfs_create_dir failed\n"); + return; + } + + chan = create_channel(subbuf_size, n_subbufs); +} + +void kgsl_cffdump_destroy() +{ + if (chan) + relay_flush(chan); + destroy_channel(); + if (dir) + debugfs_remove(dir); +} + +void kgsl_cffdump_open(struct kgsl_device *device) +{ + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); + if (!device->cff_dump_enable) + return; + + /* Set the maximum possible address range */ + kgsl_cffdump_memory_base(device, + adreno_dev->gmem_size + KGSL_CFF_GMEM_OFFSET, + 0xFFFFFFFF - + (adreno_dev->gmem_size + KGSL_CFF_GMEM_OFFSET), + adreno_dev->gmem_size); +} + +void kgsl_cffdump_memory_base(struct kgsl_device *device, unsigned int base, + unsigned int range, unsigned gmemsize) +{ + if (!device->cff_dump_enable) + return; + kgsl_cffdump_printline(device->id, CFF_OP_MEMORY_BASE, base, + range, gmemsize, 0, 0); +} + +void kgsl_cffdump_hang(struct kgsl_device *device) +{ + if (!device->cff_dump_enable) + return; + kgsl_cffdump_printline(device->id, CFF_OP_HANG, 0, 0, 0, 0, 0); +} + +void kgsl_cffdump_close(struct kgsl_device *device) +{ + if (!device->cff_dump_enable) + return; + kgsl_cffdump_printline(device->id, CFF_OP_EOF, 0, 0, 0, 0, 0); +} + +void kgsl_cffdump_user_event(struct kgsl_device *device, + unsigned int cff_opcode, unsigned int op1, + unsigned int op2, unsigned int op3, + unsigned int op4, unsigned int op5) +{ + if (!device->cff_dump_enable) + return; + kgsl_cffdump_printline(-1, cff_opcode, op1, op2, op3, op4, op5); +} + + + +void kgsl_cffdump_memcpy(struct kgsl_device *device, + uint64_t gpuaddr, unsigned int *ptr, uint64_t sizebytes) +{ + int i; + + if (!device || !device->cff_dump_enable) + return; + + for (i = 0; i < ALIGN(sizebytes, 4) / 4; gpuaddr += 4, ptr++, i++) + kgsl_cffdump_write(device, gpuaddr, *ptr); +} + +void kgsl_cffdump_syncmem(struct kgsl_device *device, + struct kgsl_mem_entry *entry, uint64_t offset, + uint64_t sizebytes, bool clean_cache) +{ + void *src; + + if (!device || device->cff_dump_enable || !entry) + return; + + if (sizebytes == 0) + return; + + if ((offset >= entry->memdesc.size) || + (entry->memdesc.size - len) > offset) + return; + + total_syncmem += sizebytes; + + src = kgsl_memdesc_map(&entry->memdesc); + if (src == NULL) { + KGSL_CORE_ERR( + "cffdump: no kernel mapping for GPU address 0x%llX\n", + gpuaddr); + return; + } + + if (clean_cache) { + /* Makes sure that the region is freshly fetched */ + mb(); + + kgsl_cache_range_op(entry->memdesc, + offset, sizebytes, KGSL_CACHE_OP_INV); + } + + kgsl_cffdump_memcpy(device, entry->memdesc.gpuaddr + offset, + src + offset, sizebytes); + + kgsl_memdesc_unmap(&entry->memdesc); +} + +void kgsl_cffdump_memset(struct kgsl_device *device, + uint64_t gpuaddr, unsigned char ch, uint64_t sizebytes) +{ + int i; + + if (!device || !device->cff_dump_enable) + return; + + /* Expand the input char into a dword and output it */ + for (i = 0; i < ALIGN(sizebytes, 4) / 4; gpuaddr += 4, i++) + kgsl_cffdump_write(device, gpuaddr, + (ch << 24) | (ch << 16) | (ch << 8) | ch); +} + +void kgsl_cffdump_regwrite(struct kgsl_device *device, uint addr, + uint value) +{ + if (!device->cff_dump_enable) + return; + + kgsl_cffdump_printline(device->id, CFF_OP_WRITE_REG, addr, value, + 0, 0, 0); +} + +void kgsl_cffdump_regpoll(struct kgsl_device *device, uint addr, + uint value, uint mask) +{ + if (!device->cff_dump_enable) + return; + + kgsl_cffdump_printline(device->id, CFF_OP_POLL_REG, addr, value, + mask, 0, 0); +} + +void kgsl_cffdump_slavewrite(struct kgsl_device *device, uint addr, uint value) +{ + if (!device->cff_dump_enable) + return; + + kgsl_cffdump_printline(-1, CFF_OP_WRITE_REG, addr, value, 0, 0, 0); +} + +int kgsl_cffdump_waitirq(struct kgsl_device *device) +{ + if (!device->cff_dump_enable) + return 0; + + kgsl_cffdump_printline(-1, CFF_OP_WAIT_IRQ, 0, 0, 0, 0, 0); + + return 1; +} +EXPORT_SYMBOL(kgsl_cffdump_waitirq); + +static int subbuf_start_handler(struct rchan_buf *buf, + void *subbuf, void *prev_subbuf, size_t prev_padding) +{ + pr_debug("kgsl: cffdump: subbuf_start_handler(subbuf=%p, prev_subbuf" + "=%p, prev_padding=%08zx)\n", subbuf, prev_subbuf, + prev_padding); + + if (relay_buf_full(buf)) { + if (!suspended) { + suspended = 1; + pr_warn("kgsl: cffdump: relay: cpu %d buffer full!!!\n", + smp_processor_id()); + } + dropped++; + return 0; + } else if (suspended) { + suspended = 0; + pr_warn("kgsl: cffdump: relay: cpu %d buffer no longer full.\n", + smp_processor_id()); + } + + subbuf_start_reserve(buf, 0); + return 1; +} + +static struct dentry *create_buf_file_handler(const char *filename, + struct dentry *parent, unsigned short mode, struct rchan_buf *buf, + int *is_global) +{ + return debugfs_create_file(filename, mode, parent, buf, + &relay_file_operations); +} + +/* + * file_remove() default callback. Removes relay file in debugfs. + */ +static int remove_buf_file_handler(struct dentry *dentry) +{ + pr_info("kgsl: cffdump: %s()\n", __func__); + debugfs_remove(dentry); + return 0; +} + +/* + * relay callbacks + */ +static struct rchan_callbacks relay_callbacks = { + .subbuf_start = subbuf_start_handler, + .create_buf_file = create_buf_file_handler, + .remove_buf_file = remove_buf_file_handler, +}; + +/** + * create_channel - creates channel /debug/klog/cpuXXX + * + * Creates channel along with associated produced/consumed control files + * + * Returns channel on success, NULL otherwise + */ +static struct rchan *create_channel(unsigned subbuf_size, unsigned n_subbufs) +{ + struct rchan *chan; + + pr_info("kgsl: cffdump: relay: create_channel: subbuf_size %u, " + "n_subbufs %u, dir 0x%p\n", subbuf_size, n_subbufs, dir); + + chan = relay_open("cpu", dir, subbuf_size, + n_subbufs, &relay_callbacks, NULL); + if (!chan) { + KGSL_CORE_ERR("relay_open failed\n"); + return NULL; + } + + suspended = 0; + dropped = 0; + + return chan; +} + +/** + * destroy_channel - destroys channel /debug/kgsl/cff/cpuXXX + * + * Destroys channel along with associated produced/consumed control files + */ +static void destroy_channel(void) +{ + pr_info("kgsl: cffdump: relay: destroy_channel\n"); + if (chan) { + relay_close(chan); + chan = NULL; + } +} + +int kgsl_cff_dump_enable_set(void *data, u64 val) +{ + int ret = 0; + struct kgsl_device *device = (struct kgsl_device *)data; + int i; + + mutex_lock(&kgsl_driver.devlock); + if (val) { + /* Check if CFF is on for some other device already */ + for (i = 0; i < KGSL_DEVICE_MAX; i++) { + if (kgsl_driver.devp[i]) { + struct kgsl_device *device_temp = + kgsl_driver.devp[i]; + if (device_temp->cff_dump_enable && + device != device_temp) { + KGSL_CORE_ERR( + "CFF is on for another device %d\n", + device_temp->id); + ret = -EINVAL; + goto done; + } + } + } + if (!device->cff_dump_enable) { + device->cff_dump_enable = 1; + /* + * force device to slumber so that we ensure that the + * start opcode in CFF is present + */ + mutex_lock(&device->mutex); + ret = kgsl_pwrctrl_change_state(device, + KGSL_STATE_SUSPEND); + ret |= kgsl_pwrctrl_change_state(device, + KGSL_STATE_SLUMBER); + if (ret) + device->cff_dump_enable = 0; + mutex_unlock(&device->mutex); + } + } else if (device->cff_dump_enable && !val) { + device->cff_dump_enable = 0; + } +done: + mutex_unlock(&kgsl_driver.devlock); + return ret; +} +EXPORT_SYMBOL(kgsl_cff_dump_enable_set); + +int kgsl_cff_dump_enable_get(void *data, u64 *val) +{ + struct kgsl_device *device = (struct kgsl_device *)data; + *val = device->cff_dump_enable; + return 0; +} +EXPORT_SYMBOL(kgsl_cff_dump_enable_get); + +/* + * kgsl_cffdump_capture_adreno_ib_cff() - Capture CFF for an IB + * @device: Device for which CFF is to be captured + * @ptbase: The pagetable in which the IB is mapped + * @gpuaddr: Address of IB + * @dwords: Size of the IB + * + * Dumps the CFF format of the IB including all objects in it like, IB2, + * shaders, etc. + * + * Returns 0 on success else error code + */ +static int kgsl_cffdump_capture_adreno_ib_cff(struct kgsl_device *device, + struct kgsl_process_private *process, + uint64_t gpuaddr, uint64_t dwords) +{ + int ret; + struct adreno_ib_object_list *ib_obj_list; + struct adreno_ib_object *ib_obj; + int i; + + if (!device->cff_dump_enable) + return 0; + + ret = adreno_ib_create_object_list(device, process, gpuaddr, dwords, + &ib_obj_list); + + if (ret) { + KGSL_DRV_ERR(device, + "Fail to create object list for IB 0x%016llX, size(dwords) 0x%llX\n", + gpuaddr, dwords); + return ret; + } + + for (i = 0; i < ib_obj_list->num_objs; i++) { + ib_obj = &(ib_obj_list->obj_list[i]); + kgsl_cffdump_syncmem(device, ib_obj->entry, 0, ib_obj->size, + false); + } + adreno_ib_destroy_obj_list(ib_obj_list); + return 0; +} + +/* + * kgsl_cffdump_capture_ib_desc() - Capture CFF for a list of IB's + * @device: Device for which CFF is to be captured + * @context: The context under which the IB list executes on device + * @ibdesc: The IB list + * @numibs: Number of IB's in ibdesc + * + * Returns 0 on success else error code + */ +int kgsl_cffdump_capture_ib_desc(struct kgsl_device *device, + struct kgsl_context *context, + struct kgsl_cmdbatch *cmdbatch) +{ + int ret = 0; + struct kgsl_memobj_node *ib; + + if (!device->cff_dump_enable) + return 0; + /* Dump CFF for IB and all objects in it */ + list_for_each_entry(ib, &cmdbatch->cmdlist, node) { + ret = kgsl_cffdump_capture_adreno_ib_cff( + device, context->proc_priv, ib->gpuaddr, + ib->size >> 2); + if (ret) { + KGSL_DRV_ERR(device, + "Fail cff capture, IB 0x%016llX, size 0x%llX\n", + ib->gpuaddr, ib->size); + break; + } + } + return ret; +} +EXPORT_SYMBOL(kgsl_cffdump_capture_ib_desc); + +DEFINE_SIMPLE_ATTRIBUTE(kgsl_cff_dump_enable_fops, kgsl_cff_dump_enable_get, + kgsl_cff_dump_enable_set, "%llu\n"); + +void kgsl_cffdump_debugfs_create(struct kgsl_device *device) +{ + debugfs_create_file("cff_dump", 0644, device->d_debugfs, device, + &kgsl_cff_dump_enable_fops); +} diff --git a/drivers/gpu/msm/kgsl_cffdump.h b/drivers/gpu/msm/kgsl_cffdump.h new file mode 100644 index 000000000000..5eb04e7ea500 --- /dev/null +++ b/drivers/gpu/msm/kgsl_cffdump.h @@ -0,0 +1,183 @@ +/* Copyright (c) 2010-2011,2013-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __KGSL_CFFDUMP_H +#define __KGSL_CFFDUMP_H + +#include <linux/types.h> +#include "kgsl_device.h" + +extern unsigned int kgsl_cff_dump_enable; + +static inline bool kgsl_cffdump_flags_no_memzero(void) { return true; } + +struct kgsl_device_private; + +#ifdef CONFIG_MSM_KGSL_CFF_DUMP + +#define CFF_OP_WRITE_MEM 0x0000000b + +void kgsl_cffdump_init(void); +void kgsl_cffdump_destroy(void); +void kgsl_cffdump_open(struct kgsl_device *device); +void kgsl_cffdump_close(struct kgsl_device *device); +void kgsl_cffdump_memcpy(struct kgsl_device *device, uint64_t gpuaddr, + unsigned int *ptr, uint64_t sizebytes); +void kgsl_cffdump_syncmem(struct kgsl_device *, struct kgsl_mem_entry *, + uint64_t offset, uint64_t sizebytes, bool clean_cache); +void kgsl_cffdump_memset(struct kgsl_device *device, uint64_t addr, + unsigned char value, size_t sizebytes); +void kgsl_cffdump_regwrite(struct kgsl_device *device, uint addr, + uint value); +void kgsl_cffdump_regpoll(struct kgsl_device *device, uint addr, + uint value, uint mask); +bool kgsl_cffdump_parse_ibs(struct kgsl_device_private *dev_priv, + const struct kgsl_memdesc *memdesc, uint64_t gpuaddr, + uint64_t sizedwords, bool check_only); +void kgsl_cffdump_user_event(struct kgsl_device *device, + unsigned int cff_opcode, unsigned int op1, + unsigned int op2, unsigned int op3, + unsigned int op4, unsigned int op5); + +void kgsl_cffdump_memory_base(struct kgsl_device *device, unsigned int base, + unsigned int range, unsigned int gmemsize); + +void kgsl_cffdump_hang(struct kgsl_device *device); +void kgsl_cffdump_debugfs_create(struct kgsl_device *device); +int kgsl_cff_dump_enable_set(void *data, u64 val); +int kgsl_cff_dump_enable_get(void *data, u64 *val); +int kgsl_cffdump_capture_ib_desc(struct kgsl_device *device, + struct kgsl_context *context, + struct kgsl_cmdbatch *cmdbatch); + +void kgsl_cffdump_printline(int id, uint opcode, uint op1, uint op2, + uint op3, uint op4, uint op5); + +static inline void kgsl_cffdump_write(struct kgsl_device *device, + uint64_t gpuaddr, unsigned int value) +{ + if (!device || !device->cff_dump_enable) + return; + + kgsl_cffdump_printline(-1, CFF_OP_WRITE_MEM, gpuaddr, value, 0, 0, 0); +} + +#else + +static inline void kgsl_cffdump_init(void) +{ + return; +} + +static inline void kgsl_cffdump_destroy(void) +{ + return; +} + +static inline void kgsl_cffdump_open(struct kgsl_device *device) +{ + return; +} + +static inline void kgsl_cffdump_close(struct kgsl_device *device) +{ + return; +} + +static inline void kgsl_cffdump_write(struct kgsl_device *device, + uint64_t gpuaddr, unsigned int value) +{ + return; +} + +static inline void kgsl_cffdump_memcpy(struct kgsl_device *device, + uint64_t gupaddr, unsigned int *ptr, uint64_t sizebytes) +{ + return; +} + +static inline void kgsl_cffdump_syncmem(struct kgsl_device *device, + struct kgsl_mem_entry *entry, uint64_t offset, + uint64_t sizebytes, bool clean_cache) +{ + return; +} + +static inline void kgsl_cffdump_memset(struct kgsl_device *device, + uint64_t addr, unsigned char ch, size_t sizebytes) +{ + return; +} + +static inline void kgsl_cffdump_regwrite(struct kgsl_device *device, uint addr, + uint value) +{ + return; +} + +static inline void kgsl_cffdump_regpoll(struct kgsl_device *device, uint addr, + uint value, uint mask) +{ + return; +} + +static inline bool kgsl_cffdump_parse_ibs(struct kgsl_device_private *dev_priv, + const struct kgsl_memdesc *memdesc, uint64_t gpuaddr, + uint64_t sizedwords, bool check_only) +{ + return false; +} + +static inline void kgsl_cffdump_memory_base(struct kgsl_device *device, + unsigned int base, unsigned int range, unsigned int gmemsize) +{ + return; +} + +static inline void kgsl_cffdump_hang(struct kgsl_device *device) +{ + return; +} + +static inline void kgsl_cffdump_debugfs_create(struct kgsl_device *device) +{ + return; +} + +static inline void kgsl_cffdump_user_event(struct kgsl_device *device, + unsigned int cff_opcode, unsigned int op1, + unsigned int op2, unsigned int op3, + unsigned int op4, unsigned int op5) +{ + return; +} + +static inline int kgsl_cffdump_capture_ib_desc(struct kgsl_device *device, + struct kgsl_context *context, + struct kgsl_cmdbatch *cmdbatch) +{ + return 0; +} + +static inline int kgsl_cff_dump_enable_set(void *data, u64 val) +{ + return -ENODEV; +} + +static inline int kgsl_cff_dump_enable_get(void *data, u64 *val) +{ + return -ENODEV; +} + +#endif /* CONFIG_MSM_KGSL_CFF_DUMP */ +#endif /* __KGSL_CFFDUMP_H */ diff --git a/drivers/gpu/msm/kgsl_cmdbatch.c b/drivers/gpu/msm/kgsl_cmdbatch.c new file mode 100644 index 000000000000..2aac458f05eb --- /dev/null +++ b/drivers/gpu/msm/kgsl_cmdbatch.c @@ -0,0 +1,948 @@ +/* Copyright (c) 2008-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* + * KGSL command batch management + * A command batch is a single submission from userland. The cmdbatch + * encapsulates everything about the submission : command buffers, flags and + * sync points. + * + * Sync points are events that need to expire before the + * cmdbatch can be queued to the hardware. All synpoints are contained in an + * array of kgsl_cmdbatch_sync_event structs in the command batch. There can be + * multiple types of events both internal ones (GPU events) and external + * triggers. As the events expire bits are cleared in a pending bitmap stored + * in the command batch. The GPU will submit the command as soon as the bitmap + * goes to zero indicating no more pending events. + */ + +#include <linux/uaccess.h> +#include <linux/list.h> +#include <linux/compat.h> + +#include "kgsl.h" +#include "kgsl_device.h" +#include "kgsl_cmdbatch.h" +#include "kgsl_sync.h" +#include "kgsl_trace.h" +#include "kgsl_compat.h" + +/* + * Define an kmem cache for the memobj structures since we allocate and free + * them so frequently + */ +static struct kmem_cache *memobjs_cache; + +/** + * kgsl_cmdbatch_put() - Decrement the refcount for a command batch object + * @cmdbatch: Pointer to the command batch object + */ +static inline void kgsl_cmdbatch_put(struct kgsl_cmdbatch *cmdbatch) +{ + if (cmdbatch) + kref_put(&cmdbatch->refcount, kgsl_cmdbatch_destroy_object); +} + +void kgsl_dump_syncpoints(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch) +{ + struct kgsl_cmdbatch_sync_event *event; + unsigned int i; + + for (i = 0; i < cmdbatch->numsyncs; i++) { + event = &cmdbatch->synclist[i]; + + if (!kgsl_cmdbatch_event_pending(cmdbatch, i)) + continue; + + switch (event->type) { + case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP: { + unsigned int retired; + + kgsl_readtimestamp(event->device, + event->context, KGSL_TIMESTAMP_RETIRED, + &retired); + + dev_err(device->dev, + " [timestamp] context %d timestamp %d (retired %d)\n", + event->context->id, event->timestamp, + retired); + break; + } + case KGSL_CMD_SYNCPOINT_TYPE_FENCE: + if (event->handle) + dev_err(device->dev, " fence: [%p] %s\n", + event->handle->fence, + event->handle->name); + else + dev_err(device->dev, " fence: invalid\n"); + break; + } + } +} + +static void _kgsl_cmdbatch_timer(unsigned long data) +{ + struct kgsl_device *device; + struct kgsl_cmdbatch *cmdbatch = (struct kgsl_cmdbatch *) data; + struct kgsl_cmdbatch_sync_event *event; + unsigned int i; + + if (cmdbatch == NULL || cmdbatch->context == NULL) + return; + + device = cmdbatch->context->device; + + dev_err(device->dev, + "kgsl: possible gpu syncpoint deadlock for context %d timestamp %d\n", + cmdbatch->context->id, cmdbatch->timestamp); + + set_bit(CMDBATCH_FLAG_FENCE_LOG, &cmdbatch->priv); + kgsl_context_dump(cmdbatch->context); + clear_bit(CMDBATCH_FLAG_FENCE_LOG, &cmdbatch->priv); + + dev_err(device->dev, " pending events:\n"); + + for (i = 0; i < cmdbatch->numsyncs; i++) { + event = &cmdbatch->synclist[i]; + + if (!kgsl_cmdbatch_event_pending(cmdbatch, i)) + continue; + + switch (event->type) { + case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP: + dev_err(device->dev, " [%d] TIMESTAMP %d:%d\n", + i, event->context->id, event->timestamp); + break; + case KGSL_CMD_SYNCPOINT_TYPE_FENCE: + if (event->handle != NULL) { + dev_err(device->dev, " [%d] FENCE %s\n", + i, event->handle->fence ? + event->handle->fence->name : "NULL"); + kgsl_sync_fence_log(event->handle->fence); + } + break; + } + } + + dev_err(device->dev, "--gpu syncpoint deadlock print end--\n"); +} + +/** + * kgsl_cmdbatch_destroy_object() - Destroy a cmdbatch object + * @kref: Pointer to the kref structure for this object + * + * Actually destroy a command batch object. Called from kgsl_cmdbatch_put + */ +void kgsl_cmdbatch_destroy_object(struct kref *kref) +{ + struct kgsl_cmdbatch *cmdbatch = container_of(kref, + struct kgsl_cmdbatch, refcount); + + kgsl_context_put(cmdbatch->context); + + kfree(cmdbatch->synclist); + kfree(cmdbatch); +} +EXPORT_SYMBOL(kgsl_cmdbatch_destroy_object); + +/* + * a generic function to retire a pending sync event and (possibly) + * kick the dispatcher + */ +static void kgsl_cmdbatch_sync_expire(struct kgsl_device *device, + struct kgsl_cmdbatch_sync_event *event) +{ + /* + * Clear the event from the pending mask - if it is already clear, then + * leave without doing anything useful + */ + if (!test_and_clear_bit(event->id, &event->cmdbatch->pending)) + return; + + /* + * If no more pending events, delete the timer and schedule the command + * for dispatch + */ + if (!kgsl_cmdbatch_events_pending(event->cmdbatch)) { + del_timer_sync(&event->cmdbatch->timer); + + if (device->ftbl->drawctxt_sched) + device->ftbl->drawctxt_sched(device, + event->cmdbatch->context); + } +} + +/* + * This function is called by the GPU event when the sync event timestamp + * expires + */ +static void kgsl_cmdbatch_sync_func(struct kgsl_device *device, + struct kgsl_event_group *group, void *priv, int result) +{ + struct kgsl_cmdbatch_sync_event *event = priv; + + trace_syncpoint_timestamp_expire(event->cmdbatch, + event->context, event->timestamp); + + kgsl_cmdbatch_sync_expire(device, event); + kgsl_context_put(event->context); + kgsl_cmdbatch_put(event->cmdbatch); +} + +static inline void _free_memobj_list(struct list_head *list) +{ + struct kgsl_memobj_node *mem, *tmpmem; + + /* Free the cmd mem here */ + list_for_each_entry_safe(mem, tmpmem, list, node) { + list_del_init(&mem->node); + kmem_cache_free(memobjs_cache, mem); + } +} + +/** + * kgsl_cmdbatch_destroy() - Destroy a cmdbatch structure + * @cmdbatch: Pointer to the command batch object to destroy + * + * Start the process of destroying a command batch. Cancel any pending events + * and decrement the refcount. Asynchronous events can still signal after + * kgsl_cmdbatch_destroy has returned. + */ +void kgsl_cmdbatch_destroy(struct kgsl_cmdbatch *cmdbatch) +{ + unsigned int i; + unsigned long pending; + + if (IS_ERR_OR_NULL(cmdbatch)) + return; + + /* Zap the canary timer */ + del_timer_sync(&cmdbatch->timer); + + /* + * Copy off the pending list and clear all pending events - this will + * render any subsequent asynchronous callback harmless + */ + bitmap_copy(&pending, &cmdbatch->pending, KGSL_MAX_SYNCPOINTS); + bitmap_zero(&cmdbatch->pending, KGSL_MAX_SYNCPOINTS); + + /* + * Clear all pending events - this will render any subsequent async + * callbacks harmless + */ + + for (i = 0; i < cmdbatch->numsyncs; i++) { + struct kgsl_cmdbatch_sync_event *event = &cmdbatch->synclist[i]; + + /* Don't do anything if the event has already expired */ + if (!test_bit(i, &pending)) + continue; + + switch (event->type) { + case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP: + kgsl_cancel_event(cmdbatch->device, + &event->context->events, event->timestamp, + kgsl_cmdbatch_sync_func, event); + break; + case KGSL_CMD_SYNCPOINT_TYPE_FENCE: + if (kgsl_sync_fence_async_cancel(event->handle)) + kgsl_cmdbatch_put(cmdbatch); + break; + } + } + + /* + * Release the the refcount on the mem entry associated with the + * cmdbatch profiling buffer + */ + if (cmdbatch->flags & KGSL_CMDBATCH_PROFILING) + kgsl_mem_entry_put(cmdbatch->profiling_buf_entry); + + /* Destroy the cmdlist we created */ + _free_memobj_list(&cmdbatch->cmdlist); + + /* Destroy the memlist we created */ + _free_memobj_list(&cmdbatch->memlist); + + /* + * If we cancelled an event, there's a good chance that the context is + * on a dispatcher queue, so schedule to get it removed. + */ + if (!bitmap_empty(&pending, KGSL_MAX_SYNCPOINTS) && + cmdbatch->device->ftbl->drawctxt_sched) + cmdbatch->device->ftbl->drawctxt_sched(cmdbatch->device, + cmdbatch->context); + + kgsl_cmdbatch_put(cmdbatch); +} +EXPORT_SYMBOL(kgsl_cmdbatch_destroy); + +/* + * A callback that gets registered with kgsl_sync_fence_async_wait and is fired + * when a fence is expired + */ +static void kgsl_cmdbatch_sync_fence_func(void *priv) +{ + struct kgsl_cmdbatch_sync_event *event = priv; + + trace_syncpoint_fence_expire(event->cmdbatch, + event->handle ? event->handle->name : "unknown"); + + kgsl_cmdbatch_sync_expire(event->device, event); + + kgsl_cmdbatch_put(event->cmdbatch); +} + +/* kgsl_cmdbatch_add_sync_fence() - Add a new sync fence syncpoint + * @device: KGSL device + * @cmdbatch: KGSL cmdbatch to add the sync point to + * @priv: Private sructure passed by the user + * + * Add a new fence sync syncpoint to the cmdbatch. + */ +static int kgsl_cmdbatch_add_sync_fence(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, void *priv) +{ + struct kgsl_cmd_syncpoint_fence *sync = priv; + struct kgsl_cmdbatch_sync_event *event; + unsigned int id; + + kref_get(&cmdbatch->refcount); + + id = cmdbatch->numsyncs++; + + event = &cmdbatch->synclist[id]; + + event->id = id; + event->type = KGSL_CMD_SYNCPOINT_TYPE_FENCE; + event->cmdbatch = cmdbatch; + event->device = device; + event->context = NULL; + + set_bit(event->id, &cmdbatch->pending); + + event->handle = kgsl_sync_fence_async_wait(sync->fd, + kgsl_cmdbatch_sync_fence_func, event); + + if (IS_ERR_OR_NULL(event->handle)) { + int ret = PTR_ERR(event->handle); + + clear_bit(event->id, &cmdbatch->pending); + event->handle = NULL; + + kgsl_cmdbatch_put(cmdbatch); + + /* + * If ret == 0 the fence was already signaled - print a trace + * message so we can track that + */ + if (ret == 0) + trace_syncpoint_fence_expire(cmdbatch, "signaled"); + + return ret; + } + + trace_syncpoint_fence(cmdbatch, event->handle->name); + + return 0; +} + +/* kgsl_cmdbatch_add_sync_timestamp() - Add a new sync point for a cmdbatch + * @device: KGSL device + * @cmdbatch: KGSL cmdbatch to add the sync point to + * @priv: Private sructure passed by the user + * + * Add a new sync point timestamp event to the cmdbatch. + */ +static int kgsl_cmdbatch_add_sync_timestamp(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, void *priv) +{ + struct kgsl_cmd_syncpoint_timestamp *sync = priv; + struct kgsl_context *context = kgsl_context_get(cmdbatch->device, + sync->context_id); + struct kgsl_cmdbatch_sync_event *event; + int ret = -EINVAL; + unsigned int id; + + if (context == NULL) + return -EINVAL; + + /* + * We allow somebody to create a sync point on their own context. + * This has the effect of delaying a command from submitting until the + * dependent command has cleared. That said we obviously can't let them + * create a sync point on a future timestamp. + */ + + if (context == cmdbatch->context) { + unsigned int queued; + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED, + &queued); + + if (timestamp_cmp(sync->timestamp, queued) > 0) { + KGSL_DRV_ERR(device, + "Cannot create syncpoint for future timestamp %d (current %d)\n", + sync->timestamp, queued); + goto done; + } + } + + kref_get(&cmdbatch->refcount); + + id = cmdbatch->numsyncs++; + + event = &cmdbatch->synclist[id]; + event->id = id; + + event->type = KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP; + event->cmdbatch = cmdbatch; + event->context = context; + event->timestamp = sync->timestamp; + event->device = device; + + set_bit(event->id, &cmdbatch->pending); + + ret = kgsl_add_event(device, &context->events, sync->timestamp, + kgsl_cmdbatch_sync_func, event); + + if (ret) { + clear_bit(event->id, &cmdbatch->pending); + kgsl_cmdbatch_put(cmdbatch); + } else { + trace_syncpoint_timestamp(cmdbatch, context, sync->timestamp); + } + +done: + if (ret) + kgsl_context_put(context); + + return ret; +} + +/** + * kgsl_cmdbatch_add_sync() - Add a sync point to a command batch + * @device: Pointer to the KGSL device struct for the GPU + * @cmdbatch: Pointer to the cmdbatch + * @sync: Pointer to the user-specified struct defining the syncpoint + * + * Create a new sync point in the cmdbatch based on the user specified + * parameters + */ +int kgsl_cmdbatch_add_sync(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, + struct kgsl_cmd_syncpoint *sync) +{ + void *priv; + int ret, psize; + int (*func)(struct kgsl_device *device, struct kgsl_cmdbatch *cmdbatch, + void *priv); + + switch (sync->type) { + case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP: + psize = sizeof(struct kgsl_cmd_syncpoint_timestamp); + func = kgsl_cmdbatch_add_sync_timestamp; + break; + case KGSL_CMD_SYNCPOINT_TYPE_FENCE: + psize = sizeof(struct kgsl_cmd_syncpoint_fence); + func = kgsl_cmdbatch_add_sync_fence; + break; + default: + KGSL_DRV_ERR(device, + "bad syncpoint type ctxt %d type 0x%x size %zu\n", + cmdbatch->context->id, sync->type, sync->size); + return -EINVAL; + } + + if (sync->size != psize) { + KGSL_DRV_ERR(device, + "bad syncpoint size ctxt %d type 0x%x size %zu\n", + cmdbatch->context->id, sync->type, sync->size); + return -EINVAL; + } + + priv = kzalloc(sync->size, GFP_KERNEL); + if (priv == NULL) + return -ENOMEM; + + if (copy_from_user(priv, sync->priv, sync->size)) { + kfree(priv); + return -EFAULT; + } + + ret = func(device, cmdbatch, priv); + kfree(priv); + + return ret; +} + +static void add_profiling_buffer(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, uint64_t gpuaddr, uint64_t size, + unsigned int id, uint64_t offset) +{ + struct kgsl_mem_entry *entry; + + if (!(cmdbatch->flags & KGSL_CMDBATCH_PROFILING)) + return; + + /* Only the first buffer entry counts - ignore the rest */ + if (cmdbatch->profiling_buf_entry != NULL) + return; + + if (id != 0) + entry = kgsl_sharedmem_find_id(cmdbatch->context->proc_priv, + id); + else + entry = kgsl_sharedmem_find(cmdbatch->context->proc_priv, + gpuaddr); + + if (entry != NULL) { + if (!kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, size)) { + kgsl_mem_entry_put(entry); + entry = NULL; + } + } + + if (entry == NULL) { + KGSL_DRV_ERR(device, + "ignore bad profile buffer ctxt %d id %d offset %lld gpuaddr %llx size %lld\n", + cmdbatch->context->id, id, offset, gpuaddr, size); + return; + } + + cmdbatch->profiling_buf_entry = entry; + + if (id != 0) + cmdbatch->profiling_buffer_gpuaddr = + entry->memdesc.gpuaddr + offset; + else + cmdbatch->profiling_buffer_gpuaddr = gpuaddr; +} + +/** + * kgsl_cmdbatch_add_ibdesc() - Add a legacy ibdesc to a command batch + * @cmdbatch: Pointer to the cmdbatch + * @ibdesc: Pointer to the user-specified struct defining the memory or IB + * + * Create a new memory entry in the cmdbatch based on the user specified + * parameters + */ +int kgsl_cmdbatch_add_ibdesc(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, struct kgsl_ibdesc *ibdesc) +{ + struct kgsl_memobj_node *mem; + + mem = kmem_cache_alloc(memobjs_cache, GFP_KERNEL); + if (mem == NULL) + return -ENOMEM; + + mem->gpuaddr = (uint64_t) ibdesc->gpuaddr; + mem->size = (uint64_t) ibdesc->sizedwords << 2; + mem->priv = 0; + mem->id = 0; + mem->offset = 0; + mem->flags = 0; + + /* sanitize the ibdesc ctrl flags */ + ibdesc->ctrl &= KGSL_IBDESC_MEMLIST | KGSL_IBDESC_PROFILING_BUFFER; + + if (cmdbatch->flags & KGSL_CMDBATCH_MEMLIST && + ibdesc->ctrl & KGSL_IBDESC_MEMLIST) { + if (ibdesc->ctrl & KGSL_IBDESC_PROFILING_BUFFER) { + add_profiling_buffer(device, cmdbatch, mem->gpuaddr, + mem->size, 0, 0); + return 0; + } + + /* add to the memlist */ + list_add_tail(&mem->node, &cmdbatch->memlist); + + if (ibdesc->ctrl & KGSL_IBDESC_PROFILING_BUFFER) + add_profiling_buffer(device, cmdbatch, mem->gpuaddr, + mem->size, 0, 0); + } else { + /* Ignore if SYNC or MARKER is specified */ + if (cmdbatch->flags & + (KGSL_CMDBATCH_SYNC | KGSL_CMDBATCH_MARKER)) + return 0; + + /* set the preamble flag if directed to */ + if (cmdbatch->context->flags & KGSL_CONTEXT_PREAMBLE && + list_empty(&cmdbatch->cmdlist)) + mem->flags = KGSL_CMDLIST_CTXTSWITCH_PREAMBLE; + + /* add to the cmd list */ + list_add_tail(&mem->node, &cmdbatch->cmdlist); + } + + return 0; +} + +/** + * kgsl_cmdbatch_create() - Create a new cmdbatch structure + * @device: Pointer to a KGSL device struct + * @context: Pointer to a KGSL context struct + * @flags: Flags for the cmdbatch + * + * Allocate an new cmdbatch structure + */ +struct kgsl_cmdbatch *kgsl_cmdbatch_create(struct kgsl_device *device, + struct kgsl_context *context, unsigned int flags) +{ + struct kgsl_cmdbatch *cmdbatch = kzalloc(sizeof(*cmdbatch), GFP_KERNEL); + if (cmdbatch == NULL) + return ERR_PTR(-ENOMEM); + + /* + * Increase the reference count on the context so it doesn't disappear + * during the lifetime of this command batch + */ + + if (!_kgsl_context_get(context)) { + kfree(cmdbatch); + return ERR_PTR(-ENOENT); + } + + kref_init(&cmdbatch->refcount); + INIT_LIST_HEAD(&cmdbatch->cmdlist); + INIT_LIST_HEAD(&cmdbatch->memlist); + + cmdbatch->device = device; + cmdbatch->context = context; + /* sanitize our flags for cmdbatches */ + cmdbatch->flags = flags & (KGSL_CMDBATCH_CTX_SWITCH + | KGSL_CMDBATCH_MARKER + | KGSL_CMDBATCH_END_OF_FRAME + | KGSL_CMDBATCH_SYNC + | KGSL_CMDBATCH_PWR_CONSTRAINT + | KGSL_CMDBATCH_MEMLIST + | KGSL_CMDBATCH_PROFILING); + + /* Add a timer to help debug sync deadlocks */ + setup_timer(&cmdbatch->timer, _kgsl_cmdbatch_timer, + (unsigned long) cmdbatch); + + return cmdbatch; +} + +#ifdef CONFIG_COMPAT +static int add_ibdesc_list_compat(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, void __user *ptr, int count) +{ + int i, ret = 0; + struct kgsl_ibdesc_compat ibdesc32; + struct kgsl_ibdesc ibdesc; + + for (i = 0; i < count; i++) { + memset(&ibdesc32, 0, sizeof(ibdesc32)); + + if (copy_from_user(&ibdesc32, ptr, sizeof(ibdesc32))) { + ret = -EFAULT; + break; + } + + ibdesc.gpuaddr = (unsigned long) ibdesc32.gpuaddr; + ibdesc.sizedwords = (size_t) ibdesc32.sizedwords; + ibdesc.ctrl = (unsigned int) ibdesc32.ctrl; + + ret = kgsl_cmdbatch_add_ibdesc(device, cmdbatch, &ibdesc); + if (ret) + break; + + ptr += sizeof(ibdesc32); + } + + return ret; +} + +static int add_syncpoints_compat(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, void __user *ptr, int count) +{ + struct kgsl_cmd_syncpoint_compat sync32; + struct kgsl_cmd_syncpoint sync; + int i, ret = 0; + + for (i = 0; i < count; i++) { + memset(&sync32, 0, sizeof(sync32)); + + if (copy_from_user(&sync32, ptr, sizeof(sync32))) { + ret = -EFAULT; + break; + } + + sync.type = sync32.type; + sync.priv = compat_ptr(sync32.priv); + sync.size = (size_t) sync32.size; + + ret = kgsl_cmdbatch_add_sync(device, cmdbatch, &sync); + if (ret) + break; + + ptr += sizeof(sync32); + } + + return ret; +} +#else +static int add_ibdesc_list_compat(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, void __user *ptr, int count) +{ + return -EINVAL; +} + +static int add_syncpoints_compat(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, void __user *ptr, int count) +{ + return -EINVAL; +} +#endif + +int kgsl_cmdbatch_add_ibdesc_list(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, void __user *ptr, int count) +{ + struct kgsl_ibdesc ibdesc; + int i, ret; + + if (is_compat_task()) + return add_ibdesc_list_compat(device, cmdbatch, ptr, count); + + for (i = 0; i < count; i++) { + memset(&ibdesc, 0, sizeof(ibdesc)); + + if (copy_from_user(&ibdesc, ptr, sizeof(ibdesc))) + return -EFAULT; + + ret = kgsl_cmdbatch_add_ibdesc(device, cmdbatch, &ibdesc); + if (ret) + return ret; + + ptr += sizeof(ibdesc); + } + + return 0; +} + +int kgsl_cmdbatch_add_syncpoints(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, void __user *ptr, int count) +{ + struct kgsl_cmd_syncpoint sync; + int i, ret; + + if (count == 0) + return 0; + + if (count > KGSL_MAX_SYNCPOINTS) + return -EINVAL; + + cmdbatch->synclist = kcalloc(count, + sizeof(struct kgsl_cmdbatch_sync_event), GFP_KERNEL); + + if (cmdbatch->synclist == NULL) + return -ENOMEM; + + if (is_compat_task()) + return add_syncpoints_compat(device, cmdbatch, ptr, count); + + for (i = 0; i < count; i++) { + memset(&sync, 0, sizeof(sync)); + + if (copy_from_user(&sync, ptr, sizeof(sync))) + return -EFAULT; + + ret = kgsl_cmdbatch_add_sync(device, cmdbatch, &sync); + if (ret) + return ret; + + ptr += sizeof(sync); + } + + return 0; +} + +static int kgsl_cmdbatch_add_object(struct list_head *head, + struct kgsl_command_object *obj) +{ + struct kgsl_memobj_node *mem; + + mem = kmem_cache_alloc(memobjs_cache, GFP_KERNEL); + if (mem == NULL) + return -ENOMEM; + + mem->gpuaddr = obj->gpuaddr; + mem->size = obj->size; + mem->id = obj->id; + mem->offset = obj->offset; + mem->flags = obj->flags; + mem->priv = 0; + + list_add_tail(&mem->node, head); + return 0; +} + +#define CMDLIST_FLAGS \ + (KGSL_CMDLIST_IB | \ + KGSL_CMDLIST_CTXTSWITCH_PREAMBLE | \ + KGSL_CMDLIST_IB_PREAMBLE) + +int kgsl_cmdbatch_add_cmdlist(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, void __user *ptr, + unsigned int size, unsigned int count) +{ + struct kgsl_command_object obj; + int i, ret = 0; + + /* Return early if nothing going on */ + if (count == 0 && ptr == NULL && size == 0) + return 0; + + /* Sanity check inputs */ + if (count == 0 || ptr == NULL || size == 0) + return -EINVAL; + + /* Ignore all if SYNC or MARKER is specified */ + if (cmdbatch->flags & (KGSL_CMDBATCH_SYNC | KGSL_CMDBATCH_MARKER)) + return 0; + + for (i = 0; i < count; i++) { + memset(&obj, 0, sizeof(obj)); + + ret = _copy_from_user(&obj, ptr, sizeof(obj), size); + if (ret) + return ret; + + /* Sanity check the flags */ + if (!(obj.flags & CMDLIST_FLAGS)) { + KGSL_DRV_ERR(device, + "invalid cmdobj ctxt %d flags %d id %d offset %lld addr %lld size %lld\n", + cmdbatch->context->id, obj.flags, obj.id, + obj.offset, obj.gpuaddr, obj.size); + return -EINVAL; + } + + ret = kgsl_cmdbatch_add_object(&cmdbatch->cmdlist, &obj); + if (ret) + return ret; + + ptr += sizeof(obj); + } + + return 0; +} + +int kgsl_cmdbatch_add_memlist(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, void __user *ptr, + unsigned int size, unsigned int count) +{ + struct kgsl_command_object obj; + int i, ret = 0; + + /* Return early if nothing going on */ + if (count == 0 && ptr == NULL && size == 0) + return 0; + + /* Sanity check inputs */ + if (count == 0 || ptr == NULL || size == 0) + return -EINVAL; + + for (i = 0; i < count; i++) { + memset(&obj, 0, sizeof(obj)); + + ret = _copy_from_user(&obj, ptr, sizeof(obj), size); + if (ret) + return ret; + + if (!(obj.flags & KGSL_OBJLIST_MEMOBJ)) { + KGSL_DRV_ERR(device, + "invalid memobj ctxt %d flags %d id %d offset %lld addr %lld size %lld\n", + cmdbatch->context->id, obj.flags, obj.id, + obj.offset, obj.gpuaddr, obj.size); + return -EINVAL; + } + + if (obj.flags & KGSL_OBJLIST_PROFILE) + add_profiling_buffer(device, cmdbatch, obj.gpuaddr, + obj.size, obj.id, obj.offset); + else { + ret = kgsl_cmdbatch_add_object(&cmdbatch->memlist, + &obj); + if (ret) + return ret; + } + + ptr += sizeof(obj); + } + + return 0; +} + +int kgsl_cmdbatch_add_synclist(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, void __user *ptr, + unsigned int size, unsigned int count) +{ + struct kgsl_command_syncpoint syncpoint; + struct kgsl_cmd_syncpoint sync; + int i, ret = 0; + + /* Return early if nothing going on */ + if (count == 0 && ptr == NULL && size == 0) + return 0; + + /* Sanity check inputs */ + if (count == 0 || ptr == NULL || size == 0) + return -EINVAL; + + if (count > KGSL_MAX_SYNCPOINTS) + return -EINVAL; + + cmdbatch->synclist = kcalloc(count, + sizeof(struct kgsl_cmdbatch_sync_event), GFP_KERNEL); + + if (cmdbatch->synclist == NULL) + return -ENOMEM; + + for (i = 0; i < count; i++) { + memset(&syncpoint, 0, sizeof(syncpoint)); + + ret = _copy_from_user(&syncpoint, ptr, sizeof(syncpoint), size); + if (ret) + return ret; + + sync.type = syncpoint.type; + sync.priv = to_user_ptr(syncpoint.priv); + sync.size = syncpoint.size; + + ret = kgsl_cmdbatch_add_sync(device, cmdbatch, &sync); + if (ret) + return ret; + + ptr += sizeof(syncpoint); + } + + return 0; +} + +void kgsl_cmdbatch_exit(void) +{ + if (memobjs_cache != NULL) + kmem_cache_destroy(memobjs_cache); +} + +int kgsl_cmdbatch_init(void) +{ + memobjs_cache = KMEM_CACHE(kgsl_memobj_node, 0); + if (memobjs_cache == NULL) { + KGSL_CORE_ERR("failed to create memobjs_cache"); + return -ENOMEM; + } + + return 0; +} diff --git a/drivers/gpu/msm/kgsl_cmdbatch.h b/drivers/gpu/msm/kgsl_cmdbatch.h new file mode 100644 index 000000000000..1547ac02fdbf --- /dev/null +++ b/drivers/gpu/msm/kgsl_cmdbatch.h @@ -0,0 +1,170 @@ +/* Copyright (c) 2008-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __KGSL_CMDBATCH_H +#define __KGSL_CMDBATCH_H + +#define KGSL_CMDBATCH_FLAGS \ + { KGSL_CMDBATCH_MARKER, "MARKER" }, \ + { KGSL_CMDBATCH_CTX_SWITCH, "CTX_SWITCH" }, \ + { KGSL_CMDBATCH_SYNC, "SYNC" }, \ + { KGSL_CMDBATCH_END_OF_FRAME, "EOF" }, \ + { KGSL_CMDBATCH_PWR_CONSTRAINT, "PWR_CONSTRAINT" }, \ + { KGSL_CMDBATCH_SUBMIT_IB_LIST, "IB_LIST" } + +/** + * struct kgsl_cmdbatch - KGSl command descriptor + * @device: KGSL GPU device that the command was created for + * @context: KGSL context that created the command + * @timestamp: Timestamp assigned to the command + * @flags: flags + * @priv: Internal flags + * @fault_policy: Internal policy describing how to handle this command in case + * of a fault + * @fault_recovery: recovery actions actually tried for this batch + * @expires: Point in time when the cmdbatch is considered to be hung + * @refcount: kref structure to maintain the reference count + * @cmdlist: List of IBs to issue + * @memlist: List of all memory used in this command batch + * @synclist: Array of context/timestamp tuples to wait for before issuing + * @numsyncs: Number of sync entries in the array + * @pending: Bitmask of sync events that are active + * @timer: a timer used to track possible sync timeouts for this cmdbatch + * @marker_timestamp: For markers, the timestamp of the last "real" command that + * was queued + * @profiling_buf_entry: Mem entry containing the profiling buffer + * @profiling_buffer_gpuaddr: GPU virt address of the profile buffer added here + * for easy access + * @profile_index: Index to store the start/stop ticks in the kernel profiling + * buffer + * @submit_ticks: Variable to hold ticks at the time of cmdbatch submit. + * @global_ts: The ringbuffer timestamp corresponding to this cmdbatch + * @timeout_jiffies: For a syncpoint cmdbatch the jiffies at which the + * timer will expire + * This structure defines an atomic batch of command buffers issued from + * userspace. + */ +struct kgsl_cmdbatch { + struct kgsl_device *device; + struct kgsl_context *context; + uint32_t timestamp; + uint32_t flags; + unsigned long priv; + unsigned long fault_policy; + unsigned long fault_recovery; + unsigned long expires; + struct kref refcount; + struct list_head cmdlist; + struct list_head memlist; + struct kgsl_cmdbatch_sync_event *synclist; + unsigned int numsyncs; + unsigned long pending; + struct timer_list timer; + unsigned int marker_timestamp; + struct kgsl_mem_entry *profiling_buf_entry; + uint64_t profiling_buffer_gpuaddr; + unsigned int profile_index; + uint64_t submit_ticks; + unsigned int global_ts; + unsigned long timeout_jiffies; +}; + +/** + * struct kgsl_cmdbatch_sync_event + * @id: identifer (positiion within the pending bitmap) + * @type: Syncpoint type + * @cmdbatch: Pointer to the cmdbatch that owns the sync event + * @context: Pointer to the KGSL context that owns the cmdbatch + * @timestamp: Pending timestamp for the event + * @handle: Pointer to a sync fence handle + * @device: Pointer to the KGSL device + */ +struct kgsl_cmdbatch_sync_event { + unsigned int id; + int type; + struct kgsl_cmdbatch *cmdbatch; + struct kgsl_context *context; + unsigned int timestamp; + struct kgsl_sync_fence_waiter *handle; + struct kgsl_device *device; +}; + +/** + * enum kgsl_cmdbatch_priv - Internal cmdbatch flags + * @CMDBATCH_FLAG_SKIP - skip the entire command batch + * @CMDBATCH_FLAG_FORCE_PREAMBLE - Force the preamble on for the cmdbatch + * @CMDBATCH_FLAG_WFI - Force wait-for-idle for the submission + * @CMDBATCH_FLAG_PROFILE - store the start / retire ticks for the command batch + * in the profiling buffer + * @CMDBATCH_FLAG_FENCE_LOG - Set if the cmdbatch is dumping fence logs via the + * cmdbatch timer - this is used to avoid recursion + */ + +enum kgsl_cmdbatch_priv { + CMDBATCH_FLAG_SKIP = 0, + CMDBATCH_FLAG_FORCE_PREAMBLE, + CMDBATCH_FLAG_WFI, + CMDBATCH_FLAG_PROFILE, + CMDBATCH_FLAG_FENCE_LOG, +}; + + +int kgsl_cmdbatch_add_memobj(struct kgsl_cmdbatch *cmdbatch, + struct kgsl_ibdesc *ibdesc); + +int kgsl_cmdbatch_add_sync(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, + struct kgsl_cmd_syncpoint *sync); + +struct kgsl_cmdbatch *kgsl_cmdbatch_create(struct kgsl_device *device, + struct kgsl_context *context, unsigned int flags); +int kgsl_cmdbatch_add_ibdesc(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, struct kgsl_ibdesc *ibdesc); +int kgsl_cmdbatch_add_ibdesc_list(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, void __user *ptr, int count); +int kgsl_cmdbatch_add_syncpoints(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, void __user *ptr, int count); +int kgsl_cmdbatch_add_cmdlist(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, void __user *ptr, + unsigned int size, unsigned int count); +int kgsl_cmdbatch_add_memlist(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, void __user *ptr, + unsigned int size, unsigned int count); +int kgsl_cmdbatch_add_synclist(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch, void __user *ptr, + unsigned int size, unsigned int count); + +int kgsl_cmdbatch_init(void); +void kgsl_cmdbatch_exit(void); + +void kgsl_dump_syncpoints(struct kgsl_device *device, + struct kgsl_cmdbatch *cmdbatch); + +void kgsl_cmdbatch_destroy(struct kgsl_cmdbatch *cmdbatch); + +void kgsl_cmdbatch_destroy_object(struct kref *kref); + +static inline bool kgsl_cmdbatch_events_pending(struct kgsl_cmdbatch *cmdbatch) +{ + return !bitmap_empty(&cmdbatch->pending, KGSL_MAX_SYNCPOINTS); +} + +static inline bool kgsl_cmdbatch_event_pending(struct kgsl_cmdbatch *cmdbatch, + unsigned int bit) +{ + if (bit >= KGSL_MAX_SYNCPOINTS) + return false; + + return test_bit(bit, &cmdbatch->pending); +} + +#endif /* __KGSL_CMDBATCH_H */ diff --git a/drivers/gpu/msm/kgsl_compat.c b/drivers/gpu/msm/kgsl_compat.c new file mode 100644 index 000000000000..248c78b7e5c4 --- /dev/null +++ b/drivers/gpu/msm/kgsl_compat.c @@ -0,0 +1,398 @@ +/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/uaccess.h> +#include <asm/ioctl.h> + +#include "kgsl.h" +#include "kgsl_compat.h" +#include "kgsl_device.h" +#include "kgsl_sync.h" + +static long +kgsl_ioctl_device_getproperty_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device_getproperty_compat *param32 = data; + struct kgsl_device_getproperty param; + + param.type = param32->type; + param.value = compat_ptr(param32->value); + param.sizebytes = (size_t)param32->sizebytes; + + return kgsl_ioctl_device_getproperty(dev_priv, cmd, ¶m); +} + +static long +kgsl_ioctl_device_setproperty_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_device_getproperty_compat *param32 = data; + struct kgsl_device_getproperty param; + + param.type = param32->type; + param.value = compat_ptr(param32->value); + param.sizebytes = (size_t)param32->sizebytes; + + return kgsl_ioctl_device_setproperty(dev_priv, cmd, ¶m); +} + +static long +kgsl_ioctl_submit_commands_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result; + struct kgsl_submit_commands_compat *param32 = data; + struct kgsl_submit_commands param; + + param.context_id = param32->context_id; + param.flags = param32->flags; + param.cmdlist = compat_ptr(param32->cmdlist); + param.numcmds = param32->numcmds; + param.synclist = compat_ptr(param32->synclist); + param.numsyncs = param32->numsyncs; + param.timestamp = param32->timestamp; + + result = kgsl_ioctl_submit_commands(dev_priv, cmd, ¶m); + + param32->timestamp = param.timestamp; + + return result; +} + +static long +kgsl_ioctl_rb_issueibcmds_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result; + struct kgsl_ringbuffer_issueibcmds_compat *param32 = data; + struct kgsl_ringbuffer_issueibcmds param; + + param.drawctxt_id = param32->drawctxt_id; + param.flags = param32->flags; + param.ibdesc_addr = (unsigned long)param32->ibdesc_addr; + param.numibs = param32->numibs; + param.timestamp = param32->timestamp; + + result = kgsl_ioctl_rb_issueibcmds(dev_priv, cmd, ¶m); + + param32->timestamp = param.timestamp; + + return result; +} + +static long kgsl_ioctl_cmdstream_freememontimestamp_ctxtid_compat( + struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_cmdstream_freememontimestamp_ctxtid_compat *param32 = data; + struct kgsl_cmdstream_freememontimestamp_ctxtid param; + + param.context_id = param32->context_id; + param.gpuaddr = (unsigned long)param32->gpuaddr; + param.type = param32->type; + param.timestamp = param32->timestamp; + + return kgsl_ioctl_cmdstream_freememontimestamp_ctxtid(dev_priv, cmd, + ¶m); +} + +static long kgsl_ioctl_sharedmem_free_compat(struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + struct kgsl_sharedmem_free_compat *param32 = data; + struct kgsl_sharedmem_free param; + + param.gpuaddr = (unsigned long)param32->gpuaddr; + + return kgsl_ioctl_sharedmem_free(dev_priv, cmd, ¶m); +} + +static long kgsl_ioctl_map_user_mem_compat(struct kgsl_device_private + *dev_priv, unsigned int cmd, + void *data) +{ + int result = 0; + struct kgsl_map_user_mem_compat *param32 = data; + struct kgsl_map_user_mem param; + + param.fd = param32->fd; + param.gpuaddr = (unsigned long)param32->gpuaddr; + param.len = (size_t)param32->len; + param.offset = (size_t)param32->offset; + param.hostptr = (unsigned long)param32->hostptr; + param.memtype = param32->memtype; + param.flags = param32->flags; + + result = kgsl_ioctl_map_user_mem(dev_priv, cmd, ¶m); + + param32->gpuaddr = gpuaddr_to_compat(param.gpuaddr); + param32->flags = param.flags; + return result; +} + +static long +kgsl_ioctl_gpumem_sync_cache_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpumem_sync_cache_compat *param32 = data; + struct kgsl_gpumem_sync_cache param; + + param.gpuaddr = (unsigned long)param32->gpuaddr; + param.id = param32->id; + param.op = param32->op; + param.offset = (size_t)param32->offset; + param.length = (size_t)param32->length; + + return kgsl_ioctl_gpumem_sync_cache(dev_priv, cmd, ¶m); +} + +static long +kgsl_ioctl_gpumem_sync_cache_bulk_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_gpumem_sync_cache_bulk_compat *param32 = data; + struct kgsl_gpumem_sync_cache_bulk param; + + param.id_list = to_user_ptr(param32->id_list); + param.count = param32->count; + param.op = param32->op; + + return kgsl_ioctl_gpumem_sync_cache_bulk(dev_priv, cmd, ¶m); +} + +static long +kgsl_ioctl_sharedmem_flush_cache_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_sharedmem_free_compat *param32 = data; + struct kgsl_sharedmem_free param; + + param.gpuaddr = (unsigned long)param32->gpuaddr; + + return kgsl_ioctl_sharedmem_flush_cache(dev_priv, cmd, ¶m); +} + +static long +kgsl_ioctl_gpumem_alloc_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result = 0; + struct kgsl_gpumem_alloc_compat *param32 = data; + struct kgsl_gpumem_alloc param; + + param.gpuaddr = (unsigned long)param32->gpuaddr; + param.size = (size_t)param32->size; + param.flags = param32->flags; + + /* + * Since this is a 32 bit application the page aligned size is expected + * to fit inside of 32 bits - check for overflow and return error if so + */ + if (PAGE_ALIGN(param.size) >= UINT_MAX) + return -EINVAL; + + result = kgsl_ioctl_gpumem_alloc(dev_priv, cmd, ¶m); + + param32->gpuaddr = gpuaddr_to_compat(param.gpuaddr); + param32->size = sizet_to_compat(param.size); + param32->flags = param.flags; + + return result; +} + +static long +kgsl_ioctl_gpumem_alloc_id_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result = 0; + struct kgsl_gpumem_alloc_id_compat *param32 = data; + struct kgsl_gpumem_alloc_id param; + + param.id = param32->id; + param.flags = param32->flags; + param.size = (size_t)param32->size; + param.mmapsize = (size_t)param32->mmapsize; + param.gpuaddr = (unsigned long)param32->gpuaddr; + + /* + * Since this is a 32 bit application the page aligned size is expected + * to fit inside of 32 bits - check for overflow and return error if so + */ + if (PAGE_ALIGN(param.size) >= UINT_MAX) + return -EINVAL; + + result = kgsl_ioctl_gpumem_alloc_id(dev_priv, cmd, ¶m); + + param32->id = param.id; + param32->flags = param.flags; + param32->size = sizet_to_compat(param.size); + param32->mmapsize = sizet_to_compat(param.mmapsize); + param32->gpuaddr = gpuaddr_to_compat(param.gpuaddr); + + return result; +} + +static long +kgsl_ioctl_gpumem_get_info_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int result = 0; + struct kgsl_gpumem_get_info_compat *param32 = data; + struct kgsl_gpumem_get_info param; + + param.gpuaddr = (unsigned long)param32->gpuaddr; + param.id = param32->id; + param.flags = param32->flags; + param.size = (size_t)param32->size; + param.mmapsize = (size_t)param32->mmapsize; + param.useraddr = (unsigned long)param32->useraddr; + + result = kgsl_ioctl_gpumem_get_info(dev_priv, cmd, ¶m); + + param32->gpuaddr = gpuaddr_to_compat(param.gpuaddr); + param32->id = param.id; + param32->flags = param.flags; + param32->size = sizet_to_compat(param.size); + param32->mmapsize = sizet_to_compat(param.mmapsize); + param32->useraddr = (compat_ulong_t)param.useraddr; + + return result; +} + +static long kgsl_ioctl_cff_syncmem_compat(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_cff_syncmem_compat *param32 = data; + struct kgsl_cff_syncmem param; + + param.gpuaddr = (unsigned long)param32->gpuaddr; + param.len = (size_t)param32->len; + + return kgsl_ioctl_cff_syncmem(dev_priv, cmd, ¶m); +} + +static long kgsl_ioctl_timestamp_event_compat(struct kgsl_device_private + *dev_priv, unsigned int cmd, void *data) +{ + struct kgsl_timestamp_event_compat *param32 = data; + struct kgsl_timestamp_event param; + + param.type = param32->type; + param.timestamp = param32->timestamp; + param.context_id = param32->context_id; + param.priv = compat_ptr(param32->priv); + param.len = (size_t)param32->len; + + return kgsl_ioctl_timestamp_event(dev_priv, cmd, ¶m); +} + + +static const struct kgsl_ioctl kgsl_compat_ioctl_funcs[] = { + KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_GETPROPERTY_COMPAT, + kgsl_ioctl_device_getproperty_compat), + /* IOCTL_KGSL_DEVICE_WAITTIMESTAMP is no longer supported */ + KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, + kgsl_ioctl_device_waittimestamp_ctxtid), + KGSL_IOCTL_FUNC(IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS_COMPAT, + kgsl_ioctl_rb_issueibcmds_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SUBMIT_COMMANDS_COMPAT, + kgsl_ioctl_submit_commands_compat), + /* IOCTL_KGSL_CMDSTREAM_READTIMESTAMP is no longer supported */ + KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID, + kgsl_ioctl_cmdstream_readtimestamp_ctxtid), + /* IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP is no longer supported */ + KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID_COMPAT, + kgsl_ioctl_cmdstream_freememontimestamp_ctxtid_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_CREATE, + kgsl_ioctl_drawctxt_create), + KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_DESTROY, + kgsl_ioctl_drawctxt_destroy), + KGSL_IOCTL_FUNC(IOCTL_KGSL_MAP_USER_MEM_COMPAT, + kgsl_ioctl_map_user_mem_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FREE_COMPAT, + kgsl_ioctl_sharedmem_free_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE_COMPAT, + kgsl_ioctl_sharedmem_flush_cache_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC_COMPAT, + kgsl_ioctl_gpumem_alloc_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_CFF_SYNCMEM_COMPAT, + kgsl_ioctl_cff_syncmem_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_CFF_USER_EVENT, + kgsl_ioctl_cff_user_event), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMESTAMP_EVENT_COMPAT, + kgsl_ioctl_timestamp_event_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SETPROPERTY_COMPAT, + kgsl_ioctl_device_setproperty_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC_ID_COMPAT, + kgsl_ioctl_gpumem_alloc_id_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_FREE_ID, + kgsl_ioctl_gpumem_free_id), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_GET_INFO_COMPAT, + kgsl_ioctl_gpumem_get_info_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE_COMPAT, + kgsl_ioctl_gpumem_sync_cache_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK_COMPAT, + kgsl_ioctl_gpumem_sync_cache_bulk_compat), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_CREATE, + kgsl_ioctl_syncsource_create), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_DESTROY, + kgsl_ioctl_syncsource_destroy), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE, + kgsl_ioctl_syncsource_create_fence), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE, + kgsl_ioctl_syncsource_signal_fence), + KGSL_IOCTL_FUNC(IOCTL_KGSL_CFF_SYNC_GPUOBJ, + kgsl_ioctl_cff_sync_gpuobj), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_ALLOC, + kgsl_ioctl_gpuobj_alloc), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_FREE, + kgsl_ioctl_gpuobj_free), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_INFO, + kgsl_ioctl_gpuobj_info), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_IMPORT, + kgsl_ioctl_gpuobj_import), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_SYNC, + kgsl_ioctl_gpuobj_sync), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPU_COMMAND, + kgsl_ioctl_gpu_command), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_SET_INFO, + kgsl_ioctl_gpuobj_set_info), +}; + +long kgsl_compat_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) +{ + struct kgsl_device_private *dev_priv = filep->private_data; + struct kgsl_device *device = dev_priv->device; + + long ret = kgsl_ioctl_helper(filep, cmd, arg, kgsl_compat_ioctl_funcs, + ARRAY_SIZE(kgsl_compat_ioctl_funcs)); + + /* + * If the command was unrecognized in the generic core, try the device + * specific function + */ + + if (ret == -ENOIOCTLCMD) { + if (device->ftbl->compat_ioctl != NULL) + return device->ftbl->compat_ioctl(dev_priv, cmd, arg); + + KGSL_DRV_INFO(device, "invalid ioctl code 0x%08X\n", cmd); + } + + return ret; +} diff --git a/drivers/gpu/msm/kgsl_compat.h b/drivers/gpu/msm/kgsl_compat.h new file mode 100644 index 000000000000..b7a1eb174baf --- /dev/null +++ b/drivers/gpu/msm/kgsl_compat.h @@ -0,0 +1,273 @@ +/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __KGSL_COMPAT_H +#define __KGSL_COMPAT_H + +#ifdef CONFIG_COMPAT +#include <linux/compat.h> +#include "kgsl.h" +#include "kgsl_device.h" + +struct kgsl_ibdesc_compat { + compat_ulong_t gpuaddr; + unsigned int __pad; + compat_size_t sizedwords; + unsigned int ctrl; +}; + +struct kgsl_cmd_syncpoint_compat { + int type; + compat_uptr_t priv; + compat_size_t size; +}; + +struct kgsl_devinfo_compat { + unsigned int device_id; + unsigned int chip_id; + unsigned int mmu_enabled; + compat_ulong_t gmem_gpubaseaddr; + unsigned int gpu_id; + compat_size_t gmem_sizebytes; +}; + +struct kgsl_shadowprop_compat { + compat_ulong_t gpuaddr; + compat_size_t size; + unsigned int flags; +}; + +struct kgsl_device_constraint_compat { + unsigned int type; + unsigned int context_id; + compat_uptr_t data; + compat_size_t size; +}; + +struct kgsl_device_getproperty_compat { + unsigned int type; + compat_uptr_t value; + compat_size_t sizebytes; +}; + +#define IOCTL_KGSL_DEVICE_GETPROPERTY_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x2, struct kgsl_device_getproperty_compat) + +#define IOCTL_KGSL_SETPROPERTY_COMPAT \ + _IOW(KGSL_IOC_TYPE, 0x32, struct kgsl_device_getproperty_compat) + + +struct kgsl_submit_commands_compat { + unsigned int context_id; + unsigned int flags; + compat_uptr_t cmdlist; + unsigned int numcmds; + compat_uptr_t synclist; + unsigned int numsyncs; + unsigned int timestamp; +/* private: reserved for future use */ + unsigned int __pad[4]; +}; + +#define IOCTL_KGSL_SUBMIT_COMMANDS_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x3D, struct kgsl_submit_commands_compat) + +struct kgsl_ringbuffer_issueibcmds_compat { + unsigned int drawctxt_id; + compat_ulong_t ibdesc_addr; + unsigned int numibs; + unsigned int timestamp; /* output param */ + unsigned int flags; +}; + +#define IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x10, struct kgsl_ringbuffer_issueibcmds_compat) + +struct kgsl_cmdstream_freememontimestamp_compat { + compat_ulong_t gpuaddr; + unsigned int type; + unsigned int timestamp; +}; + +#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_COMPAT \ + _IOW(KGSL_IOC_TYPE, 0x12, \ + struct kgsl_cmdstream_freememontimestamp_compat) + +struct kgsl_cmdstream_freememontimestamp_ctxtid_compat { + unsigned int context_id; + compat_ulong_t gpuaddr; + unsigned int type; + unsigned int timestamp; +}; + +#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID_COMPAT \ + _IOW(KGSL_IOC_TYPE, 0x17, \ + struct kgsl_cmdstream_freememontimestamp_ctxtid_compat) + +struct kgsl_map_user_mem_compat { + int fd; + compat_ulong_t gpuaddr; + compat_size_t len; + compat_size_t offset; + compat_ulong_t hostptr; + enum kgsl_user_mem_type memtype; + unsigned int flags; +}; + +#define IOCTL_KGSL_MAP_USER_MEM_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x15, struct kgsl_map_user_mem_compat) + +struct kgsl_sharedmem_free_compat { + compat_ulong_t gpuaddr; +}; + +#define IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE_COMPAT \ + _IOW(KGSL_IOC_TYPE, 0x24, struct kgsl_sharedmem_free_compat) + +#define IOCTL_KGSL_SHAREDMEM_FREE_COMPAT \ + _IOW(KGSL_IOC_TYPE, 0x21, struct kgsl_sharedmem_free_compat) + +struct kgsl_gpumem_alloc_compat { + compat_ulong_t gpuaddr; /* output param */ + compat_size_t size; + unsigned int flags; +}; + +#define IOCTL_KGSL_GPUMEM_ALLOC_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x2f, struct kgsl_gpumem_alloc_compat) + +struct kgsl_cff_syncmem_compat { + compat_ulong_t gpuaddr; + compat_size_t len; + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_CFF_SYNCMEM_COMPAT \ + _IOW(KGSL_IOC_TYPE, 0x30, struct kgsl_cff_syncmem_compat) + +struct kgsl_timestamp_event_compat { + int type; /* Type of event (see list below) */ + unsigned int timestamp; /* Timestamp to trigger event on */ + unsigned int context_id; /* Context for the timestamp */ + compat_uptr_t priv; /* Pointer to the event specific blob */ + compat_size_t len; /* Size of the event specific blob */ +}; + +#define IOCTL_KGSL_TIMESTAMP_EVENT_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x33, struct kgsl_timestamp_event_compat) + +struct kgsl_gpumem_alloc_id_compat { + unsigned int id; + unsigned int flags; + compat_size_t size; + compat_size_t mmapsize; + compat_ulong_t gpuaddr; +/* private: reserved for future use*/ + unsigned int __pad[2]; +}; + +#define IOCTL_KGSL_GPUMEM_ALLOC_ID_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x34, struct kgsl_gpumem_alloc_id_compat) + +struct kgsl_gpumem_get_info_compat { + compat_ulong_t gpuaddr; + unsigned int id; + unsigned int flags; + compat_size_t size; + compat_size_t mmapsize; + compat_ulong_t useraddr; +/* private: reserved for future use*/ + unsigned int __pad[4]; +}; + +#define IOCTL_KGSL_GPUMEM_GET_INFO_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x36, struct kgsl_gpumem_get_info_compat) + +struct kgsl_gpumem_sync_cache_compat { + compat_ulong_t gpuaddr; + unsigned int id; + unsigned int op; + compat_size_t offset; + compat_size_t length; +}; + +#define IOCTL_KGSL_GPUMEM_SYNC_CACHE_COMPAT \ + _IOW(KGSL_IOC_TYPE, 0x37, struct kgsl_gpumem_sync_cache_compat) + +struct kgsl_gpumem_sync_cache_bulk_compat { + compat_uptr_t id_list; + unsigned int count; + unsigned int op; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x3C, struct kgsl_gpumem_sync_cache_bulk_compat) + +struct kgsl_perfcounter_query_compat { + unsigned int groupid; + compat_uptr_t countables; + unsigned int count; + unsigned int max_counters; + unsigned int __pad[2]; +}; + +#define IOCTL_KGSL_PERFCOUNTER_QUERY_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x3A, struct kgsl_perfcounter_query_compat) + +struct kgsl_perfcounter_read_compat { + compat_uptr_t reads; + unsigned int count; + unsigned int __pad[2]; +}; + +#define IOCTL_KGSL_PERFCOUNTER_READ_COMPAT \ + _IOWR(KGSL_IOC_TYPE, 0x3B, struct kgsl_perfcounter_read_compat) + +static inline compat_ulong_t gpuaddr_to_compat(unsigned long gpuaddr) +{ + WARN(gpuaddr >> 32, "Top 32 bits of gpuaddr have been set\n"); + return (compat_ulong_t)gpuaddr; +} + +static inline compat_size_t sizet_to_compat(size_t size) +{ + WARN(size >> 32, "Size greater than 4G\n"); + return (compat_size_t)size; +} + +int kgsl_cmdbatch_create_compat(struct kgsl_device *device, unsigned int flags, + struct kgsl_cmdbatch *cmdbatch, void __user *cmdlist, + unsigned int numcmds, void __user *synclist, + unsigned int numsyncs); + +long kgsl_compat_ioctl(struct file *filep, unsigned int cmd, + unsigned long arg); + +#else +static inline int kgsl_cmdbatch_create_compat(struct kgsl_device *device, + unsigned int flags, struct kgsl_cmdbatch *cmdbatch, + void __user *cmdlist, unsigned int numcmds, + void __user *synclist, unsigned int numsyncs) +{ + BUG(); +} + +static inline long kgsl_compat_ioctl(struct file *filep, unsigned int cmd, + unsigned long arg) +{ + BUG(); +} + +#endif /* CONFIG_COMPAT */ +#endif /* __KGSL_COMPAT_H */ diff --git a/drivers/gpu/msm/kgsl_debugfs.c b/drivers/gpu/msm/kgsl_debugfs.c new file mode 100644 index 000000000000..11095f38bad7 --- /dev/null +++ b/drivers/gpu/msm/kgsl_debugfs.c @@ -0,0 +1,272 @@ +/* Copyright (c) 2002,2008-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/module.h> +#include <linux/debugfs.h> + +#include "kgsl.h" +#include "kgsl_device.h" +#include "kgsl_sharedmem.h" +#include "kgsl_debugfs.h" + +/*default log levels is error for everything*/ +#define KGSL_LOG_LEVEL_MAX 7 + +struct dentry *kgsl_debugfs_dir; +static struct dentry *proc_d_debugfs; + +static inline int kgsl_log_set(unsigned int *log_val, void *data, u64 val) +{ + *log_val = min((unsigned int)val, (unsigned int)KGSL_LOG_LEVEL_MAX); + return 0; +} + +#define KGSL_DEBUGFS_LOG(__log) \ +static int __log ## _set(void *data, u64 val) \ +{ \ + struct kgsl_device *device = data; \ + return kgsl_log_set(&device->__log, data, val); \ +} \ +static int __log ## _get(void *data, u64 *val) \ +{ \ + struct kgsl_device *device = data; \ + *val = device->__log; \ + return 0; \ +} \ +DEFINE_SIMPLE_ATTRIBUTE(__log ## _fops, \ +__log ## _get, __log ## _set, "%llu\n"); \ + +KGSL_DEBUGFS_LOG(drv_log); +KGSL_DEBUGFS_LOG(cmd_log); +KGSL_DEBUGFS_LOG(ctxt_log); +KGSL_DEBUGFS_LOG(mem_log); +KGSL_DEBUGFS_LOG(pwr_log); + +static int _strict_set(void *data, u64 val) +{ + kgsl_sharedmem_set_noretry(val ? true : false); + return 0; +} + +static int _strict_get(void *data, u64 *val) +{ + *val = kgsl_sharedmem_get_noretry(); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(_strict_fops, _strict_get, _strict_set, "%llu\n"); + +void kgsl_device_debugfs_init(struct kgsl_device *device) +{ + if (kgsl_debugfs_dir && !IS_ERR(kgsl_debugfs_dir)) + device->d_debugfs = debugfs_create_dir(device->name, + kgsl_debugfs_dir); + + if (!device->d_debugfs || IS_ERR(device->d_debugfs)) + return; + + debugfs_create_file("log_level_cmd", 0644, device->d_debugfs, device, + &cmd_log_fops); + debugfs_create_file("log_level_ctxt", 0644, device->d_debugfs, device, + &ctxt_log_fops); + debugfs_create_file("log_level_drv", 0644, device->d_debugfs, device, + &drv_log_fops); + debugfs_create_file("log_level_mem", 0644, device->d_debugfs, device, + &mem_log_fops); + debugfs_create_file("log_level_pwr", 0644, device->d_debugfs, device, + &pwr_log_fops); +} + +struct type_entry { + int type; + const char *str; +}; + +static const struct type_entry memtypes[] = { KGSL_MEM_TYPES }; + +static const char *memtype_str(int memtype) +{ + int i; + for (i = 0; i < ARRAY_SIZE(memtypes); i++) + if (memtypes[i].type == memtype) + return memtypes[i].str; + return "unknown"; +} + +static char get_alignflag(const struct kgsl_memdesc *m) +{ + int align = kgsl_memdesc_get_align(m); + if (align >= ilog2(SZ_1M)) + return 'L'; + else if (align >= ilog2(SZ_64K)) + return 'l'; + return '-'; +} + +static char get_cacheflag(const struct kgsl_memdesc *m) +{ + static const char table[] = { + [KGSL_CACHEMODE_WRITECOMBINE] = '-', + [KGSL_CACHEMODE_UNCACHED] = 'u', + [KGSL_CACHEMODE_WRITEBACK] = 'b', + [KGSL_CACHEMODE_WRITETHROUGH] = 't', + }; + return table[kgsl_memdesc_get_cachemode(m)]; +} + + +static int print_mem_entry(int id, void *ptr, void *data) +{ + struct seq_file *s = data; + struct kgsl_mem_entry *entry = ptr; + char flags[8]; + char usage[16]; + struct kgsl_memdesc *m = &entry->memdesc; + + flags[0] = kgsl_memdesc_is_global(m) ? 'g' : '-'; + flags[1] = '-'; + flags[2] = !(m->flags & KGSL_MEMFLAGS_GPUREADONLY) ? 'w' : '-'; + flags[3] = get_alignflag(m); + flags[4] = get_cacheflag(m); + flags[5] = kgsl_memdesc_use_cpu_map(m) ? 'p' : '-'; + flags[6] = (m->useraddr) ? 'Y' : 'N'; + flags[7] = '\0'; + + kgsl_get_memory_usage(usage, sizeof(usage), m->flags); + + seq_printf(s, "%pK %pK %16llu %5d %8s %10s %16s %5d", + (uint64_t *)(uintptr_t) m->gpuaddr, + (unsigned long *) m->useraddr, + m->size, entry->id, flags, + memtype_str(kgsl_memdesc_usermem_type(m)), + usage, m->sgt->nents); + + if (entry->metadata[0] != 0) + seq_printf(s, " %s", entry->metadata); + + seq_putc(s, '\n'); + + return 0; +} + +static int process_mem_print(struct seq_file *s, void *unused) +{ + struct kgsl_process_private *private = s->private; + + seq_printf(s, "%8s %8s %8s %5s %8s %10s %16s %5s\n", + "gpuaddr", "useraddr", "size", "id", "flags", "type", + "usage", "sglen"); + + spin_lock(&private->mem_lock); + idr_for_each(&private->mem_idr, print_mem_entry, s); + spin_unlock(&private->mem_lock); + + return 0; +} + +static int process_mem_open(struct inode *inode, struct file *file) +{ + int ret; + pid_t pid = (pid_t) (unsigned long) inode->i_private; + struct kgsl_process_private *private = NULL; + + private = kgsl_process_private_find(pid); + + if (!private) + return -ENODEV; + + ret = single_open(file, process_mem_print, private); + if (ret) + kgsl_process_private_put(private); + + return ret; +} + +static int process_mem_release(struct inode *inode, struct file *file) +{ + struct kgsl_process_private *private = + ((struct seq_file *)file->private_data)->private; + + if (private) + kgsl_process_private_put(private); + + return single_release(inode, file); +} + +static const struct file_operations process_mem_fops = { + .open = process_mem_open, + .read = seq_read, + .llseek = seq_lseek, + .release = process_mem_release, +}; + + +/** + * kgsl_process_init_debugfs() - Initialize debugfs for a process + * @private: Pointer to process private structure created for the process + * + * kgsl_process_init_debugfs() is called at the time of creating the + * process struct when a process opens kgsl device for the first time. + * This function is not fatal - all we do is print a warning message if + * the files can't be created + */ +void kgsl_process_init_debugfs(struct kgsl_process_private *private) +{ + unsigned char name[16]; + struct dentry *dentry; + + snprintf(name, sizeof(name), "%d", private->pid); + + private->debug_root = debugfs_create_dir(name, proc_d_debugfs); + + /* + * Both debugfs_create_dir() and debugfs_create_file() return + * ERR_PTR(-ENODEV) if debugfs is disabled in the kernel but return + * NULL on error when it is enabled. For both usages we need to check + * for ERROR or NULL and only print a warning on an actual failure + * (i.e. - when the return value is NULL) + */ + + if (IS_ERR_OR_NULL(private->debug_root)) { + WARN((private->debug_root == NULL), + "Unable to create debugfs dir for %s\n", name); + private->debug_root = NULL; + return; + } + + dentry = debugfs_create_file("mem", 0444, private->debug_root, + (void *) ((unsigned long) private->pid), &process_mem_fops); + + if (IS_ERR_OR_NULL(dentry)) + WARN((dentry == NULL), + "Unable to create 'mem' file for %s\n", name); +} + +void kgsl_core_debugfs_init(void) +{ + struct dentry *debug_dir; + + kgsl_debugfs_dir = debugfs_create_dir("kgsl", NULL); + + debug_dir = debugfs_create_dir("debug", kgsl_debugfs_dir); + + debugfs_create_file("strict_memory", 0644, debug_dir, NULL, + &_strict_fops); + + proc_d_debugfs = debugfs_create_dir("proc", kgsl_debugfs_dir); +} + +void kgsl_core_debugfs_close(void) +{ + debugfs_remove_recursive(kgsl_debugfs_dir); +} diff --git a/drivers/gpu/msm/kgsl_debugfs.h b/drivers/gpu/msm/kgsl_debugfs.h new file mode 100644 index 000000000000..34875954bb8b --- /dev/null +++ b/drivers/gpu/msm/kgsl_debugfs.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2002,2008-2011,2013,2015 The Linux Foundation. + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _KGSL_DEBUGFS_H +#define _KGSL_DEBUGFS_H + +struct kgsl_device; +struct kgsl_process_private; + +#ifdef CONFIG_DEBUG_FS +void kgsl_core_debugfs_init(void); +void kgsl_core_debugfs_close(void); + +void kgsl_device_debugfs_init(struct kgsl_device *device); + +extern struct dentry *kgsl_debugfs_dir; +static inline struct dentry *kgsl_get_debugfs_dir(void) +{ + return kgsl_debugfs_dir; +} + +void kgsl_process_init_debugfs(struct kgsl_process_private *); +#else +static inline void kgsl_core_debugfs_init(void) { } +static inline void kgsl_device_debugfs_init(struct kgsl_device *device) { } +static inline void kgsl_core_debugfs_close(void) { } +static inline struct dentry *kgsl_get_debugfs_dir(void) { return NULL; } +static inline void kgsl_process_init_debugfs(struct kgsl_process_private *priv) +{ +} +#endif + +#endif diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h new file mode 100644 index 000000000000..8fc3fa1311b5 --- /dev/null +++ b/drivers/gpu/msm/kgsl_device.h @@ -0,0 +1,869 @@ +/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __KGSL_DEVICE_H +#define __KGSL_DEVICE_H + +#include <linux/slab.h> +#include <linux/idr.h> +#include <linux/pm_qos.h> +#include <linux/sched.h> + +#include "kgsl.h" +#include "kgsl_mmu.h" +#include "kgsl_pwrctrl.h" +#include "kgsl_log.h" +#include "kgsl_pwrscale.h" +#include "kgsl_snapshot.h" +#include "kgsl_sharedmem.h" +#include "kgsl_cmdbatch.h" + +#include <linux/sync.h> + +#define KGSL_TIMEOUT_NONE 0 +#define KGSL_TIMEOUT_DEFAULT 0xFFFFFFFF +#define KGSL_TIMEOUT_PART 50 /* 50 msec */ + +#define FIRST_TIMEOUT (HZ / 2) + +#define KGSL_IOCTL_FUNC(_cmd, _func) \ + [_IOC_NR((_cmd))] = \ + { .cmd = (_cmd), .func = (_func) } + +/* KGSL device state is initialized to INIT when platform_probe * + * sucessfully initialized the device. Once a device has been opened * + * (started) it becomes active. NAP implies that only low latency * + * resources (for now clocks on some platforms) are off. SLEEP implies * + * that the KGSL module believes a device is idle (has been inactive * + * past its timer) and all system resources are released. SUSPEND is * + * requested by the kernel and will be enforced upon all open devices. */ + +#define KGSL_STATE_NONE 0x00000000 +#define KGSL_STATE_INIT 0x00000001 +#define KGSL_STATE_ACTIVE 0x00000002 +#define KGSL_STATE_NAP 0x00000004 +#define KGSL_STATE_SLEEP 0x00000008 +#define KGSL_STATE_SUSPEND 0x00000010 +#define KGSL_STATE_AWARE 0x00000020 +#define KGSL_STATE_SLUMBER 0x00000080 +#define KGSL_STATE_DEEP_NAP 0x00000100 + +#define KGSL_GRAPHICS_MEMORY_LOW_WATERMARK 0x1000000 + +#define KGSL_IS_PAGE_ALIGNED(addr) (!((addr) & (~PAGE_MASK))) + +/** + * enum kgsl_event_results - result codes passed to an event callback when the + * event is retired or cancelled + * @KGSL_EVENT_RETIRED: The timestamp associated with the event retired + * successflly + * @KGSL_EVENT_CANCELLED: The event was cancelled before the event was fired + */ +enum kgsl_event_results { + KGSL_EVENT_RETIRED = 1, + KGSL_EVENT_CANCELLED = 2, +}; + +#define KGSL_FLAG_WAKE_ON_TOUCH BIT(0) + +/* + * "list" of event types for ftrace symbolic magic + */ + +#define KGSL_EVENT_TYPES \ + { KGSL_EVENT_RETIRED, "retired" }, \ + { KGSL_EVENT_CANCELLED, "cancelled" } + +#define KGSL_CONTEXT_FLAGS \ + { KGSL_CONTEXT_NO_GMEM_ALLOC , "NO_GMEM_ALLOC" }, \ + { KGSL_CONTEXT_PREAMBLE, "PREAMBLE" }, \ + { KGSL_CONTEXT_TRASH_STATE, "TRASH_STATE" }, \ + { KGSL_CONTEXT_CTX_SWITCH, "CTX_SWITCH" }, \ + { KGSL_CONTEXT_PER_CONTEXT_TS, "PER_CONTEXT_TS" }, \ + { KGSL_CONTEXT_USER_GENERATED_TS, "USER_TS" }, \ + { KGSL_CONTEXT_NO_FAULT_TOLERANCE, "NO_FT" }, \ + { KGSL_CONTEXT_PWR_CONSTRAINT, "PWR" }, \ + { KGSL_CONTEXT_SAVE_GMEM, "SAVE_GMEM" } + +#define KGSL_CONTEXT_TYPES \ + { KGSL_CONTEXT_TYPE_ANY, "ANY" }, \ + { KGSL_CONTEXT_TYPE_GL, "GL" }, \ + { KGSL_CONTEXT_TYPE_CL, "CL" }, \ + { KGSL_CONTEXT_TYPE_C2D, "C2D" }, \ + { KGSL_CONTEXT_TYPE_RS, "RS" } + +#define KGSL_CONTEXT_ID(_context) \ + ((_context != NULL) ? (_context)->id : KGSL_MEMSTORE_GLOBAL) + +/* Allocate 600K for the snapshot static region*/ +#define KGSL_SNAPSHOT_MEMSIZE (600 * 1024) + +struct kgsl_device; +struct platform_device; +struct kgsl_device_private; +struct kgsl_context; +struct kgsl_power_stats; +struct kgsl_event; +struct kgsl_snapshot; + +struct kgsl_functable { + /* Mandatory functions - these functions must be implemented + by the client device. The driver will not check for a NULL + pointer before calling the hook. + */ + void (*regread) (struct kgsl_device *device, + unsigned int offsetwords, unsigned int *value); + void (*regwrite) (struct kgsl_device *device, + unsigned int offsetwords, unsigned int value); + int (*idle) (struct kgsl_device *device); + bool (*isidle) (struct kgsl_device *device); + int (*suspend_context) (struct kgsl_device *device); + int (*init) (struct kgsl_device *device); + int (*start) (struct kgsl_device *device, int priority); + int (*stop) (struct kgsl_device *device); + int (*getproperty) (struct kgsl_device *device, + unsigned int type, void __user *value, + size_t sizebytes); + int (*getproperty_compat) (struct kgsl_device *device, + unsigned int type, void __user *value, + size_t sizebytes); + int (*waittimestamp) (struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp, + unsigned int msecs); + int (*readtimestamp) (struct kgsl_device *device, void *priv, + enum kgsl_timestamp_type type, unsigned int *timestamp); + int (*issueibcmds) (struct kgsl_device_private *dev_priv, + struct kgsl_context *context, struct kgsl_cmdbatch *cmdbatch, + uint32_t *timestamps); + void (*power_stats)(struct kgsl_device *device, + struct kgsl_power_stats *stats); + unsigned int (*gpuid)(struct kgsl_device *device, unsigned int *chipid); + void (*snapshot)(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, struct kgsl_context *context); + irqreturn_t (*irq_handler)(struct kgsl_device *device); + int (*drain)(struct kgsl_device *device); + /* Optional functions - these functions are not mandatory. The + driver will check that the function pointer is not NULL before + calling the hook */ + struct kgsl_context *(*drawctxt_create) (struct kgsl_device_private *, + uint32_t *flags); + void (*drawctxt_detach)(struct kgsl_context *context); + void (*drawctxt_destroy) (struct kgsl_context *context); + void (*drawctxt_dump) (struct kgsl_device *device, + struct kgsl_context *context); + long (*ioctl) (struct kgsl_device_private *dev_priv, + unsigned int cmd, unsigned long arg); + long (*compat_ioctl) (struct kgsl_device_private *dev_priv, + unsigned int cmd, unsigned long arg); + int (*setproperty) (struct kgsl_device_private *dev_priv, + unsigned int type, void __user *value, + unsigned int sizebytes); + int (*setproperty_compat) (struct kgsl_device_private *dev_priv, + unsigned int type, void __user *value, + unsigned int sizebytes); + void (*drawctxt_sched)(struct kgsl_device *device, + struct kgsl_context *context); + void (*resume)(struct kgsl_device *device); + int (*regulator_enable)(struct kgsl_device *); + bool (*is_hw_collapsible)(struct kgsl_device *); + void (*regulator_disable)(struct kgsl_device *); + void (*pwrlevel_change_settings)(struct kgsl_device *device, + unsigned int prelevel, unsigned int postlevel, bool post); + void (*regulator_disable_poll)(struct kgsl_device *device); +}; + +struct kgsl_ioctl { + unsigned int cmd; + long (*func)(struct kgsl_device_private *, unsigned int, void *); +}; + +long kgsl_ioctl_helper(struct file *filep, unsigned int cmd, unsigned long arg, + const struct kgsl_ioctl *cmds, int len); + +/* Flag to mark the memobj_node as a preamble */ +#define MEMOBJ_PREAMBLE BIT(0) +/* Flag to mark that the memobj_node should not go to the hadrware */ +#define MEMOBJ_SKIP BIT(1) + +/** + * struct kgsl_memobj_node - Memory object descriptor + * @node: Local list node for the cmdbatch + * @id: GPU memory ID for the object + * offset: Offset within the object + * @gpuaddr: GPU address for the object + * @flags: External flags passed by the user + * @priv: Internal flags set by the driver + */ +struct kgsl_memobj_node { + struct list_head node; + unsigned int id; + uint64_t offset; + uint64_t gpuaddr; + uint64_t size; + unsigned long flags; + unsigned long priv; +}; + +struct kgsl_device { + struct device *dev; + const char *name; + unsigned int ver_major; + unsigned int ver_minor; + uint32_t flags; + enum kgsl_deviceid id; + + /* Starting physical address for GPU registers */ + unsigned long reg_phys; + + /* Starting Kernel virtual address for GPU registers */ + void __iomem *reg_virt; + + /* Total memory size for all GPU registers */ + unsigned int reg_len; + + /* Kernel virtual address for GPU shader memory */ + void __iomem *shader_mem_virt; + + /* Starting physical address for GPU shader memory */ + unsigned long shader_mem_phys; + + /* GPU shader memory size */ + unsigned int shader_mem_len; + struct kgsl_memdesc memstore; + const char *iomemname; + const char *shadermemname; + + struct kgsl_mmu mmu; + struct completion hwaccess_gate; + struct completion cmdbatch_gate; + const struct kgsl_functable *ftbl; + struct work_struct idle_check_ws; + struct timer_list idle_timer; + struct kgsl_pwrctrl pwrctrl; + int open_count; + + struct mutex mutex; + uint32_t state; + uint32_t requested_state; + + atomic_t active_cnt; + + wait_queue_head_t wait_queue; + wait_queue_head_t active_cnt_wq; + struct platform_device *pdev; + struct dentry *d_debugfs; + struct idr context_idr; + rwlock_t context_lock; + + struct { + void *ptr; + size_t size; + } snapshot_memory; + + struct kgsl_snapshot *snapshot; + + u32 snapshot_faultcount; /* Total number of faults since boot */ + struct kobject snapshot_kobj; + + struct kobject ppd_kobj; + + /* Logging levels */ + int cmd_log; + int ctxt_log; + int drv_log; + int mem_log; + int pwr_log; + struct kgsl_pwrscale pwrscale; + struct work_struct event_work; + + int reset_counter; /* Track how many GPU core resets have occured */ + int cff_dump_enable; + struct workqueue_struct *events_wq; + + struct device *busmondev; /* pseudo dev for GPU BW voting governor */ +}; + + +#define KGSL_DEVICE_COMMON_INIT(_dev) \ + .hwaccess_gate = COMPLETION_INITIALIZER((_dev).hwaccess_gate),\ + .cmdbatch_gate = COMPLETION_INITIALIZER((_dev).cmdbatch_gate),\ + .idle_check_ws = __WORK_INITIALIZER((_dev).idle_check_ws,\ + kgsl_idle_check),\ + .event_work = __WORK_INITIALIZER((_dev).event_work,\ + kgsl_process_events),\ + .context_idr = IDR_INIT((_dev).context_idr),\ + .wait_queue = __WAIT_QUEUE_HEAD_INITIALIZER((_dev).wait_queue),\ + .active_cnt_wq = __WAIT_QUEUE_HEAD_INITIALIZER((_dev).active_cnt_wq),\ + .mutex = __MUTEX_INITIALIZER((_dev).mutex),\ + .state = KGSL_STATE_NONE,\ + .ver_major = DRIVER_VERSION_MAJOR,\ + .ver_minor = DRIVER_VERSION_MINOR + + +/** + * enum bits for struct kgsl_context.priv + * @KGSL_CONTEXT_PRIV_DETACHED - The context has been destroyed by userspace + * and is no longer using the gpu. + * @KGSL_CONTEXT_PRIV_INVALID - The context has been destroyed by the kernel + * because it caused a GPU fault. + * @KGSL_CONTEXT_PRIV_PAGEFAULT - The context has caused a page fault. + * @KGSL_CONTEXT_PRIV_DEVICE_SPECIFIC - this value and higher values are + * reserved for devices specific use. + */ +enum kgsl_context_priv { + KGSL_CONTEXT_PRIV_DETACHED = 0, + KGSL_CONTEXT_PRIV_INVALID, + KGSL_CONTEXT_PRIV_PAGEFAULT, + KGSL_CONTEXT_PRIV_DEVICE_SPECIFIC = 16, +}; + +struct kgsl_process_private; + +/** + * struct kgsl_context - The context fields that are valid for a user defined + * context + * @refcount: kref object for reference counting the context + * @id: integer identifier for the context + * @priority; The context's priority to submit commands to GPU + * @tid: task that created this context. + * @dev_priv: pointer to the owning device instance + * @proc_priv: pointer to process private, the process that allocated the + * context + * @priv: in-kernel context flags, use KGSL_CONTEXT_* values + * @reset_status: status indication whether a gpu reset occured and whether + * this context was responsible for causing it + * @wait_on_invalid_ts: flag indicating if this context has tried to wait on a + * bad timestamp + * @timeline: sync timeline used to create fences that can be signaled when a + * sync_pt timestamp expires + * @events: A kgsl_event_group for this context - contains the list of GPU + * events + * @pagefault_ts: global timestamp of the pagefault, if KGSL_CONTEXT_PAGEFAULT + * is set. + * @flags: flags from userspace controlling the behavior of this context + * @pwr_constraint: power constraint from userspace for this context + * @fault_count: number of times gpu hanged in last _context_throttle_time ms + * @fault_time: time of the first gpu hang in last _context_throttle_time ms + */ +struct kgsl_context { + struct kref refcount; + uint32_t id; + uint32_t priority; + pid_t tid; + struct kgsl_device_private *dev_priv; + struct kgsl_process_private *proc_priv; + unsigned long priv; + struct kgsl_device *device; + unsigned int reset_status; + bool wait_on_invalid_ts; + struct sync_timeline *timeline; + struct kgsl_event_group events; + unsigned int pagefault_ts; + unsigned int flags; + struct kgsl_pwr_constraint pwr_constraint; + unsigned int fault_count; + unsigned long fault_time; +}; + +#define _context_comm(_c) \ + (((_c) && (_c)->proc_priv) ? (_c)->proc_priv->comm : "unknown") + +/* + * Print log messages with the context process name/pid: + * [...] kgsl kgsl-3d0: kgsl-api-test[22182]: + */ + +#define pr_context(_d, _c, fmt, args...) \ + dev_err((_d)->dev, "%s[%d]: " fmt, \ + _context_comm((_c)), \ + (_c)->proc_priv->pid, ##args) + +/** + * struct kgsl_process_private - Private structure for a KGSL process (across + * all devices) + * @priv: Internal flags, use KGSL_PROCESS_* values + * @pid: ID for the task owner of the process + * @comm: task name of the process + * @mem_lock: Spinlock to protect the process memory lists + * @refcount: kref object for reference counting the process + * @idr: Iterator for assigning IDs to memory allocations + * @pagetable: Pointer to the pagetable owned by this process + * @kobj: Pointer to a kobj for the sysfs directory for this process + * @debug_root: Pointer to the debugfs root for this process + * @stats: Memory allocation statistics for this process + * @syncsource_idr: sync sources created by this process + * @syncsource_lock: Spinlock to protect the syncsource idr + * @fd_count: Counter for the number of FDs for this process + */ +struct kgsl_process_private { + unsigned long priv; + pid_t pid; + char comm[TASK_COMM_LEN]; + spinlock_t mem_lock; + struct kref refcount; + struct idr mem_idr; + struct kgsl_pagetable *pagetable; + struct list_head list; + struct kobject kobj; + struct dentry *debug_root; + struct { + uint64_t cur; + uint64_t max; + } stats[KGSL_MEM_ENTRY_MAX]; + struct idr syncsource_idr; + spinlock_t syncsource_lock; + int fd_count; +}; + +/** + * enum kgsl_process_priv_flags - Private flags for kgsl_process_private + * @KGSL_PROCESS_INIT: Set if the process structure has been set up + */ +enum kgsl_process_priv_flags { + KGSL_PROCESS_INIT = 0, +}; + +struct kgsl_device_private { + struct kgsl_device *device; + struct kgsl_process_private *process_priv; +}; + +/** + * struct kgsl_snapshot - details for a specific snapshot instance + * @start: Pointer to the start of the static snapshot region + * @size: Size of the current snapshot instance + * @ptr: Pointer to the next block of memory to write to during snapshotting + * @remain: Bytes left in the snapshot region + * @timestamp: Timestamp of the snapshot instance (in seconds since boot) + * @mempool: Pointer to the memory pool for storing memory objects + * @mempool_size: Size of the memory pool + * @obj_list: List of frozen GPU buffers that are waiting to be dumped. + * @cp_list: List of IB's to be dumped. + * @work: worker to dump the frozen memory + * @dump_gate: completion gate signaled by worker when it is finished. + * @process: the process that caused the hang, if known. + */ +struct kgsl_snapshot { + u8 *start; + size_t size; + u8 *ptr; + size_t remain; + unsigned long timestamp; + u8 *mempool; + size_t mempool_size; + struct list_head obj_list; + struct list_head cp_list; + struct work_struct work; + struct completion dump_gate; + struct kgsl_process_private *process; +}; + +/** + * struct kgsl_snapshot_object - GPU memory in the snapshot + * @gpuaddr: The GPU address identified during snapshot + * @size: The buffer size identified during snapshot + * @offset: offset from start of the allocated kgsl_mem_entry + * @type: SNAPSHOT_OBJ_TYPE_* identifier. + * @entry: the reference counted memory entry for this buffer + * @node: node for kgsl_snapshot.obj_list + */ +struct kgsl_snapshot_object { + uint64_t gpuaddr; + uint64_t size; + uint64_t offset; + int type; + struct kgsl_mem_entry *entry; + struct list_head node; +}; + +struct kgsl_device *kgsl_get_device(int dev_idx); + +static inline void kgsl_process_add_stats(struct kgsl_process_private *priv, + unsigned int type, uint64_t size) +{ + priv->stats[type].cur += size; + if (priv->stats[type].max < priv->stats[type].cur) + priv->stats[type].max = priv->stats[type].cur; +} + +static inline void kgsl_regread(struct kgsl_device *device, + unsigned int offsetwords, + unsigned int *value) +{ + device->ftbl->regread(device, offsetwords, value); +} + +static inline void kgsl_regwrite(struct kgsl_device *device, + unsigned int offsetwords, + unsigned int value) +{ + device->ftbl->regwrite(device, offsetwords, value); +} + +static inline void kgsl_regrmw(struct kgsl_device *device, + unsigned int offsetwords, + unsigned int mask, unsigned int bits) +{ + unsigned int val = 0; + + device->ftbl->regread(device, offsetwords, &val); + val &= ~mask; + device->ftbl->regwrite(device, offsetwords, val | bits); +} + +static inline int kgsl_idle(struct kgsl_device *device) +{ + return device->ftbl->idle(device); +} + +static inline unsigned int kgsl_gpuid(struct kgsl_device *device, + unsigned int *chipid) +{ + return device->ftbl->gpuid(device, chipid); +} + +static inline int kgsl_create_device_sysfs_files(struct device *root, + const struct device_attribute **list) +{ + int ret = 0, i; + for (i = 0; list[i] != NULL; i++) + ret |= device_create_file(root, list[i]); + return ret; +} + +static inline void kgsl_remove_device_sysfs_files(struct device *root, + const struct device_attribute **list) +{ + int i; + for (i = 0; list[i] != NULL; i++) + device_remove_file(root, list[i]); +} + +static inline struct kgsl_device *kgsl_device_from_dev(struct device *dev) +{ + int i; + + for (i = 0; i < KGSL_DEVICE_MAX; i++) { + if (kgsl_driver.devp[i] && kgsl_driver.devp[i]->dev == dev) + return kgsl_driver.devp[i]; + } + + return NULL; +} + +static inline int kgsl_state_is_awake(struct kgsl_device *device) +{ + if (device->state == KGSL_STATE_ACTIVE || + device->state == KGSL_STATE_AWARE) + return true; + else + return false; +} + +int kgsl_readtimestamp(struct kgsl_device *device, void *priv, + enum kgsl_timestamp_type type, unsigned int *timestamp); + +int kgsl_check_timestamp(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp); + +int kgsl_device_platform_probe(struct kgsl_device *device); + +void kgsl_device_platform_remove(struct kgsl_device *device); + +const char *kgsl_pwrstate_to_str(unsigned int state); + +int kgsl_device_snapshot_init(struct kgsl_device *device); +void kgsl_device_snapshot(struct kgsl_device *device, + struct kgsl_context *context); +void kgsl_device_snapshot_close(struct kgsl_device *device); +void kgsl_snapshot_save_frozen_objs(struct work_struct *work); + +void kgsl_events_init(void); +void kgsl_events_exit(void); + +void kgsl_del_event_group(struct kgsl_event_group *group); + +void kgsl_add_event_group(struct kgsl_event_group *group, + struct kgsl_context *context, const char *name, + readtimestamp_func readtimestamp, void *priv); + +void kgsl_cancel_events_timestamp(struct kgsl_device *device, + struct kgsl_event_group *group, unsigned int timestamp); +void kgsl_cancel_events(struct kgsl_device *device, + struct kgsl_event_group *group); +void kgsl_cancel_event(struct kgsl_device *device, + struct kgsl_event_group *group, unsigned int timestamp, + kgsl_event_func func, void *priv); +bool kgsl_event_pending(struct kgsl_device *device, + struct kgsl_event_group *group, unsigned int timestamp, + kgsl_event_func func, void *priv); +int kgsl_add_event(struct kgsl_device *device, struct kgsl_event_group *group, + unsigned int timestamp, kgsl_event_func func, void *priv); +void kgsl_process_event_group(struct kgsl_device *device, + struct kgsl_event_group *group); +void kgsl_flush_event_group(struct kgsl_device *device, + struct kgsl_event_group *group); +void kgsl_process_events(struct work_struct *work); + +void kgsl_context_destroy(struct kref *kref); + +int kgsl_context_init(struct kgsl_device_private *, struct kgsl_context + *context); + +void kgsl_context_dump(struct kgsl_context *context); + +int kgsl_memfree_find_entry(pid_t ptname, uint64_t *gpuaddr, + uint64_t *size, uint64_t *flags, pid_t *pid); + +long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); + +long kgsl_ioctl_copy_in(unsigned int kernel_cmd, unsigned int user_cmd, + unsigned long arg, unsigned char *ptr); + +long kgsl_ioctl_copy_out(unsigned int kernel_cmd, unsigned int user_cmd, + unsigned long, unsigned char *ptr); + +int kgsl_mem_entry_attach_process(struct kgsl_mem_entry *entry, + struct kgsl_device_private *dev_priv); + +/** + * kgsl_context_put() - Release context reference count + * @context: Pointer to the KGSL context to be released + * + * Reduce the reference count on a KGSL context and destroy it if it is no + * longer needed + */ +static inline void +kgsl_context_put(struct kgsl_context *context) +{ + if (context) + kref_put(&context->refcount, kgsl_context_destroy); +} + +/** + * kgsl_context_detached() - check if a context is detached + * @context: the context + * + * Check if a context has been destroyed by userspace and is only waiting + * for reference counts to go away. This check is used to weed out + * contexts that shouldn't use the gpu so NULL is considered detached. + */ +static inline bool kgsl_context_detached(struct kgsl_context *context) +{ + return (context == NULL || test_bit(KGSL_CONTEXT_PRIV_DETACHED, + &context->priv)); +} + +/** + * kgsl_context_invalid() - check if a context is invalid + * @context: the context + * + * Check if a context has been invalidated by the kernel and may no + * longer use the GPU. + */ +static inline bool kgsl_context_invalid(struct kgsl_context *context) +{ + return (context == NULL || test_bit(KGSL_CONTEXT_PRIV_INVALID, + &context->priv)); +} + + +/** + * kgsl_context_get() - get a pointer to a KGSL context + * @device: Pointer to the KGSL device that owns the context + * @id: Context ID + * + * Find the context associated with the given ID number, increase the reference + * count on it and return it. The caller must make sure that this call is + * paired with a kgsl_context_put. This function is for internal use because it + * doesn't validate the ownership of the context with the calling process - use + * kgsl_context_get_owner for that + */ +static inline struct kgsl_context *kgsl_context_get(struct kgsl_device *device, + uint32_t id) +{ + int result = 0; + struct kgsl_context *context = NULL; + + read_lock(&device->context_lock); + + context = idr_find(&device->context_idr, id); + + /* Don't return a context that has been detached */ + if (kgsl_context_detached(context)) + context = NULL; + else + result = kref_get_unless_zero(&context->refcount); + + read_unlock(&device->context_lock); + + if (!result) + return NULL; + return context; +} + +/** +* _kgsl_context_get() - lightweight function to just increment the ref count +* @context: Pointer to the KGSL context +* +* Get a reference to the specified KGSL context structure. This is a +* lightweight way to just increase the refcount on a known context rather than +* walking through kgsl_context_get and searching the iterator +*/ +static inline int _kgsl_context_get(struct kgsl_context *context) +{ + int ret = 0; + + if (context) + ret = kref_get_unless_zero(&context->refcount); + + return ret; +} + +/** + * kgsl_context_get_owner() - get a pointer to a KGSL context in a specific + * process + * @dev_priv: Pointer to the process struct + * @id: Context ID to return + * + * Find the context associated with the given ID number, increase the reference + * count on it and return it. The caller must make sure that this call is + * paired with a kgsl_context_put. This function validates that the context id + * given is owned by the dev_priv instancet that is passed in. See + * kgsl_context_get for the internal version that doesn't do the check + */ +static inline struct kgsl_context *kgsl_context_get_owner( + struct kgsl_device_private *dev_priv, uint32_t id) +{ + struct kgsl_context *context; + + context = kgsl_context_get(dev_priv->device, id); + + /* Verify that the context belongs to current calling fd. */ + if (context != NULL && context->dev_priv != dev_priv) { + kgsl_context_put(context); + return NULL; + } + + return context; +} + +/** +* kgsl_process_private_get() - increment the refcount on a kgsl_process_private +* struct +* @process: Pointer to the KGSL process_private +* +* Returns 0 if the structure is invalid and a reference count could not be +* obtained, nonzero otherwise. +*/ +static inline int kgsl_process_private_get(struct kgsl_process_private *process) +{ + int ret = 0; + if (process != NULL) + ret = kref_get_unless_zero(&process->refcount); + return ret; +} + +void kgsl_process_private_put(struct kgsl_process_private *private); + + +struct kgsl_process_private *kgsl_process_private_find(pid_t pid); + +/** + * kgsl_property_read_u32() - Read a u32 property from the device tree + * @device: Pointer to the KGSL device + * @prop: String name of the property to query + * @ptr: Pointer to the variable to store the property + */ +static inline int kgsl_property_read_u32(struct kgsl_device *device, + const char *prop, unsigned int *ptr) +{ + return of_property_read_u32(device->pdev->dev.of_node, prop, ptr); +} + +/** + * kgsl_sysfs_store() - parse a string from a sysfs store function + * @buf: Incoming string to parse + * @ptr: Pointer to an unsigned int to store the value + */ +static inline int kgsl_sysfs_store(const char *buf, unsigned int *ptr) +{ + unsigned int val; + int rc; + + rc = kstrtou32(buf, 0, &val); + if (rc) + return rc; + + if (ptr) + *ptr = val; + + return 0; +} + +/* + * A helper macro to print out "not enough memory functions" - this + * makes it easy to standardize the messages as well as cut down on + * the number of strings in the binary + */ +#define SNAPSHOT_ERR_NOMEM(_d, _s) \ + KGSL_DRV_ERR((_d), \ + "snapshot: not enough snapshot memory for section %s\n", (_s)) + +/** + * struct kgsl_snapshot_registers - list of registers to snapshot + * @regs: Pointer to an array of register ranges + * @count: Number of entries in the array + */ +struct kgsl_snapshot_registers { + const unsigned int *regs; + unsigned int count; +}; + +size_t kgsl_snapshot_dump_registers(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv); + +void kgsl_snapshot_indexed_registers(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, unsigned int index, + unsigned int data, unsigned int start, unsigned int count); + +int kgsl_snapshot_get_object(struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process, uint64_t gpuaddr, + uint64_t size, unsigned int type); + +int kgsl_snapshot_have_object(struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process, + uint64_t gpuaddr, uint64_t size); + +struct adreno_ib_object_list; + +int kgsl_snapshot_add_ib_obj_list(struct kgsl_snapshot *snapshot, + struct adreno_ib_object_list *ib_obj_list); + +void kgsl_snapshot_add_section(struct kgsl_device *device, u16 id, + struct kgsl_snapshot *snapshot, + size_t (*func)(struct kgsl_device *, u8 *, size_t, void *), + void *priv); + +/** + * struct kgsl_pwr_limit - limit structure for each client + * @node: Local list node for the limits list + * @level: requested power level + * @device: pointer to the device structure + */ +struct kgsl_pwr_limit { + struct list_head node; + unsigned int level; + struct kgsl_device *device; +}; + +#endif /* __KGSL_DEVICE_H */ diff --git a/drivers/gpu/msm/kgsl_events.c b/drivers/gpu/msm/kgsl_events.c new file mode 100644 index 000000000000..e1f9ad17d0ff --- /dev/null +++ b/drivers/gpu/msm/kgsl_events.c @@ -0,0 +1,445 @@ +/* Copyright (c) 2011-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/slab.h> +#include <linux/list.h> +#include <linux/workqueue.h> +#include <linux/debugfs.h> +#include <kgsl_device.h> + +#include "kgsl_debugfs.h" +#include "kgsl_trace.h" + +/* + * Define an kmem cache for the event structures since we allocate and free them + * so frequently + */ +static struct kmem_cache *events_cache; +static struct dentry *events_dentry; + +static inline void signal_event(struct kgsl_device *device, + struct kgsl_event *event, int result) +{ + list_del(&event->node); + event->result = result; + queue_work(device->events_wq, &event->work); +} + +/** + * _kgsl_event_worker() - Work handler for processing GPU event callbacks + * @work: Pointer to the work_struct for the event + * + * Each event callback has its own work struct and is run on a event specific + * workqeuue. This is the worker that queues up the event callback function. + */ +static void _kgsl_event_worker(struct work_struct *work) +{ + struct kgsl_event *event = container_of(work, struct kgsl_event, work); + int id = KGSL_CONTEXT_ID(event->context); + + trace_kgsl_fire_event(id, event->timestamp, event->result, + jiffies - event->created, event->func); + + event->func(event->device, event->group, event->priv, event->result); + + kgsl_context_put(event->context); + kmem_cache_free(events_cache, event); +} + +static void _process_event_group(struct kgsl_device *device, + struct kgsl_event_group *group, bool flush) +{ + struct kgsl_event *event, *tmp; + unsigned int timestamp; + struct kgsl_context *context; + + if (group == NULL) + return; + + context = group->context; + + /* + * Sanity check to be sure that we we aren't racing with the context + * getting destroyed + */ + if (context != NULL && !_kgsl_context_get(context)) + BUG(); + + spin_lock(&group->lock); + + group->readtimestamp(device, group->priv, KGSL_TIMESTAMP_RETIRED, + ×tamp); + + /* + * If no timestamps have been retired since the last time we were here + * then we can avoid going through this loop + */ + if (!flush && timestamp_cmp(timestamp, group->processed) <= 0) + goto out; + + list_for_each_entry_safe(event, tmp, &group->events, node) { + if (timestamp_cmp(event->timestamp, timestamp) <= 0) + signal_event(device, event, KGSL_EVENT_RETIRED); + else if (flush) + signal_event(device, event, KGSL_EVENT_CANCELLED); + + } + + group->processed = timestamp; + +out: + spin_unlock(&group->lock); + kgsl_context_put(context); +} + +/** + * kgsl_process_event_group() - Handle all the retired events in a group + * @device: Pointer to a KGSL device + * @group: Pointer to a GPU events group to process + */ + +void kgsl_process_event_group(struct kgsl_device *device, + struct kgsl_event_group *group) +{ + _process_event_group(device, group, false); +} +EXPORT_SYMBOL(kgsl_process_event_group); + +/** + * kgsl_flush_event_group() - flush all the events in a group by retiring the + * ones can be retired and cancelling the ones that are pending + * @device: Pointer to a KGSL device + * @group: Pointer to a GPU events group to process + */ +void kgsl_flush_event_group(struct kgsl_device *device, + struct kgsl_event_group *group) +{ + _process_event_group(device, group, true); +} +EXPORT_SYMBOL(kgsl_flush_event_group); + +/** + * kgsl_cancel_events_timestamp() - Cancel pending events for a given timestamp + * @device: Pointer to a KGSL device + * @group: Ponter to the GPU event group that owns the event + * @timestamp: Registered expiry timestamp for the event + */ +void kgsl_cancel_events_timestamp(struct kgsl_device *device, + struct kgsl_event_group *group, unsigned int timestamp) +{ + struct kgsl_event *event, *tmp; + + spin_lock(&group->lock); + + list_for_each_entry_safe(event, tmp, &group->events, node) { + if (timestamp_cmp(timestamp, event->timestamp) == 0) + signal_event(device, event, KGSL_EVENT_CANCELLED); + } + + spin_unlock(&group->lock); +} +EXPORT_SYMBOL(kgsl_cancel_events_timestamp); + +/** + * kgsl_cancel_events() - Cancel all pending events in the group + * @device: Pointer to a KGSL device + * @group: Pointer to a kgsl_events_group + */ +void kgsl_cancel_events(struct kgsl_device *device, + struct kgsl_event_group *group) +{ + struct kgsl_event *event, *tmp; + + spin_lock(&group->lock); + + list_for_each_entry_safe(event, tmp, &group->events, node) + signal_event(device, event, KGSL_EVENT_CANCELLED); + + spin_unlock(&group->lock); +} +EXPORT_SYMBOL(kgsl_cancel_events); + +/** + * kgsl_cancel_event() - Cancel a specific event from a group + * @device: Pointer to a KGSL device + * @group: Pointer to the group that contains the events + * @timestamp: Registered expiry timestamp for the event + * @func: Registered callback for the function + * @priv: Registered priv data for the function + */ +void kgsl_cancel_event(struct kgsl_device *device, + struct kgsl_event_group *group, unsigned int timestamp, + kgsl_event_func func, void *priv) +{ + struct kgsl_event *event, *tmp; + spin_lock(&group->lock); + + list_for_each_entry_safe(event, tmp, &group->events, node) { + if (timestamp == event->timestamp && func == event->func && + event->priv == priv) + signal_event(device, event, KGSL_EVENT_CANCELLED); + } + + spin_unlock(&group->lock); +} +EXPORT_SYMBOL(kgsl_cancel_event); + +/** + * kgsl_event_pending() - Searches for an event in an event group + * @device: Pointer to a KGSL device + * @group: Pointer to the group that contains the events + * @timestamp: Registered expiry timestamp for the event + * @func: Registered callback for the function + * @priv: Registered priv data for the function + */ +bool kgsl_event_pending(struct kgsl_device *device, + struct kgsl_event_group *group, + unsigned int timestamp, kgsl_event_func func, void *priv) +{ + struct kgsl_event *event; + bool result = false; + spin_lock(&group->lock); + list_for_each_entry(event, &group->events, node) { + if (timestamp == event->timestamp && func == event->func && + event->priv == priv) { + result = true; + break; + } + } + spin_unlock(&group->lock); + return result; +} +/** + * kgsl_add_event() - Add a new GPU event to a group + * @device: Pointer to a KGSL device + * @group: Pointer to the group to add the event to + * @timestamp: Timestamp that the event will expire on + * @func: Callback function for the event + * @priv: Private data to send to the callback function + */ +int kgsl_add_event(struct kgsl_device *device, struct kgsl_event_group *group, + unsigned int timestamp, kgsl_event_func func, void *priv) +{ + unsigned int queued; + struct kgsl_context *context = group->context; + struct kgsl_event *event; + unsigned int retired; + + if (!func) + return -EINVAL; + + /* + * If the caller is creating their own timestamps, let them schedule + * events in the future. Otherwise only allow timestamps that have been + * queued. + */ + if (!context || !(context->flags & KGSL_CONTEXT_USER_GENERATED_TS)) { + group->readtimestamp(device, group->priv, KGSL_TIMESTAMP_QUEUED, + &queued); + + if (timestamp_cmp(timestamp, queued) > 0) + return -EINVAL; + } + + event = kmem_cache_alloc(events_cache, GFP_KERNEL); + if (event == NULL) + return -ENOMEM; + + /* Get a reference to the context while the event is active */ + if (context != NULL && !_kgsl_context_get(context)) { + kmem_cache_free(events_cache, event); + return -ENOENT; + } + + event->device = device; + event->context = context; + event->timestamp = timestamp; + event->priv = priv; + event->func = func; + event->created = jiffies; + event->group = group; + + INIT_WORK(&event->work, _kgsl_event_worker); + + trace_kgsl_register_event(KGSL_CONTEXT_ID(context), timestamp, func); + + spin_lock(&group->lock); + + /* + * Check to see if the requested timestamp has already retired. If so, + * schedule the callback right away + */ + group->readtimestamp(device, group->priv, KGSL_TIMESTAMP_RETIRED, + &retired); + + if (timestamp_cmp(retired, timestamp) >= 0) { + event->result = KGSL_EVENT_RETIRED; + queue_work(device->events_wq, &event->work); + spin_unlock(&group->lock); + return 0; + } + + /* Add the event to the group list */ + list_add_tail(&event->node, &group->events); + + spin_unlock(&group->lock); + + return 0; +} +EXPORT_SYMBOL(kgsl_add_event); + +static DEFINE_RWLOCK(group_lock); +static LIST_HEAD(group_list); + +/** + * kgsl_process_events() - Work queue for processing new timestamp events + * @work: Pointer to a work_struct + */ +void kgsl_process_events(struct work_struct *work) +{ + struct kgsl_event_group *group; + struct kgsl_device *device = container_of(work, struct kgsl_device, + event_work); + + read_lock(&group_lock); + list_for_each_entry(group, &group_list, group) + _process_event_group(device, group, false); + read_unlock(&group_lock); +} +EXPORT_SYMBOL(kgsl_process_events); + +/** + * kgsl_del_event_group() - Remove a GPU event group + * @group: GPU event group to remove + */ +void kgsl_del_event_group(struct kgsl_event_group *group) +{ + /* Make sure that all the events have been deleted from the list */ + BUG_ON(!list_empty(&group->events)); + + write_lock(&group_lock); + list_del(&group->group); + write_unlock(&group_lock); +} +EXPORT_SYMBOL(kgsl_del_event_group); + +/** + * kgsl_add_event_group() - Add a new GPU event group + * group: Pointer to the new group to add to the list + * context: Context that owns the group (or NULL for global) + * name: Name of the group + * readtimestamp: Function pointer to the readtimestamp function to call when + * processing events + * priv: Priv member to pass to the readtimestamp function + */ +void kgsl_add_event_group(struct kgsl_event_group *group, + struct kgsl_context *context, const char *name, + readtimestamp_func readtimestamp, void *priv) +{ + BUG_ON(readtimestamp == NULL); + + spin_lock_init(&group->lock); + INIT_LIST_HEAD(&group->events); + + group->context = context; + group->readtimestamp = readtimestamp; + group->priv = priv; + + if (name) + strlcpy(group->name, name, sizeof(group->name)); + + write_lock(&group_lock); + list_add_tail(&group->group, &group_list); + write_unlock(&group_lock); +} +EXPORT_SYMBOL(kgsl_add_event_group); + +static void events_debugfs_print_group(struct seq_file *s, + struct kgsl_event_group *group) +{ + struct kgsl_event *event; + unsigned int retired; + + spin_lock(&group->lock); + + seq_printf(s, "%s: last=%d\n", group->name, group->processed); + + list_for_each_entry(event, &group->events, node) { + + group->readtimestamp(event->device, group->priv, + KGSL_TIMESTAMP_RETIRED, &retired); + + seq_printf(s, "\t%d:%d age=%lu func=%ps [retired=%d]\n", + group->context ? group->context->id : + KGSL_MEMSTORE_GLOBAL, + event->timestamp, jiffies - event->created, + event->func, retired); + } + spin_unlock(&group->lock); +} + +static int events_debugfs_print(struct seq_file *s, void *unused) +{ + struct kgsl_event_group *group; + + seq_puts(s, "event groups:\n"); + seq_puts(s, "--------------\n"); + + read_lock(&group_lock); + list_for_each_entry(group, &group_list, group) { + events_debugfs_print_group(s, group); + seq_puts(s, "\n"); + } + read_unlock(&group_lock); + + return 0; +} + +static int events_debugfs_open(struct inode *inode, struct file *file) +{ + return single_open(file, events_debugfs_print, NULL); +} + +static const struct file_operations events_fops = { + .open = events_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/** + * kgsl_events_exit() - Destroy the event kmem cache on module exit + */ +void kgsl_events_exit(void) +{ + if (events_cache) + kmem_cache_destroy(events_cache); + + debugfs_remove(events_dentry); +} + +/** + * kgsl_events_init() - Create the event kmem cache on module start + */ +void __init kgsl_events_init(void) +{ + struct dentry *debugfs_dir = kgsl_get_debugfs_dir(); + events_cache = KMEM_CACHE(kgsl_event, 0); + + events_dentry = debugfs_create_file("events", 0444, debugfs_dir, NULL, + &events_fops); + + /* Failure to create a debugfs entry is non fatal */ + if (IS_ERR(events_dentry)) + events_dentry = NULL; +} diff --git a/drivers/gpu/msm/kgsl_ioctl.c b/drivers/gpu/msm/kgsl_ioctl.c new file mode 100644 index 000000000000..0802e94f56ad --- /dev/null +++ b/drivers/gpu/msm/kgsl_ioctl.c @@ -0,0 +1,176 @@ +/* Copyright (c) 2008-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/ioctl.h> +#include <linux/compat.h> +#include <linux/uaccess.h> +#include <linux/fs.h> +#include "kgsl_device.h" +#include "kgsl_sync.h" + +static const struct kgsl_ioctl kgsl_ioctl_funcs[] = { + KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_GETPROPERTY, + kgsl_ioctl_device_getproperty), + /* IOCTL_KGSL_DEVICE_WAITTIMESTAMP is no longer supported */ + KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, + kgsl_ioctl_device_waittimestamp_ctxtid), + KGSL_IOCTL_FUNC(IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS, + kgsl_ioctl_rb_issueibcmds), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SUBMIT_COMMANDS, + kgsl_ioctl_submit_commands), + /* IOCTL_KGSL_CMDSTREAM_READTIMESTAMP is no longer supported */ + KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID, + kgsl_ioctl_cmdstream_readtimestamp_ctxtid), + /* IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP is no longer supported */ + KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID, + kgsl_ioctl_cmdstream_freememontimestamp_ctxtid), + KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_CREATE, + kgsl_ioctl_drawctxt_create), + KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_DESTROY, + kgsl_ioctl_drawctxt_destroy), + KGSL_IOCTL_FUNC(IOCTL_KGSL_MAP_USER_MEM, + kgsl_ioctl_map_user_mem), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FROM_PMEM, + kgsl_ioctl_map_user_mem), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FREE, + kgsl_ioctl_sharedmem_free), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE, + kgsl_ioctl_sharedmem_flush_cache), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC, + kgsl_ioctl_gpumem_alloc), + KGSL_IOCTL_FUNC(IOCTL_KGSL_CFF_SYNCMEM, + kgsl_ioctl_cff_syncmem), + KGSL_IOCTL_FUNC(IOCTL_KGSL_CFF_USER_EVENT, + kgsl_ioctl_cff_user_event), + KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMESTAMP_EVENT, + kgsl_ioctl_timestamp_event), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SETPROPERTY, + kgsl_ioctl_device_setproperty), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC_ID, + kgsl_ioctl_gpumem_alloc_id), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_FREE_ID, + kgsl_ioctl_gpumem_free_id), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_GET_INFO, + kgsl_ioctl_gpumem_get_info), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE, + kgsl_ioctl_gpumem_sync_cache), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK, + kgsl_ioctl_gpumem_sync_cache_bulk), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_CREATE, + kgsl_ioctl_syncsource_create), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_DESTROY, + kgsl_ioctl_syncsource_destroy), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE, + kgsl_ioctl_syncsource_create_fence), + KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE, + kgsl_ioctl_syncsource_signal_fence), + KGSL_IOCTL_FUNC(IOCTL_KGSL_CFF_SYNC_GPUOBJ, + kgsl_ioctl_cff_sync_gpuobj), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_ALLOC, + kgsl_ioctl_gpuobj_alloc), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_FREE, + kgsl_ioctl_gpuobj_free), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_INFO, + kgsl_ioctl_gpuobj_info), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_IMPORT, + kgsl_ioctl_gpuobj_import), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_SYNC, + kgsl_ioctl_gpuobj_sync), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPU_COMMAND, + kgsl_ioctl_gpu_command), + KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_SET_INFO, + kgsl_ioctl_gpuobj_set_info), +}; + +long kgsl_ioctl_copy_in(unsigned int kernel_cmd, unsigned int user_cmd, + unsigned long arg, unsigned char *ptr) +{ + unsigned int usize = _IOC_SIZE(user_cmd); + unsigned int ksize = _IOC_SIZE(kernel_cmd); + unsigned int copy = ksize < usize ? ksize : usize; + + if ((kernel_cmd & IOC_IN) && (user_cmd & IOC_IN)) { + if (copy > 0 && copy_from_user(ptr, (void __user *) arg, copy)) + return -EFAULT; + } + + return 0; +} + +long kgsl_ioctl_copy_out(unsigned int kernel_cmd, unsigned int user_cmd, + unsigned long arg, unsigned char *ptr) +{ + unsigned int usize = _IOC_SIZE(user_cmd); + unsigned int ksize = _IOC_SIZE(kernel_cmd); + unsigned int copy = ksize < usize ? ksize : usize; + + if ((kernel_cmd & IOC_OUT) && (user_cmd & IOC_OUT)) { + if (copy > 0 && copy_to_user((void __user *) arg, ptr, copy)) + return -EFAULT; + } + + return 0; +} + +long kgsl_ioctl_helper(struct file *filep, unsigned int cmd, unsigned long arg, + const struct kgsl_ioctl *cmds, int len) +{ + struct kgsl_device_private *dev_priv = filep->private_data; + unsigned char data[128] = { 0 }; + unsigned int nr = _IOC_NR(cmd); + long ret; + + if (nr >= len || cmds[nr].func == NULL) + return -ENOIOCTLCMD; + + BUG_ON(_IOC_SIZE(cmds[nr].cmd) > sizeof(data)); + + if (_IOC_SIZE(cmds[nr].cmd)) { + ret = kgsl_ioctl_copy_in(cmds[nr].cmd, cmd, arg, data); + if (ret) + return ret; + } + + ret = cmds[nr].func(dev_priv, cmd, data); + + if (ret == 0 && _IOC_SIZE(cmds[nr].cmd)) + ret = kgsl_ioctl_copy_out(cmds[nr].cmd, cmd, arg, data); + + return ret; +} + +long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) +{ + struct kgsl_device_private *dev_priv = filep->private_data; + struct kgsl_device *device = dev_priv->device; + long ret; + + ret = kgsl_ioctl_helper(filep, cmd, arg, kgsl_ioctl_funcs, + ARRAY_SIZE(kgsl_ioctl_funcs)); + + /* + * If the command was unrecognized in the generic core, try the device + * specific function + */ + + if (ret == -ENOIOCTLCMD) { + if (is_compat_task() && device->ftbl->compat_ioctl != NULL) + return device->ftbl->compat_ioctl(dev_priv, cmd, arg); + else if (device->ftbl->ioctl != NULL) + return device->ftbl->ioctl(dev_priv, cmd, arg); + + KGSL_DRV_INFO(device, "invalid ioctl code 0x%08X\n", cmd); + } + + return ret; +} diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c new file mode 100644 index 000000000000..be77db93994e --- /dev/null +++ b/drivers/gpu/msm/kgsl_iommu.c @@ -0,0 +1,1834 @@ +/* Copyright (c) 2011-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/types.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/spinlock.h> +#include <linux/genalloc.h> +#include <linux/slab.h> +#include <linux/iommu.h> +#include <linux/msm_kgsl.h> +#include <linux/ratelimit.h> +#include <soc/qcom/scm.h> +#include <soc/qcom/secure_buffer.h> +#include <stddef.h> +#include <linux/compat.h> + +#include "kgsl.h" +#include "kgsl_device.h" +#include "kgsl_mmu.h" +#include "kgsl_sharedmem.h" +#include "kgsl_iommu.h" +#include "adreno_pm4types.h" +#include "adreno.h" +#include "kgsl_trace.h" +#include "kgsl_cffdump.h" +#include "kgsl_pwrctrl.h" + +static struct kgsl_mmu_pt_ops iommu_pt_ops; +static bool need_iommu_sync; + +const unsigned int kgsl_iommu_reg_list[KGSL_IOMMU_REG_MAX] = { + 0x0,/* SCTLR */ + 0x20,/* TTBR0 */ + 0x34,/* CONTEXTIDR */ + 0x58,/* FSR */ + 0x60,/* FAR_0 */ + 0x618,/* TLBIALL */ + 0x008,/* RESUME */ + 0x68,/* FSYNR0 */ + 0x6C,/* FSYNR1 */ + 0x7F0,/* TLBSYNC */ + 0x7F4,/* TLBSTATUS */ +}; + +/* + * struct kgsl_iommu_addr_entry - entry in the kgsl_iommu_pt rbtree. + * @base: starting virtual address of the entry + * @size: size of the entry + * @node: the rbtree node + * + */ +struct kgsl_iommu_addr_entry { + uint64_t base; + uint64_t size; + struct rb_node node; +}; + +static struct kmem_cache *addr_entry_cache; + +static inline void _iommu_sync_mmu_pc(bool lock) +{ + if (need_iommu_sync == false) + return; + + if (lock) + mutex_lock(&kgsl_mmu_sync); + else + mutex_unlock(&kgsl_mmu_sync); +} + +static void _detach_pt(struct kgsl_iommu_pt *iommu_pt, + struct kgsl_iommu_context *ctx) +{ + if (iommu_pt->attached) { + _iommu_sync_mmu_pc(true); + iommu_detach_device(iommu_pt->domain, ctx->dev); + _iommu_sync_mmu_pc(false); + iommu_pt->attached = false; + } +} + +static int _attach_pt(struct kgsl_iommu_pt *iommu_pt, + struct kgsl_iommu_context *ctx) +{ + int ret; + + if (iommu_pt->attached) + return 0; + + _iommu_sync_mmu_pc(true); + ret = iommu_attach_device(iommu_pt->domain, ctx->dev); + _iommu_sync_mmu_pc(false); + + if (ret == 0) + iommu_pt->attached = true; + else + KGSL_CORE_ERR("iommu_attach_device(%s) failed: %d\n", + ctx->name, ret); + + return ret; +} + +/* + * One page allocation for a guard region to protect against over-zealous + * GPU pre-fetch + */ + +static struct page *kgsl_guard_page; +static struct kgsl_memdesc kgsl_secure_guard_page_memdesc; + +/* These functions help find the nearest allocated memory entries on either side + * of a faulting address. If we know the nearby allocations memory we can + * get a better determination of what we think should have been located in the + * faulting region + */ + +/* + * A local structure to make it easy to store the interesting bits for the + * memory entries on either side of the faulting address + */ + +struct _mem_entry { + uint64_t gpuaddr; + uint64_t size; + uint64_t flags; + unsigned int priv; + int pending_free; + pid_t pid; +}; + +static void _get_entries(struct kgsl_process_private *private, + uint64_t faultaddr, struct _mem_entry *prev, + struct _mem_entry *next) +{ + int id; + struct kgsl_mem_entry *entry; + + uint64_t prevaddr = 0; + struct kgsl_mem_entry *p = NULL; + + uint64_t nextaddr = (uint64_t) -1; + struct kgsl_mem_entry *n = NULL; + + idr_for_each_entry(&private->mem_idr, entry, id) { + uint64_t addr = entry->memdesc.gpuaddr; + + if ((addr < faultaddr) && (addr > prevaddr)) { + prevaddr = addr; + p = entry; + } + + if ((addr > faultaddr) && (addr < nextaddr)) { + nextaddr = addr; + n = entry; + } + } + + if (p != NULL) { + prev->gpuaddr = p->memdesc.gpuaddr; + prev->size = p->memdesc.size; + prev->flags = p->memdesc.flags; + prev->priv = p->memdesc.priv; + prev->pending_free = p->pending_free; + prev->pid = private->pid; + } + + if (n != NULL) { + next->gpuaddr = n->memdesc.gpuaddr; + next->size = n->memdesc.size; + next->flags = n->memdesc.flags; + next->priv = n->memdesc.priv; + next->pending_free = n->pending_free; + next->pid = private->pid; + } +} + +static void _find_mem_entries(struct kgsl_mmu *mmu, uint64_t faultaddr, + phys_addr_t ptbase, struct _mem_entry *preventry, + struct _mem_entry *nextentry) +{ + struct kgsl_process_private *private = NULL, *p; + int id = kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase); + + memset(preventry, 0, sizeof(*preventry)); + memset(nextentry, 0, sizeof(*nextentry)); + + /* Set the maximum possible size as an initial value */ + nextentry->gpuaddr = (uint64_t) -1; + + mutex_lock(&kgsl_driver.process_mutex); + list_for_each_entry(p, &kgsl_driver.process_list, list) { + if (p->pagetable && (p->pagetable->name == id)) { + if (kgsl_process_private_get(p)) + private = p; + break; + } + } + mutex_unlock(&kgsl_driver.process_mutex); + + if (private != NULL) { + spin_lock(&private->mem_lock); + _get_entries(private, faultaddr, preventry, nextentry); + spin_unlock(&private->mem_lock); + + kgsl_process_private_put(private); + } +} + +static void _print_entry(struct kgsl_device *device, struct _mem_entry *entry) +{ + char name[32]; + memset(name, 0, sizeof(name)); + + kgsl_get_memory_usage(name, sizeof(name) - 1, entry->flags); + + KGSL_LOG_DUMP(device, + "[%016llX - %016llX] %s %s (pid = %d) (%s)\n", + entry->gpuaddr, + entry->gpuaddr + entry->size, + entry->priv & KGSL_MEMDESC_GUARD_PAGE ? "(+guard)" : "", + entry->pending_free ? "(pending free)" : "", + entry->pid, name); +} + +static void _check_if_freed(struct kgsl_iommu_context *ctx, + uint64_t addr, pid_t ptname) +{ + uint64_t gpuaddr = addr; + uint64_t size = 0; + uint64_t flags = 0; + pid_t pid; + + char name[32]; + memset(name, 0, sizeof(name)); + + if (kgsl_memfree_find_entry(ptname, &gpuaddr, &size, &flags, &pid)) { + kgsl_get_memory_usage(name, sizeof(name) - 1, flags); + KGSL_LOG_DUMP(ctx->kgsldev, "---- premature free ----\n"); + KGSL_LOG_DUMP(ctx->kgsldev, + "[%8.8llX-%8.8llX] (%s) was already freed by pid %d\n", + gpuaddr, gpuaddr + size, name, pid); + } +} + +static int kgsl_iommu_fault_handler(struct iommu_domain *domain, + struct device *dev, unsigned long addr, int flags, void *token) +{ + int ret = 0; + struct kgsl_pagetable *pt = token; + struct kgsl_mmu *mmu = pt->mmu; + struct kgsl_iommu *iommu; + struct kgsl_iommu_context *ctx; + u64 ptbase; + u32 contextidr; + pid_t ptname; + struct _mem_entry prev, next; + int write; + struct kgsl_device *device; + struct adreno_device *adreno_dev; + unsigned int no_page_fault_log = 0; + unsigned int curr_context_id = 0; + struct kgsl_context *context; + char *fault_type = "unknown"; + + static DEFINE_RATELIMIT_STATE(_rs, + DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + + if (mmu == NULL || mmu->priv == NULL) + return ret; + + iommu = mmu->priv; + ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER]; + device = mmu->device; + adreno_dev = ADRENO_DEVICE(device); + + if (pt->name == KGSL_MMU_SECURE_PT) + ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_SECURE]; + + /* + * set the fault bits and stuff before any printks so that if fault + * handler runs then it will know it's dealing with a pagefault. + * Read the global current timestamp because we could be in middle of + * RB switch and hence the cur RB may not be reliable but global + * one will always be reliable + */ + kgsl_sharedmem_readl(&device->memstore, &curr_context_id, + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context)); + + context = kgsl_context_get(device, curr_context_id); + + if (context != NULL) { + /* save pagefault timestamp for GFT */ + set_bit(KGSL_CONTEXT_PRIV_PAGEFAULT, &context->priv); + + kgsl_context_put(context); + context = NULL; + } + + ctx->fault = 1; + + if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, + &adreno_dev->ft_pf_policy) && + (flags & IOMMU_FAULT_TRANSACTION_STALLED)) { + /* + * Turn off GPU IRQ so we don't get faults from it too. + * The device mutex must be held to change power state + */ + mutex_lock(&device->mutex); + kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE); + mutex_unlock(&device->mutex); + } + + write = (flags & IOMMU_FAULT_WRITE) ? 1 : 0; + if (flags & IOMMU_FAULT_TRANSLATION) + fault_type = "translation"; + else if (flags & IOMMU_FAULT_PERMISSION) + fault_type = "permission"; + + ptbase = KGSL_IOMMU_GET_CTX_REG_Q(ctx, TTBR0); + contextidr = KGSL_IOMMU_GET_CTX_REG(ctx, CONTEXTIDR); + + ptname = kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase); + + if (test_bit(KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE, + &adreno_dev->ft_pf_policy)) + no_page_fault_log = kgsl_mmu_log_fault_addr(mmu, ptbase, addr); + + if (!no_page_fault_log && __ratelimit(&_rs)) { + KGSL_MEM_CRIT(ctx->kgsldev, + "GPU PAGE FAULT: addr = %lX pid= %d\n", addr, ptname); + KGSL_MEM_CRIT(ctx->kgsldev, + "context=%s TTBR0=0x%llx CIDR=0x%x (%s %s fault)\n", + ctx->name, ptbase, contextidr, + write ? "write" : "read", fault_type); + + /* Don't print the debug if this is a permissions fault */ + if (!(flags & IOMMU_FAULT_PERMISSION)) { + _check_if_freed(ctx, addr, ptname); + + KGSL_LOG_DUMP(ctx->kgsldev, + "---- nearby memory ----\n"); + + _find_mem_entries(mmu, addr, ptbase, &prev, &next); + + if (prev.gpuaddr) + _print_entry(ctx->kgsldev, &prev); + else + KGSL_LOG_DUMP(ctx->kgsldev, "*EMPTY*\n"); + + KGSL_LOG_DUMP(ctx->kgsldev, " <- fault @ %8.8lX\n", + addr); + + if (next.gpuaddr != (uint64_t) -1) + _print_entry(ctx->kgsldev, &next); + else + KGSL_LOG_DUMP(ctx->kgsldev, "*EMPTY*\n"); + + } + } + + trace_kgsl_mmu_pagefault(ctx->kgsldev, addr, + kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase), + write ? "write" : "read"); + + /* + * We do not want the h/w to resume fetching data from an iommu + * that has faulted, this is better for debugging as it will stall + * the GPU and trigger a snapshot. Return EBUSY error. + */ + if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, + &adreno_dev->ft_pf_policy) && + (flags & IOMMU_FAULT_TRANSACTION_STALLED)) { + uint32_t sctlr_val; + ret = -EBUSY; + /* + * Disable context fault interrupts + * as we do not clear FSR in the ISR. + * Will be re-enabled after FSR is cleared. + */ + sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, SCTLR); + sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_CFIE_SHIFT); + KGSL_IOMMU_SET_CTX_REG(ctx, SCTLR, sctlr_val); + + adreno_set_gpu_fault(adreno_dev, ADRENO_IOMMU_PAGE_FAULT); + /* Go ahead with recovery*/ + adreno_dispatcher_schedule(device); + } + + return ret; +} + +/* + * kgsl_iommu_disable_clk() - Disable iommu clocks + * Disable IOMMU clocks + */ +static void kgsl_iommu_disable_clk(struct kgsl_mmu *mmu) +{ + struct kgsl_iommu *iommu = mmu->priv; + int j; + + atomic_dec(&iommu->clk_enable_count); + BUG_ON(atomic_read(&iommu->clk_enable_count) < 0); + + for (j = (KGSL_IOMMU_MAX_CLKS - 1); j >= 0; j--) + if (iommu->clks[j]) + clk_disable_unprepare(iommu->clks[j]); +} + +/* + * kgsl_iommu_enable_clk_prepare_enable - Enable the specified IOMMU clock + * Try 4 times to enable it and then BUG() for debug + */ +static void kgsl_iommu_clk_prepare_enable(struct clk *clk) +{ + int num_retries = 4; + + while (num_retries--) { + if (!clk_prepare_enable(clk)) + return; + } + + /* Failure is fatal so BUG() to facilitate debug */ + KGSL_CORE_ERR("IOMMU clock enable failed\n"); + BUG(); +} + +/* + * kgsl_iommu_enable_clk - Enable iommu clocks + * Enable all the IOMMU clocks + */ +static void kgsl_iommu_enable_clk(struct kgsl_mmu *mmu) +{ + int j; + struct kgsl_iommu *iommu = mmu->priv; + + for (j = 0; j < KGSL_IOMMU_MAX_CLKS; j++) { + if (iommu->clks[j]) + kgsl_iommu_clk_prepare_enable(iommu->clks[j]); + } + atomic_inc(&iommu->clk_enable_count); +} + +/* kgsl_iommu_get_ttbr0 - Get TTBR0 setting for a pagetable */ +static u64 kgsl_iommu_get_ttbr0(struct kgsl_pagetable *pt) +{ + struct kgsl_iommu_pt *iommu_pt = pt ? pt->priv : NULL; + + BUG_ON(iommu_pt == NULL); + + return iommu_pt->ttbr0; +} + +/* kgsl_iommu_get_contextidr - query CONTEXTIDR setting for a pagetable */ +static u32 kgsl_iommu_get_contextidr(struct kgsl_pagetable *pt) +{ + struct kgsl_iommu_pt *iommu_pt = pt ? pt->priv : NULL; + + BUG_ON(iommu_pt == NULL); + + return iommu_pt->contextidr; +} + +/* + * kgsl_iommu_destroy_pagetable - Free up reaources help by a pagetable + * @mmu_specific_pt - Pointer to pagetable which is to be freed + * + * Return - void + */ +static void kgsl_iommu_destroy_pagetable(struct kgsl_pagetable *pt) +{ + struct kgsl_iommu_pt *iommu_pt = pt->priv; + struct kgsl_mmu *mmu = pt->mmu; + struct kgsl_iommu *iommu; + struct kgsl_iommu_context *ctx; + + BUG_ON(!list_empty(&pt->list)); + + iommu = mmu->priv; + + if (KGSL_MMU_SECURE_PT == pt->name) + ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_SECURE]; + else + ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER]; + + if (iommu_pt->domain) { + trace_kgsl_pagetable_destroy(iommu_pt->ttbr0, pt->name); + + _detach_pt(iommu_pt, ctx); + + iommu_domain_free(iommu_pt->domain); + } + + kfree(iommu_pt); +} + +static void setup_64bit_pagetable(struct kgsl_mmu *mmu, + struct kgsl_pagetable *pagetable, + struct kgsl_iommu_pt *pt) +{ + if (mmu->secured && pagetable->name == KGSL_MMU_SECURE_PT) { + pt->compat_va_start = KGSL_IOMMU_SECURE_BASE; + pt->compat_va_end = KGSL_IOMMU_SECURE_END; + pt->va_start = KGSL_IOMMU_SECURE_BASE; + pt->va_end = KGSL_IOMMU_SECURE_END; + } else { + pt->compat_va_start = KGSL_IOMMU_SVM_BASE32; + pt->compat_va_end = KGSL_IOMMU_SVM_END32; + pt->va_start = KGSL_IOMMU_VA_BASE64; + pt->va_end = KGSL_IOMMU_VA_END64; + } + + if (pagetable->name != KGSL_MMU_GLOBAL_PT && + pagetable->name != KGSL_MMU_SECURE_PT) { + if ((BITS_PER_LONG == 32) || is_compat_task()) { + pt->svm_start = KGSL_IOMMU_SVM_BASE32; + pt->svm_end = KGSL_IOMMU_SVM_END32; + } else { + pt->svm_start = KGSL_IOMMU_SVM_BASE64; + pt->svm_end = KGSL_IOMMU_SVM_END64; + } + } +} + +static void setup_32bit_pagetable(struct kgsl_mmu *mmu, + struct kgsl_pagetable *pagetable, + struct kgsl_iommu_pt *pt) +{ + if (mmu->secured) { + if (pagetable->name == KGSL_MMU_SECURE_PT) { + pt->compat_va_start = KGSL_IOMMU_SECURE_BASE; + pt->compat_va_end = KGSL_IOMMU_SECURE_END; + pt->va_start = KGSL_IOMMU_SECURE_BASE; + pt->va_end = KGSL_IOMMU_SECURE_END; + } else { + pt->va_start = KGSL_IOMMU_SVM_BASE32; + pt->va_end = KGSL_IOMMU_SECURE_BASE; + pt->compat_va_start = pt->va_start; + pt->compat_va_end = pt->va_end; + } + } else { + pt->va_start = KGSL_IOMMU_SVM_BASE32; + pt->va_end = KGSL_MMU_GLOBAL_MEM_BASE; + pt->compat_va_start = pt->va_start; + pt->compat_va_end = pt->va_end; + } + + if (pagetable->name != KGSL_MMU_GLOBAL_PT && + pagetable->name != KGSL_MMU_SECURE_PT) { + pt->svm_start = KGSL_IOMMU_SVM_BASE32; + pt->svm_end = KGSL_IOMMU_SVM_END32; + } +} + + +static struct kgsl_iommu_pt * +_alloc_pt(struct device *dev, struct kgsl_mmu *mmu, struct kgsl_pagetable *pt) +{ + struct kgsl_iommu_pt *iommu_pt; + struct bus_type *bus = kgsl_mmu_get_bus(dev); + + if (bus == NULL) + return ERR_PTR(-ENODEV); + + iommu_pt = kzalloc(sizeof(struct kgsl_iommu_pt), GFP_KERNEL); + if (iommu_pt == NULL) + return ERR_PTR(-ENOMEM); + + iommu_pt->domain = iommu_domain_alloc(bus); + if (iommu_pt->domain == NULL) { + kfree(iommu_pt); + return ERR_PTR(-ENODEV); + } + + pt->pt_ops = &iommu_pt_ops; + pt->priv = iommu_pt; + iommu_pt->rbtree = RB_ROOT; + + if (MMU_FEATURE(mmu, KGSL_MMU_64BIT)) + setup_64bit_pagetable(mmu, pt, iommu_pt); + else + setup_32bit_pagetable(mmu, pt, iommu_pt); + + + return iommu_pt; +} + +static void _free_pt(struct kgsl_iommu_context *ctx, struct kgsl_pagetable *pt) +{ + struct kgsl_iommu_pt *iommu_pt = pt->priv; + + pt->pt_ops = NULL; + pt->priv = NULL; + + if (iommu_pt == NULL) + return; + + _detach_pt(iommu_pt, ctx); + + if (iommu_pt->domain != NULL) + iommu_domain_free(iommu_pt->domain); + kfree(iommu_pt); +} + +static int _init_global_pt(struct kgsl_mmu *mmu, struct kgsl_pagetable *pt) +{ + int ret = 0; + struct kgsl_iommu_pt *iommu_pt = NULL; + int disable_htw = !MMU_FEATURE(mmu, KGSL_MMU_COHERENT_HTW); + unsigned int cb_num; + struct kgsl_iommu *iommu = mmu->priv; + struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER]; + + iommu_pt = _alloc_pt(ctx->dev, mmu, pt); + + if (IS_ERR(iommu_pt)) + return PTR_ERR(iommu_pt); + + iommu_domain_set_attr(iommu_pt->domain, + DOMAIN_ATTR_COHERENT_HTW_DISABLE, &disable_htw); + + if (kgsl_mmu_is_perprocess(mmu)) { + ret = iommu_domain_set_attr(iommu_pt->domain, + DOMAIN_ATTR_PROCID, &pt->name); + if (ret) { + KGSL_CORE_ERR("set DOMAIN_ATTR_PROCID failed: %d\n", + ret); + goto done; + } + } + + ret = _attach_pt(iommu_pt, ctx); + if (ret) + goto done; + + iommu_set_fault_handler(iommu_pt->domain, + kgsl_iommu_fault_handler, pt); + + ret = iommu_domain_get_attr(iommu_pt->domain, + DOMAIN_ATTR_CONTEXT_BANK, &cb_num); + if (ret) { + KGSL_CORE_ERR("get DOMAIN_ATTR_PROCID failed: %d\n", + ret); + goto done; + } + + ctx->cb_num = cb_num; + ctx->regbase = iommu->regbase + KGSL_IOMMU_CB0_OFFSET + + (cb_num << KGSL_IOMMU_CB_SHIFT); + + ret = iommu_domain_get_attr(iommu_pt->domain, + DOMAIN_ATTR_TTBR0, &iommu_pt->ttbr0); + if (ret) { + KGSL_CORE_ERR("get DOMAIN_ATTR_TTBR0 failed: %d\n", + ret); + goto done; + } + ret = iommu_domain_get_attr(iommu_pt->domain, + DOMAIN_ATTR_CONTEXTIDR, &iommu_pt->contextidr); + if (ret) { + KGSL_CORE_ERR("get DOMAIN_ATTR_CONTEXTIDR failed: %d\n", + ret); + goto done; + } + +done: + if (ret) + _free_pt(ctx, pt); + + return ret; +} + +static int _init_secure_pt(struct kgsl_mmu *mmu, struct kgsl_pagetable *pt) +{ + int ret = 0; + struct kgsl_iommu_pt *iommu_pt = NULL; + struct kgsl_iommu *iommu = mmu->priv; + int disable_htw = !MMU_FEATURE(mmu, KGSL_MMU_COHERENT_HTW); + struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_SECURE]; + int secure_vmid = VMID_CP_PIXEL; + unsigned int cb_num; + + if (!mmu->secured) + return -EPERM; + + if (!MMU_FEATURE(mmu, KGSL_MMU_HYP_SECURE_ALLOC)) { + if (!kgsl_mmu_bus_secured(ctx->dev)) + return -EPERM; + } + + iommu_pt = _alloc_pt(ctx->dev, mmu, pt); + + if (IS_ERR(iommu_pt)) + return PTR_ERR(iommu_pt); + + iommu_domain_set_attr(iommu_pt->domain, + DOMAIN_ATTR_COHERENT_HTW_DISABLE, &disable_htw); + + ret = iommu_domain_set_attr(iommu_pt->domain, + DOMAIN_ATTR_SECURE_VMID, &secure_vmid); + if (ret) { + KGSL_CORE_ERR("set DOMAIN_ATTR_SECURE_VMID failed: %d\n", ret); + goto done; + } + + ret = _attach_pt(iommu_pt, ctx); + + if (MMU_FEATURE(mmu, KGSL_MMU_HYP_SECURE_ALLOC)) + iommu_set_fault_handler(iommu_pt->domain, + kgsl_iommu_fault_handler, pt); + + ret = iommu_domain_get_attr(iommu_pt->domain, + DOMAIN_ATTR_CONTEXT_BANK, &cb_num); + if (ret) { + KGSL_CORE_ERR("get DOMAIN_ATTR_PROCID failed: %d\n", + ret); + goto done; + } + + ctx->cb_num = cb_num; + ctx->regbase = iommu->regbase + KGSL_IOMMU_CB0_OFFSET + + (cb_num << KGSL_IOMMU_CB_SHIFT); + +done: + if (ret) + _free_pt(ctx, pt); + return ret; +} + +static int _init_per_process_pt(struct kgsl_mmu *mmu, struct kgsl_pagetable *pt) +{ + int ret = 0; + struct kgsl_iommu_pt *iommu_pt = NULL; + struct kgsl_iommu *iommu = mmu->priv; + struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER]; + int dynamic = 1; + unsigned int cb_num = ctx->cb_num; + int disable_htw = !MMU_FEATURE(mmu, KGSL_MMU_COHERENT_HTW); + + iommu_pt = _alloc_pt(ctx->dev, mmu, pt); + + if (IS_ERR(iommu_pt)) + return PTR_ERR(iommu_pt); + + ret = iommu_domain_set_attr(iommu_pt->domain, + DOMAIN_ATTR_DYNAMIC, &dynamic); + if (ret) { + KGSL_CORE_ERR("set DOMAIN_ATTR_DYNAMIC failed: %d\n", ret); + goto done; + } + ret = iommu_domain_set_attr(iommu_pt->domain, + DOMAIN_ATTR_CONTEXT_BANK, &cb_num); + if (ret) { + KGSL_CORE_ERR("set DOMAIN_ATTR_CONTEXT_BANK failed: %d\n", ret); + goto done; + } + + ret = iommu_domain_set_attr(iommu_pt->domain, + DOMAIN_ATTR_PROCID, &pt->name); + if (ret) { + KGSL_CORE_ERR("set DOMAIN_ATTR_PROCID failed: %d\n", ret); + goto done; + } + + iommu_domain_set_attr(iommu_pt->domain, + DOMAIN_ATTR_COHERENT_HTW_DISABLE, &disable_htw); + + ret = _attach_pt(iommu_pt, ctx); + if (ret) + goto done; + + /* now read back the attributes needed for self programming */ + ret = iommu_domain_get_attr(iommu_pt->domain, + DOMAIN_ATTR_TTBR0, &iommu_pt->ttbr0); + if (ret) { + KGSL_CORE_ERR("get DOMAIN_ATTR_TTBR0 failed: %d\n", ret); + goto done; + } + + ret = iommu_domain_get_attr(iommu_pt->domain, + DOMAIN_ATTR_CONTEXTIDR, &iommu_pt->contextidr); + if (ret) { + KGSL_CORE_ERR("get DOMAIN_ATTR_CONTEXTIDR failed: %d\n", ret); + goto done; + } + +done: + if (ret) + _free_pt(ctx, pt); + + return ret; +} + +/* kgsl_iommu_init_pt - Set up an IOMMU pagetable */ +static int kgsl_iommu_init_pt(struct kgsl_mmu *mmu, struct kgsl_pagetable *pt) +{ + if (pt == NULL) + return -EINVAL; + + switch (pt->name) { + case KGSL_MMU_GLOBAL_PT: + return _init_global_pt(mmu, pt); + + case KGSL_MMU_SECURE_PT: + return _init_secure_pt(mmu, pt); + + default: + return _init_per_process_pt(mmu, pt); + } +} + +/* + * kgsl_iommu_get_reg_ahbaddr - Returns the ahb address of the register + * @mmu - Pointer to mmu structure + * @id - The context ID of the IOMMU ctx + * @reg - The register for which address is required + * + * Return - The address of register which can be used in type0 packet + */ +static unsigned int kgsl_iommu_get_reg_ahbaddr(struct kgsl_mmu *mmu, + enum kgsl_iommu_context_id id, enum kgsl_iommu_reg_map reg) +{ + unsigned int result; + struct kgsl_iommu *iommu = mmu->priv; + struct kgsl_iommu_context *ctx = &iommu->ctx[id]; + + result = ctx->gpu_offset + kgsl_iommu_reg_list[reg]; + return result; +} + +static int kgsl_iommu_init(struct kgsl_mmu *mmu) +{ + /* + * intialize device mmu + * + * call this with the global lock held + */ + int status = 0; + struct kgsl_iommu *iommu = mmu->priv; + struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER]; + struct platform_device *pdev = mmu->device->pdev; + + if (ctx->name == NULL) { + KGSL_CORE_ERR("dt: gfx3d0_user context bank not found\n"); + return -EINVAL; + } + + /* check requirements for per process pagetables */ + if (ctx->gpu_offset == UINT_MAX) { + KGSL_CORE_ERR("missing qcom,gpu-offset forces global pt\n"); + mmu->features |= KGSL_MMU_GLOBAL_PAGETABLE; + } + + if (iommu->version == 1 && iommu->micro_mmu_ctrl == UINT_MAX) { + KGSL_CORE_ERR( + "missing qcom,micro-mmu-control forces global pt\n"); + mmu->features |= KGSL_MMU_GLOBAL_PAGETABLE; + } + + /* Check to see if we need to do the IOMMU sync dance */ + need_iommu_sync = of_property_read_bool(pdev->dev.of_node, + "qcom,gpu-quirk-iommu-sync"); + + iommu->regbase = ioremap(iommu->regstart, iommu->regsize); + if (iommu->regbase == NULL) { + KGSL_CORE_ERR("Could not map IOMMU registers 0x%lx:0x%x\n", + iommu->regstart, iommu->regsize); + return -ENOMEM; + } + + if (addr_entry_cache == NULL) { + addr_entry_cache = KMEM_CACHE(kgsl_iommu_addr_entry, 0); + if (addr_entry_cache == NULL) { + status = -ENOMEM; + goto done; + } + } + + if (kgsl_guard_page == NULL) { + kgsl_guard_page = alloc_page(GFP_KERNEL | __GFP_ZERO | + __GFP_HIGHMEM); + if (kgsl_guard_page == NULL) { + status = -ENOMEM; + goto done; + } + } + +done: + return status; +} + +static void _detach_context(struct kgsl_iommu_context *ctx) +{ + struct kgsl_iommu_pt *iommu_pt; + + if (ctx->default_pt == NULL) + return; + + iommu_pt = ctx->default_pt->priv; + + _detach_pt(iommu_pt, ctx); + + ctx->default_pt = NULL; +} + +static int _setup_user_context(struct kgsl_mmu *mmu) +{ + int ret = 0; + struct kgsl_iommu *iommu = mmu->priv; + struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER]; + struct adreno_device *adreno_dev = ADRENO_DEVICE(mmu->device); + struct kgsl_iommu_pt *iommu_pt = NULL; + unsigned int sctlr_val; + + if (mmu->defaultpagetable == NULL) { + mmu->defaultpagetable = kgsl_mmu_getpagetable(mmu, + KGSL_MMU_GLOBAL_PT); + /* if we don't have a default pagetable, nothing will work */ + if (IS_ERR(mmu->defaultpagetable)) { + ret = PTR_ERR(mmu->defaultpagetable); + mmu->defaultpagetable = NULL; + return ret; + } + } + + iommu_pt = mmu->defaultpagetable->priv; + + ret = _attach_pt(iommu_pt, ctx); + if (ret) + return ret; + + ctx->default_pt = mmu->defaultpagetable; + + kgsl_iommu_enable_clk(mmu); + + sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, SCTLR); + + /* + * If pagefault policy is GPUHALT_ENABLE, + * 1) Program CFCFG to 1 to enable STALL mode + * 2) Program HUPCF to 0 (Stall or terminate subsequent + * transactions in the presence of an outstanding fault) + * else + * 1) Program CFCFG to 0 to disable STALL mode (0=Terminate) + * 2) Program HUPCF to 1 (Process subsequent transactions + * independently of any outstanding fault) + */ + + sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, SCTLR); + if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, + &adreno_dev->ft_pf_policy)) { + sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT); + sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT); + } else { + sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT); + sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT); + } + KGSL_IOMMU_SET_CTX_REG(ctx, SCTLR, sctlr_val); + kgsl_iommu_disable_clk(mmu); + + return 0; +} + +static int _setup_secure_context(struct kgsl_mmu *mmu) +{ + int ret; + struct kgsl_iommu *iommu = mmu->priv; + struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_SECURE]; + unsigned int cb_num; + + struct kgsl_iommu_pt *iommu_pt; + + if (ctx->dev == NULL || !mmu->secured) + return 0; + + if (mmu->securepagetable == NULL) { + mmu->securepagetable = kgsl_mmu_getpagetable(mmu, + KGSL_MMU_SECURE_PT); + if (IS_ERR(mmu->securepagetable)) { + ret = PTR_ERR(mmu->securepagetable); + mmu->securepagetable = NULL; + return ret; + } else if (mmu->securepagetable == NULL) { + return -ENOMEM; + } + } + iommu_pt = mmu->securepagetable->priv; + + ret = _attach_pt(iommu_pt, ctx); + if (ret) + goto done; + + ctx->default_pt = mmu->securepagetable; + + ret = iommu_domain_get_attr(iommu_pt->domain, DOMAIN_ATTR_CONTEXT_BANK, + &cb_num); + if (ret) { + KGSL_CORE_ERR("get CONTEXT_BANK attr, err %d\n", ret); + goto done; + } + ctx->cb_num = cb_num; +done: + if (ret) + _detach_context(ctx); + return ret; +} + +static int kgsl_iommu_start(struct kgsl_mmu *mmu) +{ + int status; + struct kgsl_iommu *iommu = mmu->priv; + struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER]; + + status = _setup_user_context(mmu); + if (status) + return status; + + status = _setup_secure_context(mmu); + if (status) + _detach_context(&iommu->ctx[KGSL_IOMMU_CONTEXT_USER]); + else { + kgsl_iommu_enable_clk(mmu); + KGSL_IOMMU_SET_CTX_REG(ctx, TLBIALL, 1); + kgsl_iommu_disable_clk(mmu); + } + return status; +} + +static int +kgsl_iommu_unmap(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc) +{ + struct kgsl_device *device = pt->mmu->device; + int ret = 0; + uint64_t range = memdesc->size; + size_t unmapped = 0; + struct kgsl_iommu_pt *iommu_pt = pt->priv; + + /* All GPU addresses as assigned are page aligned, but some + functions purturb the gpuaddr with an offset, so apply the + mask here to make sure we have the right address */ + + uint64_t gpuaddr = PAGE_ALIGN(memdesc->gpuaddr); + + if (range == 0 || gpuaddr == 0) + return 0; + + if (kgsl_memdesc_has_guard_page(memdesc)) + range += kgsl_memdesc_guard_page_size(pt->mmu, memdesc); + + if (kgsl_memdesc_is_secured(memdesc)) { + + if (!kgsl_mmu_is_secured(pt->mmu)) + return -EINVAL; + + mutex_lock(&device->mutex); + ret = kgsl_active_count_get(device); + if (!ret) { + _iommu_sync_mmu_pc(true); + unmapped = iommu_unmap(iommu_pt->domain, gpuaddr, + range); + _iommu_sync_mmu_pc(false); + kgsl_active_count_put(device); + } + mutex_unlock(&device->mutex); + } else { + _iommu_sync_mmu_pc(true); + unmapped = iommu_unmap(iommu_pt->domain, gpuaddr, range); + _iommu_sync_mmu_pc(false); + } + if (unmapped != range) { + KGSL_CORE_ERR( + "iommu_unmap(%p, %llx, %lld) failed with unmapped size: %zd\n", + iommu_pt->domain, gpuaddr, range, unmapped); + return -EINVAL; + } + + return ret; +} + +/** + * _iommu_add_guard_page - Add iommu guard page + * @pt - Pointer to kgsl pagetable structure + * @memdesc - memdesc to add guard page + * @gpuaddr - GPU addr of guard page + * @protflags - flags for mapping + * + * Return 0 on success, error on map fail + */ +static int _iommu_add_guard_page(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc, + uint64_t gpuaddr, + unsigned int protflags) +{ + struct kgsl_iommu_pt *iommu_pt = pt->priv; + phys_addr_t physaddr = page_to_phys(kgsl_guard_page); + int ret; + + if (kgsl_memdesc_has_guard_page(memdesc)) { + + /* + * Allocate guard page for secure buffers. + * This has to be done after we attach a smmu pagetable. + * Allocate the guard page when first secure buffer is. + * mapped to save 1MB of memory if CPZ is not used. + */ + if (kgsl_memdesc_is_secured(memdesc)) { + struct scatterlist *sg; + unsigned int sgp_size = pt->mmu->secure_align_mask + 1; + if (!kgsl_secure_guard_page_memdesc.sgt) { + if (kgsl_allocate_user(pt->mmu->device, + &kgsl_secure_guard_page_memdesc, pt, + sgp_size, sgp_size, + KGSL_MEMFLAGS_SECURE)) { + KGSL_CORE_ERR( + "Secure guard page alloc failed\n"); + return -ENOMEM; + } + } + + sg = kgsl_secure_guard_page_memdesc.sgt->sgl; + physaddr = page_to_phys(sg_page(sg)); + } + + _iommu_sync_mmu_pc(true); + ret = iommu_map(iommu_pt->domain, gpuaddr, physaddr, + kgsl_memdesc_guard_page_size(pt->mmu, memdesc), + protflags & ~IOMMU_WRITE); + _iommu_sync_mmu_pc(false); + if (ret) { + KGSL_CORE_ERR( + "iommu_map(%p, addr %016llX, flags %x) err: %d\n", + iommu_pt->domain, gpuaddr, protflags & ~IOMMU_WRITE, + ret); + return ret; + } + } + + return 0; +} + +static int +kgsl_iommu_map(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc) +{ + int ret = 0; + uint64_t addr = memdesc->gpuaddr; + struct kgsl_iommu_pt *iommu_pt = pt->priv; + uint64_t size = memdesc->size; + unsigned int flags = 0; + struct kgsl_device *device = pt->mmu->device; + size_t mapped = 0; + + BUG_ON(NULL == iommu_pt); + + flags = IOMMU_READ | IOMMU_WRITE | IOMMU_NOEXEC; + + /* Set up the protection for the page(s) */ + if (memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY) + flags &= ~IOMMU_WRITE; + + if (memdesc->priv & KGSL_MEMDESC_PRIVILEGED) + flags |= IOMMU_PRIV; + + if (kgsl_memdesc_is_secured(memdesc)) { + + if (!kgsl_mmu_is_secured(pt->mmu)) + return -EINVAL; + + mutex_lock(&device->mutex); + ret = kgsl_active_count_get(device); + if (!ret) { + _iommu_sync_mmu_pc(true); + mapped = iommu_map_sg(iommu_pt->domain, addr, + memdesc->sgt->sgl, memdesc->sgt->nents, + flags); + _iommu_sync_mmu_pc(false); + kgsl_active_count_put(device); + } + mutex_unlock(&device->mutex); + } else { + _iommu_sync_mmu_pc(true); + mapped = iommu_map_sg(iommu_pt->domain, addr, + memdesc->sgt->sgl, memdesc->sgt->nents, flags); + _iommu_sync_mmu_pc(false); + } + + if (mapped != size) { + KGSL_CORE_ERR("iommu_map_sg(%p, %016llX, %lld, %x) err: %zd\n", + iommu_pt->domain, addr, size, + flags, mapped); + return -ENODEV; + } + + ret = _iommu_add_guard_page(pt, memdesc, addr + size, flags); + if (ret) { + /* cleanup the partial mapping */ + _iommu_sync_mmu_pc(true); + iommu_unmap(iommu_pt->domain, addr, size); + _iommu_sync_mmu_pc(false); + } + + return ret; +} + +/* This function must be called with context bank attached */ +static void kgsl_iommu_clear_fsr(struct kgsl_mmu *mmu) +{ + struct kgsl_iommu *iommu = mmu->priv; + struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER]; + unsigned int sctlr_val; + + if (ctx->default_pt != NULL) { + kgsl_iommu_enable_clk(mmu); + KGSL_IOMMU_SET_CTX_REG(ctx, FSR, 0xffffffff); + /* + * Re-enable context fault interrupts after clearing + * FSR to prevent the interrupt from firing repeatedly + */ + sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, SCTLR); + sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_CFIE_SHIFT); + KGSL_IOMMU_SET_CTX_REG(ctx, SCTLR, sctlr_val); + /* + * Make sure the above register writes + * are not reordered across the barrier + * as we use writel_relaxed to write them + */ + wmb(); + kgsl_iommu_disable_clk(mmu); + } +} + +static void kgsl_iommu_pagefault_resume(struct kgsl_mmu *mmu) +{ + struct kgsl_iommu *iommu = mmu->priv; + struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER]; + + if (ctx->default_pt != NULL && ctx->fault) { + /* + * Write 1 to RESUME.TnR to terminate the + * stalled transaction. + */ + KGSL_IOMMU_SET_CTX_REG(ctx, RESUME, 1); + /* + * Make sure the above register writes + * are not reordered across the barrier + * as we use writel_relaxed to write them + */ + wmb(); + ctx->fault = 0; + } +} + +static void kgsl_iommu_stop(struct kgsl_mmu *mmu) +{ + int i; + struct kgsl_iommu *iommu = mmu->priv; + + /* + * If the iommu supports retention, we don't need + * to detach when stopping. + */ + if (!MMU_FEATURE(mmu, KGSL_MMU_RETENTION)) { + for (i = 0; i < KGSL_IOMMU_CONTEXT_MAX; i++) + _detach_context(&iommu->ctx[i]); + } +} + +static int kgsl_iommu_close(struct kgsl_mmu *mmu) +{ + struct kgsl_iommu *iommu = mmu->priv; + int i; + + for (i = 0; i < KGSL_IOMMU_CONTEXT_MAX; i++) + _detach_context(&iommu->ctx[i]); + + kgsl_mmu_putpagetable(mmu->defaultpagetable); + mmu->defaultpagetable = NULL; + + + kgsl_mmu_putpagetable(mmu->securepagetable); + mmu->securepagetable = NULL; + + if (iommu->regbase != NULL) + iounmap(iommu->regbase); + + kgsl_sharedmem_free(&kgsl_secure_guard_page_memdesc); + + if (kgsl_guard_page != NULL) { + __free_page(kgsl_guard_page); + kgsl_guard_page = NULL; + } + + return 0; +} + +static u64 +kgsl_iommu_get_current_ttbr0(struct kgsl_mmu *mmu) +{ + u64 val; + struct kgsl_iommu *iommu = mmu->priv; + /* + * We cannot enable or disable the clocks in interrupt context, this + * function is called from interrupt context if there is an axi error + */ + if (in_interrupt()) + return 0; + + kgsl_iommu_enable_clk(mmu); + val = KGSL_IOMMU_GET_CTX_REG_Q(&iommu->ctx[KGSL_IOMMU_CONTEXT_USER], + TTBR0); + kgsl_iommu_disable_clk(mmu); + return val; +} + +/* + * kgsl_iommu_set_pt - Change the IOMMU pagetable of the primary context bank + * @mmu - Pointer to mmu structure + * @pt - Pagetable to switch to + * + * Set the new pagetable for the IOMMU by doing direct register writes + * to the IOMMU registers through the cpu + * + * Return - void + */ +static int kgsl_iommu_set_pt(struct kgsl_mmu *mmu, + struct kgsl_pagetable *pt) +{ + struct kgsl_iommu *iommu = mmu->priv; + struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER]; + int ret = 0; + uint64_t ttbr0, temp; + unsigned int contextidr; + unsigned long wait_for_flush; + + /* + * If using a global pagetable, we can skip all this + * because the pagetable will be set up by the iommu + * driver and never changed at runtime. + */ + if (!kgsl_mmu_is_perprocess(mmu)) + return 0; + + kgsl_iommu_enable_clk(mmu); + + ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pt); + contextidr = kgsl_mmu_pagetable_get_contextidr(pt); + + /* + * Taking the liberty to spin idle since this codepath + * is invoked when we can spin safely for it to be idle + */ + ret = adreno_spin_idle(mmu->device, ADRENO_IDLE_TIMEOUT); + if (ret) + return ret; + + KGSL_IOMMU_SET_CTX_REG_Q(ctx, TTBR0, ttbr0); + KGSL_IOMMU_SET_CTX_REG(ctx, CONTEXTIDR, contextidr); + + mb(); + temp = KGSL_IOMMU_GET_CTX_REG_Q(ctx, TTBR0); + + KGSL_IOMMU_SET_CTX_REG(ctx, TLBIALL, 1); + /* make sure the TBLI write completes before we wait */ + mb(); + /* + * Wait for flush to complete by polling the flush + * status bit of TLBSTATUS register for not more than + * 2 s. After 2s just exit, at that point the SMMU h/w + * may be stuck and will eventually cause GPU to hang + * or bring the system down. + */ + wait_for_flush = jiffies + msecs_to_jiffies(2000); + KGSL_IOMMU_SET_CTX_REG(ctx, TLBSYNC, 0); + while (KGSL_IOMMU_GET_CTX_REG(ctx, TLBSTATUS) & + (KGSL_IOMMU_CTX_TLBSTATUS_SACTIVE)) { + if (time_after(jiffies, wait_for_flush)) { + KGSL_DRV_WARN(mmu->device, + "Wait limit reached for IOMMU tlb flush\n"); + break; + } + cpu_relax(); + } + + /* Disable smmu clock */ + kgsl_iommu_disable_clk(mmu); + + return ret; +} + +/* + * kgsl_iommu_set_pf_policy() - Set the pagefault policy for IOMMU + * @mmu: Pointer to mmu structure + * @pf_policy: The pagefault polict to set + * + * Check if the new policy indicated by pf_policy is same as current + * policy, if same then return else set the policy + */ +static int kgsl_iommu_set_pf_policy(struct kgsl_mmu *mmu, + unsigned long pf_policy) +{ + struct kgsl_iommu *iommu = mmu->priv; + struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER]; + struct adreno_device *adreno_dev = ADRENO_DEVICE(mmu->device); + int ret = 0; + unsigned int sctlr_val; + + if ((adreno_dev->ft_pf_policy & + BIT(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE)) == + (pf_policy & BIT(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE))) + return 0; + + /* If not attached, policy will be updated during the next attach */ + if (ctx->default_pt != NULL) { + /* Need to idle device before changing options */ + ret = mmu->device->ftbl->idle(mmu->device); + if (ret) + return ret; + + kgsl_iommu_enable_clk(mmu); + + sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, SCTLR); + + if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &pf_policy)) { + sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT); + sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT); + } else { + sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT); + sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT); + } + + KGSL_IOMMU_SET_CTX_REG(ctx, SCTLR, sctlr_val); + + kgsl_iommu_disable_clk(mmu); + } + + return ret; +} + +static struct kgsl_protected_registers * +kgsl_iommu_get_prot_regs(struct kgsl_mmu *mmu) +{ + struct kgsl_iommu *iommu = mmu->priv; + + return &iommu->protect; +} + +static struct kgsl_iommu_addr_entry *_find_gpuaddr( + struct kgsl_pagetable *pagetable, uint64_t gpuaddr) +{ + struct kgsl_iommu_pt *pt = pagetable->priv; + struct rb_node *node = pt->rbtree.rb_node; + + while (node != NULL) { + struct kgsl_iommu_addr_entry *entry = rb_entry(node, + struct kgsl_iommu_addr_entry, node); + + if (gpuaddr < entry->base) + node = node->rb_left; + else if (gpuaddr > entry->base) + node = node->rb_right; + else + return entry; + } + + return NULL; +} + +static int _remove_gpuaddr(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr) +{ + struct kgsl_iommu_pt *pt = pagetable->priv; + struct kgsl_iommu_addr_entry *entry; + + entry = _find_gpuaddr(pagetable, gpuaddr); + + if (entry != NULL) { + rb_erase(&entry->node, &pt->rbtree); + kmem_cache_free(addr_entry_cache, entry); + return 0; + } + + return -ENOMEM; +} + +static int _insert_gpuaddr(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr, uint64_t size) +{ + struct kgsl_iommu_pt *pt = pagetable->priv; + struct rb_node **node, *parent = NULL; + struct kgsl_iommu_addr_entry *new = + kmem_cache_alloc(addr_entry_cache, GFP_ATOMIC); + + if (new == NULL) + return -ENOMEM; + + new->base = gpuaddr; + new->size = size; + + node = &pt->rbtree.rb_node; + + while (*node != NULL) { + struct kgsl_iommu_addr_entry *this; + + parent = *node; + this = rb_entry(parent, struct kgsl_iommu_addr_entry, node); + + if (new->base < this->base) + node = &parent->rb_left; + else if (new->base > this->base) + node = &parent->rb_right; + else + BUG(); + } + + rb_link_node(&new->node, parent, node); + rb_insert_color(&new->node, &pt->rbtree); + + return 0; +} + +static uint64_t _get_unmapped_area(struct kgsl_pagetable *pagetable, + uint64_t bottom, uint64_t top, uint64_t size, + uint64_t align) +{ + struct kgsl_iommu_pt *pt = pagetable->priv; + struct rb_node *node = rb_first(&pt->rbtree); + uint64_t start; + + bottom = ALIGN(bottom, align); + start = bottom; + + while (node != NULL) { + uint64_t gap; + struct kgsl_iommu_addr_entry *entry = rb_entry(node, + struct kgsl_iommu_addr_entry, node); + + /* + * Skip any entries that are outside of the range, but make sure + * to account for some that might straddle the lower bound + */ + if (entry->base < bottom) { + if (entry->base + entry->size > bottom) + start = ALIGN(entry->base + entry->size, align); + node = rb_next(node); + continue; + } + + /* Stop if we went over the top */ + if (entry->base >= top) + break; + + /* Make sure there is a gap to consider */ + if (start < entry->base) { + gap = entry->base - start; + + if (gap >= size) + return start; + } + + /* Stop if there is no more room in the region */ + if (entry->base + entry->size >= top) + return (uint64_t) -ENOMEM; + + /* Start the next cycle at the end of the current entry */ + start = ALIGN(entry->base + entry->size, align); + node = rb_next(node); + } + + if (start + size <= top) + return start; + + return (uint64_t) -ENOMEM; +} + +static uint64_t _get_unmapped_area_topdown(struct kgsl_pagetable *pagetable, + uint64_t bottom, uint64_t top, uint64_t size, + uint64_t align) +{ + struct kgsl_iommu_pt *pt = pagetable->priv; + struct rb_node *node = rb_last(&pt->rbtree); + uint64_t end = top; + uint64_t mask = ~(align - 1); + struct kgsl_iommu_addr_entry *entry; + + /* Make sure that the bottom is correctly aligned */ + bottom = ALIGN(bottom, align); + + /* Make sure the requested size will fit in the range */ + if (size > (top - bottom)) + return -ENOMEM; + + /* Walk back through the list to find the highest entry in the range */ + for (node = rb_last(&pt->rbtree); node != NULL; node = rb_prev(node)) { + entry = rb_entry(node, struct kgsl_iommu_addr_entry, node); + if (entry->base < top) + break; + } + + while (node != NULL) { + uint64_t offset; + + entry = rb_entry(node, struct kgsl_iommu_addr_entry, node); + + /* If the entire entry is below the range the search is over */ + if ((entry->base + entry->size) < bottom) + break; + + /* Get the top of the entry properly aligned */ + offset = ALIGN(entry->base + entry->size, align); + + /* + * Try to allocate the memory from the top of the gap, + * making sure that it fits between the top of this entry and + * the bottom of the previous one + */ + + if (offset < end) { + uint64_t chunk = (end - size) & mask; + + if (chunk >= offset) + return chunk; + } + + /* + * If we get here and the current entry is outside of the range + * then we are officially out of room + */ + + if (entry->base < bottom) + return (uint64_t) -ENOMEM; + + /* Set the top of the gap to the current entry->base */ + end = entry->base; + + /* And move on to the next lower entry */ + node = rb_prev(node); + } + + /* If we get here then there are no more entries in the region */ + if ((end > size) && (((end - size) & mask) >= bottom)) + return (end - size) & mask; + + return (uint64_t) -ENOMEM; +} + +static uint64_t kgsl_iommu_find_svm_region(struct kgsl_pagetable *pagetable, + uint64_t start, uint64_t end, uint64_t size, + uint64_t alignment) +{ + uint64_t addr; + + /* Avoid black holes */ + BUG_ON(end <= start); + + spin_lock(&pagetable->lock); + addr = _get_unmapped_area_topdown(pagetable, + start, end, size, alignment); + spin_unlock(&pagetable->lock); + return addr; +} + +#define ADDR_IN_GLOBAL(_a) \ + (((_a) >= KGSL_MMU_GLOBAL_MEM_BASE) && \ + ((_a) < (KGSL_MMU_GLOBAL_MEM_BASE + KGSL_MMU_GLOBAL_MEM_SIZE))) + +static int kgsl_iommu_set_svm_region(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr, uint64_t size) +{ + int ret = -ENOMEM; + struct kgsl_iommu_pt *pt = pagetable->priv; + struct rb_node *node; + + /* Make sure the requested address doesn't fall in the global range */ + if (ADDR_IN_GLOBAL(gpuaddr) || ADDR_IN_GLOBAL(gpuaddr + size)) + return -ENOMEM; + + spin_lock(&pagetable->lock); + node = pt->rbtree.rb_node; + + while (node != NULL) { + uint64_t start, end; + struct kgsl_iommu_addr_entry *entry = rb_entry(node, + struct kgsl_iommu_addr_entry, node); + + start = entry->base; + end = entry->base + entry->size; + + if (gpuaddr + size <= start) + node = node->rb_left; + else if (end <= gpuaddr) + node = node->rb_right; + else + goto out; + } + + ret = _insert_gpuaddr(pagetable, gpuaddr, size); +out: + spin_unlock(&pagetable->lock); + return ret; +} + + +static int kgsl_iommu_get_gpuaddr(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + struct kgsl_iommu_pt *pt = pagetable->priv; + int ret = 0; + uint64_t addr, start, end; + uint64_t size = memdesc->size; + unsigned int align; + + BUG_ON(kgsl_memdesc_use_cpu_map(memdesc)); + + if (memdesc->flags & KGSL_MEMFLAGS_SECURE && + pagetable->name != KGSL_MMU_SECURE_PT) + return -EINVAL; + + if (kgsl_memdesc_has_guard_page(memdesc)) + size += kgsl_memdesc_guard_page_size(pagetable->mmu, memdesc); + + align = 1 << kgsl_memdesc_get_align(memdesc); + + if (memdesc->flags & KGSL_MEMFLAGS_FORCE_32BIT) { + start = pt->compat_va_start; + end = pt->compat_va_end; + } else { + start = pt->va_start; + end = pt->va_end; + } + + spin_lock(&pagetable->lock); + + addr = _get_unmapped_area(pagetable, start, end, size, align); + + if (addr == (uint64_t) -ENOMEM) { + ret = -ENOMEM; + goto out; + } + + ret = _insert_gpuaddr(pagetable, addr, size); + if (ret == 0) + memdesc->gpuaddr = addr; + +out: + spin_unlock(&pagetable->lock); + return ret; +} + +static void kgsl_iommu_put_gpuaddr(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + spin_lock(&pagetable->lock); + + if (_remove_gpuaddr(pagetable, memdesc->gpuaddr)) + BUG(); + + spin_unlock(&pagetable->lock); +} + +static int kgsl_iommu_svm_range(struct kgsl_pagetable *pagetable, + uint64_t *lo, uint64_t *hi, uint64_t memflags) +{ + struct kgsl_iommu_pt *pt = pagetable->priv; + bool gpu_compat = (memflags & KGSL_MEMFLAGS_FORCE_32BIT) != 0; + + if (lo != NULL) + *lo = gpu_compat ? pt->compat_va_start : pt->svm_start; + if (hi != NULL) + *hi = gpu_compat ? pt->compat_va_end : pt->svm_end; + + return 0; +} + +static bool kgsl_iommu_addr_in_range(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr) +{ + struct kgsl_iommu_pt *pt = pagetable->priv; + + if (gpuaddr == 0) + return false; + + if (gpuaddr >= pt->va_start && gpuaddr < pt->va_end) + return true; + + if (gpuaddr >= pt->compat_va_start && gpuaddr < pt->compat_va_end) + return true; + + if (gpuaddr >= pt->svm_start && gpuaddr < pt->svm_end) + return true; + + return false; +} + +struct kgsl_mmu_ops kgsl_iommu_ops = { + .mmu_init = kgsl_iommu_init, + .mmu_close = kgsl_iommu_close, + .mmu_start = kgsl_iommu_start, + .mmu_stop = kgsl_iommu_stop, + .mmu_set_pt = kgsl_iommu_set_pt, + .mmu_clear_fsr = kgsl_iommu_clear_fsr, + .mmu_get_current_ttbr0 = kgsl_iommu_get_current_ttbr0, + .mmu_enable_clk = kgsl_iommu_enable_clk, + .mmu_disable_clk = kgsl_iommu_disable_clk, + .mmu_get_reg_ahbaddr = kgsl_iommu_get_reg_ahbaddr, + .mmu_set_pf_policy = kgsl_iommu_set_pf_policy, + .mmu_pagefault_resume = kgsl_iommu_pagefault_resume, + .mmu_get_prot_regs = kgsl_iommu_get_prot_regs, + .mmu_init_pt = kgsl_iommu_init_pt, +}; + +static struct kgsl_mmu_pt_ops iommu_pt_ops = { + .mmu_map = kgsl_iommu_map, + .mmu_unmap = kgsl_iommu_unmap, + .mmu_destroy_pagetable = kgsl_iommu_destroy_pagetable, + .get_ttbr0 = kgsl_iommu_get_ttbr0, + .get_contextidr = kgsl_iommu_get_contextidr, + .get_gpuaddr = kgsl_iommu_get_gpuaddr, + .put_gpuaddr = kgsl_iommu_put_gpuaddr, + .set_svm_region = kgsl_iommu_set_svm_region, + .find_svm_region = kgsl_iommu_find_svm_region, + .svm_range = kgsl_iommu_svm_range, + .addr_in_range = kgsl_iommu_addr_in_range, +}; diff --git a/drivers/gpu/msm/kgsl_iommu.h b/drivers/gpu/msm/kgsl_iommu.h new file mode 100644 index 000000000000..738c8296642a --- /dev/null +++ b/drivers/gpu/msm/kgsl_iommu.h @@ -0,0 +1,210 @@ +/* Copyright (c) 2012-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __KGSL_IOMMU_H +#define __KGSL_IOMMU_H + +#ifdef CONFIG_MSM_IOMMU +#include <linux/qcom_iommu.h> +#endif +#include <linux/of.h> +#include "kgsl.h" + +#define KGSL_IOMMU_SECURE_SIZE SZ_256M +#define KGSL_IOMMU_SECURE_END KGSL_MMU_GLOBAL_MEM_BASE +#define KGSL_IOMMU_SECURE_BASE \ + (KGSL_MMU_GLOBAL_MEM_BASE - KGSL_IOMMU_SECURE_SIZE) + +#define KGSL_IOMMU_SVM_BASE32 0x300000 +#define KGSL_IOMMU_SVM_END32 (0xC0000000 - SZ_16M) + +#define KGSL_IOMMU_VA_BASE64 0x500000000ULL +#define KGSL_IOMMU_VA_END64 0x600000000ULL +/* + * Note: currently we only support 36 bit addresses, + * but the CPU supports 39. Eventually this range + * should change to high part of the 39 bit address + * space just like the CPU. + */ +#define KGSL_IOMMU_SVM_BASE64 0x700000000ULL +#define KGSL_IOMMU_SVM_END64 0x800000000ULL + +/* Pagetable virtual base */ +#define KGSL_IOMMU_CTX_OFFSET_V1 0x8000 +#define KGSL_IOMMU_CTX_OFFSET_V2 0x9000 +#define KGSL_IOMMU_CTX_OFFSET_V2_A530 0x8000 +#define KGSL_IOMMU_CTX_OFFSET_A405V2 0x8000 +#define KGSL_IOMMU_CTX_SHIFT 12 + +/* FSYNR1 V0 fields */ +#define KGSL_IOMMU_FSYNR1_AWRITE_MASK 0x00000001 +#define KGSL_IOMMU_FSYNR1_AWRITE_SHIFT 8 +/* FSYNR0 V1 fields */ +#define KGSL_IOMMU_V1_FSYNR0_WNR_MASK 0x00000001 +#define KGSL_IOMMU_V1_FSYNR0_WNR_SHIFT 4 + +/* TLBSTATUS register fields */ +#define KGSL_IOMMU_CTX_TLBSTATUS_SACTIVE BIT(0) + +/* IMPLDEF_MICRO_MMU_CTRL register fields */ +#define KGSL_IOMMU_IMPLDEF_MICRO_MMU_CTRL_HALT 0x00000004 +#define KGSL_IOMMU_IMPLDEF_MICRO_MMU_CTRL_IDLE 0x00000008 + +/* SCTLR fields */ +#define KGSL_IOMMU_SCTLR_HUPCF_SHIFT 8 +#define KGSL_IOMMU_SCTLR_CFCFG_SHIFT 7 +#define KGSL_IOMMU_SCTLR_CFIE_SHIFT 6 + +enum kgsl_iommu_reg_map { + KGSL_IOMMU_CTX_SCTLR = 0, + KGSL_IOMMU_CTX_TTBR0, + KGSL_IOMMU_CTX_CONTEXTIDR, + KGSL_IOMMU_CTX_FSR, + KGSL_IOMMU_CTX_FAR, + KGSL_IOMMU_CTX_TLBIALL, + KGSL_IOMMU_CTX_RESUME, + KGSL_IOMMU_CTX_FSYNR0, + KGSL_IOMMU_CTX_FSYNR1, + KGSL_IOMMU_CTX_TLBSYNC, + KGSL_IOMMU_CTX_TLBSTATUS, + KGSL_IOMMU_REG_MAX +}; + +/* Max number of iommu clks per IOMMU unit */ +#define KGSL_IOMMU_MAX_CLKS 5 + +enum kgsl_iommu_context_id { + KGSL_IOMMU_CONTEXT_USER = 0, + KGSL_IOMMU_CONTEXT_SECURE = 1, + KGSL_IOMMU_CONTEXT_MAX, +}; + +/* offset at which a nop command is placed in setstate_memory */ +#define KGSL_IOMMU_SETSTATE_NOP_OFFSET 1024 + +/* + * struct kgsl_iommu_context - Structure holding data about an iommu context + * bank + * @dev: pointer to the iommu context's device + * @name: context name + * @id: The id of the context, used for deciding how it is used. + * @cb_num: The hardware context bank number, used for calculating register + * offsets. + * @kgsldev: The kgsl device that uses this context. + * @fault: Flag when set indicates that this iommu device has caused a page + * fault + * @gpu_offset: Offset of this context bank in the GPU register space + * @default_pt: The default pagetable for this context, + * it may be changed by self programming. + */ +struct kgsl_iommu_context { + struct device *dev; + const char *name; + enum kgsl_iommu_context_id id; + unsigned int cb_num; + struct kgsl_device *kgsldev; + int fault; + void __iomem *regbase; + unsigned int gpu_offset; + struct kgsl_pagetable *default_pt; +}; + +/* + * struct kgsl_iommu - Structure holding iommu data for kgsl driver + * @ctx: Array of kgsl_iommu_context structs + * @regbase: Virtual address of the IOMMU register base + * @regstart: Physical address of the iommu registers + * @regsize: Length of the iommu register region. + * @clk_enable_count: The ref count of clock enable calls + * @clks: Array of pointers to IOMMU clocks + * @micro_mmu_ctrl: GPU register offset of this glob al register + * @smmu_info: smmu info used in a5xx preemption + * @protect: register protection settings for the iommu. + */ +struct kgsl_iommu { + struct kgsl_iommu_context ctx[KGSL_IOMMU_CONTEXT_MAX]; + void __iomem *regbase; + unsigned long regstart; + unsigned int regsize; + atomic_t clk_enable_count; + struct clk *clks[KGSL_IOMMU_MAX_CLKS]; + unsigned int micro_mmu_ctrl; + struct kgsl_memdesc smmu_info; + unsigned int version; + struct kgsl_protected_registers protect; +}; + +/* + * struct kgsl_iommu_pt - Iommu pagetable structure private to kgsl driver + * @domain: Pointer to the iommu domain that contains the iommu pagetable + * @ttbr0: register value to set when using this pagetable + * @contextidr: register value to set when using this pagetable + * @attached: is the pagetable attached? + * @rbtree: all buffers mapped into the pagetable, indexed by gpuaddr + * @va_start: Start of virtual range used in this pagetable. + * @va_end: End of virtual range. + * @svm_start: Start of shared virtual memory range. Addresses in this + * range are also valid in the process's CPU address space. + * @svm_end: End of the shared virtual memory range. + * @svm_start: 32 bit compatible range, for old clients who lack bits + * @svm_end: end of 32 bit compatible range + */ +struct kgsl_iommu_pt { + struct iommu_domain *domain; + u64 ttbr0; + u32 contextidr; + bool attached; + + struct rb_root rbtree; + + uint64_t va_start; + uint64_t va_end; + uint64_t svm_start; + uint64_t svm_end; + uint64_t compat_va_start; + uint64_t compat_va_end; +}; + +/* + * offset of context bank 0 from the start of the SMMU register space. + */ +#define KGSL_IOMMU_CB0_OFFSET 0x8000 +/* size of each context bank's register space */ +#define KGSL_IOMMU_CB_SHIFT 12 + +/* Macros to read/write IOMMU registers */ +extern const unsigned int kgsl_iommu_reg_list[KGSL_IOMMU_REG_MAX]; + +static inline void __iomem * +kgsl_iommu_reg(struct kgsl_iommu_context *ctx, enum kgsl_iommu_reg_map reg) +{ + BUG_ON(ctx->regbase == NULL); + BUG_ON(reg >= KGSL_IOMMU_REG_MAX); + return ctx->regbase + kgsl_iommu_reg_list[reg]; +} + +#define KGSL_IOMMU_SET_CTX_REG_Q(_ctx, REG, val) \ + writeq_relaxed((val), \ + kgsl_iommu_reg((_ctx), KGSL_IOMMU_CTX_##REG)) + +#define KGSL_IOMMU_GET_CTX_REG_Q(_ctx, REG) \ + readq_relaxed(kgsl_iommu_reg((_ctx), KGSL_IOMMU_CTX_##REG)) + +#define KGSL_IOMMU_SET_CTX_REG(_ctx, REG, val) \ + writel_relaxed((val), \ + kgsl_iommu_reg((_ctx), KGSL_IOMMU_CTX_##REG)) + +#define KGSL_IOMMU_GET_CTX_REG(_ctx, REG) \ + readl_relaxed(kgsl_iommu_reg((_ctx), KGSL_IOMMU_CTX_##REG)) + + +#endif diff --git a/drivers/gpu/msm/kgsl_log.h b/drivers/gpu/msm/kgsl_log.h new file mode 100644 index 000000000000..70480f8e9189 --- /dev/null +++ b/drivers/gpu/msm/kgsl_log.h @@ -0,0 +1,137 @@ +/* Copyright (c) 2002,2008-2011,2013-2014 The Linux Foundation. + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __KGSL_LOG_H +#define __KGSL_LOG_H + +#define KGSL_LOG_INFO(dev, lvl, fmt, args...) \ + do { \ + if ((lvl) >= 6) \ + dev_info(dev, "|%s| " fmt, \ + __func__, ##args);\ + } while (0) + +#define KGSL_LOG_WARN(dev, lvl, fmt, args...) \ + do { \ + if ((lvl) >= 4) \ + dev_warn(dev, "|%s| " fmt, \ + __func__, ##args);\ + } while (0) + +#define KGSL_LOG_ERR(dev, lvl, fmt, args...) \ + do { \ + if ((lvl) >= 3) \ + dev_err(dev, "|%s| " fmt, \ + __func__, ##args);\ + } while (0) + +#define KGSL_LOG_CRIT(dev, lvl, fmt, args...) \ + do { \ + if ((lvl) >= 2) \ + dev_crit(dev, "|%s| " fmt, \ + __func__, ##args);\ + } while (0) + +#define KGSL_LOG_FATAL(dev, lvl, fmt, args...) \ + do { \ + dev_crit(dev, "|%s| " fmt, __func__, ##args);\ + BUG(); \ + } while (0) + +#define KGSL_LOG_POSTMORTEM_WRITE(_dev, fmt, args...) \ + do { dev_crit(_dev->dev, fmt, ##args); } while (0) + +#define KGSL_LOG_DUMP(_dev, fmt, args...) dev_err(_dev->dev, fmt, ##args) + +#define KGSL_DEV_ERR_ONCE(_dev, fmt, args...) \ +({ \ + static bool kgsl_dev_err_once; \ + \ + if (!kgsl_dev_err_once) { \ + kgsl_dev_err_once = true; \ + dev_crit(_dev->dev, "|%s| " fmt, __func__, ##args); \ + } \ +}) + +#define KGSL_LOG_CRIT_RATELIMITED(dev, lvl, fmt, args...) \ + do { \ + if ((lvl) >= 2) \ + dev_crit_ratelimited(dev, "|%s| " fmt, \ + __func__, ##args);\ + } while (0) + +#define KGSL_DRV_INFO(_dev, fmt, args...) \ +KGSL_LOG_INFO(_dev->dev, _dev->drv_log, fmt, ##args) +#define KGSL_DRV_WARN(_dev, fmt, args...) \ +KGSL_LOG_WARN(_dev->dev, _dev->drv_log, fmt, ##args) +#define KGSL_DRV_ERR(_dev, fmt, args...) \ +KGSL_LOG_ERR(_dev->dev, _dev->drv_log, fmt, ##args) +#define KGSL_DRV_CRIT(_dev, fmt, args...) \ +KGSL_LOG_CRIT(_dev->dev, _dev->drv_log, fmt, ##args) +#define KGSL_DRV_CRIT_RATELIMIT(_dev, fmt, args...) \ +KGSL_LOG_CRIT_RATELIMITED(_dev->dev, _dev->drv_log, fmt, ##args) +#define KGSL_DRV_FATAL(_dev, fmt, args...) \ +KGSL_LOG_FATAL((_dev)->dev, (_dev)->drv_log, fmt, ##args) + +#define KGSL_CMD_INFO(_dev, fmt, args...) \ +KGSL_LOG_INFO(_dev->dev, _dev->cmd_log, fmt, ##args) +#define KGSL_CMD_WARN(_dev, fmt, args...) \ +KGSL_LOG_WARN(_dev->dev, _dev->cmd_log, fmt, ##args) +#define KGSL_CMD_ERR(_dev, fmt, args...) \ +KGSL_LOG_ERR(_dev->dev, _dev->cmd_log, fmt, ##args) +#define KGSL_CMD_CRIT(_dev, fmt, args...) \ +KGSL_LOG_CRIT(_dev->dev, _dev->cmd_log, fmt, ##args) + +#define KGSL_CTXT_INFO(_dev, fmt, args...) \ +KGSL_LOG_INFO(_dev->dev, _dev->ctxt_log, fmt, ##args) +#define KGSL_CTXT_WARN(_dev, fmt, args...) \ +KGSL_LOG_WARN(_dev->dev, _dev->ctxt_log, fmt, ##args) +#define KGSL_CTXT_ERR(_dev, fmt, args...) \ +KGSL_LOG_ERR(_dev->dev, _dev->ctxt_log, fmt, ##args) +#define KGSL_CTXT_CRIT(_dev, fmt, args...) \ +KGSL_LOG_CRIT(_dev->dev, _dev->ctxt_log, fmt, ##args) + +#define KGSL_MEM_INFO(_dev, fmt, args...) \ +KGSL_LOG_INFO(_dev->dev, _dev->mem_log, fmt, ##args) +#define KGSL_MEM_WARN(_dev, fmt, args...) \ +KGSL_LOG_WARN(_dev->dev, _dev->mem_log, fmt, ##args) +#define KGSL_MEM_ERR(_dev, fmt, args...) \ +KGSL_LOG_ERR(_dev->dev, _dev->mem_log, fmt, ##args) +#define KGSL_MEM_CRIT(_dev, fmt, args...) \ +KGSL_LOG_CRIT(_dev->dev, _dev->mem_log, fmt, ##args) + +#define KGSL_PWR_INFO(_dev, fmt, args...) \ +KGSL_LOG_INFO(_dev->dev, _dev->pwr_log, fmt, ##args) +#define KGSL_PWR_WARN(_dev, fmt, args...) \ +KGSL_LOG_WARN(_dev->dev, _dev->pwr_log, fmt, ##args) +#define KGSL_PWR_ERR(_dev, fmt, args...) \ +KGSL_LOG_ERR(_dev->dev, _dev->pwr_log, fmt, ##args) +#define KGSL_PWR_CRIT(_dev, fmt, args...) \ +KGSL_LOG_CRIT(_dev->dev, _dev->pwr_log, fmt, ##args) + +/* Core error messages - these are for core KGSL functions that have + no device associated with them (such as memory) */ + +#define KGSL_CORE_ERR(fmt, args...) \ +pr_err("kgsl: %s: " fmt, __func__, ##args) + +#define KGSL_CORE_ERR_ONCE(fmt, args...) \ +({ \ + static bool kgsl_core_err_once; \ + if (!kgsl_core_err_once) { \ + kgsl_core_err_once = true; \ + pr_err("kgsl: %s: " fmt, __func__, ##args); \ + } \ +}) + +#endif /* __KGSL_LOG_H */ diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c new file mode 100644 index 000000000000..9fc6310d0d36 --- /dev/null +++ b/drivers/gpu/msm/kgsl_mmu.c @@ -0,0 +1,886 @@ +/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/export.h> +#include <linux/types.h> +#include <linux/device.h> +#include <linux/spinlock.h> +#include <linux/genalloc.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/iommu.h> +#include <linux/types.h> + +#include "kgsl.h" +#include "kgsl_mmu.h" +#include "kgsl_device.h" +#include "kgsl_sharedmem.h" + +static enum kgsl_mmutype kgsl_mmu_type = KGSL_MMU_TYPE_NONE; + +static void pagetable_remove_sysfs_objects(struct kgsl_pagetable *pagetable); + +/* + * There are certain memory allocations (ringbuffer, memstore, etc) that need to + * be present at the same address in every pagetable. We call these "global" + * pagetable entries. There are relatively few of these and they are mostly + * stable (defined at init time) but the actual number of globals can differ + * slight depending on the target and implementation. + * + * Here we define an array and a simple allocator to keep track of the currently + * active global entries. Each entry is assigned a unique address inside of a + * MMU implementation specific "global" region. The addresses are assigned + * sequentially and never re-used to avoid having to go back and reprogram + * existing pagetables. The entire list of active entries are mapped and + * unmapped into every new pagetable as it is created and destroyed. + * + * Because there are relatively few entries and they are defined at boot time we + * don't need to go over the top to define a dynamic allocation scheme. It will + * be less wasteful to pick a static number with a little bit of growth + * potential. + */ + +#define KGSL_MAX_GLOBAL_PT_ENTRIES 32 + +/** + * struct kgsl_global_pt_entries - Collection of global pagetable entries + * @offset - offset into the global PT space to be assigned to then next + * allocation + * @entries: Array of assigned memdesc entries + * @count: Number of currently assigned entries + * + * Maintain a list of global pagetable entries. Pagetables are shared between + * devices so the global pt entry list needs to be driver wide too + */ +static struct kgsl_global_pt_entries { + unsigned int offset; + struct kgsl_memdesc *entries[KGSL_MAX_GLOBAL_PT_ENTRIES]; + int count; +} kgsl_global_pt_entries; + +/** + * kgsl_search_global_pt_entries() - Check to see if the given GPU address + * belongs to any of the global PT entries + * @gpuaddr: GPU address to search for + * @size: Size of the region to search for + * + * Search all the global pagetable entries for the GPU address and size and + * return the memory descriptor + */ +struct kgsl_memdesc *kgsl_search_global_pt_entries(unsigned int gpuaddr, + unsigned int size) +{ + int i; + + for (i = 0; i < KGSL_MAX_GLOBAL_PT_ENTRIES; i++) { + struct kgsl_memdesc *memdesc = + kgsl_global_pt_entries.entries[i]; + + if (memdesc && kgsl_gpuaddr_in_memdesc(memdesc, gpuaddr, size)) + return memdesc; + } + + return NULL; +} +EXPORT_SYMBOL(kgsl_search_global_pt_entries); + +/** + * kgsl_unmap_global_pt_entries() - Unmap all global entries from the given + * pagetable + * @pagetable: Pointer to a kgsl_pagetable structure + * + * Unmap all the current active global entries from the specified pagetable + */ +static void kgsl_unmap_global_pt_entries(struct kgsl_pagetable *pagetable) +{ + int i; + unsigned long flags; + + BUG_ON(pagetable->name == KGSL_MMU_GLOBAL_PT); + + spin_lock_irqsave(&kgsl_driver.ptlock, flags); + if (pagetable->globals_mapped == false) { + spin_unlock_irqrestore(&kgsl_driver.ptlock, flags); + return; + } + spin_unlock_irqrestore(&kgsl_driver.ptlock, flags); + + for (i = 0; i < KGSL_MAX_GLOBAL_PT_ENTRIES; i++) { + struct kgsl_memdesc *entry = kgsl_global_pt_entries.entries[i]; + if (entry != NULL) + kgsl_mmu_unmap(pagetable, entry); + } + + spin_lock_irqsave(&kgsl_driver.ptlock, flags); + pagetable->globals_mapped = false; + spin_unlock_irqrestore(&kgsl_driver.ptlock, flags); +} + +/** + * kgsl_map_global_pt_entries() - Map all active global entries into the given + * pagetable + * @pagetable: Pointer to a kgsl_pagetable structure + * + * Map all the current global PT entries into the specified pagetable. + */ +void kgsl_map_global_pt_entries(struct kgsl_pagetable *pagetable) +{ + int i, ret = 0; + unsigned long flags; + + spin_lock_irqsave(&kgsl_driver.ptlock, flags); + if (pagetable->globals_mapped == true) { + spin_unlock_irqrestore(&kgsl_driver.ptlock, flags); + return; + } + spin_unlock_irqrestore(&kgsl_driver.ptlock, flags); + + for (i = 0; !ret && i < KGSL_MAX_GLOBAL_PT_ENTRIES; i++) { + struct kgsl_memdesc *entry = kgsl_global_pt_entries.entries[i]; + + if (entry != NULL) { + ret = kgsl_mmu_map(pagetable, entry); + BUG_ON(ret); + } + } + + spin_lock_irqsave(&kgsl_driver.ptlock, flags); + pagetable->globals_mapped = true; + spin_unlock_irqrestore(&kgsl_driver.ptlock, flags); +} +EXPORT_SYMBOL(kgsl_map_global_pt_entries); + +/** + * kgsl_remove_global_pt_entry() - Remove a memory descriptor from the global PT + * entry list + * @memdesc: Pointer to the kgsl memory descriptor to remove + * + * Remove the specified memory descriptor from the current list of global + * pagetable entries + */ +void kgsl_remove_global_pt_entry(struct kgsl_memdesc *memdesc) +{ + int i, j; + + if (kgsl_mmu_type == KGSL_MMU_TYPE_NONE) + return; + + if (memdesc->gpuaddr == 0) + return; + + for (i = 0; i < kgsl_global_pt_entries.count; i++) { + if (kgsl_global_pt_entries.entries[i] == memdesc) { + memdesc->gpuaddr = 0; + memdesc->priv &= ~KGSL_MEMDESC_GLOBAL; + for (j = i; j < kgsl_global_pt_entries.count; j++) + kgsl_global_pt_entries.entries[j] = + kgsl_global_pt_entries.entries[j + 1]; + kgsl_global_pt_entries.entries[j - 1] = NULL; + kgsl_global_pt_entries.count--; + break; + } + } +} +EXPORT_SYMBOL(kgsl_remove_global_pt_entry); + +/** + * kgsl_add_global_pt_entry() - Add a new global PT entry to the active list + * @mmu: Pointer to a kgsl_mmu structure for the active MMU implementation + * @memdesc: Pointer to the kgsl memory descriptor to add + * + * Add a memory descriptor to the list of global pagetable entries. + */ +int kgsl_add_global_pt_entry(struct kgsl_device *device, + struct kgsl_memdesc *memdesc) +{ + int i; + int index = 0; + uint64_t gaddr = KGSL_MMU_GLOBAL_MEM_BASE; + uint64_t size = ALIGN(memdesc->size, PAGE_SIZE); + + if (kgsl_mmu_type == KGSL_MMU_TYPE_NONE) { + memdesc->gpuaddr = (uint64_t) memdesc->physaddr; + return 0; + } + + /* do we already have a mapping? */ + if (memdesc->gpuaddr != 0) + return 0; + + if (kgsl_global_pt_entries.count == KGSL_MAX_GLOBAL_PT_ENTRIES) + return -ENOMEM; + + /* + * search for the first free slot by going through all valid entries + * and checking for overlap. All entries are in increasing order of + * gpuaddr + */ + for (i = 0; i < kgsl_global_pt_entries.count; i++) { + if (kgsl_addr_range_overlap(gaddr, size, + kgsl_global_pt_entries.entries[i]->gpuaddr, + kgsl_global_pt_entries.entries[i]->size)) + /* On a clash set gaddr to end of clashing entry */ + gaddr = kgsl_global_pt_entries.entries[i]->gpuaddr + + kgsl_global_pt_entries.entries[i]->size; + else + break; + } + index = i; + if ((gaddr + size) >= + (KGSL_MMU_GLOBAL_MEM_BASE + KGSL_MMU_GLOBAL_MEM_SIZE)) + return -ENOMEM; + + memdesc->gpuaddr = gaddr; + + memdesc->priv |= KGSL_MEMDESC_GLOBAL; + /* + * Move the entries from index till the last entry 1 slot right leaving + * the slot at index empty for the newcomer + */ + for (i = kgsl_global_pt_entries.count - 1; i >= index; i--) + kgsl_global_pt_entries.entries[i + 1] = + kgsl_global_pt_entries.entries[i]; + kgsl_global_pt_entries.entries[index] = memdesc; + kgsl_global_pt_entries.count++; + + return 0; +} +EXPORT_SYMBOL(kgsl_add_global_pt_entry); + +static void kgsl_destroy_pagetable(struct kref *kref) +{ + struct kgsl_pagetable *pagetable = container_of(kref, + struct kgsl_pagetable, refcount); + + kgsl_mmu_detach_pagetable(pagetable); + + kgsl_unmap_global_pt_entries(pagetable); + + pagetable->pt_ops->mmu_destroy_pagetable(pagetable); + + kfree(pagetable); +} + +static inline void kgsl_put_pagetable(struct kgsl_pagetable *pagetable) +{ + if (pagetable) + kref_put(&pagetable->refcount, kgsl_destroy_pagetable); +} + +static struct kgsl_pagetable * +kgsl_get_pagetable(unsigned long name) +{ + struct kgsl_pagetable *pt, *ret = NULL; + unsigned long flags; + + spin_lock_irqsave(&kgsl_driver.ptlock, flags); + list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) { + if (name == pt->name && kref_get_unless_zero(&pt->refcount)) { + ret = pt; + break; + } + } + + spin_unlock_irqrestore(&kgsl_driver.ptlock, flags); + return ret; +} + +static struct kgsl_pagetable * +_get_pt_from_kobj(struct kobject *kobj) +{ + unsigned int ptname; + + if (!kobj) + return NULL; + + if (kstrtou32(kobj->name, 0, &ptname)) + return NULL; + + return kgsl_get_pagetable(ptname); +} + +static ssize_t +sysfs_show_entries(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct kgsl_pagetable *pt; + int ret = 0; + + pt = _get_pt_from_kobj(kobj); + + if (pt) { + unsigned int val = atomic_read(&pt->stats.entries); + + ret += snprintf(buf, PAGE_SIZE, "%d\n", val); + } + + kgsl_put_pagetable(pt); + return ret; +} + +static ssize_t +sysfs_show_mapped(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct kgsl_pagetable *pt; + int ret = 0; + + pt = _get_pt_from_kobj(kobj); + + if (pt) { + uint64_t val = atomic_long_read(&pt->stats.mapped); + + ret += snprintf(buf, PAGE_SIZE, "%llu\n", val); + } + + kgsl_put_pagetable(pt); + return ret; +} + +static ssize_t +sysfs_show_max_mapped(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct kgsl_pagetable *pt; + int ret = 0; + + pt = _get_pt_from_kobj(kobj); + + if (pt) { + uint64_t val = atomic_long_read(&pt->stats.max_mapped); + + ret += snprintf(buf, PAGE_SIZE, "%llu\n", val); + } + + kgsl_put_pagetable(pt); + return ret; +} + +static struct kobj_attribute attr_entries = { + .attr = { .name = "entries", .mode = 0444 }, + .show = sysfs_show_entries, + .store = NULL, +}; + +static struct kobj_attribute attr_mapped = { + .attr = { .name = "mapped", .mode = 0444 }, + .show = sysfs_show_mapped, + .store = NULL, +}; + +static struct kobj_attribute attr_max_mapped = { + .attr = { .name = "max_mapped", .mode = 0444 }, + .show = sysfs_show_max_mapped, + .store = NULL, +}; + +static struct attribute *pagetable_attrs[] = { + &attr_entries.attr, + &attr_mapped.attr, + &attr_max_mapped.attr, + NULL, +}; + +static struct attribute_group pagetable_attr_group = { + .attrs = pagetable_attrs, +}; + +static void +pagetable_remove_sysfs_objects(struct kgsl_pagetable *pagetable) +{ + if (pagetable->kobj) + sysfs_remove_group(pagetable->kobj, + &pagetable_attr_group); + + kobject_put(pagetable->kobj); + pagetable->kobj = NULL; +} + +static int +pagetable_add_sysfs_objects(struct kgsl_pagetable *pagetable) +{ + char ptname[16]; + int ret = -ENOMEM; + + snprintf(ptname, sizeof(ptname), "%d", pagetable->name); + pagetable->kobj = kobject_create_and_add(ptname, + kgsl_driver.ptkobj); + if (pagetable->kobj == NULL) + goto err; + + ret = sysfs_create_group(pagetable->kobj, &pagetable_attr_group); + +err: + if (ret) { + if (pagetable->kobj) + kobject_put(pagetable->kobj); + + pagetable->kobj = NULL; + } + + return ret; +} + +void +kgsl_mmu_detach_pagetable(struct kgsl_pagetable *pagetable) +{ + unsigned long flags; + + /* NOMMU has no pagetable so return early if its NULL */ + if (!pagetable) + return; + + spin_lock_irqsave(&kgsl_driver.ptlock, flags); + + if (!list_empty(&pagetable->list)) + list_del_init(&pagetable->list); + + spin_unlock_irqrestore(&kgsl_driver.ptlock, flags); + + pagetable_remove_sysfs_objects(pagetable); +} + +int +kgsl_mmu_get_ptname_from_ptbase(struct kgsl_mmu *mmu, u64 pt_base) +{ + struct kgsl_pagetable *pt; + int ptid = -1; + + if (!mmu->mmu_ops) + return KGSL_MMU_GLOBAL_PT; + spin_lock(&kgsl_driver.ptlock); + list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) { + if (kgsl_mmu_pagetable_get_ttbr0(pt) == pt_base) { + ptid = (int) pt->name; + break; + } + } + spin_unlock(&kgsl_driver.ptlock); + + return ptid; +} +EXPORT_SYMBOL(kgsl_mmu_get_ptname_from_ptbase); + +struct kgsl_pagetable *kgsl_mmu_get_pt_from_ptname(struct kgsl_mmu *mmu, + int ptname) +{ + struct kgsl_pagetable *pt; + + spin_lock(&kgsl_driver.ptlock); + list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) { + if (pt->name == ptname) { + spin_unlock(&kgsl_driver.ptlock); + return pt; + } + } + spin_unlock(&kgsl_driver.ptlock); + return NULL; + +} +EXPORT_SYMBOL(kgsl_mmu_get_pt_from_ptname); + +unsigned int +kgsl_mmu_log_fault_addr(struct kgsl_mmu *mmu, phys_addr_t pt_base, + unsigned int addr) +{ + struct kgsl_pagetable *pt; + unsigned int ret = 0; + + if (!mmu->mmu_ops) + return 0; + spin_lock(&kgsl_driver.ptlock); + list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) { + if (kgsl_mmu_pagetable_get_ttbr0(pt) == pt_base) { + if ((addr & ~(PAGE_SIZE-1)) == pt->fault_addr) { + ret = 1; + break; + } else { + pt->fault_addr = + (addr & ~(PAGE_SIZE-1)); + ret = 0; + break; + } + } + } + spin_unlock(&kgsl_driver.ptlock); + + return ret; +} +EXPORT_SYMBOL(kgsl_mmu_log_fault_addr); + +int kgsl_mmu_init(struct kgsl_device *device, char *mmutype) +{ + int status = 0; + struct kgsl_mmu *mmu = &device->mmu; + mmu->device = device; + + if (mmutype && !strcmp(mmutype, "nommu")) + kgsl_mmu_type = KGSL_MMU_TYPE_NONE; + + /* + * Don't use kgsl_allocate_global here because we need to get the MMU + * set up before we can add the global entry but the MMU init needs the + * setstate block. Allocate the memory here and map it later + */ + + status = kgsl_allocate_contiguous(device, &mmu->setstate_memory, + PAGE_SIZE); + if (status) + return status; + + /* Mark the setstate memory as read only */ + mmu->setstate_memory.flags |= KGSL_MEMFLAGS_GPUREADONLY; + + kgsl_sharedmem_set(device, &mmu->setstate_memory, 0, 0, + mmu->setstate_memory.size); + + if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_type) { + mmu->mmu_ops = &kgsl_iommu_ops; + status = mmu->mmu_ops->mmu_init(mmu); + } + + if (status) + goto done; + + /* Add the setstate memory to the global PT entry list */ + status = kgsl_add_global_pt_entry(device, &mmu->setstate_memory); + +done: + if (status) + kgsl_sharedmem_free(&mmu->setstate_memory); + + return status; +} +EXPORT_SYMBOL(kgsl_mmu_init); + +int kgsl_mmu_start(struct kgsl_device *device) +{ + struct kgsl_mmu *mmu = &device->mmu; + int ret = 0; + + if (kgsl_mmu_type != KGSL_MMU_TYPE_NONE) + ret = mmu->mmu_ops->mmu_start(mmu); + + return ret; +} +EXPORT_SYMBOL(kgsl_mmu_start); + +static struct kgsl_pagetable * +kgsl_mmu_createpagetableobject(struct kgsl_mmu *mmu, + unsigned int name) +{ + int status = 0; + struct kgsl_pagetable *pagetable = NULL; + unsigned long flags; + + pagetable = kzalloc(sizeof(struct kgsl_pagetable), GFP_KERNEL); + if (pagetable == NULL) + return ERR_PTR(-ENOMEM); + + kref_init(&pagetable->refcount); + + spin_lock_init(&pagetable->lock); + + pagetable->mmu = mmu; + pagetable->name = name; + pagetable->fault_addr = 0xFFFFFFFF; + + atomic_set(&pagetable->stats.entries, 0); + atomic_long_set(&pagetable->stats.mapped, 0); + atomic_long_set(&pagetable->stats.max_mapped, 0); + + if (mmu->mmu_ops && mmu->mmu_ops->mmu_init_pt) { + status = mmu->mmu_ops->mmu_init_pt(mmu, pagetable); + if (status) + goto err; + } + + if (KGSL_MMU_SECURE_PT != name) + kgsl_map_global_pt_entries(pagetable); + + spin_lock_irqsave(&kgsl_driver.ptlock, flags); + list_add(&pagetable->list, &kgsl_driver.pagetable_list); + spin_unlock_irqrestore(&kgsl_driver.ptlock, flags); + + /* Create the sysfs entries */ + pagetable_add_sysfs_objects(pagetable); + + return pagetable; + +err: + if (pagetable->priv) + pagetable->pt_ops->mmu_destroy_pagetable(pagetable); + + kfree(pagetable); + + return ERR_PTR(status); +} + +struct kgsl_pagetable *kgsl_mmu_getpagetable(struct kgsl_mmu *mmu, + unsigned long name) +{ + struct kgsl_pagetable *pt; + + if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type) + return (void *)(-1); + + if (!kgsl_mmu_is_perprocess(mmu) && (KGSL_MMU_SECURE_PT != name)) { + name = KGSL_MMU_GLOBAL_PT; + if (mmu->defaultpagetable) + return mmu->defaultpagetable; + } + + pt = kgsl_get_pagetable(name); + + if (pt == NULL) + pt = kgsl_mmu_createpagetableobject(mmu, name); + + return pt; +} + +void kgsl_mmu_putpagetable(struct kgsl_pagetable *pagetable) +{ + kgsl_put_pagetable(pagetable); +} +EXPORT_SYMBOL(kgsl_mmu_putpagetable); + +static int _nommu_get_gpuaddr(struct kgsl_memdesc *memdesc) +{ + if (memdesc->sgt->nents > 1) { + KGSL_CORE_ERR( + "Attempt to map non-contiguous memory with NOMMU\n"); + return -EINVAL; + } + + memdesc->gpuaddr = (uint64_t) sg_phys(memdesc->sgt->sgl); + + if (memdesc->gpuaddr == 0) { + KGSL_CORE_ERR("Unable to get a physical address\n"); + return -EINVAL; + } + + return 0; +} + +/** + * kgsl_mmu_find_svm_region() - Find a empty spot in the SVM region + * @pagetable: KGSL pagetable to search + * @start: start of search range, must be within kgsl_mmu_svm_range() + * @end: end of search range, must be within kgsl_mmu_svm_range() + * @size: Size of the region to find + * @align: Desired alignment of the address + */ +uint64_t kgsl_mmu_find_svm_region(struct kgsl_pagetable *pagetable, + uint64_t start, uint64_t end, uint64_t size, + uint64_t align) +{ + BUG_ON(pagetable == NULL || pagetable->pt_ops->find_svm_region == NULL); + return pagetable->pt_ops->find_svm_region(pagetable, start, end, size, + align); +} + +/** + * kgsl_mmu_set_svm_region() - Check if a region is empty and reserve it if so + * @pagetable: KGSL pagetable to search + * @gpuaddr: GPU address to check/reserve + * @size: Size of the region to check/reserve + */ +int kgsl_mmu_set_svm_region(struct kgsl_pagetable *pagetable, uint64_t gpuaddr, + uint64_t size) +{ + BUG_ON(pagetable == NULL || pagetable->pt_ops->set_svm_region == NULL); + return pagetable->pt_ops->set_svm_region(pagetable, gpuaddr, size); +} + +/** + * kgsl_mmu_get_gpuaddr() - Assign a GPU address to the memdesc + * @pagetable: GPU pagetable to assign the address in + * @memdesc: mem descriptor to assign the memory to + */ +int +kgsl_mmu_get_gpuaddr(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + if (kgsl_mmu_type == KGSL_MMU_TYPE_NONE) + return _nommu_get_gpuaddr(memdesc); + + BUG_ON(pagetable == NULL || pagetable->pt_ops->get_gpuaddr == NULL); + return pagetable->pt_ops->get_gpuaddr(pagetable, memdesc); +} +EXPORT_SYMBOL(kgsl_mmu_get_gpuaddr); + +int +kgsl_mmu_map(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + int ret = 0; + int size; + + if (!memdesc->gpuaddr) + return -EINVAL; + /* Only global mappings should be mapped multiple times */ + if (!kgsl_memdesc_is_global(memdesc) && + (KGSL_MEMDESC_MAPPED & memdesc->priv)) + return -EINVAL; + + if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_NONE) + return 0; + + /* Add space for the guard page when allocating the mmu VA. */ + size = memdesc->size; + if (kgsl_memdesc_has_guard_page(memdesc)) + size += kgsl_memdesc_guard_page_size(pagetable->mmu, memdesc); + + ret = pagetable->pt_ops->mmu_map(pagetable, memdesc); + + if (ret == 0) { + KGSL_STATS_ADD(size, &pagetable->stats.mapped, + &pagetable->stats.max_mapped); + + atomic_inc(&pagetable->stats.entries); + + memdesc->priv |= KGSL_MEMDESC_MAPPED; + } + + return ret; +} +EXPORT_SYMBOL(kgsl_mmu_map); + +/** + * kgsl_mmu_put_gpuaddr() - Remove a GPU address from a pagetable + * @pagetable: Pagetable to release the memory from + * @memdesc: Memory descriptor containing the GPU address to free + */ +int kgsl_mmu_put_gpuaddr(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + if (memdesc->size == 0 || memdesc->gpuaddr == 0) + return 0; + + if (pagetable != NULL && pagetable->pt_ops->put_gpuaddr != NULL) + pagetable->pt_ops->put_gpuaddr(pagetable, memdesc); + + if (!kgsl_memdesc_is_global(memdesc)) + memdesc->gpuaddr = 0; + + return 0; +} +EXPORT_SYMBOL(kgsl_mmu_put_gpuaddr); + +/** + * kgsl_mmu_svm_range() - Return the range for SVM (if applicable) + * @pagetable: Pagetable to query the range from + * @lo: Pointer to store the start of the SVM range + * @hi: Pointer to store the end of the SVM range + * @memflags: Flags from the buffer we are mapping + */ +int kgsl_mmu_svm_range(struct kgsl_pagetable *pagetable, + uint64_t *lo, uint64_t *hi, uint64_t memflags) +{ + if (pagetable == NULL || pagetable->pt_ops->svm_range == NULL) + return -ENODEV; + + return pagetable->pt_ops->svm_range(pagetable, lo, hi, memflags); +} +EXPORT_SYMBOL(kgsl_mmu_svm_range); + +int +kgsl_mmu_unmap(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc) +{ + int size; + uint64_t start_addr = 0; + uint64_t end_addr = 0; + + if (memdesc->size == 0 || memdesc->gpuaddr == 0 || + !(KGSL_MEMDESC_MAPPED & memdesc->priv)) + return -EINVAL; + + if (kgsl_mmu_type == KGSL_MMU_TYPE_NONE) + return 0; + + /* Add space for the guard page when freeing the mmu VA. */ + size = memdesc->size; + if (kgsl_memdesc_has_guard_page(memdesc)) + size += kgsl_memdesc_guard_page_size(pagetable->mmu, memdesc); + + start_addr = memdesc->gpuaddr; + end_addr = (memdesc->gpuaddr + size); + + pagetable->pt_ops->mmu_unmap(pagetable, memdesc); + + /* If buffer is unmapped 0 fault addr */ + if ((pagetable->fault_addr >= start_addr) && + (pagetable->fault_addr < end_addr)) + pagetable->fault_addr = 0; + + /* Remove the statistics */ + atomic_dec(&pagetable->stats.entries); + atomic_long_sub(size, &pagetable->stats.mapped); + + if (!kgsl_memdesc_is_global(memdesc)) + memdesc->priv &= ~KGSL_MEMDESC_MAPPED; + + return 0; +} +EXPORT_SYMBOL(kgsl_mmu_unmap); + +int kgsl_mmu_close(struct kgsl_device *device) +{ + struct kgsl_mmu *mmu = &device->mmu; + int ret = 0; + + kgsl_free_global(&mmu->setstate_memory); + + if (mmu->mmu_ops != NULL) + ret = mmu->mmu_ops->mmu_close(mmu); + + return ret; +} +EXPORT_SYMBOL(kgsl_mmu_close); + +int kgsl_mmu_enabled(void) +{ + if (KGSL_MMU_TYPE_NONE != kgsl_mmu_type) + return 1; + else + return 0; +} +EXPORT_SYMBOL(kgsl_mmu_enabled); + +enum kgsl_mmutype kgsl_mmu_get_mmutype(void) +{ + return kgsl_mmu_type; +} +EXPORT_SYMBOL(kgsl_mmu_get_mmutype); + +void kgsl_mmu_set_mmutype(enum kgsl_mmutype type) +{ + kgsl_mmu_type = type; +} +EXPORT_SYMBOL(kgsl_mmu_set_mmutype); + +bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr) +{ + if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type) + return (gpuaddr != 0); + + if (pagetable == NULL || pagetable->pt_ops->addr_in_range == NULL) + return false; + + return pagetable->pt_ops->addr_in_range(pagetable, gpuaddr); +} +EXPORT_SYMBOL(kgsl_mmu_gpuaddr_in_range); diff --git a/drivers/gpu/msm/kgsl_mmu.h b/drivers/gpu/msm/kgsl_mmu.h new file mode 100644 index 000000000000..dc266ab9f381 --- /dev/null +++ b/drivers/gpu/msm/kgsl_mmu.h @@ -0,0 +1,364 @@ +/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __KGSL_MMU_H +#define __KGSL_MMU_H + +#include "kgsl_iommu.h" +/* + * These defines control the address range for allocations that + * are mapped into all pagetables. + */ +#define KGSL_MMU_GLOBAL_MEM_SIZE SZ_8M +#define KGSL_MMU_GLOBAL_MEM_BASE 0xf8000000 + +/* Identifier for the global page table */ +/* Per process page tables will probably pass in the thread group + as an identifier */ +#define KGSL_MMU_GLOBAL_PT 0 +#define KGSL_MMU_SECURE_PT 1 + +struct kgsl_device; + +enum kgsl_mmutype { + KGSL_MMU_TYPE_IOMMU = 0, + KGSL_MMU_TYPE_NONE +}; + +struct kgsl_pagetable { + spinlock_t lock; + struct kref refcount; + struct list_head list; + unsigned int name; + struct kobject *kobj; + + struct { + atomic_t entries; + atomic_long_t mapped; + atomic_long_t max_mapped; + } stats; + const struct kgsl_mmu_pt_ops *pt_ops; + unsigned int fault_addr; + void *priv; + struct kgsl_mmu *mmu; + bool globals_mapped; +}; + +struct kgsl_mmu; + +struct kgsl_mmu_ops { + int (*mmu_init) (struct kgsl_mmu *mmu); + int (*mmu_close) (struct kgsl_mmu *mmu); + int (*mmu_start) (struct kgsl_mmu *mmu); + void (*mmu_stop) (struct kgsl_mmu *mmu); + int (*mmu_set_pt) (struct kgsl_mmu *mmu, struct kgsl_pagetable *pt); + uint64_t (*mmu_get_current_ttbr0)(struct kgsl_mmu *mmu); + void (*mmu_pagefault_resume)(struct kgsl_mmu *mmu); + void (*mmu_clear_fsr)(struct kgsl_mmu *mmu); + void (*mmu_enable_clk)(struct kgsl_mmu *mmu); + void (*mmu_disable_clk)(struct kgsl_mmu *mmu); + unsigned int (*mmu_get_reg_ahbaddr)(struct kgsl_mmu *mmu, + enum kgsl_iommu_context_id ctx_id, + enum kgsl_iommu_reg_map reg); + int (*mmu_set_pf_policy)(struct kgsl_mmu *mmu, unsigned long pf_policy); + struct kgsl_protected_registers *(*mmu_get_prot_regs) + (struct kgsl_mmu *mmu); + int (*mmu_init_pt)(struct kgsl_mmu *mmu, struct kgsl_pagetable *); +}; + +struct kgsl_mmu_pt_ops { + int (*mmu_map)(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc); + int (*mmu_unmap)(struct kgsl_pagetable *pt, + struct kgsl_memdesc *memdesc); + void (*mmu_destroy_pagetable) (struct kgsl_pagetable *); + u64 (*get_ttbr0)(struct kgsl_pagetable *); + u32 (*get_contextidr)(struct kgsl_pagetable *); + int (*get_gpuaddr)(struct kgsl_pagetable *, struct kgsl_memdesc *); + void (*put_gpuaddr)(struct kgsl_pagetable *, struct kgsl_memdesc *); + uint64_t (*find_svm_region)(struct kgsl_pagetable *, uint64_t, uint64_t, + uint64_t, uint64_t); + int (*set_svm_region)(struct kgsl_pagetable *, uint64_t, uint64_t); + int (*svm_range)(struct kgsl_pagetable *, uint64_t *, uint64_t *, + uint64_t); + bool (*addr_in_range)(struct kgsl_pagetable *pagetable, uint64_t); +}; + +/* + * MMU_FEATURE - return true if the specified feature is supported by the GPU + * MMU + */ +#define MMU_FEATURE(_mmu, _bit) \ + ((_mmu)->features & (_bit)) + +/* MMU has register retention */ +#define KGSL_MMU_RETENTION BIT(1) +/* MMU requires the TLB to be flushed on map */ +#define KGSL_MMU_FLUSH_TLB_ON_MAP BIT(2) +/* MMU uses global pagetable */ +#define KGSL_MMU_GLOBAL_PAGETABLE BIT(3) +/* MMU uses hypervisor for content protection */ +#define KGSL_MMU_HYP_SECURE_ALLOC BIT(4) +/* Force 32 bit, even if the MMU can do 64 bit */ +#define KGSL_MMU_FORCE_32BIT BIT(5) +/* 64 bit address is live */ +#define KGSL_MMU_64BIT BIT(6) +/* MMU can do coherent hardware table walks */ +#define KGSL_MMU_COHERENT_HTW BIT(7) + +struct kgsl_mmu { + uint32_t flags; + struct kgsl_device *device; + struct kgsl_memdesc setstate_memory; + /* current page table object being used by device mmu */ + struct kgsl_pagetable *defaultpagetable; + /* secure global pagetable device mmu */ + struct kgsl_pagetable *securepagetable; + const struct kgsl_mmu_ops *mmu_ops; + void *priv; + bool secured; + uint features; + unsigned int secure_align_mask; +}; + +extern struct kgsl_mmu_ops kgsl_iommu_ops; + +struct kgsl_pagetable *kgsl_mmu_getpagetable(struct kgsl_mmu *, + unsigned long name); + +struct kgsl_pagetable *kgsl_mmu_getpagetable_ptbase(struct kgsl_mmu *, + u64 ptbase); + +void kgsl_mmu_putpagetable(struct kgsl_pagetable *pagetable); +int kgsl_mmu_init(struct kgsl_device *device, char *mmutype); +int kgsl_mmu_start(struct kgsl_device *device); +int kgsl_mmu_close(struct kgsl_device *device); +int kgsl_mmu_map(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc); +int kgsl_mmu_get_gpuaddr(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc); +int kgsl_mmu_map_global(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc); +int kgsl_mmu_unmap(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc); +int kgsl_mmu_put_gpuaddr(struct kgsl_pagetable *pagetable, + struct kgsl_memdesc *memdesc); +unsigned int kgsl_virtaddr_to_physaddr(void *virtaddr); +int kgsl_mmu_get_ptname_from_ptbase(struct kgsl_mmu *mmu, u64 pt_base); +unsigned int kgsl_mmu_log_fault_addr(struct kgsl_mmu *mmu, + phys_addr_t pt_base, unsigned int addr); +int kgsl_mmu_enabled(void); +void kgsl_mmu_set_mmutype(enum kgsl_mmutype type); +enum kgsl_mmutype kgsl_mmu_get_mmutype(void); +bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pt, uint64_t gpuaddr); + +int kgsl_mmu_get_region(struct kgsl_pagetable *pagetable, + uint64_t gpuaddr, uint64_t size); + +int kgsl_mmu_find_region(struct kgsl_pagetable *pagetable, + uint64_t region_start, uint64_t region_end, + uint64_t *gpuaddr, uint64_t size, unsigned int align); + +int kgsl_add_global_pt_entry(struct kgsl_device *device, + struct kgsl_memdesc *memdesc); +void kgsl_remove_global_pt_entry(struct kgsl_memdesc *memdesc); +void kgsl_map_global_pt_entries(struct kgsl_pagetable *pagetable); + +struct kgsl_memdesc *kgsl_search_global_pt_entries(unsigned int gpuaddr, + unsigned int size); +struct kgsl_pagetable *kgsl_mmu_get_pt_from_ptname(struct kgsl_mmu *mmu, + int ptname); + +uint64_t kgsl_mmu_find_svm_region(struct kgsl_pagetable *pagetable, + uint64_t start, uint64_t end, uint64_t size, + uint64_t alignment); + +int kgsl_mmu_set_svm_region(struct kgsl_pagetable *pagetable, uint64_t gpuaddr, + uint64_t size); + +void kgsl_mmu_detach_pagetable(struct kgsl_pagetable *pagetable); + +int kgsl_mmu_svm_range(struct kgsl_pagetable *pagetable, + uint64_t *lo, uint64_t *hi, uint64_t memflags); + +/* + * Static inline functions of MMU that simply call the SMMU specific + * function using a function pointer. These functions can be thought + * of as wrappers around the actual function + */ + +static inline u64 kgsl_mmu_get_current_ttbr0(struct kgsl_mmu *mmu) +{ + if (mmu->mmu_ops && mmu->mmu_ops->mmu_get_current_ttbr0) + return mmu->mmu_ops->mmu_get_current_ttbr0(mmu); + else + return 0; +} + +static inline int kgsl_mmu_set_pt(struct kgsl_mmu *mmu, + struct kgsl_pagetable *pagetable) +{ + if (mmu->mmu_ops && mmu->mmu_ops->mmu_set_pt) + return mmu->mmu_ops->mmu_set_pt(mmu, pagetable); + + return 0; +} + +static inline void kgsl_mmu_stop(struct kgsl_mmu *mmu) +{ + if (mmu->mmu_ops && mmu->mmu_ops->mmu_stop) + mmu->mmu_ops->mmu_stop(mmu); +} + +static inline void kgsl_mmu_enable_clk(struct kgsl_mmu *mmu) +{ + if (mmu->mmu_ops && mmu->mmu_ops->mmu_enable_clk) + mmu->mmu_ops->mmu_enable_clk(mmu); + else + return; +} + +static inline void kgsl_mmu_disable_clk(struct kgsl_mmu *mmu) +{ + if (mmu->mmu_ops && mmu->mmu_ops->mmu_disable_clk) + mmu->mmu_ops->mmu_disable_clk(mmu); +} + +/* + * kgsl_mmu_get_reg_ahbaddr() - Calls the mmu specific function pointer to + * return the address that GPU can use to access register + * @mmu: Pointer to the device mmu + * @ctx_id: The MMU HW context ID + * @reg: Register whose address is to be returned + * + * Returns the ahb address of reg else 0 + */ +static inline unsigned int kgsl_mmu_get_reg_ahbaddr(struct kgsl_mmu *mmu, + enum kgsl_iommu_context_id ctx_id, + enum kgsl_iommu_reg_map reg) +{ + if (mmu->mmu_ops && mmu->mmu_ops->mmu_get_reg_ahbaddr) + return mmu->mmu_ops->mmu_get_reg_ahbaddr(mmu, ctx_id, reg); + else + return 0; +} + +/* + * kgsl_mmu_is_perprocess() - Runtime check for per-process + * pagetables. + * @mmu: the mmu + * + * Returns true if per-process pagetables are enabled, + * false if not. + */ +static inline int kgsl_mmu_is_perprocess(struct kgsl_mmu *mmu) +{ + return MMU_FEATURE(mmu, KGSL_MMU_GLOBAL_PAGETABLE) ? 0 : 1; +} + +/* + * kgsl_mmu_use_cpu_map() - Runtime check for matching the CPU + * address space on the GPU. + * @mmu: the mmu + * + * Returns true if supported false if not. + */ +static inline int kgsl_mmu_use_cpu_map(struct kgsl_mmu *mmu) +{ + return kgsl_mmu_is_perprocess(mmu) && + kgsl_mmu_get_mmutype() != KGSL_MMU_TYPE_NONE; +} + +static inline int kgsl_mmu_set_pagefault_policy(struct kgsl_mmu *mmu, + unsigned long pf_policy) +{ + if (mmu->mmu_ops && mmu->mmu_ops->mmu_set_pf_policy) + return mmu->mmu_ops->mmu_set_pf_policy(mmu, pf_policy); + else + return 0; +} + +static inline void kgsl_mmu_pagefault_resume(struct kgsl_mmu *mmu) +{ + if (mmu->mmu_ops && mmu->mmu_ops->mmu_pagefault_resume) + return mmu->mmu_ops->mmu_pagefault_resume(mmu); +} + +static inline void kgsl_mmu_clear_fsr(struct kgsl_mmu *mmu) +{ + if (mmu->mmu_ops && mmu->mmu_ops->mmu_clear_fsr) + return mmu->mmu_ops->mmu_clear_fsr(mmu); +} + +static inline struct kgsl_protected_registers *kgsl_mmu_get_prot_regs + (struct kgsl_mmu *mmu) +{ + if (mmu->mmu_ops && mmu->mmu_ops->mmu_get_prot_regs) + return mmu->mmu_ops->mmu_get_prot_regs(mmu); + else + return NULL; +} + +static inline int kgsl_mmu_is_secured(struct kgsl_mmu *mmu) +{ + return mmu && (mmu->secured) && (mmu->securepagetable); +} + +static inline u64 +kgsl_mmu_pagetable_get_ttbr0(struct kgsl_pagetable *pagetable) +{ + if (pagetable && pagetable->pt_ops->get_ttbr0) + return pagetable->pt_ops->get_ttbr0(pagetable); + return 0; +} + +static inline u32 +kgsl_mmu_pagetable_get_contextidr(struct kgsl_pagetable *pagetable) +{ + if (pagetable && pagetable->pt_ops->get_contextidr) + return pagetable->pt_ops->get_contextidr(pagetable); + return 0; +} + +#ifdef CONFIG_MSM_IOMMU +#include <linux/qcom_iommu.h> +static inline bool kgsl_mmu_bus_secured(struct device *dev) +{ + struct bus_type *bus = msm_iommu_get_bus(dev); + + return (bus == &msm_iommu_sec_bus_type) ? true : false; +} +static inline struct bus_type *kgsl_mmu_get_bus(struct device *dev) +{ + return msm_iommu_get_bus(dev); +} +static inline struct device *kgsl_mmu_get_ctx(const char *name) +{ + return msm_iommu_get_ctx(name); +} +#else +static inline bool kgsl_mmu_bus_secured(struct device *dev) +{ + return false; +} + +static inline struct bus_type *kgsl_mmu_get_bus(struct device *dev) +{ + return &platform_bus_type; +} +static inline struct device *kgsl_mmu_get_ctx(const char *name) +{ + return ERR_PTR(-ENODEV); +} +#endif + +#endif /* __KGSL_MMU_H */ diff --git a/drivers/gpu/msm/kgsl_pwrctrl.c b/drivers/gpu/msm/kgsl_pwrctrl.c new file mode 100644 index 000000000000..54a93e62a580 --- /dev/null +++ b/drivers/gpu/msm/kgsl_pwrctrl.c @@ -0,0 +1,2620 @@ +/* Copyright (c) 2010-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/export.h> +#include <linux/interrupt.h> +#include <asm/page.h> +#include <linux/pm_runtime.h> +#include <linux/msm-bus.h> +#include <linux/msm-bus-board.h> +#include <linux/ktime.h> +#include <linux/delay.h> +#include <linux/msm_adreno_devfreq.h> +#include <linux/of_device.h> + +#include "kgsl.h" +#include "kgsl_pwrscale.h" +#include "kgsl_device.h" +#include "kgsl_trace.h" +#include <soc/qcom/devfreq_devbw.h> + +#define KGSL_PWRFLAGS_POWER_ON 0 +#define KGSL_PWRFLAGS_CLK_ON 1 +#define KGSL_PWRFLAGS_AXI_ON 2 +#define KGSL_PWRFLAGS_IRQ_ON 3 +#define KGSL_PWRFLAGS_RETENTION_ON 4 + +#define UPDATE_BUSY_VAL 1000000 + +/* + * Expected delay for post-interrupt processing on A3xx. + * The delay may be longer, gradually increase the delay + * to compensate. If the GPU isn't done by max delay, + * it's working on something other than just the final + * command sequence so stop waiting for it to be idle. + */ +#define INIT_UDELAY 200 +#define MAX_UDELAY 2000 + +/* Number of jiffies for a full thermal cycle */ +#define TH_HZ 20 + +#define KGSL_MAX_BUSLEVELS 20 + +#define DEFAULT_BUS_P 25 +#define DEFAULT_BUS_DIV (100 / DEFAULT_BUS_P) + +/* Order deeply matters here because reasons. New entries go on the end */ +static const char * const clocks[] = { + "src_clk", + "core_clk", + "iface_clk", + "mem_clk", + "mem_iface_clk", + "alt_mem_iface_clk", + "rbbmtimer_clk", + "gtcu_clk", + "gtbu_clk", + "gtcu_iface_clk", + "alwayson_clk" +}; + +static unsigned int ib_votes[KGSL_MAX_BUSLEVELS]; +static int last_vote_buslevel; +static int max_vote_buslevel; + +static void kgsl_pwrctrl_clk(struct kgsl_device *device, int state, + int requested_state); +static void kgsl_pwrctrl_axi(struct kgsl_device *device, int state); +static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, int state); +static void kgsl_pwrctrl_set_state(struct kgsl_device *device, + unsigned int state); +static void kgsl_pwrctrl_request_state(struct kgsl_device *device, + unsigned int state); +static void kgsl_pwrctrl_retention_clk(struct kgsl_device *device, int state); + +/** + * _record_pwrevent() - Record the history of the new event + * @device: Pointer to the kgsl_device struct + * @t: Timestamp + * @event: Event type + * + * Finish recording the duration of the previous event. Then update the + * index, record the start of the new event, and the relevant data. + */ +static void _record_pwrevent(struct kgsl_device *device, + ktime_t t, int event) { + struct kgsl_pwrscale *psc = &device->pwrscale; + struct kgsl_pwr_history *history = &psc->history[event]; + int i = history->index; + if (history->events == NULL) + return; + history->events[i].duration = ktime_us_delta(t, + history->events[i].start); + i = (i + 1) % history->size; + history->index = i; + history->events[i].start = t; + switch (event) { + case KGSL_PWREVENT_STATE: + history->events[i].data = device->state; + break; + case KGSL_PWREVENT_GPU_FREQ: + history->events[i].data = device->pwrctrl.active_pwrlevel; + break; + case KGSL_PWREVENT_BUS_FREQ: + history->events[i].data = last_vote_buslevel; + break; + default: + break; + } +} + +/** + * kgsl_get_bw() - Return latest msm bus IB vote + */ +static unsigned int kgsl_get_bw(void) +{ + return ib_votes[last_vote_buslevel]; +} + +/** + * _ab_buslevel_update() - Return latest msm bus AB vote + * @pwr: Pointer to the kgsl_pwrctrl struct + * @ab: Pointer to be updated with the calculated AB vote + */ +static void _ab_buslevel_update(struct kgsl_pwrctrl *pwr, + unsigned long *ab) +{ + unsigned int ib = ib_votes[last_vote_buslevel]; + unsigned int max_bw = ib_votes[max_vote_buslevel]; + if (!ab) + return; + if (ib == 0) + *ab = 0; + else if ((!pwr->bus_percent_ab) && (!pwr->bus_ab_mbytes)) + *ab = DEFAULT_BUS_P * ib / 100; + else if (pwr->bus_width) + *ab = pwr->bus_ab_mbytes; + else + *ab = (pwr->bus_percent_ab * max_bw) / 100; + + if (*ab > ib) + *ab = ib; +} + +/** + * _adjust_pwrlevel() - Given a requested power level do bounds checking on the + * constraints and return the nearest possible level + * @device: Pointer to the kgsl_device struct + * @level: Requested level + * @pwrc: Pointer to the power constraint to be applied + * + * Apply thermal and max/min limits first. Then force the level with a + * constraint if one exists. + */ +static unsigned int _adjust_pwrlevel(struct kgsl_pwrctrl *pwr, int level, + struct kgsl_pwr_constraint *pwrc, + int popp) +{ + unsigned int max_pwrlevel = max_t(unsigned int, pwr->thermal_pwrlevel, + pwr->max_pwrlevel); + unsigned int min_pwrlevel = max_t(unsigned int, pwr->thermal_pwrlevel, + pwr->min_pwrlevel); + + switch (pwrc->type) { + case KGSL_CONSTRAINT_PWRLEVEL: { + switch (pwrc->sub_type) { + case KGSL_CONSTRAINT_PWR_MAX: + return max_pwrlevel; + break; + case KGSL_CONSTRAINT_PWR_MIN: + return min_pwrlevel; + break; + default: + break; + } + } + break; + } + + if (popp && (max_pwrlevel < pwr->active_pwrlevel)) + max_pwrlevel = pwr->active_pwrlevel; + + if (level < max_pwrlevel) + return max_pwrlevel; + if (level > min_pwrlevel) + return min_pwrlevel; + + return level; +} + +/** + * kgsl_pwrctrl_buslevel_update() - Recalculate the bus vote and send it + * @device: Pointer to the kgsl_device struct + * @on: true for setting and active bus vote, false to turn off the vote + */ +void kgsl_pwrctrl_buslevel_update(struct kgsl_device *device, + bool on) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int cur = pwr->pwrlevels[pwr->active_pwrlevel].bus_freq; + int buslevel = 0; + unsigned long ab; + + /* the bus should be ON to update the active frequency */ + if (on && !(test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->power_flags))) + return; + /* + * If the bus should remain on calculate our request and submit it, + * otherwise request bus level 0, off. + */ + if (on) { + buslevel = min_t(int, pwr->pwrlevels[0].bus_max, + cur + pwr->bus_mod); + buslevel = max_t(int, buslevel, 1); + } else { + /* If the bus is being turned off, reset to default level */ + pwr->bus_mod = 0; + pwr->bus_percent_ab = 0; + pwr->bus_ab_mbytes = 0; + } + trace_kgsl_buslevel(device, pwr->active_pwrlevel, buslevel); + last_vote_buslevel = buslevel; + + /* buslevel is the IB vote, update the AB */ + _ab_buslevel_update(pwr, &ab); + + /** + * vote for ocmem if target supports ocmem scaling, + * shut down based on "on" parameter + */ + if (pwr->ocmem_pcl) + msm_bus_scale_client_update_request(pwr->ocmem_pcl, + on ? pwr->active_pwrlevel : pwr->num_pwrlevels - 1); + + /* vote for bus if gpubw-dev support is not enabled */ + if (pwr->pcl) + msm_bus_scale_client_update_request(pwr->pcl, buslevel); + + /* ask a governor to vote on behalf of us */ + if (pwr->devbw) + devfreq_vbif_update_bw(ib_votes[last_vote_buslevel], ab); +} +EXPORT_SYMBOL(kgsl_pwrctrl_buslevel_update); + +/** + * kgsl_pwrctrl_pwrlevel_change_settings() - Program h/w during powerlevel + * transitions + * @device: Pointer to the kgsl_device struct + * @post: flag to check if the call is before/after the clk_rate change + * @wake_up: flag to check if device is active or waking up + */ +static void kgsl_pwrctrl_pwrlevel_change_settings(struct kgsl_device *device, + bool post) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + unsigned int old = pwr->previous_pwrlevel; + unsigned int new = pwr->active_pwrlevel; + + if (device->state != KGSL_STATE_ACTIVE) + return; + if (old == new) + return; + if (!device->ftbl->pwrlevel_change_settings) + return; + + device->ftbl->pwrlevel_change_settings(device, old, new, post); +} + +/** + * kgsl_pwrctrl_set_thermal_cycle() - set the thermal cycle if required + * @pwr: Pointer to the kgsl_pwrctrl struct + * @new_level: the level to transition to + */ +static void kgsl_pwrctrl_set_thermal_cycle(struct kgsl_pwrctrl *pwr, + unsigned int new_level) +{ + if ((new_level != pwr->thermal_pwrlevel) || !pwr->sysfs_pwr_limit) + return; + if (pwr->thermal_pwrlevel == pwr->sysfs_pwr_limit->level) { + /* Thermal cycle for sysfs pwr limit, start cycling*/ + if (pwr->thermal_cycle == CYCLE_ENABLE) { + pwr->thermal_cycle = CYCLE_ACTIVE; + mod_timer(&pwr->thermal_timer, jiffies + + (TH_HZ - pwr->thermal_timeout)); + pwr->thermal_highlow = 1; + } + } else { + /* Non sysfs pwr limit, stop thermal cycle if active*/ + if (pwr->thermal_cycle == CYCLE_ACTIVE) { + pwr->thermal_cycle = CYCLE_ENABLE; + del_timer_sync(&pwr->thermal_timer); + } + } +} + +/** + * kgsl_pwrctrl_pwrlevel_change() - Validate and change power levels + * @device: Pointer to the kgsl_device struct + * @new_level: Requested powerlevel, an index into the pwrlevel array + * + * Check that any power level constraints are still valid. Update the + * requested level according to any thermal, max/min, or power constraints. + * If a new GPU level is going to be set, update the bus to that level's + * default value. Do not change the bus if a constraint keeps the new + * level at the current level. Set the new GPU frequency. + */ +void kgsl_pwrctrl_pwrlevel_change(struct kgsl_device *device, + unsigned int new_level) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct kgsl_pwrlevel *pwrlevel; + unsigned int old_level = pwr->active_pwrlevel; + + /* If a pwr constraint is expired, remove it */ + if ((pwr->constraint.type != KGSL_CONSTRAINT_NONE) && + (time_after(jiffies, pwr->constraint.expires))) { + /* Trace the constraint being un-set by the driver */ + trace_kgsl_constraint(device, pwr->constraint.type, + old_level, 0); + /*Invalidate the constraint set */ + pwr->constraint.expires = 0; + pwr->constraint.type = KGSL_CONSTRAINT_NONE; + } + + /* + * Adjust the power level if required by thermal, max/min, + * constraints, etc + */ + new_level = _adjust_pwrlevel(pwr, new_level, &pwr->constraint, + device->pwrscale.popp_level); + + /* + * If thermal cycling is required and the new level hits the + * thermal limit, kick off the cycling. + */ + kgsl_pwrctrl_set_thermal_cycle(pwr, new_level); + + if (new_level == old_level) + return; + + /* + * Set the active and previous powerlevel first in case the clocks are + * off - if we don't do this then the pwrlevel change won't take effect + * when the clocks come back + */ + pwr->active_pwrlevel = new_level; + pwr->previous_pwrlevel = old_level; + + /* + * If the bus is running faster than its default level and the GPU + * frequency is moving down keep the DDR at a relatively high level. + */ + if (pwr->bus_mod < 0 || new_level < old_level) { + pwr->bus_mod = 0; + pwr->bus_percent_ab = 0; + } + /* + * Update the bus before the GPU clock to prevent underrun during + * frequency increases. + */ + kgsl_pwrctrl_buslevel_update(device, true); + + pwrlevel = &pwr->pwrlevels[pwr->active_pwrlevel]; + /* Change register settings if any BEFORE pwrlevel change*/ + kgsl_pwrctrl_pwrlevel_change_settings(device, 0); + clk_set_rate(pwr->grp_clks[0], pwrlevel->gpu_freq); + trace_kgsl_pwrlevel(device, + pwr->active_pwrlevel, pwrlevel->gpu_freq, + pwr->previous_pwrlevel, + pwr->pwrlevels[old_level].gpu_freq); + /* Change register settings if any AFTER pwrlevel change*/ + kgsl_pwrctrl_pwrlevel_change_settings(device, 1); + + /* Timestamp the frequency change */ + device->pwrscale.freq_change_time = ktime_to_ms(ktime_get()); +} +EXPORT_SYMBOL(kgsl_pwrctrl_pwrlevel_change); + +/** + * kgsl_pwrctrl_set_constraint() - Validate and change enforced constraint + * @device: Pointer to the kgsl_device struct + * @pwrc: Pointer to requested constraint + * @id: Context id which owns the constraint + * + * Accept the new constraint if no previous constraint existed or if the + * new constraint is faster than the previous one. If the new and previous + * constraints are equal, update the timestamp and ownership to make sure + * the constraint expires at the correct time. + */ +void kgsl_pwrctrl_set_constraint(struct kgsl_device *device, + struct kgsl_pwr_constraint *pwrc, uint32_t id) +{ + unsigned int constraint; + struct kgsl_pwr_constraint *pwrc_old; + + if (device == NULL || pwrc == NULL) + return; + constraint = _adjust_pwrlevel(&device->pwrctrl, + device->pwrctrl.active_pwrlevel, pwrc, 0); + pwrc_old = &device->pwrctrl.constraint; + + /* + * If a constraint is already set, set a new constraint only + * if it is faster. If the requested constraint is the same + * as the current one, update ownership and timestamp. + */ + if ((pwrc_old->type == KGSL_CONSTRAINT_NONE) || + (constraint < pwrc_old->hint.pwrlevel.level)) { + pwrc_old->type = pwrc->type; + pwrc_old->sub_type = pwrc->sub_type; + pwrc_old->hint.pwrlevel.level = constraint; + pwrc_old->owner_id = id; + pwrc_old->expires = jiffies + device->pwrctrl.interval_timeout; + kgsl_pwrctrl_pwrlevel_change(device, constraint); + /* Trace the constraint being set by the driver */ + trace_kgsl_constraint(device, pwrc_old->type, constraint, 1); + } else if ((pwrc_old->type == pwrc->type) && + (pwrc_old->hint.pwrlevel.level == constraint)) { + pwrc_old->owner_id = id; + pwrc_old->expires = jiffies + + device->pwrctrl.interval_timeout; + } +} +EXPORT_SYMBOL(kgsl_pwrctrl_set_constraint); + +static ssize_t kgsl_pwrctrl_thermal_pwrlevel_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kgsl_device *device = kgsl_device_from_dev(dev); + struct kgsl_pwrctrl *pwr; + int ret; + unsigned int level = 0; + + if (device == NULL) + return 0; + + pwr = &device->pwrctrl; + + ret = kgsl_sysfs_store(buf, &level); + + if (ret) + return ret; + + mutex_lock(&device->mutex); + + if (level > pwr->num_pwrlevels - 2) + level = pwr->num_pwrlevels - 2; + + pwr->thermal_pwrlevel = level; + + /* Update the current level using the new limit */ + kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel); + mutex_unlock(&device->mutex); + + return count; +} + +static ssize_t kgsl_pwrctrl_thermal_pwrlevel_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + + struct kgsl_device *device = kgsl_device_from_dev(dev); + struct kgsl_pwrctrl *pwr; + if (device == NULL) + return 0; + pwr = &device->pwrctrl; + return snprintf(buf, PAGE_SIZE, "%d\n", pwr->thermal_pwrlevel); +} + +static ssize_t kgsl_pwrctrl_max_pwrlevel_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kgsl_device *device = kgsl_device_from_dev(dev); + struct kgsl_pwrctrl *pwr; + int ret; + unsigned int level = 0; + + if (device == NULL) + return 0; + + pwr = &device->pwrctrl; + + ret = kgsl_sysfs_store(buf, &level); + if (ret) + return ret; + + mutex_lock(&device->mutex); + + /* You can't set a maximum power level lower than the minimum */ + if (level > pwr->min_pwrlevel) + level = pwr->min_pwrlevel; + + pwr->max_pwrlevel = level; + + /* Update the current level using the new limit */ + kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel); + mutex_unlock(&device->mutex); + + return count; +} + +static ssize_t kgsl_pwrctrl_max_pwrlevel_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + + struct kgsl_device *device = kgsl_device_from_dev(dev); + struct kgsl_pwrctrl *pwr; + if (device == NULL) + return 0; + pwr = &device->pwrctrl; + return snprintf(buf, PAGE_SIZE, "%u\n", pwr->max_pwrlevel); +} + +static ssize_t kgsl_pwrctrl_min_pwrlevel_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ struct kgsl_device *device = kgsl_device_from_dev(dev); + struct kgsl_pwrctrl *pwr; + int ret; + unsigned int level = 0; + + if (device == NULL) + return 0; + + pwr = &device->pwrctrl; + + ret = kgsl_sysfs_store(buf, &level); + if (ret) + return ret; + + mutex_lock(&device->mutex); + if (level > pwr->num_pwrlevels - 2) + level = pwr->num_pwrlevels - 2; + + /* You can't set a minimum power level lower than the maximum */ + if (level < pwr->max_pwrlevel) + level = pwr->max_pwrlevel; + + pwr->min_pwrlevel = level; + + /* Update the current level using the new limit */ + kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel); + + mutex_unlock(&device->mutex); + + return count; +} + +static ssize_t kgsl_pwrctrl_min_pwrlevel_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = kgsl_device_from_dev(dev); + struct kgsl_pwrctrl *pwr; + if (device == NULL) + return 0; + pwr = &device->pwrctrl; + return snprintf(buf, PAGE_SIZE, "%u\n", pwr->min_pwrlevel); +} + +static ssize_t kgsl_pwrctrl_num_pwrlevels_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + + struct kgsl_device *device = kgsl_device_from_dev(dev); + struct kgsl_pwrctrl *pwr; + if (device == NULL) + return 0; + pwr = &device->pwrctrl; + return snprintf(buf, PAGE_SIZE, "%d\n", pwr->num_pwrlevels - 1); +} + +/* Given a GPU clock value, return the lowest matching powerlevel */ + +static int _get_nearest_pwrlevel(struct kgsl_pwrctrl *pwr, unsigned int clock) +{ + int i; + + for (i = pwr->num_pwrlevels - 1; i >= 0; i--) { + if (abs(pwr->pwrlevels[i].gpu_freq - clock) < 5000000) + return i; + } + + return -ERANGE; +} + +static ssize_t kgsl_pwrctrl_max_gpuclk_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kgsl_device *device = kgsl_device_from_dev(dev); + struct kgsl_pwrctrl *pwr; + unsigned int val = 0; + int level, ret; + + if (device == NULL) + return 0; + + pwr = &device->pwrctrl; + + ret = kgsl_sysfs_store(buf, &val); + if (ret) + return ret; + + mutex_lock(&device->mutex); + level = _get_nearest_pwrlevel(pwr, val); + /* If the requested power level is not supported by hw, try cycling */ + if (level < 0) { + unsigned int hfreq, diff, udiff, i; + if ((val < pwr->pwrlevels[pwr->num_pwrlevels - 1].gpu_freq) || + (val > pwr->pwrlevels[0].gpu_freq)) + goto err; + + /* Find the neighboring frequencies */ + for (i = 0; i < pwr->num_pwrlevels - 1; i++) { + if ((pwr->pwrlevels[i].gpu_freq > val) && + (pwr->pwrlevels[i + 1].gpu_freq < val)) { + level = i; + break; + } + } + if (i == pwr->num_pwrlevels - 1) + goto err; + hfreq = pwr->pwrlevels[i].gpu_freq; + diff = hfreq - pwr->pwrlevels[i + 1].gpu_freq; + udiff = hfreq - val; + pwr->thermal_timeout = (udiff * TH_HZ) / diff; + pwr->thermal_cycle = CYCLE_ENABLE; + } else { + pwr->thermal_cycle = CYCLE_DISABLE; + del_timer_sync(&pwr->thermal_timer); + } + mutex_unlock(&device->mutex); + + if (pwr->sysfs_pwr_limit) + kgsl_pwr_limits_set_freq(pwr->sysfs_pwr_limit, + pwr->pwrlevels[level].gpu_freq); + return count; + +err: + mutex_unlock(&device->mutex); + return count; +} + +static ssize_t kgsl_pwrctrl_max_gpuclk_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + + struct kgsl_device *device = kgsl_device_from_dev(dev); + struct kgsl_pwrctrl *pwr; + unsigned int freq; + if (device == NULL) + return 0; + pwr = &device->pwrctrl; + freq = pwr->pwrlevels[pwr->thermal_pwrlevel].gpu_freq; + /* Calculate the effective frequency if we're cycling */ + if (pwr->thermal_cycle) { + unsigned int hfreq = freq; + unsigned int lfreq = pwr->pwrlevels[pwr-> + thermal_pwrlevel + 1].gpu_freq; + freq = pwr->thermal_timeout * (lfreq / TH_HZ) + + (TH_HZ - pwr->thermal_timeout) * (hfreq / TH_HZ); + } + + return snprintf(buf, PAGE_SIZE, "%d\n", freq); +} + +static ssize_t kgsl_pwrctrl_gpuclk_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kgsl_device *device = kgsl_device_from_dev(dev); + struct kgsl_pwrctrl *pwr; + unsigned int val = 0; + int ret, level; + + if (device == NULL) + return 0; + + pwr = &device->pwrctrl; + + ret = kgsl_sysfs_store(buf, &val); + if (ret) + return ret; + + mutex_lock(&device->mutex); + level = _get_nearest_pwrlevel(pwr, val); + if (level >= 0) + kgsl_pwrctrl_pwrlevel_change(device, (unsigned int) level); + + mutex_unlock(&device->mutex); + return count; +} + +static ssize_t kgsl_pwrctrl_gpuclk_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = kgsl_device_from_dev(dev); + struct kgsl_pwrctrl *pwr; + if (device == NULL) + return 0; + pwr = &device->pwrctrl; + return snprintf(buf, PAGE_SIZE, "%ld\n", kgsl_pwrctrl_active_freq(pwr)); +} + +static ssize_t __timer_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count, + enum kgsl_pwrctrl_timer_type timer) +{ + unsigned int val = 0; + struct kgsl_device *device = kgsl_device_from_dev(dev); + int ret; + + if (device == NULL) + return 0; + + ret = kgsl_sysfs_store(buf, &val); + if (ret) + return ret; + + /* + * We don't quite accept a maximum of 0xFFFFFFFF due to internal jiffy + * math, so make sure the value falls within the largest offset we can + * deal with + */ + + if (val > jiffies_to_usecs(MAX_JIFFY_OFFSET)) + return -EINVAL; + + mutex_lock(&device->mutex); + /* Let the timeout be requested in ms, but convert to jiffies. */ + if (timer == KGSL_PWR_IDLE_TIMER) + device->pwrctrl.interval_timeout = msecs_to_jiffies(val); + else if (timer == KGSL_PWR_DEEP_NAP_TIMER) + device->pwrctrl.deep_nap_timeout = msecs_to_jiffies(val); + + mutex_unlock(&device->mutex); + + return count; +} + +static ssize_t kgsl_pwrctrl_idle_timer_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return __timer_store(dev, attr, buf, count, KGSL_PWR_IDLE_TIMER); +} + +static ssize_t kgsl_pwrctrl_idle_timer_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = kgsl_device_from_dev(dev); + if (device == NULL) + return 0; + /* Show the idle_timeout converted to msec */ + return snprintf(buf, PAGE_SIZE, "%u\n", + jiffies_to_msecs(device->pwrctrl.interval_timeout)); +} + +static ssize_t kgsl_pwrctrl_deep_nap_timer_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + + return __timer_store(dev, attr, buf, count, KGSL_PWR_DEEP_NAP_TIMER); +} + +static ssize_t kgsl_pwrctrl_deep_nap_timer_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = kgsl_device_from_dev(dev); + + if (device == NULL) + return 0; + /* Show the idle_timeout converted to msec */ + return snprintf(buf, PAGE_SIZE, "%u\n", + jiffies_to_msecs(device->pwrctrl.deep_nap_timeout)); +} + +static ssize_t kgsl_pwrctrl_pmqos_active_latency_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned int val = 0; + struct kgsl_device *device = kgsl_device_from_dev(dev); + int ret; + + if (device == NULL) + return 0; + + ret = kgsl_sysfs_store(buf, &val); + if (ret) + return ret; + + mutex_lock(&device->mutex); + device->pwrctrl.pm_qos_active_latency = val; + mutex_unlock(&device->mutex); + + return count; +} + +static ssize_t kgsl_pwrctrl_pmqos_active_latency_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = kgsl_device_from_dev(dev); + if (device == NULL) + return 0; + return snprintf(buf, PAGE_SIZE, "%d\n", + device->pwrctrl.pm_qos_active_latency); +} + +static ssize_t kgsl_pwrctrl_gpubusy_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int ret; + struct kgsl_device *device = kgsl_device_from_dev(dev); + struct kgsl_clk_stats *stats; + + if (device == NULL) + return 0; + stats = &device->pwrctrl.clk_stats; + ret = snprintf(buf, PAGE_SIZE, "%7d %7d\n", + stats->busy_old, stats->total_old); + if (!test_bit(KGSL_PWRFLAGS_AXI_ON, &device->pwrctrl.power_flags)) { + stats->busy_old = 0; + stats->total_old = 0; + } + return ret; +} + +static ssize_t kgsl_pwrctrl_gpu_available_frequencies_show( + struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = kgsl_device_from_dev(dev); + struct kgsl_pwrctrl *pwr; + int index, num_chars = 0; + + if (device == NULL) + return 0; + pwr = &device->pwrctrl; + for (index = 0; index < pwr->num_pwrlevels - 1; index++) + num_chars += snprintf(buf + num_chars, PAGE_SIZE, "%d ", + pwr->pwrlevels[index].gpu_freq); + buf[num_chars++] = '\n'; + return num_chars; +} + +static ssize_t kgsl_pwrctrl_reset_count_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = kgsl_device_from_dev(dev); + if (device == NULL) + return 0; + return snprintf(buf, PAGE_SIZE, "%d\n", device->reset_counter); +} + +static void __force_on(struct kgsl_device *device, int flag, int on) +{ + if (on) { + switch (flag) { + case KGSL_PWRFLAGS_CLK_ON: + kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_ON, + KGSL_STATE_ACTIVE); + break; + case KGSL_PWRFLAGS_AXI_ON: + kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_ON); + break; + case KGSL_PWRFLAGS_POWER_ON: + kgsl_pwrctrl_pwrrail(device, KGSL_PWRFLAGS_ON); + break; + case KGSL_PWRFLAGS_RETENTION_ON: + kgsl_pwrctrl_retention_clk(device, KGSL_PWRFLAGS_ON); + break; + } + set_bit(flag, &device->pwrctrl.ctrl_flags); + } else { + clear_bit(flag, &device->pwrctrl.ctrl_flags); + } +} + +static ssize_t __force_on_show(struct device *dev, + struct device_attribute *attr, + char *buf, int flag) +{ + struct kgsl_device *device = kgsl_device_from_dev(dev); + if (device == NULL) + return 0; + return snprintf(buf, PAGE_SIZE, "%d\n", + test_bit(flag, &device->pwrctrl.ctrl_flags)); +} + +static ssize_t __force_on_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count, + int flag) +{ + unsigned int val = 0; + struct kgsl_device *device = kgsl_device_from_dev(dev); + int ret; + + if (device == NULL) + return 0; + + ret = kgsl_sysfs_store(buf, &val); + if (ret) + return ret; + + mutex_lock(&device->mutex); + __force_on(device, flag, val); + mutex_unlock(&device->mutex); + + return count; +} + +static ssize_t kgsl_pwrctrl_force_clk_on_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return __force_on_show(dev, attr, buf, KGSL_PWRFLAGS_CLK_ON); +} + +static ssize_t kgsl_pwrctrl_force_clk_on_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return __force_on_store(dev, attr, buf, count, KGSL_PWRFLAGS_CLK_ON); +} + +static ssize_t kgsl_pwrctrl_force_bus_on_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return __force_on_show(dev, attr, buf, KGSL_PWRFLAGS_AXI_ON); +} + +static ssize_t kgsl_pwrctrl_force_bus_on_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return __force_on_store(dev, attr, buf, count, KGSL_PWRFLAGS_AXI_ON); +} + +static ssize_t kgsl_pwrctrl_force_rail_on_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return __force_on_show(dev, attr, buf, KGSL_PWRFLAGS_POWER_ON); +} + +static ssize_t kgsl_pwrctrl_force_rail_on_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return __force_on_store(dev, attr, buf, count, KGSL_PWRFLAGS_POWER_ON); +} + +static ssize_t kgsl_pwrctrl_force_non_retention_on_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return __force_on_show(dev, attr, buf, KGSL_PWRFLAGS_RETENTION_ON); +} + +static ssize_t kgsl_pwrctrl_force_non_retention_on_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + return __force_on_store(dev, attr, buf, count, + KGSL_PWRFLAGS_RETENTION_ON); +} + +static ssize_t kgsl_pwrctrl_bus_split_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = kgsl_device_from_dev(dev); + if (device == NULL) + return 0; + return snprintf(buf, PAGE_SIZE, "%d\n", + device->pwrctrl.bus_control); +} + +static ssize_t kgsl_pwrctrl_bus_split_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned int val = 0; + struct kgsl_device *device = kgsl_device_from_dev(dev); + int ret; + + if (device == NULL) + return 0; + + ret = kgsl_sysfs_store(buf, &val); + if (ret) + return ret; + + mutex_lock(&device->mutex); + device->pwrctrl.bus_control = val ? true : false; + mutex_unlock(&device->mutex); + + return count; +} + +static ssize_t kgsl_pwrctrl_default_pwrlevel_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = kgsl_device_from_dev(dev); + if (device == NULL) + return 0; + return snprintf(buf, PAGE_SIZE, "%d\n", + device->pwrctrl.default_pwrlevel); +} + +static ssize_t kgsl_pwrctrl_default_pwrlevel_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kgsl_device *device = kgsl_device_from_dev(dev); + struct kgsl_pwrctrl *pwr; + struct kgsl_pwrscale *pwrscale; + int ret; + unsigned int level = 0; + + if (device == NULL) + return 0; + + pwr = &device->pwrctrl; + pwrscale = &device->pwrscale; + + ret = kgsl_sysfs_store(buf, &level); + if (ret) + return ret; + + if (level > pwr->num_pwrlevels - 2) + goto done; + + mutex_lock(&device->mutex); + pwr->default_pwrlevel = level; + pwrscale->gpu_profile.profile.initial_freq + = pwr->pwrlevels[level].gpu_freq; + + mutex_unlock(&device->mutex); +done: + return count; +} + + +static ssize_t kgsl_popp_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned int val = 0; + struct kgsl_device *device = kgsl_device_from_dev(dev); + int ret; + + if (device == NULL) + return 0; + + ret = kgsl_sysfs_store(buf, &val); + if (ret) + return ret; + + mutex_lock(&device->mutex); + if (val) + set_bit(POPP_ON, &device->pwrscale.popp_state); + else + clear_bit(POPP_ON, &device->pwrscale.popp_state); + mutex_unlock(&device->mutex); + + return count; +} + +static ssize_t kgsl_popp_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = kgsl_device_from_dev(dev); + if (device == NULL) + return 0; + return snprintf(buf, PAGE_SIZE, "%d\n", + test_bit(POPP_ON, &device->pwrscale.popp_state)); +} + +static DEVICE_ATTR(gpuclk, 0644, kgsl_pwrctrl_gpuclk_show, + kgsl_pwrctrl_gpuclk_store); +static DEVICE_ATTR(max_gpuclk, 0644, kgsl_pwrctrl_max_gpuclk_show, + kgsl_pwrctrl_max_gpuclk_store); +static DEVICE_ATTR(idle_timer, 0644, kgsl_pwrctrl_idle_timer_show, + kgsl_pwrctrl_idle_timer_store); +static DEVICE_ATTR(deep_nap_timer, 0644, kgsl_pwrctrl_deep_nap_timer_show, + kgsl_pwrctrl_deep_nap_timer_store); +static DEVICE_ATTR(gpubusy, 0444, kgsl_pwrctrl_gpubusy_show, + NULL); +static DEVICE_ATTR(gpu_available_frequencies, 0444, + kgsl_pwrctrl_gpu_available_frequencies_show, + NULL); +static DEVICE_ATTR(max_pwrlevel, 0644, + kgsl_pwrctrl_max_pwrlevel_show, + kgsl_pwrctrl_max_pwrlevel_store); +static DEVICE_ATTR(min_pwrlevel, 0644, + kgsl_pwrctrl_min_pwrlevel_show, + kgsl_pwrctrl_min_pwrlevel_store); +static DEVICE_ATTR(thermal_pwrlevel, 0644, + kgsl_pwrctrl_thermal_pwrlevel_show, + kgsl_pwrctrl_thermal_pwrlevel_store); +static DEVICE_ATTR(num_pwrlevels, 0444, + kgsl_pwrctrl_num_pwrlevels_show, + NULL); +static DEVICE_ATTR(pmqos_active_latency, 0644, + kgsl_pwrctrl_pmqos_active_latency_show, + kgsl_pwrctrl_pmqos_active_latency_store); +static DEVICE_ATTR(reset_count, 0444, + kgsl_pwrctrl_reset_count_show, + NULL); +static DEVICE_ATTR(force_clk_on, 0644, + kgsl_pwrctrl_force_clk_on_show, + kgsl_pwrctrl_force_clk_on_store); +static DEVICE_ATTR(force_bus_on, 0644, + kgsl_pwrctrl_force_bus_on_show, + kgsl_pwrctrl_force_bus_on_store); +static DEVICE_ATTR(force_rail_on, 0644, + kgsl_pwrctrl_force_rail_on_show, + kgsl_pwrctrl_force_rail_on_store); +static DEVICE_ATTR(bus_split, 0644, + kgsl_pwrctrl_bus_split_show, + kgsl_pwrctrl_bus_split_store); +static DEVICE_ATTR(default_pwrlevel, 0644, + kgsl_pwrctrl_default_pwrlevel_show, + kgsl_pwrctrl_default_pwrlevel_store); +static DEVICE_ATTR(popp, 0644, kgsl_popp_show, kgsl_popp_store); +static DEVICE_ATTR(force_non_retention_on, 0644, + kgsl_pwrctrl_force_non_retention_on_show, + kgsl_pwrctrl_force_non_retention_on_store); + +static const struct device_attribute *pwrctrl_attr_list[] = { + &dev_attr_gpuclk, + &dev_attr_max_gpuclk, + &dev_attr_idle_timer, + &dev_attr_deep_nap_timer, + &dev_attr_gpubusy, + &dev_attr_gpu_available_frequencies, + &dev_attr_max_pwrlevel, + &dev_attr_min_pwrlevel, + &dev_attr_thermal_pwrlevel, + &dev_attr_num_pwrlevels, + &dev_attr_pmqos_active_latency, + &dev_attr_reset_count, + &dev_attr_force_clk_on, + &dev_attr_force_bus_on, + &dev_attr_force_rail_on, + &dev_attr_force_non_retention_on, + &dev_attr_bus_split, + &dev_attr_default_pwrlevel, + &dev_attr_popp, + NULL +}; + +int kgsl_pwrctrl_init_sysfs(struct kgsl_device *device) +{ + return kgsl_create_device_sysfs_files(device->dev, pwrctrl_attr_list); +} + +void kgsl_pwrctrl_uninit_sysfs(struct kgsl_device *device) +{ + kgsl_remove_device_sysfs_files(device->dev, pwrctrl_attr_list); +} + +/* Track the amount of time the gpu is on vs the total system time. * + * Regularly update the percentage of busy time displayed by sysfs. */ +void kgsl_pwrctrl_busy_time(struct kgsl_device *device, u64 time, u64 busy) +{ + struct kgsl_clk_stats *stats = &device->pwrctrl.clk_stats; + stats->total += time; + stats->busy += busy; + + if (stats->total < UPDATE_BUSY_VAL) + return; + + /* Update the output regularly and reset the counters. */ + stats->total_old = stats->total; + stats->busy_old = stats->busy; + stats->total = 0; + stats->busy = 0; + + trace_kgsl_gpubusy(device, stats->busy_old, stats->total_old); +} +EXPORT_SYMBOL(kgsl_pwrctrl_busy_time); + +static void kgsl_pwrctrl_retention_clk(struct kgsl_device *device, int state) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int i = 0; + + if (!(pwr->gx_retention) || test_bit(KGSL_PWRFLAGS_RETENTION_ON, + &device->pwrctrl.ctrl_flags)) + return; + + if (state == KGSL_PWRFLAGS_OFF) { + if (test_and_clear_bit(KGSL_PWRFLAGS_RETENTION_ON, + &pwr->power_flags)) { + trace_kgsl_retention_clk(device, state); + /* prepare the mx clk to avoid RPM transactions*/ + clk_set_rate(pwr->dummy_mx_clk, + pwr->pwrlevels + [pwr->active_pwrlevel]. + gpu_freq); + clk_prepare(pwr->dummy_mx_clk); + /* + * Unprepare Gfx clocks to put Gfx rail to + * retention voltage. + */ + for (i = KGSL_MAX_CLKS - 1; i > 0; i--) + if (pwr->grp_clks[i]) + clk_unprepare(pwr->grp_clks[i]); + } + } else if (state == KGSL_PWRFLAGS_ON) { + if (!test_and_set_bit(KGSL_PWRFLAGS_RETENTION_ON, + &pwr->power_flags)) { + trace_kgsl_retention_clk(device, state); + /* + * Prepare Gfx clocks to put Gfx rail out + * of rentention + */ + for (i = KGSL_MAX_CLKS - 1; i > 0; i--) + if (pwr->grp_clks[i]) + clk_prepare(pwr->grp_clks[i]); + + /* unprepare the dummy mx clk*/ + clk_unprepare(pwr->dummy_mx_clk); + clk_set_rate(pwr->dummy_mx_clk, + pwr->pwrlevels[pwr->num_pwrlevels - 1]. + gpu_freq); + } + } +} + +static void kgsl_pwrctrl_clk(struct kgsl_device *device, int state, + int requested_state) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int i = 0; + + if (test_bit(KGSL_PWRFLAGS_CLK_ON, &pwr->ctrl_flags)) + return; + + if (state == KGSL_PWRFLAGS_OFF) { + if (test_and_clear_bit(KGSL_PWRFLAGS_CLK_ON, + &pwr->power_flags)) { + trace_kgsl_clk(device, state, + kgsl_pwrctrl_active_freq(pwr)); + for (i = KGSL_MAX_CLKS - 1; i > 0; i--) + clk_disable(pwr->grp_clks[i]); + /* High latency clock maintenance. */ + if ((pwr->pwrlevels[0].gpu_freq > 0) && + (requested_state != KGSL_STATE_NAP) && + (requested_state != + KGSL_STATE_DEEP_NAP)) { + for (i = KGSL_MAX_CLKS - 1; i > 0; i--) + clk_unprepare(pwr->grp_clks[i]); + clk_set_rate(pwr->grp_clks[0], + pwr->pwrlevels[pwr->num_pwrlevels - 1]. + gpu_freq); + } + } else if (requested_state == KGSL_STATE_SLEEP) { + /* High latency clock maintenance. */ + for (i = KGSL_MAX_CLKS - 1; i > 0; i--) + clk_unprepare(pwr->grp_clks[i]); + if ((pwr->pwrlevels[0].gpu_freq > 0)) + clk_set_rate(pwr->grp_clks[0], + pwr->pwrlevels[pwr->num_pwrlevels - 1]. + gpu_freq); + } + } else if (state == KGSL_PWRFLAGS_ON) { + if (!test_and_set_bit(KGSL_PWRFLAGS_CLK_ON, + &pwr->power_flags)) { + trace_kgsl_clk(device, state, + kgsl_pwrctrl_active_freq(pwr)); + /* High latency clock maintenance. */ + if ((device->state != KGSL_STATE_NAP) && + (device->state != KGSL_STATE_DEEP_NAP)) { + if (pwr->pwrlevels[0].gpu_freq > 0) + clk_set_rate(pwr->grp_clks[0], + pwr->pwrlevels + [pwr->active_pwrlevel]. + gpu_freq); + for (i = KGSL_MAX_CLKS - 1; i > 0; i--) + clk_prepare(pwr->grp_clks[i]); + } + /* as last step, enable grp_clk + this is to let GPU interrupt to come */ + for (i = KGSL_MAX_CLKS - 1; i > 0; i--) + clk_enable(pwr->grp_clks[i]); + } + } +} + +static void kgsl_pwrctrl_axi(struct kgsl_device *device, int state) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + if (test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->ctrl_flags)) + return; + + if (state == KGSL_PWRFLAGS_OFF) { + if (test_and_clear_bit(KGSL_PWRFLAGS_AXI_ON, + &pwr->power_flags)) { + trace_kgsl_bus(device, state); + kgsl_pwrctrl_buslevel_update(device, false); + + if (pwr->devbw) + devfreq_suspend_devbw(pwr->devbw); + } + } else if (state == KGSL_PWRFLAGS_ON) { + if (!test_and_set_bit(KGSL_PWRFLAGS_AXI_ON, + &pwr->power_flags)) { + trace_kgsl_bus(device, state); + kgsl_pwrctrl_buslevel_update(device, true); + + if (pwr->devbw) + devfreq_resume_devbw(pwr->devbw); + } + } +} + +static int _regulator_enable(struct kgsl_device *device, + struct kgsl_regulator *regulator) +{ + int ret; + + if (IS_ERR_OR_NULL(regulator->reg)) + return 0; + + ret = regulator_enable(regulator->reg); + if (ret) + KGSL_DRV_ERR(device, "Failed to enable regulator '%s': %d\n", + regulator->name, ret); + return ret; +} + +static void _regulator_disable(struct kgsl_regulator *regulator) +{ + if (!IS_ERR_OR_NULL(regulator->reg)) + regulator_disable(regulator->reg); +} + +static int _enable_regulators(struct kgsl_device *device, + struct kgsl_pwrctrl *pwr) +{ + int i; + + for (i = 0; i < KGSL_MAX_REGULATORS; i++) { + int ret = _regulator_enable(device, &pwr->regulators[i]); + + if (ret) { + for (i = i - 1; i >= 0; i--) + _regulator_disable(&pwr->regulators[i]); + return ret; + } + } + + return 0; +} + +static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, int state) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int status = 0; + + if (test_bit(KGSL_PWRFLAGS_POWER_ON, &pwr->ctrl_flags)) + return 0; + + if (state == KGSL_PWRFLAGS_OFF) { + if (test_and_clear_bit(KGSL_PWRFLAGS_POWER_ON, + &pwr->power_flags)) { + trace_kgsl_rail(device, state); + device->ftbl->regulator_disable_poll(device); + } + } else if (state == KGSL_PWRFLAGS_ON) { + if (!test_and_set_bit(KGSL_PWRFLAGS_POWER_ON, + &pwr->power_flags)) { + status = _enable_regulators(device, pwr); + + if (status) + clear_bit(KGSL_PWRFLAGS_POWER_ON, + &pwr->power_flags); + else + trace_kgsl_rail(device, state); + } + } + + return status; +} + +static void kgsl_pwrctrl_irq(struct kgsl_device *device, int state) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + + if (state == KGSL_PWRFLAGS_ON) { + if (!test_and_set_bit(KGSL_PWRFLAGS_IRQ_ON, + &pwr->power_flags)) { + trace_kgsl_irq(device, state); + enable_irq(pwr->interrupt_num); + } + } else if (state == KGSL_PWRFLAGS_OFF) { + if (test_and_clear_bit(KGSL_PWRFLAGS_IRQ_ON, + &pwr->power_flags)) { + trace_kgsl_irq(device, state); + if (in_interrupt()) + disable_irq_nosync(pwr->interrupt_num); + else + disable_irq(pwr->interrupt_num); + } + } +} + +/** + * kgsl_thermal_cycle() - Work function for thermal timer. + * @work: The input work + * + * This function is called for work that is queued by the thermal + * timer. It cycles to the alternate thermal frequency. + */ +static void kgsl_thermal_cycle(struct work_struct *work) +{ + struct kgsl_pwrctrl *pwr = container_of(work, struct kgsl_pwrctrl, + thermal_cycle_ws); + struct kgsl_device *device = container_of(pwr, struct kgsl_device, + pwrctrl); + + if (device == NULL) + return; + + mutex_lock(&device->mutex); + if (pwr->thermal_cycle == CYCLE_ACTIVE) { + if (pwr->thermal_highlow) + kgsl_pwrctrl_pwrlevel_change(device, + pwr->thermal_pwrlevel); + else + kgsl_pwrctrl_pwrlevel_change(device, + pwr->thermal_pwrlevel + 1); + } + mutex_unlock(&device->mutex); +} + +static void kgsl_thermal_timer(unsigned long data) +{ + struct kgsl_device *device = (struct kgsl_device *) data; + + /* Keep the timer running consistently despite processing time */ + if (device->pwrctrl.thermal_highlow) { + mod_timer(&device->pwrctrl.thermal_timer, + jiffies + + device->pwrctrl.thermal_timeout); + device->pwrctrl.thermal_highlow = 0; + } else { + mod_timer(&device->pwrctrl.thermal_timer, + jiffies + (TH_HZ - + device->pwrctrl.thermal_timeout)); + device->pwrctrl.thermal_highlow = 1; + } + /* Have work run in a non-interrupt context. */ + kgsl_schedule_work(&device->pwrctrl.thermal_cycle_ws); +} + +void kgsl_deep_nap_timer(unsigned long data) +{ + struct kgsl_device *device = (struct kgsl_device *) data; + + if (device->state == KGSL_STATE_NAP) { + kgsl_pwrctrl_request_state(device, KGSL_STATE_DEEP_NAP); + kgsl_schedule_work(&device->idle_check_ws); + } +} + +static int _get_regulator(struct kgsl_device *device, + struct kgsl_regulator *regulator, const char *str) +{ + regulator->reg = devm_regulator_get(&device->pdev->dev, str); + if (IS_ERR(regulator->reg)) { + KGSL_CORE_ERR("Couldn't get regulator: %s (%ld)\n", + str, PTR_ERR(regulator->reg)); + return PTR_ERR(regulator->reg); + } + + strlcpy(regulator->name, str, sizeof(regulator->name)); + return 0; +} + +static int get_legacy_regulators(struct kgsl_device *device) +{ + struct device *dev = &device->pdev->dev; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int ret; + + ret = _get_regulator(device, &pwr->regulators[0], "vdd"); + + /* Use vddcx only on targets that have it. */ + if (ret == 0 && of_find_property(dev->of_node, "vddcx-supply", NULL)) + ret = _get_regulator(device, &pwr->regulators[1], "vddcx"); + + return ret; +} + +static int get_regulators(struct kgsl_device *device) +{ + struct device *dev = &device->pdev->dev; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int index = 0; + const char *name; + struct property *prop; + + if (!of_find_property(dev->of_node, "regulator-names", NULL)) + return get_legacy_regulators(device); + + of_property_for_each_string(dev->of_node, + "regulator-names", prop, name) { + int ret; + + if (index == KGSL_MAX_REGULATORS) { + KGSL_CORE_ERR("Too many regulators defined\n"); + return -ENOMEM; + } + + ret = _get_regulator(device, &pwr->regulators[index], name); + if (ret) + return ret; + index++; + } + + return 0; +} + +static int _get_clocks(struct kgsl_device *device) +{ + struct device *dev = &device->pdev->dev; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + const char *name; + struct property *prop; + + of_property_for_each_string(dev->of_node, "clock-names", prop, name) { + int i; + + for (i = 0; i < KGSL_MAX_CLKS; i++) { + if (pwr->grp_clks[i] || strcmp(clocks[i], name)) + continue; + + pwr->grp_clks[i] = devm_clk_get(dev, name); + + if (IS_ERR(pwr->grp_clks[i])) { + int ret = PTR_ERR(pwr->grp_clks[i]); + + KGSL_CORE_ERR("Couldn't get clock: %s (%d)\n", + name, ret); + pwr->grp_clks[i] = NULL; + return ret; + } + + break; + } + } + + return 0; +} + +int kgsl_pwrctrl_init(struct kgsl_device *device) +{ + int i, k, m, n = 0, result; + struct platform_device *pdev = device->pdev; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct device_node *ocmem_bus_node; + struct msm_bus_scale_pdata *ocmem_scale_table = NULL; + struct msm_bus_scale_pdata *bus_scale_table; + struct device_node *gpubw_dev_node; + struct platform_device *p2dev; + + bus_scale_table = msm_bus_cl_get_pdata(device->pdev); + if (bus_scale_table == NULL) + return -EINVAL; + + result = _get_clocks(device); + if (result) + return result; + + /* Make sure we have a source clk for freq setting */ + if (pwr->grp_clks[0] == NULL) + pwr->grp_clks[0] = pwr->grp_clks[1]; + + if (of_property_read_u32(pdev->dev.of_node, "qcom,deep-nap-timeout", + &pwr->deep_nap_timeout)) + pwr->deep_nap_timeout = HZ/50; + + pwr->gx_retention = of_property_read_bool(pdev->dev.of_node, + "qcom,gx-retention"); + if (pwr->gx_retention) { + pwr->dummy_mx_clk = clk_get(&pdev->dev, "mx_clk"); + if (IS_ERR(pwr->dummy_mx_clk)) { + pwr->gx_retention = 0; + pwr->dummy_mx_clk = NULL; + KGSL_CORE_ERR("Couldn't get clock: mx_clk\n"); + } + } + + pwr->power_flags = BIT(KGSL_PWRFLAGS_RETENTION_ON); + + if (pwr->num_pwrlevels == 0) { + KGSL_PWR_ERR(device, "No power levels are defined\n"); + return -EINVAL; + } + + /* Initialize the user and thermal clock constraints */ + + pwr->max_pwrlevel = 0; + pwr->min_pwrlevel = pwr->num_pwrlevels - 2; + pwr->thermal_pwrlevel = 0; + + pwr->wakeup_maxpwrlevel = 0; + + for (i = 0; i < pwr->num_pwrlevels; i++) { + unsigned int freq = pwr->pwrlevels[i].gpu_freq; + + if (freq > 0) + freq = clk_round_rate(pwr->grp_clks[0], freq); + + pwr->pwrlevels[i].gpu_freq = freq; + } + + clk_set_rate(pwr->grp_clks[0], + pwr->pwrlevels[pwr->num_pwrlevels - 1].gpu_freq); + + clk_set_rate(pwr->grp_clks[6], + clk_round_rate(pwr->grp_clks[6], KGSL_RBBMTIMER_CLK_FREQ)); + + result = get_regulators(device); + if (result) + return result; + + pwr->power_flags = 0; + + if (kgsl_property_read_u32(device, "qcom,pm-qos-active-latency", + &pwr->pm_qos_active_latency)) + pwr->pm_qos_active_latency = 501; + + if (kgsl_property_read_u32(device, "qcom,pm-qos-wakeup-latency", + &pwr->pm_qos_wakeup_latency)) + pwr->pm_qos_wakeup_latency = 101; + + pm_runtime_enable(&pdev->dev); + + ocmem_bus_node = of_find_node_by_name( + device->pdev->dev.of_node, + "qcom,ocmem-bus-client"); + /* If platform has splitted ocmem bus client - use it */ + if (ocmem_bus_node) { + ocmem_scale_table = msm_bus_pdata_from_node + (device->pdev, ocmem_bus_node); + if (ocmem_scale_table) + pwr->ocmem_pcl = msm_bus_scale_register_client + (ocmem_scale_table); + + if (!pwr->ocmem_pcl) + return -EINVAL; + } + + /* Bus width in bytes, set it to zero if not found */ + if (of_property_read_u32(pdev->dev.of_node, "qcom,bus-width", + &pwr->bus_width)) + pwr->bus_width = 0; + + /* Check if gpu bandwidth vote device is defined in dts */ + if (pwr->bus_control) + /* Check if gpu bandwidth vote device is defined in dts */ + gpubw_dev_node = of_parse_phandle(pdev->dev.of_node, + "qcom,gpubw-dev", 0); + + /* + * Governor support enables the gpu bus scaling via governor + * and hence no need to register for bus scaling client + * if gpubw-dev is defined. + */ + if (gpubw_dev_node) { + p2dev = of_find_device_by_node(gpubw_dev_node); + if (p2dev) + pwr->devbw = &p2dev->dev; + } else { + /* + * Register for gpu bus scaling if governor support + * is not enabled and gpu bus voting is to be done + * from the driver. + */ + pwr->pcl = msm_bus_scale_register_client(bus_scale_table); + if (pwr->pcl == 0) + return -EINVAL; + } + + pwr->bus_ib = kzalloc(bus_scale_table->num_usecases * + sizeof(*pwr->bus_ib), GFP_KERNEL); + if (pwr->bus_ib == NULL) + return -ENOMEM; + + /* + * Pull the BW vote out of the bus table. They will be used to + * calculate the ratio between the votes. + */ + for (i = 0; i < bus_scale_table->num_usecases; i++) { + struct msm_bus_paths *usecase = + &bus_scale_table->usecase[i]; + struct msm_bus_vectors *vector = &usecase->vectors[0]; + if (vector->dst == MSM_BUS_SLAVE_EBI_CH0 && + vector->ib != 0) { + + if (i < KGSL_MAX_BUSLEVELS) { + /* Convert bytes to Mbytes. */ + ib_votes[i] = + DIV_ROUND_UP_ULL(vector->ib, 1048576) + - 1; + if (ib_votes[i] > ib_votes[max_vote_buslevel]) + max_vote_buslevel = i; + } + + /* check for duplicate values */ + for (k = 0; k < n; k++) + if (vector->ib == pwr->bus_ib[k]) + break; + + /* if this is a new ib value, save it */ + if (k == n) { + pwr->bus_ib[k] = vector->ib; + n++; + /* find which pwrlevels use this ib */ + for (m = 0; m < pwr->num_pwrlevels - 1; m++) { + if (bus_scale_table-> + usecase[pwr->pwrlevels[m]. + bus_freq].vectors[0].ib + == vector->ib) + pwr->bus_index[m] = k; + } + } + } + } + + INIT_WORK(&pwr->thermal_cycle_ws, kgsl_thermal_cycle); + setup_timer(&pwr->thermal_timer, kgsl_thermal_timer, + (unsigned long) device); + + INIT_LIST_HEAD(&pwr->limits); + spin_lock_init(&pwr->limits_lock); + pwr->sysfs_pwr_limit = kgsl_pwr_limits_add(KGSL_DEVICE_3D0); + + setup_timer(&pwr->deep_nap_timer, kgsl_deep_nap_timer, + (unsigned long) device); + devfreq_vbif_register_callback(kgsl_get_bw); + + return result; +} + +void kgsl_pwrctrl_close(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int i; + + KGSL_PWR_INFO(device, "close device %d\n", device->id); + + pm_runtime_disable(&device->pdev->dev); + + if (pwr->pcl) + msm_bus_scale_unregister_client(pwr->pcl); + + pwr->pcl = 0; + + if (pwr->ocmem_pcl) + msm_bus_scale_unregister_client(pwr->ocmem_pcl); + + pwr->ocmem_pcl = 0; + + for (i = 0; i < KGSL_MAX_REGULATORS; i++) + pwr->regulators[i].reg = NULL; + + for (i = 0; i < KGSL_MAX_REGULATORS; i++) + pwr->grp_clks[i] = NULL; + + pwr->power_flags = 0; + + if (!IS_ERR_OR_NULL(pwr->sysfs_pwr_limit)) { + list_del(&pwr->sysfs_pwr_limit->node); + kfree(pwr->sysfs_pwr_limit); + pwr->sysfs_pwr_limit = NULL; + } + kfree(pwr->bus_ib); +} + +/** + * kgsl_idle_check() - Work function for GPU interrupts and idle timeouts. + * @device: The device + * + * This function is called for work that is queued by the interrupt + * handler or the idle timer. It attempts to transition to a clocks + * off state if the active_cnt is 0 and the hardware is idle. + */ +void kgsl_idle_check(struct work_struct *work) +{ + struct kgsl_device *device = container_of(work, struct kgsl_device, + idle_check_ws); + WARN_ON(device == NULL); + if (device == NULL) + return; + + mutex_lock(&device->mutex); + + if (device->state == KGSL_STATE_ACTIVE + || device->state == KGSL_STATE_NAP + || device->state == KGSL_STATE_DEEP_NAP) { + + if (!atomic_read(&device->active_cnt)) + kgsl_pwrctrl_change_state(device, + device->requested_state); + + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + if (device->state == KGSL_STATE_ACTIVE) + mod_timer(&device->idle_timer, + jiffies + + device->pwrctrl.interval_timeout); + } + if (device->state != KGSL_STATE_DEEP_NAP) + kgsl_pwrscale_update(device); + mutex_unlock(&device->mutex); +} +EXPORT_SYMBOL(kgsl_idle_check); + +void kgsl_timer(unsigned long data) +{ + struct kgsl_device *device = (struct kgsl_device *) data; + + KGSL_PWR_INFO(device, "idle timer expired device %d\n", device->id); + if (device->requested_state != KGSL_STATE_SUSPEND) { + if (device->pwrctrl.strtstp_sleepwake) + kgsl_pwrctrl_request_state(device, KGSL_STATE_SLUMBER); + else + kgsl_pwrctrl_request_state(device, KGSL_STATE_SLEEP); + /* Have work run in a non-interrupt context. */ + kgsl_schedule_work(&device->idle_check_ws); + } +} + +static bool kgsl_pwrctrl_isenabled(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + return ((test_bit(KGSL_PWRFLAGS_CLK_ON, &pwr->power_flags) != 0) && + (test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->power_flags) != 0)); +} + +/** + * kgsl_pre_hwaccess - Enforce preconditions for touching registers + * @device: The device + * + * This function ensures that the correct lock is held and that the GPU + * clock is on immediately before a register is read or written. Note + * that this function does not check active_cnt because the registers + * must be accessed during device start and stop, when the active_cnt + * may legitimately be 0. + */ +void kgsl_pre_hwaccess(struct kgsl_device *device) +{ + /* In order to touch a register you must hold the device mutex...*/ + BUG_ON(!mutex_is_locked(&device->mutex)); + /* and have the clock on! */ + BUG_ON(!kgsl_pwrctrl_isenabled(device)); +} +EXPORT_SYMBOL(kgsl_pre_hwaccess); + +static int kgsl_pwrctrl_enable(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int level, status; + + if (pwr->wakeup_maxpwrlevel) { + level = pwr->max_pwrlevel; + pwr->wakeup_maxpwrlevel = 0; + } else if (kgsl_popp_check(device)) { + level = pwr->active_pwrlevel; + } else { + level = pwr->default_pwrlevel; + } + + kgsl_pwrctrl_pwrlevel_change(device, level); + + /* Order pwrrail/clk sequence based upon platform */ + status = kgsl_pwrctrl_pwrrail(device, KGSL_PWRFLAGS_ON); + if (status) + return status; + kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_ON, KGSL_STATE_ACTIVE); + kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_ON); + return device->ftbl->regulator_enable(device); +} + +static void kgsl_pwrctrl_disable(struct kgsl_device *device) +{ + /* Order pwrrail/clk sequence based upon platform */ + device->ftbl->regulator_disable(device); + kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_OFF); + kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_OFF, KGSL_STATE_SLEEP); + kgsl_pwrctrl_pwrrail(device, KGSL_PWRFLAGS_OFF); +} + +/** + * _init() - Get the GPU ready to start, but don't turn anything on + * @device - Pointer to the kgsl_device struct + */ +static int _init(struct kgsl_device *device) +{ + int status = 0; + switch (device->state) { + case KGSL_STATE_NAP: + case KGSL_STATE_DEEP_NAP: + case KGSL_STATE_SLEEP: + /* Get the device out of retention */ + kgsl_pwrctrl_retention_clk(device, KGSL_PWRFLAGS_ON); + /* Force power on to do the stop */ + status = kgsl_pwrctrl_enable(device); + case KGSL_STATE_ACTIVE: + kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF); + del_timer_sync(&device->idle_timer); + device->ftbl->stop(device); + /* fall through */ + case KGSL_STATE_AWARE: + kgsl_pwrctrl_disable(device); + /* fall through */ + case KGSL_STATE_SLUMBER: + case KGSL_STATE_NONE: + kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT); + } + + return status; +} + +/** + * _wake() - Power up the GPU from a slumber/sleep state + * @device - Pointer to the kgsl_device struct + * + * Resume the GPU from a lower power state to ACTIVE. + */ +static int _wake(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + int status = 0; + + switch (device->state) { + case KGSL_STATE_SUSPEND: + complete_all(&device->hwaccess_gate); + /* Call the GPU specific resume function */ + device->ftbl->resume(device); + /* fall through */ + case KGSL_STATE_SLUMBER: + status = device->ftbl->start(device, + device->pwrctrl.superfast); + device->pwrctrl.superfast = false; + + if (status) { + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + KGSL_DRV_ERR(device, "start failed %d\n", status); + break; + } + /* fall through */ + case KGSL_STATE_SLEEP: + kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_ON); + kgsl_pwrscale_wake(device); + kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON); + /* fall through */ + case KGSL_STATE_DEEP_NAP: + pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma, + device->pwrctrl.pm_qos_active_latency); + /* Get the device out of retention */ + kgsl_pwrctrl_retention_clk(device, KGSL_PWRFLAGS_ON); + /* fall through */ + case KGSL_STATE_NAP: + /* Turn on the core clocks */ + kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_ON, KGSL_STATE_ACTIVE); + + /* + * No need to turn on/off irq here as it no longer affects + * power collapse + */ + kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE); + + /* Change register settings if any after pwrlevel change*/ + kgsl_pwrctrl_pwrlevel_change_settings(device, 1); + /* All settings for power level transitions are complete*/ + pwr->previous_pwrlevel = pwr->active_pwrlevel; + mod_timer(&device->idle_timer, jiffies + + device->pwrctrl.interval_timeout); + del_timer_sync(&device->pwrctrl.deep_nap_timer); + + break; + case KGSL_STATE_AWARE: + /* Enable state before turning on irq */ + kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE); + kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON); + mod_timer(&device->idle_timer, jiffies + + device->pwrctrl.interval_timeout); + del_timer_sync(&device->pwrctrl.deep_nap_timer); + break; + default: + KGSL_PWR_WARN(device, "unhandled state %s\n", + kgsl_pwrstate_to_str(device->state)); + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + status = -EINVAL; + break; + } + return status; +} + +/* + * _aware() - Put device into AWARE + * @device: Device pointer + * + * The GPU should be available for register reads/writes and able + * to communicate with the rest of the system. However disable all + * paths that allow a switch to an interrupt context (interrupts & + * timers). + * Return 0 on success else error code + */ +static int +_aware(struct kgsl_device *device) +{ + int status = 0; + switch (device->state) { + case KGSL_STATE_INIT: + status = kgsl_pwrctrl_enable(device); + break; + /* The following 3 cases shouldn't occur, but don't panic. */ + case KGSL_STATE_DEEP_NAP: + case KGSL_STATE_NAP: + case KGSL_STATE_SLEEP: + status = _wake(device); + case KGSL_STATE_ACTIVE: + kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF); + del_timer_sync(&device->idle_timer); + break; + case KGSL_STATE_SLUMBER: + status = kgsl_pwrctrl_enable(device); + break; + default: + status = -EINVAL; + } + if (status) + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + else + kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE); + return status; +} + +static int +_nap(struct kgsl_device *device) +{ + switch (device->state) { + case KGSL_STATE_ACTIVE: + if (!device->ftbl->is_hw_collapsible(device)) { + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + return -EBUSY; + } + + /* + * Read HW busy counters before going to NAP state. + * The data might be used by power scale governors + * independently of the HW activity. For example + * the simple-on-demand governor will get the latest + * busy_time data even if the gpu isn't active. + */ + kgsl_pwrscale_update_stats(device); + + mod_timer(&device->pwrctrl.deep_nap_timer, jiffies + + device->pwrctrl.deep_nap_timeout); + + kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_OFF, KGSL_STATE_NAP); + kgsl_pwrctrl_set_state(device, KGSL_STATE_NAP); + case KGSL_STATE_SLEEP: + case KGSL_STATE_SLUMBER: + break; + case KGSL_STATE_AWARE: + KGSL_PWR_WARN(device, + "transition AWARE -> NAP is not permitted\n"); + default: + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + break; + } + return 0; +} + +static int +_deep_nap(struct kgsl_device *device) +{ + switch (device->state) { + /* + * Device is expected to be clock gated to move to + * a deeper low power state. No other transition is permitted + */ + case KGSL_STATE_NAP: + kgsl_pwrctrl_retention_clk(device, KGSL_PWRFLAGS_OFF); + pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma, + PM_QOS_DEFAULT_VALUE); + kgsl_pwrctrl_set_state(device, KGSL_STATE_DEEP_NAP); + break; + default: + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + break; + } + return 0; +} + +static int +_sleep(struct kgsl_device *device) +{ + switch (device->state) { + case KGSL_STATE_ACTIVE: + if (!device->ftbl->is_hw_collapsible(device)) { + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + return -EBUSY; + } + /* fall through */ + case KGSL_STATE_NAP: + kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF); + kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_OFF); + kgsl_pwrscale_sleep(device); + kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_OFF, KGSL_STATE_SLEEP); + kgsl_pwrctrl_set_state(device, KGSL_STATE_SLEEP); + pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma, + PM_QOS_DEFAULT_VALUE); + break; + case KGSL_STATE_SLUMBER: + break; + case KGSL_STATE_AWARE: + KGSL_PWR_WARN(device, + "transition AWARE -> SLEEP is not permitted\n"); + default: + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + break; + } + + return 0; +} + +static int +_slumber(struct kgsl_device *device) +{ + int status = 0; + switch (device->state) { + case KGSL_STATE_ACTIVE: + if (!device->ftbl->is_hw_collapsible(device)) { + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + return -EBUSY; + } + /* fall through */ + case KGSL_STATE_NAP: + case KGSL_STATE_SLEEP: + case KGSL_STATE_DEEP_NAP: + del_timer_sync(&device->idle_timer); + if (device->pwrctrl.thermal_cycle == CYCLE_ACTIVE) { + device->pwrctrl.thermal_cycle = CYCLE_ENABLE; + del_timer_sync(&device->pwrctrl.thermal_timer); + } + del_timer_sync(&device->pwrctrl.deep_nap_timer); + kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF); + /* Get the device out of retention */ + kgsl_pwrctrl_retention_clk(device, KGSL_PWRFLAGS_ON); + /* make sure power is on to stop the device*/ + status = kgsl_pwrctrl_enable(device); + device->ftbl->suspend_context(device); + device->ftbl->stop(device); + kgsl_pwrctrl_disable(device); + kgsl_pwrscale_sleep(device); + kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF); + kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER); + pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma, + PM_QOS_DEFAULT_VALUE); + break; + case KGSL_STATE_SUSPEND: + complete_all(&device->hwaccess_gate); + device->ftbl->resume(device); + kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER); + break; + case KGSL_STATE_AWARE: + kgsl_pwrctrl_disable(device); + kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER); + break; + default: + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + break; + + } + return status; +} + +/* + * _suspend() - Put device into suspend + * @device: Device pointer + * + * Return 0 on success else error code + */ +static int _suspend(struct kgsl_device *device) +{ + int ret = 0; + + if ((KGSL_STATE_NONE == device->state) || + (KGSL_STATE_INIT == device->state)) + return ret; + + /* drain to prevent from more commands being submitted */ + device->ftbl->drain(device); + /* wait for active count so device can be put in slumber */ + ret = kgsl_active_count_wait(device, 0); + if (ret) + goto err; + + ret = device->ftbl->idle(device); + if (ret) + goto err; + + ret = _slumber(device); + if (ret) + goto err; + + kgsl_pwrctrl_set_state(device, KGSL_STATE_SUSPEND); + return ret; + +err: + device->ftbl->resume(device); + KGSL_PWR_ERR(device, "device failed to SUSPEND %d\n", ret); + return ret; +} + +/* + * kgsl_pwrctrl_change_state() changes the GPU state to the input + * @device: Pointer to a KGSL device + * @state: desired KGSL state + * + * Caller must hold the device mutex. If the requested state change + * is valid, execute it. Otherwise return an error code explaining + * why the change has not taken place. Also print an error if an + * unexpected state change failure occurs. For example, a change to + * NAP may be rejected because the GPU is busy, this is not an error. + * A change to SUSPEND should go through no matter what, so if it + * fails an additional error message will be printed to dmesg. + */ +int kgsl_pwrctrl_change_state(struct kgsl_device *device, int state) +{ + int status = 0; + if (device->state == state) + return status; + kgsl_pwrctrl_request_state(device, state); + + /* Work through the legal state transitions */ + switch (state) { + case KGSL_STATE_INIT: + status = _init(device); + break; + case KGSL_STATE_AWARE: + status = _aware(device); + break; + case KGSL_STATE_ACTIVE: + status = _wake(device); + break; + case KGSL_STATE_NAP: + status = _nap(device); + break; + case KGSL_STATE_SLEEP: + status = _sleep(device); + break; + case KGSL_STATE_SLUMBER: + status = _slumber(device); + break; + case KGSL_STATE_SUSPEND: + status = _suspend(device); + break; + case KGSL_STATE_DEEP_NAP: + status = _deep_nap(device); + break; + default: + KGSL_PWR_INFO(device, "bad state request 0x%x\n", state); + kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE); + status = -EINVAL; + break; + } + + /* Record the state timing info */ + if (!status) { + ktime_t t = ktime_get(); + _record_pwrevent(device, t, KGSL_PWREVENT_STATE); + } + return status; +} +EXPORT_SYMBOL(kgsl_pwrctrl_change_state); + +static void kgsl_pwrctrl_set_state(struct kgsl_device *device, + unsigned int state) +{ + trace_kgsl_pwr_set_state(device, state); + device->state = state; + device->requested_state = KGSL_STATE_NONE; +} + +static void kgsl_pwrctrl_request_state(struct kgsl_device *device, + unsigned int state) +{ + if (state != KGSL_STATE_NONE && state != device->requested_state) + trace_kgsl_pwr_request_state(device, state); + device->requested_state = state; +} + +const char *kgsl_pwrstate_to_str(unsigned int state) +{ + switch (state) { + case KGSL_STATE_NONE: + return "NONE"; + case KGSL_STATE_INIT: + return "INIT"; + case KGSL_STATE_AWARE: + return "AWARE"; + case KGSL_STATE_ACTIVE: + return "ACTIVE"; + case KGSL_STATE_NAP: + return "NAP"; + case KGSL_STATE_DEEP_NAP: + return "DEEP_NAP"; + case KGSL_STATE_SLEEP: + return "SLEEP"; + case KGSL_STATE_SUSPEND: + return "SUSPEND"; + case KGSL_STATE_SLUMBER: + return "SLUMBER"; + default: + break; + } + return "UNKNOWN"; +} +EXPORT_SYMBOL(kgsl_pwrstate_to_str); + + +/** + * kgsl_active_count_get() - Increase the device active count + * @device: Pointer to a KGSL device + * + * Increase the active count for the KGSL device and turn on + * clocks if this is the first reference. Code paths that need + * to touch the hardware or wait for the hardware to complete + * an operation must hold an active count reference until they + * are finished. An error code will be returned if waking the + * device fails. The device mutex must be held while *calling + * this function. + */ +int kgsl_active_count_get(struct kgsl_device *device) +{ + int ret = 0; + BUG_ON(!mutex_is_locked(&device->mutex)); + + if ((atomic_read(&device->active_cnt) == 0) && + (device->state != KGSL_STATE_ACTIVE)) { + mutex_unlock(&device->mutex); + wait_for_completion(&device->hwaccess_gate); + mutex_lock(&device->mutex); + device->pwrctrl.superfast = true; + ret = kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); + } + if (ret == 0) + atomic_inc(&device->active_cnt); + trace_kgsl_active_count(device, + (unsigned long) __builtin_return_address(0)); + return ret; +} +EXPORT_SYMBOL(kgsl_active_count_get); + +/** + * kgsl_active_count_put() - Decrease the device active count + * @device: Pointer to a KGSL device + * + * Decrease the active count for the KGSL device and turn off + * clocks if there are no remaining references. This function will + * transition the device to NAP if there are no other pending state + * changes. It also completes the suspend gate. The device mutex must + * be held while calling this function. + */ +void kgsl_active_count_put(struct kgsl_device *device) +{ + BUG_ON(!mutex_is_locked(&device->mutex)); + BUG_ON(atomic_read(&device->active_cnt) == 0); + + if (atomic_dec_and_test(&device->active_cnt)) { + if (device->state == KGSL_STATE_ACTIVE && + device->requested_state == KGSL_STATE_NONE) { + kgsl_pwrctrl_request_state(device, KGSL_STATE_NAP); + kgsl_schedule_work(&device->idle_check_ws); + } + + mod_timer(&device->idle_timer, + jiffies + device->pwrctrl.interval_timeout); + } + + trace_kgsl_active_count(device, + (unsigned long) __builtin_return_address(0)); + + wake_up(&device->active_cnt_wq); +} +EXPORT_SYMBOL(kgsl_active_count_put); + +static int _check_active_count(struct kgsl_device *device, int count) +{ + /* Return 0 if the active count is greater than the desired value */ + return atomic_read(&device->active_cnt) > count ? 0 : 1; +} + +/** + * kgsl_active_count_wait() - Wait for activity to finish. + * @device: Pointer to a KGSL device + * @count: Active count value to wait for + * + * Block until the active_cnt value hits the desired value + */ +int kgsl_active_count_wait(struct kgsl_device *device, int count) +{ + int result = 0; + long wait_jiffies = HZ; + + BUG_ON(!mutex_is_locked(&device->mutex)); + + while (atomic_read(&device->active_cnt) > count) { + long ret; + mutex_unlock(&device->mutex); + ret = wait_event_timeout(device->active_cnt_wq, + _check_active_count(device, count), wait_jiffies); + mutex_lock(&device->mutex); + result = ret == 0 ? -ETIMEDOUT : 0; + if (!result) + wait_jiffies = ret; + else + break; + } + + return result; +} +EXPORT_SYMBOL(kgsl_active_count_wait); + +/** + * _update_limits() - update the limits based on the current requests + * @limit: Pointer to the limits structure + * @reason: Reason for the update + * @level: Level if any to be set + * + * Set the thermal pwrlevel based on the current limits + */ +static void _update_limits(struct kgsl_pwr_limit *limit, unsigned int reason, + unsigned int level) +{ + struct kgsl_device *device = limit->device; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct kgsl_pwr_limit *temp_limit; + unsigned int max_level = 0; + + spin_lock(&pwr->limits_lock); + switch (reason) { + case KGSL_PWR_ADD_LIMIT: + list_add(&limit->node, &pwr->limits); + break; + case KGSL_PWR_DEL_LIMIT: + list_del(&limit->node); + if (list_empty(&pwr->limits)) + goto done; + break; + case KGSL_PWR_SET_LIMIT: + limit->level = level; + break; + default: + break; + } + + list_for_each_entry(temp_limit, &pwr->limits, node) { + max_level = max_t(unsigned int, max_level, temp_limit->level); + } + +done: + spin_unlock(&pwr->limits_lock); + + mutex_lock(&device->mutex); + pwr->thermal_pwrlevel = max_level; + kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel); + mutex_unlock(&device->mutex); +} + +/** + * kgsl_pwr_limits_add() - Add a new pwr limit + * @id: Device ID + * + * Allocate a pwr limit structure for the client, add it to the limits + * list and return the pointer to the client + */ +void *kgsl_pwr_limits_add(enum kgsl_deviceid id) +{ + struct kgsl_device *device = kgsl_get_device(id); + struct kgsl_pwr_limit *limit; + + if (IS_ERR_OR_NULL(device)) + return NULL; + + limit = kzalloc(sizeof(struct kgsl_pwr_limit), + GFP_KERNEL); + if (limit == NULL) + return ERR_PTR(-ENOMEM); + limit->device = device; + + _update_limits(limit, KGSL_PWR_ADD_LIMIT, 0); + return limit; +} +EXPORT_SYMBOL(kgsl_pwr_limits_add); + +/** + * kgsl_pwr_limits_del() - Unregister the pwr limit client and + * adjust the thermal limits + * @limit_ptr: Client handle + * + * Delete the client handle from the thermal list and adjust the + * active clocks if needed. + */ +void kgsl_pwr_limits_del(void *limit_ptr) +{ + struct kgsl_pwr_limit *limit = limit_ptr; + if (IS_ERR(limit)) + return; + + _update_limits(limit, KGSL_PWR_DEL_LIMIT, 0); + kfree(limit); +} +EXPORT_SYMBOL(kgsl_pwr_limits_del); + +/** + * kgsl_pwr_limits_set_freq() - Set the requested limit for the client + * @limit_ptr: Client handle + * @freq: Client requested frequency + * + * Set the new limit for the client and adjust the clocks + */ +int kgsl_pwr_limits_set_freq(void *limit_ptr, unsigned int freq) +{ + struct kgsl_pwrctrl *pwr; + struct kgsl_pwr_limit *limit = limit_ptr; + int level; + + if (IS_ERR(limit)) + return -EINVAL; + + pwr = &limit->device->pwrctrl; + level = _get_nearest_pwrlevel(pwr, freq); + if (level < 0) + return -EINVAL; + _update_limits(limit, KGSL_PWR_SET_LIMIT, level); + return 0; +} +EXPORT_SYMBOL(kgsl_pwr_limits_set_freq); + +/** + * kgsl_pwr_limits_set_default() - Set the default thermal limit for the client + * @limit_ptr: Client handle + * + * Set the default for the client and adjust the clocks + */ +void kgsl_pwr_limits_set_default(void *limit_ptr) +{ + struct kgsl_pwr_limit *limit = limit_ptr; + + if (IS_ERR(limit)) + return; + + _update_limits(limit, KGSL_PWR_SET_LIMIT, 0); +} +EXPORT_SYMBOL(kgsl_pwr_limits_set_default); + +/** + * kgsl_pwr_limits_get_freq() - Get the current limit + * @id: Device ID + * + * Get the current limit set for the device + */ +unsigned int kgsl_pwr_limits_get_freq(enum kgsl_deviceid id) +{ + struct kgsl_device *device = kgsl_get_device(id); + struct kgsl_pwrctrl *pwr; + unsigned int freq; + + if (IS_ERR_OR_NULL(device)) + return 0; + pwr = &device->pwrctrl; + mutex_lock(&device->mutex); + freq = pwr->pwrlevels[pwr->thermal_pwrlevel].gpu_freq; + mutex_unlock(&device->mutex); + + return freq; +} +EXPORT_SYMBOL(kgsl_pwr_limits_get_freq); diff --git a/drivers/gpu/msm/kgsl_pwrctrl.h b/drivers/gpu/msm/kgsl_pwrctrl.h new file mode 100644 index 000000000000..5335dbfa6a58 --- /dev/null +++ b/drivers/gpu/msm/kgsl_pwrctrl.h @@ -0,0 +1,241 @@ +/* Copyright (c) 2010-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __KGSL_PWRCTRL_H +#define __KGSL_PWRCTRL_H + +#include <linux/pm_qos.h> + +/***************************************************************************** +** power flags +*****************************************************************************/ +#define KGSL_PWRFLAGS_ON 1 +#define KGSL_PWRFLAGS_OFF 0 + +#define KGSL_PWRLEVEL_TURBO 0 +#define KGSL_PWRLEVEL_NOMINAL 1 +#define KGSL_PWRLEVEL_LAST_OFFSET 2 + +#define KGSL_PWR_ON 0xFFFF + +#define KGSL_MAX_CLKS 11 +#define KGSL_MAX_REGULATORS 2 + +#define KGSL_MAX_PWRLEVELS 10 + +/* Only two supported levels, min & max */ +#define KGSL_CONSTRAINT_PWR_MAXLEVELS 2 + +#define KGSL_RBBMTIMER_CLK_FREQ 19200000 + +/* Symbolic table for the constraint type */ +#define KGSL_CONSTRAINT_TYPES \ + { KGSL_CONSTRAINT_NONE, "None" }, \ + { KGSL_CONSTRAINT_PWRLEVEL, "Pwrlevel" } +/* Symbolic table for the constraint sub type */ +#define KGSL_CONSTRAINT_PWRLEVEL_SUBTYPES \ + { KGSL_CONSTRAINT_PWR_MIN, "Min" }, \ + { KGSL_CONSTRAINT_PWR_MAX, "Max" } + +#define KGSL_PWR_ADD_LIMIT 0 +#define KGSL_PWR_DEL_LIMIT 1 +#define KGSL_PWR_SET_LIMIT 2 + +enum kgsl_pwrctrl_timer_type { + KGSL_PWR_IDLE_TIMER, + KGSL_PWR_DEEP_NAP_TIMER, +}; + +/* + * States for thermal cycling. _DISABLE means that no cycling has been + * requested. _ENABLE means that cycling has been requested, but GPU + * DCVS is currently recommending running at a lower frequency than the + * cycle frequency. _ACTIVE means that the frequency is actively being + * cycled. + */ +#define CYCLE_DISABLE 0 +#define CYCLE_ENABLE 1 +#define CYCLE_ACTIVE 2 + +struct platform_device; + +struct kgsl_clk_stats { + unsigned int busy; + unsigned int total; + unsigned int busy_old; + unsigned int total_old; +}; + +struct kgsl_pwr_constraint { + unsigned int type; + unsigned int sub_type; + union { + struct { + unsigned int level; + } pwrlevel; + } hint; + unsigned long expires; + uint32_t owner_id; +}; + +/** + * struct kgsl_pwrlevel - Struct holding different pwrlevel info obtained from + * from dtsi file + * @gpu_freq: GPU frequency vote in Hz + * @bus_freq: Bus bandwidth vote index + * @bus_min: Min bus index @gpu_freq + * @bus_max: Max bus index @gpu_freq + */ +struct kgsl_pwrlevel { + unsigned int gpu_freq; + unsigned int bus_freq; + unsigned int bus_min; + unsigned int bus_max; +}; + +struct kgsl_regulator { + struct regulator *reg; + char name[8]; +}; + +/** + * struct kgsl_pwrctrl - Power control settings for a KGSL device + * @interrupt_num - The interrupt number for the device + * @grp_clks - Array of clocks structures that we control + * @dummy_mx_clk - mx clock that is contolled during retention + * @power_flags - Control flags for power + * @pwrlevels - List of supported power levels + * @active_pwrlevel - The currently active power level + * @previous_pwrlevel - The power level before transition + * @thermal_pwrlevel - maximum powerlevel constraint from thermal + * @default_pwrlevel - device wake up power level + * @max_pwrlevel - maximum allowable powerlevel per the user + * @min_pwrlevel - minimum allowable powerlevel per the user + * @num_pwrlevels - number of available power levels + * @interval_timeout - timeout in jiffies to be idle before a power event + * @strtstp_sleepwake - true if the device supports low latency GPU start/stop + * @regulators - array of pointers to kgsl_regulator structs + * @pcl - bus scale identifier + * @ocmem - ocmem bus scale identifier + * @irq_name - resource name for the IRQ + * @clk_stats - structure of clock statistics + * @pm_qos_req_dma - the power management quality of service structure + * @pm_qos_active_latency - allowed CPU latency in microseconds when active + * @pm_qos_wakeup_latency - allowed CPU latency in microseconds during wakeup + * @bus_control - true if the bus calculation is independent + * @bus_mod - modifier from the current power level for the bus vote + * @bus_percent_ab - current percent of total possible bus usage + * @bus_width - target specific bus width in number of bytes + * @bus_ab_mbytes - AB vote in Mbytes for current bus usage + * @bus_index - default bus index into the bus_ib table + * @bus_ib - the set of unique ib requests needed for the bus calculation + * @constraint - currently active power constraint + * @superfast - Boolean flag to indicate that the GPU start should be run in the + * higher priority thread + * @thermal_cycle_ws - Work struct for scheduling thermal cycling + * @thermal_timer - Timer for thermal cycling + * @thermal_timeout - Cycling timeout for switching between frequencies + * @thermal_cycle - Is thermal cycling enabled + * @thermal_highlow - flag for swithcing between high and low frequency + * @limits - list head for limits + * @limits_lock - spin lock to protect limits list + * @sysfs_pwr_limit - pointer to the sysfs limits node + * @deep_nap_timer - Timer struct for entering deep nap + * @deep_nap_timeout - Timeout for entering deep nap + * @gx_retention - true if retention voltage is allowed + */ + +struct kgsl_pwrctrl { + int interrupt_num; + struct clk *grp_clks[KGSL_MAX_CLKS]; + struct clk *dummy_mx_clk; + unsigned long power_flags; + unsigned long ctrl_flags; + struct kgsl_pwrlevel pwrlevels[KGSL_MAX_PWRLEVELS]; + unsigned int active_pwrlevel; + unsigned int previous_pwrlevel; + unsigned int thermal_pwrlevel; + unsigned int default_pwrlevel; + unsigned int wakeup_maxpwrlevel; + unsigned int max_pwrlevel; + unsigned int min_pwrlevel; + unsigned int num_pwrlevels; + unsigned long interval_timeout; + bool strtstp_sleepwake; + struct kgsl_regulator regulators[KGSL_MAX_REGULATORS]; + uint32_t pcl; + uint32_t ocmem_pcl; + const char *irq_name; + struct kgsl_clk_stats clk_stats; + struct pm_qos_request pm_qos_req_dma; + unsigned int pm_qos_active_latency; + unsigned int pm_qos_wakeup_latency; + bool bus_control; + int bus_mod; + unsigned int bus_percent_ab; + unsigned int bus_width; + unsigned long bus_ab_mbytes; + struct device *devbw; + unsigned int bus_index[KGSL_MAX_PWRLEVELS]; + uint64_t *bus_ib; + struct kgsl_pwr_constraint constraint; + bool superfast; + struct work_struct thermal_cycle_ws; + struct timer_list thermal_timer; + uint32_t thermal_timeout; + uint32_t thermal_cycle; + uint32_t thermal_highlow; + struct list_head limits; + spinlock_t limits_lock; + struct kgsl_pwr_limit *sysfs_pwr_limit; + struct timer_list deep_nap_timer; + uint32_t deep_nap_timeout; + bool gx_retention; +}; + +int kgsl_pwrctrl_init(struct kgsl_device *device); +void kgsl_pwrctrl_close(struct kgsl_device *device); +void kgsl_timer(unsigned long data); +void kgsl_idle_check(struct work_struct *work); +void kgsl_pre_hwaccess(struct kgsl_device *device); +void kgsl_pwrctrl_pwrlevel_change(struct kgsl_device *device, + unsigned int level); +void kgsl_pwrctrl_buslevel_update(struct kgsl_device *device, + bool on); +int kgsl_pwrctrl_init_sysfs(struct kgsl_device *device); +void kgsl_pwrctrl_uninit_sysfs(struct kgsl_device *device); +int kgsl_pwrctrl_change_state(struct kgsl_device *device, int state); + +static inline unsigned long kgsl_get_clkrate(struct clk *clk) +{ + return (clk != NULL) ? clk_get_rate(clk) : 0; +} + +/* + * kgsl_pwrctrl_active_freq - get currently configured frequency + * @pwr: kgsl_pwrctrl structure for the device + * + * Returns the currently configured frequency for the device. + */ +static inline unsigned long +kgsl_pwrctrl_active_freq(struct kgsl_pwrctrl *pwr) +{ + return pwr->pwrlevels[pwr->active_pwrlevel].gpu_freq; +} + +int __must_check kgsl_active_count_get(struct kgsl_device *device); +void kgsl_active_count_put(struct kgsl_device *device); +int kgsl_active_count_wait(struct kgsl_device *device, int count); +void kgsl_pwrctrl_busy_time(struct kgsl_device *device, u64 time, u64 busy); +void kgsl_pwrctrl_set_constraint(struct kgsl_device *device, + struct kgsl_pwr_constraint *pwrc, uint32_t id); +#endif /* __KGSL_PWRCTRL_H */ diff --git a/drivers/gpu/msm/kgsl_pwrscale.c b/drivers/gpu/msm/kgsl_pwrscale.c new file mode 100644 index 000000000000..c888df3cb6ed --- /dev/null +++ b/drivers/gpu/msm/kgsl_pwrscale.c @@ -0,0 +1,905 @@ +/* Copyright (c) 2010-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/export.h> +#include <linux/kernel.h> + +#include "kgsl.h" +#include "kgsl_pwrscale.h" +#include "kgsl_device.h" +#include "kgsl_trace.h" + +#define FAST_BUS 1 +#define SLOW_BUS -1 + +/* + * "SLEEP" is generic counting both NAP & SLUMBER + * PERIODS generally won't exceed 9 for the relavent 150msec + * window, but can be significantly smaller and still POPP + * pushable in cases where SLUMBER is involved. Hence the + * additional reliance on PERCENT to make sure a reasonable + * amount of down-time actually exists. + */ +#define MIN_SLEEP_PERIODS 3 +#define MIN_SLEEP_PERCENT 5 + +static struct kgsl_popp popp_param[POPP_MAX] = { + {0, 0}, + {-5, 20}, + {-5, 0}, + {0, 0}, +}; + +static void do_devfreq_suspend(struct work_struct *work); +static void do_devfreq_resume(struct work_struct *work); +static void do_devfreq_notify(struct work_struct *work); + +/* + * These variables are used to keep the latest data + * returned by kgsl_devfreq_get_dev_status + */ +static struct xstats last_xstats; +static struct devfreq_dev_status last_status = { .private_data = &last_xstats }; + +/* + * kgsl_pwrscale_sleep - notify governor that device is going off + * @device: The device + * + * Called shortly after all pending work is completed. + */ +void kgsl_pwrscale_sleep(struct kgsl_device *device) +{ + struct kgsl_pwrscale *psc = &device->pwrscale; + BUG_ON(!mutex_is_locked(&device->mutex)); + if (!device->pwrscale.enabled) + return; + device->pwrscale.on_time = 0; + + psc->popp_level = 0; + clear_bit(POPP_PUSH, &device->pwrscale.popp_state); + + /* to call devfreq_suspend_device() from a kernel thread */ + queue_work(device->pwrscale.devfreq_wq, + &device->pwrscale.devfreq_suspend_ws); +} +EXPORT_SYMBOL(kgsl_pwrscale_sleep); + +/* + * kgsl_pwrscale_wake - notify governor that device is going on + * @device: The device + * + * Called when the device is returning to an active state. + */ +void kgsl_pwrscale_wake(struct kgsl_device *device) +{ + struct kgsl_power_stats stats; + struct kgsl_pwrscale *psc = &device->pwrscale; + BUG_ON(!mutex_is_locked(&device->mutex)); + + if (!device->pwrscale.enabled) + return; + /* clear old stats before waking */ + memset(&psc->accum_stats, 0, sizeof(psc->accum_stats)); + memset(&last_xstats, 0, sizeof(last_xstats)); + + /* and any hw activity from waking up*/ + device->ftbl->power_stats(device, &stats); + + psc->time = ktime_get(); + + psc->next_governor_call = ktime_add_us(psc->time, + KGSL_GOVERNOR_CALL_INTERVAL); + + /* to call devfreq_resume_device() from a kernel thread */ + queue_work(psc->devfreq_wq, &psc->devfreq_resume_ws); +} +EXPORT_SYMBOL(kgsl_pwrscale_wake); + +/* + * kgsl_pwrscale_busy - update pwrscale state for new work + * @device: The device + * + * Called when new work is submitted to the device. + * This function must be called with the device mutex locked. + */ +void kgsl_pwrscale_busy(struct kgsl_device *device) +{ + BUG_ON(!mutex_is_locked(&device->mutex)); + if (!device->pwrscale.enabled) + return; + if (device->pwrscale.on_time == 0) + device->pwrscale.on_time = ktime_to_us(ktime_get()); +} +EXPORT_SYMBOL(kgsl_pwrscale_busy); + +/** + * kgsl_pwrscale_update_stats() - update device busy statistics + * @device: The device + * + * Read hardware busy counters and accumulate the results. + */ +void kgsl_pwrscale_update_stats(struct kgsl_device *device) +{ + struct kgsl_pwrscale *psc = &device->pwrscale; + BUG_ON(!mutex_is_locked(&device->mutex)); + + if (!psc->enabled) + return; + + if (device->state == KGSL_STATE_ACTIVE) { + struct kgsl_power_stats stats; + device->ftbl->power_stats(device, &stats); + if (psc->popp_level) { + u64 x = stats.busy_time; + u64 y = stats.ram_time; + do_div(x, 100); + do_div(y, 100); + x *= popp_param[psc->popp_level].gpu_x; + y *= popp_param[psc->popp_level].ddr_y; + trace_kgsl_popp_mod(device, x, y); + stats.busy_time += x; + stats.ram_time += y; + } + device->pwrscale.accum_stats.busy_time += stats.busy_time; + device->pwrscale.accum_stats.ram_time += stats.ram_time; + device->pwrscale.accum_stats.ram_wait += stats.ram_wait; + } +} +EXPORT_SYMBOL(kgsl_pwrscale_update_stats); + +/** + * kgsl_pwrscale_update() - update device busy statistics + * @device: The device + * + * If enough time has passed schedule the next call to devfreq + * get_dev_status. + */ +void kgsl_pwrscale_update(struct kgsl_device *device) +{ + ktime_t t; + BUG_ON(!mutex_is_locked(&device->mutex)); + + if (!device->pwrscale.enabled) + return; + + t = ktime_get(); + if (ktime_compare(t, device->pwrscale.next_governor_call) < 0) + return; + + device->pwrscale.next_governor_call = ktime_add_us(t, + KGSL_GOVERNOR_CALL_INTERVAL); + + /* to call srcu_notifier_call_chain() from a kernel thread */ + if (device->state != KGSL_STATE_SLUMBER) + queue_work(device->pwrscale.devfreq_wq, + &device->pwrscale.devfreq_notify_ws); +} +EXPORT_SYMBOL(kgsl_pwrscale_update); + +/* + * kgsl_pwrscale_disable - temporarily disable the governor + * @device: The device + * + * Temporarily disable the governor, to prevent interference + * with profiling tools that expect a fixed clock frequency. + * This function must be called with the device mutex locked. + */ +void kgsl_pwrscale_disable(struct kgsl_device *device) +{ + BUG_ON(!mutex_is_locked(&device->mutex)); + if (device->pwrscale.devfreqptr) + queue_work(device->pwrscale.devfreq_wq, + &device->pwrscale.devfreq_suspend_ws); + device->pwrscale.enabled = false; + kgsl_pwrctrl_pwrlevel_change(device, KGSL_PWRLEVEL_TURBO); +} +EXPORT_SYMBOL(kgsl_pwrscale_disable); + +/* + * kgsl_pwrscale_enable - re-enable the governor + * @device: The device + * + * Reenable the governor after a kgsl_pwrscale_disable() call. + * This function must be called with the device mutex locked. + */ +void kgsl_pwrscale_enable(struct kgsl_device *device) +{ + BUG_ON(!mutex_is_locked(&device->mutex)); + + if (device->pwrscale.devfreqptr) { + queue_work(device->pwrscale.devfreq_wq, + &device->pwrscale.devfreq_resume_ws); + device->pwrscale.enabled = true; + } else { + /* + * Don't enable it if devfreq is not set and let the device + * run at default level; + */ + kgsl_pwrctrl_pwrlevel_change(device, + device->pwrctrl.default_pwrlevel); + device->pwrscale.enabled = false; + } +} +EXPORT_SYMBOL(kgsl_pwrscale_enable); + +static int _thermal_adjust(struct kgsl_pwrctrl *pwr, int level) +{ + if (level < pwr->active_pwrlevel) + return pwr->active_pwrlevel; + + /* + * A lower frequency has been recommended! Stop thermal + * cycling (but keep the upper thermal limit) and switch to + * the lower frequency. + */ + pwr->thermal_cycle = CYCLE_ENABLE; + del_timer_sync(&pwr->thermal_timer); + return level; +} + +/* + * Use various metrics including level stability, NAP intervals, and + * overall GPU freq / DDR freq combination to decide if POPP should + * be activated. + */ +static bool popp_stable(struct kgsl_device *device) +{ + s64 t; + s64 nap_time = 0; + s64 go_time = 0; + int i, index; + int nap = 0; + s64 percent_nap = 0; + struct kgsl_pwr_event *e; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct kgsl_pwrscale *psc = &device->pwrscale; + + if (!test_bit(POPP_ON, &psc->popp_state)) + return false; + + /* If already pushed or running naturally at min don't push further */ + if (test_bit(POPP_PUSH, &psc->popp_state)) + return false; + if (!psc->popp_level && + (pwr->active_pwrlevel == pwr->min_pwrlevel)) + return false; + if (psc->history[KGSL_PWREVENT_STATE].events == NULL) + return false; + + t = ktime_to_ms(ktime_get()); + /* Check for recent NAP statistics: NAPping regularly and well? */ + if (pwr->active_pwrlevel == 0) { + index = psc->history[KGSL_PWREVENT_STATE].index; + i = index > 0 ? (index - 1) : + (psc->history[KGSL_PWREVENT_STATE].size - 1); + while (i != index) { + e = &psc->history[KGSL_PWREVENT_STATE].events[i]; + if (e->data == KGSL_STATE_NAP || + e->data == KGSL_STATE_SLUMBER) { + if (ktime_to_ms(e->start) + STABLE_TIME > t) { + nap++; + nap_time += e->duration; + } + } else if (e->data == KGSL_STATE_ACTIVE) { + if (ktime_to_ms(e->start) + STABLE_TIME > t) + go_time += e->duration; + } + if (i == 0) + i = psc->history[KGSL_PWREVENT_STATE].size - 1; + else + i--; + } + if (nap_time && go_time) { + percent_nap = 100 * nap_time; + do_div(percent_nap, nap_time + go_time); + } + trace_kgsl_popp_nap(device, (int)nap_time / 1000, nap, + percent_nap); + /* If running high at turbo, don't push */ + if (nap < MIN_SLEEP_PERIODS || percent_nap < MIN_SLEEP_PERCENT) + return false; + } + + /* Finally check that there hasn't been a recent change */ + if ((device->pwrscale.freq_change_time + STABLE_TIME) < t) { + device->pwrscale.freq_change_time = t; + return true; + } + return false; +} + +bool kgsl_popp_check(struct kgsl_device *device) +{ + int i; + struct kgsl_pwrscale *psc = &device->pwrscale; + struct kgsl_pwr_event *e; + + if (!test_bit(POPP_ON, &psc->popp_state)) + return false; + if (!test_bit(POPP_PUSH, &psc->popp_state)) + return false; + if (psc->history[KGSL_PWREVENT_STATE].events == NULL) { + clear_bit(POPP_PUSH, &psc->popp_state); + return false; + } + + e = &psc->history[KGSL_PWREVENT_STATE]. + events[psc->history[KGSL_PWREVENT_STATE].index]; + if (e->data == KGSL_STATE_SLUMBER) + e->duration = ktime_us_delta(ktime_get(), e->start); + + /* If there's been a long SLUMBER in recent history, clear the _PUSH */ + for (i = 0; i < psc->history[KGSL_PWREVENT_STATE].size; i++) { + e = &psc->history[KGSL_PWREVENT_STATE].events[i]; + if ((e->data == KGSL_STATE_SLUMBER) && + (e->duration > POPP_RESET_TIME)) { + clear_bit(POPP_PUSH, &psc->popp_state); + return false; + } + } + return true; +} + +/* + * The GPU has been running at the current frequency for a while. Attempt + * to lower the frequency for boarderline cases. + */ +static void popp_trans1(struct kgsl_device *device) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct kgsl_pwrlevel *pl = &pwr->pwrlevels[pwr->active_pwrlevel]; + struct kgsl_pwrscale *psc = &device->pwrscale; + int old_level = psc->popp_level; + + switch (old_level) { + case 0: + psc->popp_level = 2; + /* If the current level has a high default bus don't push it */ + if (pl->bus_freq == pl->bus_max) + pwr->bus_mod = 1; + kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel + 1); + break; + case 1: + case 2: + psc->popp_level++; + break; + case 3: + set_bit(POPP_PUSH, &psc->popp_state); + psc->popp_level = 0; + break; + case POPP_MAX: + default: + psc->popp_level = 0; + break; + } + + trace_kgsl_popp_level(device, old_level, psc->popp_level); +} + +/* + * The GPU DCVS algorithm recommends a level change. Apply any + * POPP restrictions and update the level accordingly + */ +static int popp_trans2(struct kgsl_device *device, int level) +{ + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct kgsl_pwrscale *psc = &device->pwrscale; + int old_level = psc->popp_level; + + if (!test_bit(POPP_ON, &psc->popp_state)) + return level; + + clear_bit(POPP_PUSH, &psc->popp_state); + /* If the governor recommends going down, do it! */ + if (pwr->active_pwrlevel < level) { + psc->popp_level = 0; + trace_kgsl_popp_level(device, old_level, psc->popp_level); + return level; + } + + switch (psc->popp_level) { + case 0: + /* If the feature isn't engaged, go up immediately */ + break; + case 1: + /* Turn off mitigation, and go up a level */ + psc->popp_level = 0; + break; + case 2: + case 3: + /* Try a more aggressive mitigation */ + psc->popp_level--; + level++; + /* Update the stable timestamp */ + device->pwrscale.freq_change_time = ktime_to_ms(ktime_get()); + break; + case POPP_MAX: + default: + psc->popp_level = 0; + break; + } + + trace_kgsl_popp_level(device, old_level, psc->popp_level); + + return level; +} + +/* + * kgsl_devfreq_target - devfreq_dev_profile.target callback + * @dev: see devfreq.h + * @freq: see devfreq.h + * @flags: see devfreq.h + * + * This function expects the device mutex to be unlocked. + */ +int kgsl_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr; + struct kgsl_pwrlevel *pwr_level; + int level, i; + unsigned long cur_freq; + + if (device == NULL) + return -ENODEV; + if (freq == NULL) + return -EINVAL; + if (!device->pwrscale.enabled) + return 0; + + pwr = &device->pwrctrl; + if (flags & DEVFREQ_FLAG_WAKEUP_MAXFREQ) { + /* + * The GPU is about to get suspended, + * but it needs to be at the max power level when waking up + */ + pwr->wakeup_maxpwrlevel = 1; + return 0; + } + + mutex_lock(&device->mutex); + cur_freq = kgsl_pwrctrl_active_freq(pwr); + level = pwr->active_pwrlevel; + pwr_level = &pwr->pwrlevels[level]; + + /* If the governor recommends a new frequency, update it here */ + if (*freq != cur_freq) { + level = pwr->max_pwrlevel; + for (i = pwr->min_pwrlevel; i >= pwr->max_pwrlevel; i--) + if (*freq <= pwr->pwrlevels[i].gpu_freq) { + if (pwr->thermal_cycle == CYCLE_ACTIVE) + level = _thermal_adjust(pwr, i); + else + level = popp_trans2(device, i); + break; + } + if (level != pwr->active_pwrlevel) + kgsl_pwrctrl_pwrlevel_change(device, level); + } else if (popp_stable(device)) { + popp_trans1(device); + } + + *freq = kgsl_pwrctrl_active_freq(pwr); + + mutex_unlock(&device->mutex); + return 0; +} +EXPORT_SYMBOL(kgsl_devfreq_target); + +/* + * kgsl_devfreq_get_dev_status - devfreq_dev_profile.get_dev_status callback + * @dev: see devfreq.h + * @freq: see devfreq.h + * @flags: see devfreq.h + * + * This function expects the device mutex to be unlocked. + */ +int kgsl_devfreq_get_dev_status(struct device *dev, + struct devfreq_dev_status *stat) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwrctrl; + struct kgsl_pwrscale *pwrscale; + ktime_t tmp; + + if (device == NULL) + return -ENODEV; + if (stat == NULL) + return -EINVAL; + + pwrscale = &device->pwrscale; + pwrctrl = &device->pwrctrl; + + mutex_lock(&device->mutex); + /* + * If the GPU clock is on grab the latest power counter + * values. Otherwise the most recent ACTIVE values will + * already be stored in accum_stats. + */ + kgsl_pwrscale_update_stats(device); + + tmp = ktime_get(); + stat->total_time = ktime_us_delta(tmp, pwrscale->time); + pwrscale->time = tmp; + + stat->busy_time = pwrscale->accum_stats.busy_time; + + stat->current_frequency = kgsl_pwrctrl_active_freq(&device->pwrctrl); + + /* + * keep the latest devfreq_dev_status values + * and vbif counters data + * to be (re)used by kgsl_busmon_get_dev_status() + */ + if (pwrctrl->bus_control) { + struct xstats *last_b = + (struct xstats *)last_status.private_data; + + last_status.total_time = stat->total_time; + last_status.busy_time = stat->busy_time; + last_status.current_frequency = stat->current_frequency; + + last_b->ram_time = device->pwrscale.accum_stats.ram_time; + last_b->ram_wait = device->pwrscale.accum_stats.ram_wait; + last_b->mod = device->pwrctrl.bus_mod; + } + + kgsl_pwrctrl_busy_time(device, stat->total_time, stat->busy_time); + trace_kgsl_pwrstats(device, stat->total_time, &pwrscale->accum_stats); + memset(&pwrscale->accum_stats, 0, sizeof(pwrscale->accum_stats)); + + mutex_unlock(&device->mutex); + + return 0; +} +EXPORT_SYMBOL(kgsl_devfreq_get_dev_status); + +/* + * kgsl_devfreq_get_cur_freq - devfreq_dev_profile.get_cur_freq callback + * @dev: see devfreq.h + * @freq: see devfreq.h + * @flags: see devfreq.h + * + * This function expects the device mutex to be unlocked. + */ +int kgsl_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + if (device == NULL) + return -ENODEV; + if (freq == NULL) + return -EINVAL; + + mutex_lock(&device->mutex); + *freq = kgsl_pwrctrl_active_freq(&device->pwrctrl); + mutex_unlock(&device->mutex); + + return 0; +} +EXPORT_SYMBOL(kgsl_devfreq_get_cur_freq); + +/* + * kgsl_devfreq_add_notifier - add a fine grained notifier. + * @dev: The device + * @nb: Notifier block that will recieve updates. + * + * Add a notifier to recieve ADRENO_DEVFREQ_NOTIFY_* events + * from the device. + */ +int kgsl_devfreq_add_notifier(struct device *dev, + struct notifier_block *nb) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + if (device == NULL) + return -ENODEV; + + if (nb == NULL) + return -EINVAL; + + return srcu_notifier_chain_register(&device->pwrscale.nh, nb); +} +EXPORT_SYMBOL(kgsl_devfreq_add_notifier); + +/* + * kgsl_devfreq_del_notifier - remove a fine grained notifier. + * @dev: The device + * @nb: The notifier block. + * + * Remove a notifier registered with kgsl_devfreq_add_notifier(). + */ +int kgsl_devfreq_del_notifier(struct device *dev, struct notifier_block *nb) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + + if (device == NULL) + return -ENODEV; + + if (nb == NULL) + return -EINVAL; + + return srcu_notifier_chain_unregister(&device->pwrscale.nh, nb); +} +EXPORT_SYMBOL(kgsl_devfreq_del_notifier); + + +/* + * kgsl_busmon_get_dev_status - devfreq_dev_profile.get_dev_status callback + * @dev: see devfreq.h + * @freq: see devfreq.h + * @flags: see devfreq.h + * + * This function expects the device mutex to be unlocked. + */ +int kgsl_busmon_get_dev_status(struct device *dev, + struct devfreq_dev_status *stat) +{ + struct xstats *b; + stat->total_time = last_status.total_time; + stat->busy_time = last_status.busy_time; + stat->current_frequency = last_status.current_frequency; + if (stat->private_data) { + struct xstats *last_b = + (struct xstats *)last_status.private_data; + b = (struct xstats *)stat->private_data; + b->ram_time = last_b->ram_time; + b->ram_wait = last_b->ram_wait; + b->mod = last_b->mod; + } + return 0; +} + +/* + * kgsl_busmon_target - devfreq_dev_profile.target callback + * @dev: see devfreq.h + * @freq: see devfreq.h + * @flags: see devfreq.h + * + * This function expects the device mutex to be unlocked. + */ +int kgsl_busmon_target(struct device *dev, unsigned long *freq, u32 flags) +{ + struct kgsl_device *device = dev_get_drvdata(dev); + struct kgsl_pwrctrl *pwr; + struct kgsl_pwrlevel *pwr_level; + int level, b; + u32 bus_flag; + unsigned long ab_mbytes; + + if (device == NULL) + return -ENODEV; + if (freq == NULL) + return -EINVAL; + if (!device->pwrscale.enabled) + return 0; + + pwr = &device->pwrctrl; + + if (!pwr->bus_control) + return 0; + + mutex_lock(&device->mutex); + level = pwr->active_pwrlevel; + pwr_level = &pwr->pwrlevels[level]; + bus_flag = device->pwrscale.bus_profile.flag; + device->pwrscale.bus_profile.flag = 0; + ab_mbytes = device->pwrscale.bus_profile.ab_mbytes; + + /* + * Bus devfreq governor has calculated its recomendations + * when gpu was running with *freq frequency. + * If the gpu frequency is different now it's better to + * ignore the call + */ + if (pwr_level->gpu_freq != *freq) { + mutex_unlock(&device->mutex); + return 0; + } + + b = pwr->bus_mod; + if ((bus_flag & DEVFREQ_FLAG_FAST_HINT) && + ((pwr_level->bus_freq + pwr->bus_mod) < pwr_level->bus_max)) + pwr->bus_mod++; + else if ((bus_flag & DEVFREQ_FLAG_SLOW_HINT) && + ((pwr_level->bus_freq + pwr->bus_mod) > pwr_level->bus_min)) + pwr->bus_mod--; + + /* Update bus vote if AB or IB is modified */ + if ((pwr->bus_mod != b) || (pwr->bus_ab_mbytes != ab_mbytes)) { + pwr->bus_percent_ab = device->pwrscale.bus_profile.percent_ab; + pwr->bus_ab_mbytes = ab_mbytes; + kgsl_pwrctrl_buslevel_update(device, true); + } + + mutex_unlock(&device->mutex); + return 0; +} + +int kgsl_busmon_get_cur_freq(struct device *dev, unsigned long *freq) +{ + return 0; +} + + +/* + * kgsl_pwrscale_init - Initialize pwrscale. + * @dev: The device + * @governor: The initial governor to use. + * + * Initialize devfreq and any non-constant profile data. + */ +int kgsl_pwrscale_init(struct device *dev, const char *governor) +{ + struct kgsl_device *device; + struct kgsl_pwrscale *pwrscale; + struct kgsl_pwrctrl *pwr; + struct devfreq *devfreq; + struct devfreq *bus_devfreq; + struct msm_adreno_extended_profile *gpu_profile; + struct devfreq_dev_profile *profile; + struct devfreq_msm_adreno_tz_data *data; + int i, out = 0; + int ret; + + device = dev_get_drvdata(dev); + if (device == NULL) + return -ENODEV; + + pwrscale = &device->pwrscale; + pwr = &device->pwrctrl; + gpu_profile = &pwrscale->gpu_profile; + profile = &pwrscale->gpu_profile.profile; + + srcu_init_notifier_head(&pwrscale->nh); + + profile->initial_freq = + pwr->pwrlevels[pwr->default_pwrlevel].gpu_freq; + /* Let's start with 10 ms and tune in later */ + profile->polling_ms = 10; + + /* do not include the 'off' level or duplicate freq. levels */ + for (i = 0; i < (pwr->num_pwrlevels - 1); i++) + pwrscale->freq_table[out++] = pwr->pwrlevels[i].gpu_freq; + + /* + * Max_state is the number of valid power levels. + * The valid power levels range from 0 - (max_state - 1) + */ + profile->max_state = pwr->num_pwrlevels - 1; + /* link storage array to the devfreq profile pointer */ + profile->freq_table = pwrscale->freq_table; + + /* if there is only 1 freq, no point in running a governor */ + if (profile->max_state == 1) + governor = "performance"; + + /* initialize msm-adreno-tz governor specific data here */ + data = gpu_profile->private_data; + /* + * If there is a separate GX power rail, allow + * independent modification to its voltage through + * the bus bandwidth vote. + */ + if (pwr->bus_control) { + out = 0; + while (pwr->bus_ib[out] && out <= pwr->pwrlevels[0].bus_max) { + pwr->bus_ib[out] = + pwr->bus_ib[out] >> 20; + out++; + } + data->bus.num = out; + data->bus.ib = &pwr->bus_ib[0]; + data->bus.index = &pwr->bus_index[0]; + data->bus.width = pwr->bus_width; + } else + data->bus.num = 0; + + devfreq = devfreq_add_device(dev, &pwrscale->gpu_profile.profile, + governor, pwrscale->gpu_profile.private_data); + if (IS_ERR(devfreq)) { + device->pwrscale.enabled = false; + return PTR_ERR(devfreq); + } + + pwrscale->devfreqptr = devfreq; + + pwrscale->gpu_profile.bus_devfreq = NULL; + if (data->bus.num) { + pwrscale->bus_profile.profile.max_state + = pwr->num_pwrlevels - 1; + pwrscale->bus_profile.profile.freq_table + = pwrscale->freq_table; + + bus_devfreq = devfreq_add_device(device->busmondev, + &pwrscale->bus_profile.profile, "gpubw_mon", NULL); + if (!IS_ERR(bus_devfreq)) + pwrscale->gpu_profile.bus_devfreq = bus_devfreq; + } + + ret = sysfs_create_link(&device->dev->kobj, + &devfreq->dev.kobj, "devfreq"); + + pwrscale->devfreq_wq = create_freezable_workqueue("kgsl_devfreq_wq"); + INIT_WORK(&pwrscale->devfreq_suspend_ws, do_devfreq_suspend); + INIT_WORK(&pwrscale->devfreq_resume_ws, do_devfreq_resume); + INIT_WORK(&pwrscale->devfreq_notify_ws, do_devfreq_notify); + + pwrscale->next_governor_call = ktime_add_us(ktime_get(), + KGSL_GOVERNOR_CALL_INTERVAL); + + /* history tracking */ + for (i = 0; i < KGSL_PWREVENT_MAX; i++) { + pwrscale->history[i].events = kzalloc( + pwrscale->history[i].size * + sizeof(struct kgsl_pwr_event), GFP_KERNEL); + pwrscale->history[i].type = i; + } + + return 0; +} +EXPORT_SYMBOL(kgsl_pwrscale_init); + +/* + * kgsl_pwrscale_close - clean up pwrscale + * @device: the device + * + * This function should be called with the device mutex locked. + */ +void kgsl_pwrscale_close(struct kgsl_device *device) +{ + int i; + struct kgsl_pwrscale *pwrscale; + + BUG_ON(!mutex_is_locked(&device->mutex)); + + pwrscale = &device->pwrscale; + if (!pwrscale->devfreqptr) + return; + flush_workqueue(pwrscale->devfreq_wq); + destroy_workqueue(pwrscale->devfreq_wq); + devfreq_remove_device(device->pwrscale.devfreqptr); + device->pwrscale.devfreqptr = NULL; + srcu_cleanup_notifier_head(&device->pwrscale.nh); + for (i = 0; i < KGSL_PWREVENT_MAX; i++) + kfree(pwrscale->history[i].events); +} +EXPORT_SYMBOL(kgsl_pwrscale_close); + +static void do_devfreq_suspend(struct work_struct *work) +{ + struct kgsl_pwrscale *pwrscale = container_of(work, + struct kgsl_pwrscale, devfreq_suspend_ws); + struct devfreq *devfreq = pwrscale->devfreqptr; + + devfreq_suspend_device(devfreq); +} + +static void do_devfreq_resume(struct work_struct *work) +{ + struct kgsl_pwrscale *pwrscale = container_of(work, + struct kgsl_pwrscale, devfreq_resume_ws); + struct devfreq *devfreq = pwrscale->devfreqptr; + + devfreq_resume_device(devfreq); +} + +static void do_devfreq_notify(struct work_struct *work) +{ + struct kgsl_pwrscale *pwrscale = container_of(work, + struct kgsl_pwrscale, devfreq_notify_ws); + struct devfreq *devfreq = pwrscale->devfreqptr; + srcu_notifier_call_chain(&pwrscale->nh, + ADRENO_DEVFREQ_NOTIFY_RETIRE, + devfreq); +} diff --git a/drivers/gpu/msm/kgsl_pwrscale.h b/drivers/gpu/msm/kgsl_pwrscale.h new file mode 100644 index 000000000000..c85317869f1d --- /dev/null +++ b/drivers/gpu/msm/kgsl_pwrscale.h @@ -0,0 +1,160 @@ +/* Copyright (c) 2010-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __KGSL_PWRSCALE_H +#define __KGSL_PWRSCALE_H + +#include <linux/devfreq.h> +#include <linux/msm_adreno_devfreq.h> +#include "kgsl_pwrctrl.h" + +/* devfreq governor call window in usec */ +#define KGSL_GOVERNOR_CALL_INTERVAL 10000 + +/* Power events to be tracked with history */ +#define KGSL_PWREVENT_STATE 0 +#define KGSL_PWREVENT_GPU_FREQ 1 +#define KGSL_PWREVENT_BUS_FREQ 2 +#define KGSL_PWREVENT_POPP 3 +#define KGSL_PWREVENT_MAX 4 + +/** + * Amount of time running at a level to be considered + * "stable" in msec + */ +#define STABLE_TIME 150 + +/* Amount of idle time needed to re-set stability in usec */ +#define POPP_RESET_TIME 1000000 + +/* Number of POPP levels */ +#define POPP_MAX 4 + +/* POPP state bits */ +#define POPP_ON BIT(0) +#define POPP_PUSH BIT(1) + +struct kgsl_popp { + int gpu_x; + int ddr_y; +}; + +struct kgsl_power_stats { + u64 busy_time; + u64 ram_time; + u64 ram_wait; +}; + +struct kgsl_pwr_event { + unsigned int data; + ktime_t start; + s64 duration; +}; + +struct kgsl_pwr_history { + struct kgsl_pwr_event *events; + unsigned int type; + unsigned int index; + unsigned int size; +}; + +/** + * struct kgsl_pwrscale - Power scaling settings for a KGSL device + * @devfreqptr - Pointer to the devfreq device + * @gpu_profile - GPU profile data for the devfreq device + * @bus_profile - Bus specific data for the bus devfreq device + * @freq_table - GPU frequencies for the DCVS algorithm + * @last_governor - Prior devfreq governor + * @accum_stats - Accumulated statistics for various frequency calculations + * @enabled - Whether or not power scaling is enabled + * @time - Last submitted sample timestamp + * @on_time - Timestamp when gpu busy begins + * @freq_change_time - Timestamp of last freq change or popp update + * @nh - Notifier for the partner devfreq bus device + * @devfreq_wq - Main devfreq workqueue + * @devfreq_suspend_ws - Pass device suspension to devfreq + * @devfreq_resume_ws - Pass device resume to devfreq + * @devfreq_notify_ws - Notify devfreq to update sampling + * @next_governor_call - Timestamp after which the governor may be notified of + * a new sample + * @history - History of power events with timestamps and durations + * @popp_level - Current level of POPP mitigation + * @popp_state - Control state for POPP, on/off, recently pushed, etc + */ +struct kgsl_pwrscale { + struct devfreq *devfreqptr; + struct msm_adreno_extended_profile gpu_profile; + struct msm_busmon_extended_profile bus_profile; + unsigned int freq_table[KGSL_MAX_PWRLEVELS]; + char last_governor[DEVFREQ_NAME_LEN]; + struct kgsl_power_stats accum_stats; + bool enabled; + ktime_t time; + s64 on_time; + s64 freq_change_time; + struct srcu_notifier_head nh; + struct workqueue_struct *devfreq_wq; + struct work_struct devfreq_suspend_ws; + struct work_struct devfreq_resume_ws; + struct work_struct devfreq_notify_ws; + ktime_t next_governor_call; + struct kgsl_pwr_history history[KGSL_PWREVENT_MAX]; + int popp_level; + unsigned long popp_state; +}; + +int kgsl_pwrscale_init(struct device *dev, const char *governor); +void kgsl_pwrscale_close(struct kgsl_device *device); + +void kgsl_pwrscale_update(struct kgsl_device *device); +void kgsl_pwrscale_update_stats(struct kgsl_device *device); +void kgsl_pwrscale_busy(struct kgsl_device *device); +void kgsl_pwrscale_sleep(struct kgsl_device *device); +void kgsl_pwrscale_wake(struct kgsl_device *device); + +void kgsl_pwrscale_enable(struct kgsl_device *device); +void kgsl_pwrscale_disable(struct kgsl_device *device); + +int kgsl_devfreq_target(struct device *dev, unsigned long *freq, u32 flags); +int kgsl_devfreq_get_dev_status(struct device *, struct devfreq_dev_status *); +int kgsl_devfreq_get_cur_freq(struct device *dev, unsigned long *freq); + +int kgsl_busmon_target(struct device *dev, unsigned long *freq, u32 flags); +int kgsl_busmon_get_dev_status(struct device *, struct devfreq_dev_status *); +int kgsl_busmon_get_cur_freq(struct device *dev, unsigned long *freq); + +bool kgsl_popp_check(struct kgsl_device *device); + + +#define KGSL_PWRSCALE_INIT(_priv_data) { \ + .enabled = true, \ + .gpu_profile = { \ + .private_data = _priv_data, \ + .profile = { \ + .target = kgsl_devfreq_target, \ + .get_dev_status = kgsl_devfreq_get_dev_status, \ + .get_cur_freq = kgsl_devfreq_get_cur_freq, \ + } }, \ + .bus_profile = { \ + .private_data = _priv_data, \ + .profile = { \ + .target = kgsl_busmon_target, \ + .get_dev_status = kgsl_busmon_get_dev_status, \ + .get_cur_freq = kgsl_busmon_get_cur_freq, \ + } }, \ + .history[KGSL_PWREVENT_STATE].size = 20, \ + .history[KGSL_PWREVENT_GPU_FREQ].size = 3, \ + .history[KGSL_PWREVENT_BUS_FREQ].size = 5, \ + .history[KGSL_PWREVENT_POPP].size = 5, \ + } +#endif diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c new file mode 100644 index 000000000000..53dd3270c75b --- /dev/null +++ b/drivers/gpu/msm/kgsl_sharedmem.c @@ -0,0 +1,1258 @@ +/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/export.h> +#include <linux/vmalloc.h> +#include <asm/cacheflush.h> +#include <linux/slab.h> +#include <linux/kmemleak.h> +#include <linux/highmem.h> +#include <linux/scatterlist.h> +#include <soc/qcom/scm.h> +#include <soc/qcom/secure_buffer.h> + +#include "kgsl.h" +#include "kgsl_sharedmem.h" +#include "kgsl_cffdump.h" +#include "kgsl_device.h" +#include "kgsl_log.h" + +/* + * The user can set this from debugfs to force failed memory allocations to + * fail without trying OOM first. This is a debug setting useful for + * stress applications that want to test failure cases without pushing the + * system into unrecoverable OOM panics + */ + +static bool sharedmem_noretry_flag; + +static DEFINE_MUTEX(kernel_map_global_lock); + +struct cp2_mem_chunks { + unsigned int chunk_list; + unsigned int chunk_list_size; + unsigned int chunk_size; +} __attribute__ ((__packed__)); + +struct cp2_lock_req { + struct cp2_mem_chunks chunks; + unsigned int mem_usage; + unsigned int lock; +} __attribute__ ((__packed__)); + +#define MEM_PROTECT_LOCK_ID2 0x0A +#define MEM_PROTECT_LOCK_ID2_FLAT 0x11 + +/* An attribute for showing per-process memory statistics */ +struct kgsl_mem_entry_attribute { + struct attribute attr; + int memtype; + ssize_t (*show)(struct kgsl_process_private *priv, + int type, char *buf); +}; + +#define to_mem_entry_attr(a) \ +container_of(a, struct kgsl_mem_entry_attribute, attr) + +#define __MEM_ENTRY_ATTR(_type, _name, _show) \ +{ \ + .attr = { .name = __stringify(_name), .mode = 0444 }, \ + .memtype = _type, \ + .show = _show, \ +} + +/* + * A structure to hold the attributes for a particular memory type. + * For each memory type in each process we store the current and maximum + * memory usage and display the counts in sysfs. This structure and + * the following macro allow us to simplify the definition for those + * adding new memory types + */ + +struct mem_entry_stats { + int memtype; + struct kgsl_mem_entry_attribute attr; + struct kgsl_mem_entry_attribute max_attr; +}; + + +#define MEM_ENTRY_STAT(_type, _name) \ +{ \ + .memtype = _type, \ + .attr = __MEM_ENTRY_ATTR(_type, _name, mem_entry_show), \ + .max_attr = __MEM_ENTRY_ATTR(_type, _name##_max, \ + mem_entry_max_show), \ +} + +static void kgsl_cma_unlock_secure(struct kgsl_memdesc *memdesc); + +/** + * Show the current amount of memory allocated for the given memtype + */ + +static ssize_t +mem_entry_show(struct kgsl_process_private *priv, int type, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", priv->stats[type].cur); +} + +/** + * Show the maximum memory allocated for the given memtype through the life of + * the process + */ + +static ssize_t +mem_entry_max_show(struct kgsl_process_private *priv, int type, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", priv->stats[type].max); +} + +static ssize_t mem_entry_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct kgsl_mem_entry_attribute *pattr = to_mem_entry_attr(attr); + struct kgsl_process_private *priv; + ssize_t ret; + + /* + * 1. sysfs_remove_file waits for reads to complete before the node + * is deleted. + * 2. kgsl_process_init_sysfs takes a refcount to the process_private, + * which is put at the end of kgsl_process_uninit_sysfs. + * These two conditions imply that priv will not be freed until this + * function completes, and no further locking is needed. + */ + priv = kobj ? container_of(kobj, struct kgsl_process_private, kobj) : + NULL; + + if (priv && pattr->show) + ret = pattr->show(priv, pattr->memtype, buf); + else + ret = -EIO; + + return ret; +} + +static const struct sysfs_ops mem_entry_sysfs_ops = { + .show = mem_entry_sysfs_show, +}; + +static struct kobj_type ktype_mem_entry = { + .sysfs_ops = &mem_entry_sysfs_ops, +}; + +static struct mem_entry_stats mem_stats[] = { + MEM_ENTRY_STAT(KGSL_MEM_ENTRY_KERNEL, kernel), + MEM_ENTRY_STAT(KGSL_MEM_ENTRY_USER, user), +#ifdef CONFIG_ION + MEM_ENTRY_STAT(KGSL_MEM_ENTRY_ION, ion), +#endif +}; + +void +kgsl_process_uninit_sysfs(struct kgsl_process_private *private) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mem_stats); i++) { + sysfs_remove_file(&private->kobj, &mem_stats[i].attr.attr); + sysfs_remove_file(&private->kobj, + &mem_stats[i].max_attr.attr); + } + + kobject_put(&private->kobj); + /* Put the refcount we got in kgsl_process_init_sysfs */ + kgsl_process_private_put(private); +} + +/** + * kgsl_process_init_sysfs() - Initialize and create sysfs files for a process + * + * @device: Pointer to kgsl device struct + * @private: Pointer to the structure for the process + * + * kgsl_process_init_sysfs() is called at the time of creating the + * process struct when a process opens the kgsl device for the first time. + * This function creates the sysfs files for the process. + */ +void kgsl_process_init_sysfs(struct kgsl_device *device, + struct kgsl_process_private *private) +{ + unsigned char name[16]; + int i; + + /* Keep private valid until the sysfs enries are removed. */ + kgsl_process_private_get(private); + + snprintf(name, sizeof(name), "%d", private->pid); + + if (kobject_init_and_add(&private->kobj, &ktype_mem_entry, + kgsl_driver.prockobj, name)) { + WARN(1, "Unable to add sysfs dir '%s'\n", name); + return; + } + + for (i = 0; i < ARRAY_SIZE(mem_stats); i++) { + if (sysfs_create_file(&private->kobj, + &mem_stats[i].attr.attr)) + WARN(1, "Couldn't create sysfs file '%s'\n", + mem_stats[i].attr.attr.name); + + if (sysfs_create_file(&private->kobj, + &mem_stats[i].max_attr.attr)) + WARN(1, "Couldn't create sysfs file '%s'\n", + mem_stats[i].max_attr.attr.name); + + } +} + +static ssize_t kgsl_drv_memstat_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + uint64_t val = 0; + + if (!strcmp(attr->attr.name, "vmalloc")) + val = atomic_long_read(&kgsl_driver.stats.vmalloc); + else if (!strcmp(attr->attr.name, "vmalloc_max")) + val = atomic_long_read(&kgsl_driver.stats.vmalloc_max); + else if (!strcmp(attr->attr.name, "page_alloc")) + val = atomic_long_read(&kgsl_driver.stats.page_alloc); + else if (!strcmp(attr->attr.name, "page_alloc_max")) + val = atomic_long_read(&kgsl_driver.stats.page_alloc_max); + else if (!strcmp(attr->attr.name, "coherent")) + val = atomic_long_read(&kgsl_driver.stats.coherent); + else if (!strcmp(attr->attr.name, "coherent_max")) + val = atomic_long_read(&kgsl_driver.stats.coherent_max); + else if (!strcmp(attr->attr.name, "secure")) + val = atomic_long_read(&kgsl_driver.stats.secure); + else if (!strcmp(attr->attr.name, "secure_max")) + val = atomic_long_read(&kgsl_driver.stats.secure_max); + else if (!strcmp(attr->attr.name, "mapped")) + val = atomic_long_read(&kgsl_driver.stats.mapped); + else if (!strcmp(attr->attr.name, "mapped_max")) + val = atomic_long_read(&kgsl_driver.stats.mapped_max); + + return snprintf(buf, PAGE_SIZE, "%llu\n", val); +} + +static ssize_t kgsl_drv_full_cache_threshold_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int ret; + unsigned int thresh = 0; + + ret = kgsl_sysfs_store(buf, &thresh); + if (ret) + return ret; + + kgsl_driver.full_cache_threshold = thresh; + return count; +} + +static ssize_t kgsl_drv_full_cache_threshold_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", + kgsl_driver.full_cache_threshold); +} + +static DEVICE_ATTR(vmalloc, 0444, kgsl_drv_memstat_show, NULL); +static DEVICE_ATTR(vmalloc_max, 0444, kgsl_drv_memstat_show, NULL); +static DEVICE_ATTR(page_alloc, 0444, kgsl_drv_memstat_show, NULL); +static DEVICE_ATTR(page_alloc_max, 0444, kgsl_drv_memstat_show, NULL); +static DEVICE_ATTR(coherent, 0444, kgsl_drv_memstat_show, NULL); +static DEVICE_ATTR(coherent_max, 0444, kgsl_drv_memstat_show, NULL); +static DEVICE_ATTR(secure, 0444, kgsl_drv_memstat_show, NULL); +static DEVICE_ATTR(secure_max, 0444, kgsl_drv_memstat_show, NULL); +static DEVICE_ATTR(mapped, 0444, kgsl_drv_memstat_show, NULL); +static DEVICE_ATTR(mapped_max, 0444, kgsl_drv_memstat_show, NULL); +static DEVICE_ATTR(full_cache_threshold, 0644, + kgsl_drv_full_cache_threshold_show, + kgsl_drv_full_cache_threshold_store); + +static const struct device_attribute *drv_attr_list[] = { + &dev_attr_vmalloc, + &dev_attr_vmalloc_max, + &dev_attr_page_alloc, + &dev_attr_page_alloc_max, + &dev_attr_coherent, + &dev_attr_coherent_max, + &dev_attr_secure, + &dev_attr_secure_max, + &dev_attr_mapped, + &dev_attr_mapped_max, + &dev_attr_full_cache_threshold, + NULL +}; + +void +kgsl_sharedmem_uninit_sysfs(void) +{ + kgsl_remove_device_sysfs_files(&kgsl_driver.virtdev, drv_attr_list); +} + +int +kgsl_sharedmem_init_sysfs(void) +{ + return kgsl_create_device_sysfs_files(&kgsl_driver.virtdev, + drv_attr_list); +} + +static int kgsl_allocate_secure(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, + struct kgsl_pagetable *pagetable, + uint64_t size) { + int ret; + + if (MMU_FEATURE(&device->mmu, KGSL_MMU_HYP_SECURE_ALLOC)) + ret = kgsl_sharedmem_page_alloc_user(memdesc, pagetable, size); + else + ret = kgsl_cma_alloc_secure(device, memdesc, size); + + return ret; +} + +int kgsl_allocate_user(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, + struct kgsl_pagetable *pagetable, + uint64_t size, uint64_t mmapsize, uint64_t flags) +{ + int ret; + + if (size == 0) + return -EINVAL; + + memdesc->flags = flags; + + if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_NONE) + ret = kgsl_cma_alloc_coherent(device, memdesc, pagetable, size); + else if (flags & KGSL_MEMFLAGS_SECURE) + ret = kgsl_allocate_secure(device, memdesc, pagetable, size); + else + ret = kgsl_sharedmem_page_alloc_user(memdesc, pagetable, size); + + return ret; +} + +static int kgsl_page_alloc_vmfault(struct kgsl_memdesc *memdesc, + struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + int i, pgoff; + struct scatterlist *s = memdesc->sgt->sgl; + unsigned int offset; + + offset = ((unsigned long) vmf->virtual_address - vma->vm_start); + + if (offset >= memdesc->size) + return VM_FAULT_SIGBUS; + + pgoff = offset >> PAGE_SHIFT; + + /* + * The sglist might be comprised of mixed blocks of memory depending + * on how many 64K pages were allocated. This means we have to do math + * to find the actual 4K page to map in user space + */ + + for (i = 0; i < memdesc->sgt->nents; i++) { + int npages = s->length >> PAGE_SHIFT; + + if (pgoff < npages) { + struct page *page = sg_page(s); + + page = nth_page(page, pgoff); + + get_page(page); + vmf->page = page; + + return 0; + } + + pgoff -= npages; + s = sg_next(s); + } + + return VM_FAULT_SIGBUS; +} + +/* + * kgsl_page_alloc_unmap_kernel() - Unmap the memory in memdesc + * + * @memdesc: The memory descriptor which contains information about the memory + * + * Unmaps the memory mapped into kernel address space + */ +static void kgsl_page_alloc_unmap_kernel(struct kgsl_memdesc *memdesc) +{ + mutex_lock(&kernel_map_global_lock); + if (!memdesc->hostptr) { + BUG_ON(memdesc->hostptr_count); + goto done; + } + memdesc->hostptr_count--; + if (memdesc->hostptr_count) + goto done; + vunmap(memdesc->hostptr); + + atomic_long_sub(memdesc->size, &kgsl_driver.stats.vmalloc); + memdesc->hostptr = NULL; +done: + mutex_unlock(&kernel_map_global_lock); +} + +static void kgsl_page_alloc_free(struct kgsl_memdesc *memdesc) +{ + unsigned int i = 0; + struct scatterlist *sg; + + kgsl_page_alloc_unmap_kernel(memdesc); + /* we certainly do not expect the hostptr to still be mapped */ + BUG_ON(memdesc->hostptr); + + /* Secure buffers need to be unlocked before being freed */ + if (memdesc->priv & KGSL_MEMDESC_TZ_LOCKED) { + int ret; + int dest_perms = PERM_READ | PERM_WRITE | PERM_EXEC; + int source_vm = VMID_CP_PIXEL; + int dest_vm = VMID_HLOS; + + ret = hyp_assign_table(memdesc->sgt, &source_vm, 1, + &dest_vm, &dest_perms, 1); + if (ret) { + pr_err("Secure buf unlock failed: gpuaddr: %llx size: %llx ret: %d\n", + memdesc->gpuaddr, memdesc->size, ret); + BUG(); + } + + atomic_long_sub(memdesc->size, &kgsl_driver.stats.secure); + } else { + atomic_long_sub(memdesc->size, &kgsl_driver.stats.page_alloc); + } + + for_each_sg(memdesc->sgt->sgl, sg, memdesc->sgt->nents, i) { + /* + * sg_alloc_table_from_pages() will collapse any physically + * adjacent pages into a single scatterlist entry. We cannot + * just call __free_pages() on the entire set since we cannot + * ensure that the size is a whole order. Instead, free each + * page or compound page group individually. + */ + struct page *p = sg_page(sg), *next; + unsigned int j = 0, count; + while (j < (sg->length/PAGE_SIZE)) { + if (memdesc->priv & KGSL_MEMDESC_TZ_LOCKED) + ClearPagePrivate(p); + + count = 1 << compound_order(p); + next = nth_page(p, count); + __free_pages(p, compound_order(p)); + p = next; + j += count; + + } + } +} + +/* + * kgsl_page_alloc_map_kernel - Map the memory in memdesc to kernel address + * space + * + * @memdesc - The memory descriptor which contains information about the memory + * + * Return: 0 on success else error code + */ +static int kgsl_page_alloc_map_kernel(struct kgsl_memdesc *memdesc) +{ + int ret = 0; + + /* Sanity check - don't map more than we could possibly chew */ + if (memdesc->size > ULONG_MAX) + return -ENOMEM; + + mutex_lock(&kernel_map_global_lock); + if (!memdesc->hostptr) { + pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL); + struct page **pages = NULL; + struct scatterlist *sg; + int npages = PAGE_ALIGN(memdesc->size) >> PAGE_SHIFT; + int sglen = memdesc->sgt->nents; + int i, count = 0; + + /* create a list of pages to call vmap */ + pages = kgsl_malloc(npages * sizeof(struct page *)); + if (pages == NULL) { + ret = -ENOMEM; + goto done; + } + + for_each_sg(memdesc->sgt->sgl, sg, sglen, i) { + struct page *page = sg_page(sg); + int j; + + for (j = 0; j < sg->length >> PAGE_SHIFT; j++) + pages[count++] = page++; + } + + + memdesc->hostptr = vmap(pages, count, + VM_IOREMAP, page_prot); + if (memdesc->hostptr) + KGSL_STATS_ADD(memdesc->size, + &kgsl_driver.stats.vmalloc, + &kgsl_driver.stats.vmalloc_max); + else + ret = -ENOMEM; + kgsl_free(pages); + } + if (memdesc->hostptr) + memdesc->hostptr_count++; +done: + mutex_unlock(&kernel_map_global_lock); + + return ret; +} + +static int kgsl_contiguous_vmfault(struct kgsl_memdesc *memdesc, + struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + unsigned long offset, pfn; + int ret; + + offset = ((unsigned long) vmf->virtual_address - vma->vm_start) >> + PAGE_SHIFT; + + pfn = (memdesc->physaddr >> PAGE_SHIFT) + offset; + ret = vm_insert_pfn(vma, (unsigned long) vmf->virtual_address, pfn); + + if (ret == -ENOMEM || ret == -EAGAIN) + return VM_FAULT_OOM; + else if (ret == -EFAULT) + return VM_FAULT_SIGBUS; + + return VM_FAULT_NOPAGE; +} + +static void kgsl_cma_coherent_free(struct kgsl_memdesc *memdesc) +{ + struct dma_attrs *attrs = NULL; + + if (memdesc->hostptr) { + if (memdesc->priv & KGSL_MEMDESC_SECURE) { + atomic_long_sub(memdesc->size, + &kgsl_driver.stats.secure); + + kgsl_cma_unlock_secure(memdesc); + attrs = &memdesc->attrs; + } else + atomic_long_sub(memdesc->size, + &kgsl_driver.stats.coherent); + + dma_free_attrs(memdesc->dev, (size_t) memdesc->size, + memdesc->hostptr, memdesc->physaddr, attrs); + } +} + +/* Global */ +static struct kgsl_memdesc_ops kgsl_page_alloc_ops = { + .free = kgsl_page_alloc_free, + .vmflags = VM_DONTDUMP | VM_DONTEXPAND | VM_DONTCOPY, + .vmfault = kgsl_page_alloc_vmfault, + .map_kernel = kgsl_page_alloc_map_kernel, + .unmap_kernel = kgsl_page_alloc_unmap_kernel, +}; + +/* CMA ops - used during NOMMU mode */ +static struct kgsl_memdesc_ops kgsl_cma_ops = { + .free = kgsl_cma_coherent_free, + .vmflags = VM_DONTDUMP | VM_PFNMAP | VM_DONTEXPAND | VM_DONTCOPY, + .vmfault = kgsl_contiguous_vmfault, +}; + +#ifdef CONFIG_ARM64 +/* + * For security reasons, ARMv8 doesn't allow invalidate only on read-only + * mapping. It would be performance prohibitive to read the permissions on + * the buffer before the operation. Every use case that we have found does not + * assume that an invalidate operation is invalidate only, so we feel + * comfortable turning invalidates into flushes for these targets + */ +static inline unsigned int _fixup_cache_range_op(unsigned int op) +{ + if (op == KGSL_CACHE_OP_INV) + return KGSL_CACHE_OP_FLUSH; + return op; +} +#else +static inline unsigned int _fixup_cache_range_op(unsigned int op) +{ + return op; +} +#endif + +int kgsl_cache_range_op(struct kgsl_memdesc *memdesc, uint64_t offset, + uint64_t size, unsigned int op) +{ + /* + * If the buffer is mapped in the kernel operate on that address + * otherwise use the user address + */ + + void *addr = (memdesc->hostptr) ? + memdesc->hostptr : (void *) memdesc->useraddr; + + /* Make sure that size is non-zero */ + if (!size) + return -EINVAL; + + /* Make sure that the offset + size isn't bigger than we can handle */ + if ((offset + size) > ULONG_MAX) + return -ERANGE; + + /* Make sure the offset + size do not overflow the address */ + if (addr + ((size_t) offset + (size_t) size) < addr) + return -ERANGE; + + /* Check that offset+length does not exceed memdesc->size */ + if (offset + size > memdesc->size) + return -ERANGE; + + /* Return quietly if the buffer isn't mapped on the CPU */ + if (addr == NULL) + return 0; + + addr = addr + offset; + + /* + * The dmac_xxx_range functions handle addresses and sizes that + * are not aligned to the cacheline size correctly. + */ + + switch (_fixup_cache_range_op(op)) { + case KGSL_CACHE_OP_FLUSH: + dmac_flush_range(addr, addr + (size_t) size); + break; + case KGSL_CACHE_OP_CLEAN: + dmac_clean_range(addr, addr + (size_t) size); + break; + case KGSL_CACHE_OP_INV: + dmac_inv_range(addr, addr + (size_t) size); + break; + } + + return 0; +} +EXPORT_SYMBOL(kgsl_cache_range_op); + +#ifndef CONFIG_ALLOC_BUFFERS_IN_4K_CHUNKS +static inline int get_page_size(size_t size, unsigned int align) +{ + return (align >= ilog2(SZ_64K) && size >= SZ_64K) + ? SZ_64K : PAGE_SIZE; +} +#else +static inline int get_page_size(size_t size, unsigned int align) +{ + return PAGE_SIZE; +} +#endif + +static int +_kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, + struct kgsl_pagetable *pagetable, + uint64_t size) +{ + int ret = 0; + unsigned int j, pcount = 0, page_size, len_alloc; + size_t len; + struct page **pages = NULL; + pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL); + void *ptr; + unsigned int align; + unsigned int step = ((VMALLOC_END - VMALLOC_START)/8) >> PAGE_SHIFT; + + align = (memdesc->flags & KGSL_MEMALIGN_MASK) >> KGSL_MEMALIGN_SHIFT; + + page_size = get_page_size(size, align); + + /* + * The alignment cannot be less than the intended page size - it can be + * larger however to accomodate hardware quirks + */ + + if (align < ilog2(page_size)) + kgsl_memdesc_set_align(memdesc, ilog2(page_size)); + + if (size > SIZE_MAX) + return -EINVAL; + + /* + * There needs to be enough room in the page array to be able to + * service the allocation entirely with PAGE_SIZE sized chunks + */ + + len_alloc = PAGE_ALIGN(size) >> PAGE_SHIFT; + + memdesc->pagetable = pagetable; + memdesc->ops = &kgsl_page_alloc_ops; + + memdesc->sgt = kmalloc(sizeof(struct sg_table), GFP_KERNEL); + if (memdesc->sgt == NULL) + return -ENOMEM; + + /* + * Allocate space to store the list of pages to send to vmap. This is an + * array of pointers so we can track 1024 pages per page of allocation + */ + + pages = kgsl_malloc(len_alloc * sizeof(struct page *)); + + if (pages == NULL) { + ret = -ENOMEM; + goto done; + } + + len = size; + + while (len > 0) { + struct page *page; + gfp_t gfp_mask = __GFP_HIGHMEM; + int j; + + /* don't waste space at the end of the allocation*/ + if (len < page_size) + page_size = PAGE_SIZE; + + /* + * Don't do some of the more aggressive memory recovery + * techniques for large order allocations + */ + if (page_size != PAGE_SIZE) + gfp_mask |= __GFP_COMP | __GFP_NORETRY | + __GFP_NO_KSWAPD | __GFP_NOWARN; + else + gfp_mask |= GFP_KERNEL; + + if (sharedmem_noretry_flag == true) + gfp_mask |= __GFP_NORETRY | __GFP_NOWARN; + + page = alloc_pages(gfp_mask, get_order(page_size)); + + if (page == NULL) { + if (page_size != PAGE_SIZE) { + page_size = PAGE_SIZE; + continue; + } + + /* + * Update sglen and memdesc size,as requested allocation + * not served fully. So that they can be correctly freed + * in kgsl_sharedmem_free(). + */ + memdesc->size = (size - len); + + if (sharedmem_noretry_flag != true) + KGSL_CORE_ERR( + "Out of memory: only allocated %lldKB of %lldKB requested\n", + (size - len) >> 10, size >> 10); + + ret = -ENOMEM; + goto done; + } + + for (j = 0; j < page_size >> PAGE_SHIFT; j++) + pages[pcount++] = nth_page(page, j); + + len -= page_size; + memdesc->size += page_size; + } + + ret = sg_alloc_table_from_pages(memdesc->sgt, pages, pcount, 0, + memdesc->size, GFP_KERNEL); + if (ret) + goto done; + + /* Call to the hypervisor to lock any secure buffer allocations */ + if (memdesc->flags & KGSL_MEMFLAGS_SECURE) { + unsigned int i; + struct scatterlist *sg; + int dest_perms = PERM_READ | PERM_WRITE; + int source_vm = VMID_HLOS; + int dest_vm = VMID_CP_PIXEL; + + ret = hyp_assign_table(memdesc->sgt, &source_vm, 1, + &dest_vm, &dest_perms, 1); + if (ret) + goto done; + + /* Set private bit for each sg to indicate that its secured */ + for_each_sg(memdesc->sgt->sgl, sg, memdesc->sgt->nents, i) + SetPagePrivate(sg_page(sg)); + + memdesc->priv |= KGSL_MEMDESC_TZ_LOCKED; + + /* Record statistics */ + KGSL_STATS_ADD(memdesc->size, &kgsl_driver.stats.secure, + &kgsl_driver.stats.secure_max); + + /* Don't map and zero the locked secure buffer */ + goto done; + } + + /* + * All memory that goes to the user has to be zeroed out before it gets + * exposed to userspace. This means that the memory has to be mapped in + * the kernel, zeroed (memset) and then unmapped. This also means that + * the dcache has to be flushed to ensure coherency between the kernel + * and user pages. We used to pass __GFP_ZERO to alloc_page which mapped + * zeroed and unmaped each individual page, and then we had to turn + * around and call flush_dcache_page() on that page to clear the caches. + * This was killing us for performance. Instead, we found it is much + * faster to allocate the pages without GFP_ZERO, map a chunk of the + * range ('step' pages), memset it, flush it and then unmap + * - this results in a factor of 4 improvement for speed for large + * buffers. There is a small decrease in speed for small buffers, + * but only on the order of a few microseconds at best. The 'step' + * size is based on a guess at the amount of free vmalloc space, but + * will scale down if there's not enough free space. + */ + for (j = 0; j < pcount; j += step) { + step = min(step, pcount - j); + + ptr = vmap(&pages[j], step, VM_IOREMAP, page_prot); + + if (ptr != NULL) { + memset(ptr, 0, step * PAGE_SIZE); + dmac_flush_range(ptr, ptr + step * PAGE_SIZE); + vunmap(ptr); + } else { + int k; + /* Very, very, very slow path */ + + for (k = j; k < j + step; k++) { + ptr = kmap_atomic(pages[k]); + memset(ptr, 0, PAGE_SIZE); + dmac_flush_range(ptr, ptr + PAGE_SIZE); + kunmap_atomic(ptr); + } + /* scale down the step size to avoid this path */ + if (step > 1) + step >>= 1; + } + } + + KGSL_STATS_ADD(memdesc->size, &kgsl_driver.stats.page_alloc, + &kgsl_driver.stats.page_alloc_max); + +done: + if (ret) { + unsigned int count = 1; + for (j = 0; j < pcount; j += count) { + count = 1 << compound_order(pages[j]); + __free_pages(pages[j], compound_order(pages[j])); + } + + kfree(memdesc->sgt); + memset(memdesc, 0, sizeof(*memdesc)); + } + kgsl_free(pages); + + return ret; +} + +int +kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, + struct kgsl_pagetable *pagetable, + uint64_t size) +{ + size = PAGE_ALIGN(size); + if (size == 0) + return -EINVAL; + + return _kgsl_sharedmem_page_alloc(memdesc, pagetable, size); +} +EXPORT_SYMBOL(kgsl_sharedmem_page_alloc_user); + +void kgsl_sharedmem_free(struct kgsl_memdesc *memdesc) +{ + if (memdesc == NULL || memdesc->size == 0) + return; + + if (memdesc->gpuaddr) { + kgsl_mmu_unmap(memdesc->pagetable, memdesc); + kgsl_mmu_put_gpuaddr(memdesc->pagetable, memdesc); + } + + if (memdesc->ops && memdesc->ops->free) + memdesc->ops->free(memdesc); + + if (memdesc->sgt) { + sg_free_table(memdesc->sgt); + kfree(memdesc->sgt); + } + + memset(memdesc, 0, sizeof(*memdesc)); +} +EXPORT_SYMBOL(kgsl_sharedmem_free); + +int +kgsl_sharedmem_readl(const struct kgsl_memdesc *memdesc, + uint32_t *dst, + uint64_t offsetbytes) +{ + uint32_t *src; + BUG_ON(memdesc == NULL || memdesc->hostptr == NULL || dst == NULL); + WARN_ON(offsetbytes % sizeof(uint32_t) != 0); + if (offsetbytes % sizeof(uint32_t) != 0) + return -EINVAL; + + WARN_ON(offsetbytes + sizeof(uint32_t) > memdesc->size); + if (offsetbytes + sizeof(uint32_t) > memdesc->size) + return -ERANGE; + + rmb(); + src = (uint32_t *)(memdesc->hostptr + offsetbytes); + *dst = *src; + return 0; +} +EXPORT_SYMBOL(kgsl_sharedmem_readl); + +int +kgsl_sharedmem_writel(struct kgsl_device *device, + const struct kgsl_memdesc *memdesc, + uint64_t offsetbytes, + uint32_t src) +{ + uint32_t *dst; + BUG_ON(memdesc == NULL || memdesc->hostptr == NULL); + WARN_ON(offsetbytes % sizeof(uint32_t) != 0); + if (offsetbytes % sizeof(uint32_t) != 0) + return -EINVAL; + + WARN_ON(offsetbytes + sizeof(uint32_t) > memdesc->size); + if (offsetbytes + sizeof(uint32_t) > memdesc->size) + return -ERANGE; + kgsl_cffdump_write(device, + memdesc->gpuaddr + offsetbytes, + src); + dst = (uint32_t *)(memdesc->hostptr + offsetbytes); + *dst = src; + + wmb(); + + return 0; +} +EXPORT_SYMBOL(kgsl_sharedmem_writel); + +int +kgsl_sharedmem_readq(const struct kgsl_memdesc *memdesc, + uint64_t *dst, + uint64_t offsetbytes) +{ + uint64_t *src; + BUG_ON(memdesc == NULL || memdesc->hostptr == NULL || dst == NULL); + WARN_ON(offsetbytes % sizeof(uint32_t) != 0); + if (offsetbytes % sizeof(uint32_t) != 0) + return -EINVAL; + + WARN_ON(offsetbytes + sizeof(uint32_t) > memdesc->size); + if (offsetbytes + sizeof(uint32_t) > memdesc->size) + return -ERANGE; + + /* + * We are reading shared memory between CPU and GPU. + * Make sure reads before this are complete + */ + rmb(); + src = (uint64_t *)(memdesc->hostptr + offsetbytes); + *dst = *src; + return 0; +} +EXPORT_SYMBOL(kgsl_sharedmem_readq); + +int +kgsl_sharedmem_writeq(struct kgsl_device *device, + const struct kgsl_memdesc *memdesc, + uint64_t offsetbytes, + uint64_t src) +{ + uint64_t *dst; + BUG_ON(memdesc == NULL || memdesc->hostptr == NULL); + WARN_ON(offsetbytes % sizeof(uint32_t) != 0); + if (offsetbytes % sizeof(uint32_t) != 0) + return -EINVAL; + + WARN_ON(offsetbytes + sizeof(uint32_t) > memdesc->size); + if (offsetbytes + sizeof(uint32_t) > memdesc->size) + return -ERANGE; + kgsl_cffdump_write(device, + lower_32_bits(memdesc->gpuaddr + offsetbytes), src); + kgsl_cffdump_write(device, + upper_32_bits(memdesc->gpuaddr + offsetbytes), src); + dst = (uint64_t *)(memdesc->hostptr + offsetbytes); + *dst = src; + + /* + * We are writing to shared memory between CPU and GPU. + * Make sure write above is posted immediately + */ + wmb(); + + return 0; +} +EXPORT_SYMBOL(kgsl_sharedmem_writeq); + +int +kgsl_sharedmem_set(struct kgsl_device *device, + const struct kgsl_memdesc *memdesc, uint64_t offsetbytes, + unsigned int value, uint64_t sizebytes) +{ + BUG_ON(memdesc == NULL || memdesc->hostptr == NULL); + BUG_ON(offsetbytes + sizebytes > memdesc->size); + + kgsl_cffdump_memset(device, + memdesc->gpuaddr + offsetbytes, value, + sizebytes); + memset(memdesc->hostptr + offsetbytes, value, sizebytes); + return 0; +} +EXPORT_SYMBOL(kgsl_sharedmem_set); + +static const char * const memtype_str[] = { + [KGSL_MEMTYPE_OBJECTANY] = "any(0)", + [KGSL_MEMTYPE_FRAMEBUFFER] = "framebuffer", + [KGSL_MEMTYPE_RENDERBUFFER] = "renderbuffer", + [KGSL_MEMTYPE_ARRAYBUFFER] = "arraybuffer", + [KGSL_MEMTYPE_ELEMENTARRAYBUFFER] = "elementarraybuffer", + [KGSL_MEMTYPE_VERTEXARRAYBUFFER] = "vertexarraybuffer", + [KGSL_MEMTYPE_TEXTURE] = "texture", + [KGSL_MEMTYPE_SURFACE] = "surface", + [KGSL_MEMTYPE_EGL_SURFACE] = "egl_surface", + [KGSL_MEMTYPE_GL] = "gl", + [KGSL_MEMTYPE_CL] = "cl", + [KGSL_MEMTYPE_CL_BUFFER_MAP] = "cl_buffer_map", + [KGSL_MEMTYPE_CL_BUFFER_NOMAP] = "cl_buffer_nomap", + [KGSL_MEMTYPE_CL_IMAGE_MAP] = "cl_image_map", + [KGSL_MEMTYPE_CL_IMAGE_NOMAP] = "cl_image_nomap", + [KGSL_MEMTYPE_CL_KERNEL_STACK] = "cl_kernel_stack", + [KGSL_MEMTYPE_COMMAND] = "command", + [KGSL_MEMTYPE_2D] = "2d", + [KGSL_MEMTYPE_EGL_IMAGE] = "egl_image", + [KGSL_MEMTYPE_EGL_SHADOW] = "egl_shadow", + [KGSL_MEMTYPE_MULTISAMPLE] = "egl_multisample", + /* KGSL_MEMTYPE_KERNEL handled below, to avoid huge array */ +}; + +void kgsl_get_memory_usage(char *name, size_t name_size, uint64_t memflags) +{ + unsigned int type = MEMFLAGS(memflags, KGSL_MEMTYPE_MASK, + KGSL_MEMTYPE_SHIFT); + + if (type == KGSL_MEMTYPE_KERNEL) + strlcpy(name, "kernel", name_size); + else if (type < ARRAY_SIZE(memtype_str) && memtype_str[type] != NULL) + strlcpy(name, memtype_str[type], name_size); + else + snprintf(name, name_size, "unknown(%3d)", type); +} +EXPORT_SYMBOL(kgsl_get_memory_usage); + +int kgsl_cma_alloc_coherent(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, + struct kgsl_pagetable *pagetable, uint64_t size) +{ + int result = 0; + + size = ALIGN(size, PAGE_SIZE); + + if (size == 0 || size > SIZE_MAX) + return -EINVAL; + + memdesc->size = size; + memdesc->pagetable = pagetable; + memdesc->ops = &kgsl_cma_ops; + memdesc->dev = device->dev->parent; + + memdesc->hostptr = dma_alloc_attrs(memdesc->dev, (size_t) size, + &memdesc->physaddr, GFP_KERNEL, NULL); + + if (memdesc->hostptr == NULL) { + result = -ENOMEM; + goto err; + } + + result = memdesc_sg_dma(memdesc, memdesc->physaddr, size); + if (result) + goto err; + + /* Record statistics */ + + KGSL_STATS_ADD(size, &kgsl_driver.stats.coherent, + &kgsl_driver.stats.coherent_max); + +err: + if (result) + kgsl_sharedmem_free(memdesc); + + return result; +} +EXPORT_SYMBOL(kgsl_cma_alloc_coherent); + +static int scm_lock_chunk(struct kgsl_memdesc *memdesc, int lock) +{ + struct cp2_lock_req request; + unsigned int resp; + unsigned int *chunk_list; + struct scm_desc desc = {0}; + int result; + + /* + * Flush the virt addr range before sending the memory to the + * secure environment to ensure the data is actually present + * in RAM + * + * Chunk_list holds the physical address of secure memory. + * Pass in the virtual address of chunk_list to flush. + * Chunk_list size is 1 because secure memory is physically + * contiguous. + */ + chunk_list = kzalloc(sizeof(unsigned int), GFP_KERNEL); + if (!chunk_list) + return -ENOMEM; + + chunk_list[0] = memdesc->physaddr; + dmac_flush_range((void *)chunk_list, (void *)chunk_list + 1); + + request.chunks.chunk_list = virt_to_phys(chunk_list); + /* + * virt_to_phys(chunk_list) may be an address > 4GB. It is guaranteed + * that when using scm_call (the older interface), the phys addresses + * will be restricted to below 4GB. + */ + desc.args[0] = virt_to_phys(chunk_list); + desc.args[1] = request.chunks.chunk_list_size = 1; + desc.args[2] = request.chunks.chunk_size = (unsigned int) memdesc->size; + desc.args[3] = request.mem_usage = 0; + desc.args[4] = request.lock = lock; + desc.args[5] = 0; + desc.arginfo = SCM_ARGS(6, SCM_RW, SCM_VAL, SCM_VAL, SCM_VAL, SCM_VAL, + SCM_VAL); + kmap_flush_unused(); + kmap_atomic_flush_unused(); + if (!is_scm_armv8()) { + result = scm_call(SCM_SVC_MP, MEM_PROTECT_LOCK_ID2, + &request, sizeof(request), &resp, sizeof(resp)); + } else { + result = scm_call2(SCM_SIP_FNID(SCM_SVC_MP, + MEM_PROTECT_LOCK_ID2_FLAT), &desc); + resp = desc.ret[0]; + } + + kfree(chunk_list); + return result; +} + +int kgsl_cma_alloc_secure(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, uint64_t size) +{ + struct kgsl_iommu *iommu = device->mmu.priv; + int result = 0; + struct kgsl_pagetable *pagetable = device->mmu.securepagetable; + size_t aligned; + + if (size == 0) + return -EINVAL; + + /* Align size to 1M boundaries */ + aligned = ALIGN(size, SZ_1M); + + /* The SCM call uses an unsigned int for the size */ + if (aligned > UINT_MAX) + return -EINVAL; + + /* + * If there is more than a page gap between the requested size and the + * aligned size we don't need to add more memory for a guard page. Yay! + */ + + if (memdesc->priv & KGSL_MEMDESC_GUARD_PAGE) + if (aligned - size >= SZ_4K) + memdesc->priv &= ~KGSL_MEMDESC_GUARD_PAGE; + + memdesc->size = aligned; + memdesc->pagetable = pagetable; + memdesc->ops = &kgsl_cma_ops; + memdesc->dev = iommu->ctx[KGSL_IOMMU_CONTEXT_SECURE].dev; + + init_dma_attrs(&memdesc->attrs); + dma_set_attr(DMA_ATTR_STRONGLY_ORDERED, &memdesc->attrs); + + memdesc->hostptr = dma_alloc_attrs(memdesc->dev, aligned, + &memdesc->physaddr, GFP_KERNEL, &memdesc->attrs); + + if (memdesc->hostptr == NULL) { + result = -ENOMEM; + goto err; + } + + result = memdesc_sg_dma(memdesc, memdesc->physaddr, aligned); + if (result) + goto err; + + result = scm_lock_chunk(memdesc, 1); + + if (result != 0) + goto err; + + /* Set the private bit to indicate that we've secured this */ + SetPagePrivate(sg_page(memdesc->sgt->sgl)); + + memdesc->priv |= KGSL_MEMDESC_TZ_LOCKED; + + /* Record statistics */ + KGSL_STATS_ADD(aligned, &kgsl_driver.stats.secure, + &kgsl_driver.stats.secure_max); +err: + if (result) + kgsl_sharedmem_free(memdesc); + + return result; +} +EXPORT_SYMBOL(kgsl_cma_alloc_secure); + +/** + * kgsl_cma_unlock_secure() - Unlock secure memory by calling TZ + * @memdesc: memory descriptor + */ +static void kgsl_cma_unlock_secure(struct kgsl_memdesc *memdesc) +{ + if (memdesc->size == 0 || !(memdesc->priv & KGSL_MEMDESC_TZ_LOCKED)) + return; + + if (!scm_lock_chunk(memdesc, 0)) + ClearPagePrivate(sg_page(memdesc->sgt->sgl)); +} + +void kgsl_sharedmem_set_noretry(bool val) +{ + sharedmem_noretry_flag = val; +} + +bool kgsl_sharedmem_get_noretry(void) +{ + return sharedmem_noretry_flag; +} diff --git a/drivers/gpu/msm/kgsl_sharedmem.h b/drivers/gpu/msm/kgsl_sharedmem.h new file mode 100644 index 000000000000..9e5651d18df8 --- /dev/null +++ b/drivers/gpu/msm/kgsl_sharedmem.h @@ -0,0 +1,331 @@ +/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __KGSL_SHAREDMEM_H +#define __KGSL_SHAREDMEM_H + +#include <linux/slab.h> +#include <linux/dma-mapping.h> +#include "kgsl_mmu.h" +#include <linux/slab.h> +#include <linux/kmemleak.h> +#include <linux/iommu.h> + +#include "kgsl_mmu.h" +#include "kgsl_log.h" + +struct kgsl_device; +struct kgsl_process_private; + +#define KGSL_CACHE_OP_INV 0x01 +#define KGSL_CACHE_OP_FLUSH 0x02 +#define KGSL_CACHE_OP_CLEAN 0x03 + +int kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, + struct kgsl_pagetable *pagetable, + uint64_t size); + +int kgsl_cma_alloc_coherent(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, + struct kgsl_pagetable *pagetable, uint64_t size); + +int kgsl_cma_alloc_secure(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, uint64_t size); + +void kgsl_sharedmem_free(struct kgsl_memdesc *memdesc); + +int kgsl_sharedmem_readl(const struct kgsl_memdesc *memdesc, + uint32_t *dst, + uint64_t offsetbytes); + +int kgsl_sharedmem_writel(struct kgsl_device *device, + const struct kgsl_memdesc *memdesc, + uint64_t offsetbytes, + uint32_t src); + +int kgsl_sharedmem_readq(const struct kgsl_memdesc *memdesc, + uint64_t *dst, + uint64_t offsetbytes); + +int kgsl_sharedmem_writeq(struct kgsl_device *device, + const struct kgsl_memdesc *memdesc, + uint64_t offsetbytes, + uint64_t src); + +int kgsl_sharedmem_set(struct kgsl_device *device, + const struct kgsl_memdesc *memdesc, + uint64_t offsetbytes, unsigned int value, + uint64_t sizebytes); + +int kgsl_cache_range_op(struct kgsl_memdesc *memdesc, + uint64_t offset, uint64_t size, + unsigned int op); + +void kgsl_process_init_sysfs(struct kgsl_device *device, + struct kgsl_process_private *private); +void kgsl_process_uninit_sysfs(struct kgsl_process_private *private); + +int kgsl_sharedmem_init_sysfs(void); +void kgsl_sharedmem_uninit_sysfs(void); + +int kgsl_allocate_user(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, + struct kgsl_pagetable *pagetable, + uint64_t size, uint64_t mmapsize, uint64_t flags); + +#define MEMFLAGS(_flags, _mask, _shift) \ + ((unsigned int) (((_flags) & (_mask)) >> (_shift))) + +/* + * kgsl_memdesc_get_align - Get alignment flags from a memdesc + * @memdesc - the memdesc + * + * Returns the alignment requested, as power of 2 exponent. + */ +static inline int +kgsl_memdesc_get_align(const struct kgsl_memdesc *memdesc) +{ + return MEMFLAGS(memdesc->flags, KGSL_MEMALIGN_MASK, + KGSL_MEMALIGN_SHIFT); +} + +/* + * kgsl_memdesc_get_cachemode - Get cache mode of a memdesc + * @memdesc: the memdesc + * + * Returns a KGSL_CACHEMODE* value. + */ +static inline int +kgsl_memdesc_get_cachemode(const struct kgsl_memdesc *memdesc) +{ + return MEMFLAGS(memdesc->flags, KGSL_CACHEMODE_MASK, + KGSL_CACHEMODE_SHIFT); +} + +static inline unsigned int +kgsl_memdesc_get_memtype(const struct kgsl_memdesc *memdesc) +{ + return MEMFLAGS(memdesc->flags, KGSL_MEMTYPE_MASK, + KGSL_MEMTYPE_SHIFT); +} +/* + * kgsl_memdesc_set_align - Set alignment flags of a memdesc + * @memdesc - the memdesc + * @align - alignment requested, as a power of 2 exponent. + */ +static inline int +kgsl_memdesc_set_align(struct kgsl_memdesc *memdesc, unsigned int align) +{ + if (align > 32) { + KGSL_CORE_ERR("Alignment too big, restricting to 2^32\n"); + align = 32; + } + + memdesc->flags &= ~KGSL_MEMALIGN_MASK; + memdesc->flags |= (align << KGSL_MEMALIGN_SHIFT) & KGSL_MEMALIGN_MASK; + return 0; +} + +/** + * kgsl_memdesc_usermem_type - return buffer type + * @memdesc - the memdesc + * + * Returns a KGSL_MEM_ENTRY_* value for this buffer, which + * identifies if was allocated by us, or imported from + * another allocator. + */ +static inline unsigned int +kgsl_memdesc_usermem_type(const struct kgsl_memdesc *memdesc) +{ + return MEMFLAGS(memdesc->flags, KGSL_MEMFLAGS_USERMEM_MASK, + KGSL_MEMFLAGS_USERMEM_SHIFT); +} + +/** + * memdesg_sg_dma() - Turn a dma_addr (from CMA) into a sg table + * @memdesc: Pointer to the memdesc structure + * @addr: Physical address from the dma_alloc function + * @size: Size of the chunk + * + * Create a sg table for the contigious chunk specified by addr and size. + */ +static inline int +memdesc_sg_dma(struct kgsl_memdesc *memdesc, + phys_addr_t addr, uint64_t size) +{ + int ret; + struct page *page = phys_to_page(addr); + + memdesc->sgt = kmalloc(sizeof(struct sg_table), GFP_KERNEL); + if (memdesc->sgt == NULL) + return -ENOMEM; + + ret = sg_alloc_table(memdesc->sgt, 1, GFP_KERNEL); + if (ret) { + kfree(memdesc->sgt); + memdesc->sgt = NULL; + return ret; + } + + sg_set_page(memdesc->sgt->sgl, page, (size_t) size, 0); + return 0; +} + +/* + * kgsl_memdesc_is_global - is this a globally mapped buffer? + * @memdesc: the memdesc + * + * Returns nonzero if this is a global mapping, 0 otherwise + */ +static inline int kgsl_memdesc_is_global(const struct kgsl_memdesc *memdesc) +{ + return (memdesc->priv & KGSL_MEMDESC_GLOBAL) != 0; +} + +/* + * kgsl_memdesc_is_secured - is this a secure buffer? + * @memdesc: the memdesc + * + * Returns true if this is a secure mapping, false otherwise + */ +static inline bool kgsl_memdesc_is_secured(const struct kgsl_memdesc *memdesc) +{ + return memdesc && (memdesc->priv & KGSL_MEMDESC_SECURE); +} + +/* + * kgsl_memdesc_has_guard_page - is the last page a guard page? + * @memdesc - the memdesc + * + * Returns nonzero if there is a guard page, 0 otherwise + */ +static inline int +kgsl_memdesc_has_guard_page(const struct kgsl_memdesc *memdesc) +{ + return (memdesc->priv & KGSL_MEMDESC_GUARD_PAGE) != 0; +} + +/* + * kgsl_memdesc_guard_page_size - returns guard page size + * @memdesc - the memdesc + * + * Returns guard page size + */ +static inline int +kgsl_memdesc_guard_page_size(const struct kgsl_mmu *mmu, + const struct kgsl_memdesc *memdesc) +{ + return kgsl_memdesc_is_secured(memdesc) ? mmu->secure_align_mask + 1 : + PAGE_SIZE; +} + +/* + * kgsl_memdesc_use_cpu_map - use the same virtual mapping on CPU and GPU? + * @memdesc - the memdesc + */ +static inline int +kgsl_memdesc_use_cpu_map(const struct kgsl_memdesc *memdesc) +{ + return (memdesc->flags & KGSL_MEMFLAGS_USE_CPU_MAP) != 0; +} + +/* + * kgsl_memdesc_mmapsize - get the size of the mmap region + * @memdesc - the memdesc + * + * The entire memdesc must be mapped. Additionally if the + * CPU mapping is going to be mirrored, there must be room + * for the guard page to be mapped so that the address spaces + * match up. + */ +static inline uint64_t +kgsl_memdesc_mmapsize(const struct kgsl_memdesc *memdesc) +{ + uint64_t size = memdesc->size; + if (kgsl_memdesc_has_guard_page(memdesc)) + size += SZ_4K; + return size; +} + +static inline int +kgsl_allocate_contiguous(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, size_t size) +{ + int ret; + + size = ALIGN(size, PAGE_SIZE); + + ret = kgsl_cma_alloc_coherent(device, memdesc, NULL, size); + if (!ret && (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_NONE)) + memdesc->gpuaddr = memdesc->physaddr; + + return ret; +} + +/* + * kgsl_allocate_global() - Allocate GPU accessible memory that will be global + * across all processes + * @device: The device pointer to which the memdesc belongs + * @memdesc: Pointer to a KGSL memory descriptor for the memory allocation + * @size: size of the allocation + * @flags: Allocation flags that control how the memory is mapped + * @priv: Priv flags that controls memory attributes + * + * Allocate contiguous memory for internal use and add the allocation to the + * list of global pagetable entries that will be mapped at the same address in + * all pagetables. This is for use for device wide GPU allocations such as + * ringbuffers. + */ +static inline int kgsl_allocate_global(struct kgsl_device *device, + struct kgsl_memdesc *memdesc, uint64_t size, uint64_t flags, + unsigned int priv) +{ + int ret; + + BUG_ON(size > SIZE_MAX); + + if (size == 0) + return -EINVAL; + + memdesc->flags = flags; + memdesc->priv = priv; + + ret = kgsl_allocate_contiguous(device, memdesc, (size_t) size); + + if (!ret) { + ret = kgsl_add_global_pt_entry(device, memdesc); + if (ret) + kgsl_sharedmem_free(memdesc); + } + + return ret; +} + +/** + * kgsl_free_global() - Free a device wide GPU allocation and remove it from the + * global pagetable entry list + * + * @memdesc: Pointer to the GPU memory descriptor to free + * + * Remove the specific memory descriptor from the global pagetable entry list + * and free it + */ +static inline void kgsl_free_global(struct kgsl_memdesc *memdesc) +{ + kgsl_remove_global_pt_entry(memdesc); + kgsl_sharedmem_free(memdesc); +} + +void kgsl_sharedmem_set_noretry(bool val); +bool kgsl_sharedmem_get_noretry(void); + +#endif /* __KGSL_SHAREDMEM_H */ diff --git a/drivers/gpu/msm/kgsl_snapshot.c b/drivers/gpu/msm/kgsl_snapshot.c new file mode 100644 index 000000000000..8116ccaa96bd --- /dev/null +++ b/drivers/gpu/msm/kgsl_snapshot.c @@ -0,0 +1,1051 @@ +/* Copyright (c) 2012-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/export.h> +#include <linux/time.h> +#include <linux/sysfs.h> +#include <linux/utsname.h> +#include <linux/sched.h> +#include <linux/idr.h> + +#include "kgsl.h" +#include "kgsl_log.h" +#include "kgsl_device.h" +#include "kgsl_sharedmem.h" +#include "kgsl_snapshot.h" +#include "adreno_cp_parser.h" + +/* Placeholder for list of ib objects that contain all objects in that IB */ + +struct kgsl_snapshot_cp_obj { + struct adreno_ib_object_list *ib_obj_list; + struct list_head node; +}; + +struct snapshot_obj_itr { + u8 *buf; /* Buffer pointer to write to */ + int pos; /* Current position in the sequence */ + loff_t offset; /* file offset to start writing from */ + size_t remain; /* Bytes remaining in buffer */ + size_t write; /* Bytes written so far */ +}; + +static void obj_itr_init(struct snapshot_obj_itr *itr, u8 *buf, + loff_t offset, size_t remain) +{ + itr->buf = buf; + itr->offset = offset; + itr->remain = remain; + itr->pos = 0; + itr->write = 0; +} + +static int obj_itr_out(struct snapshot_obj_itr *itr, void *src, int size) +{ + if (itr->remain == 0) + return 0; + + if ((itr->pos + size) <= itr->offset) + goto done; + + /* Handle the case that offset is in the middle of the buffer */ + + if (itr->offset > itr->pos) { + src += (itr->offset - itr->pos); + size -= (itr->offset - itr->pos); + + /* Advance pos to the offset start */ + itr->pos = itr->offset; + } + + if (size > itr->remain) + size = itr->remain; + + memcpy(itr->buf, src, size); + + itr->buf += size; + itr->write += size; + itr->remain -= size; + +done: + itr->pos += size; + return size; +} + +/* idr_for_each function to count the number of contexts */ + +static int snapshot_context_count(int id, void *ptr, void *data) +{ + int *count = data; + *count = *count + 1; + + return 0; +} + +/* + * To simplify the iterator loop use a global pointer instead of trying + * to pass around double star references to the snapshot data + */ + +static u8 *_ctxtptr; + +static int snapshot_context_info(int id, void *ptr, void *data) +{ + struct kgsl_snapshot_linux_context *header = + (struct kgsl_snapshot_linux_context *)_ctxtptr; + struct kgsl_context *context = ptr; + struct kgsl_device *device; + + device = context->device; + + header->id = id; + + /* Future-proof for per-context timestamps - for now, just + * return the global timestamp for all contexts + */ + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED, + &header->timestamp_queued); + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, + &header->timestamp_retired); + + _ctxtptr += sizeof(struct kgsl_snapshot_linux_context); + + return 0; +} + +/* Snapshot the Linux specific information */ +static size_t snapshot_os(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_linux *header = (struct kgsl_snapshot_linux *)buf; + struct kgsl_pwrctrl *pwr = &device->pwrctrl; + struct task_struct *task; + pid_t pid; + int ctxtcount = 0; + size_t size = sizeof(*header); + u64 temp_ptbase; + + /* Figure out how many active contexts there are - these will + * be appended on the end of the structure */ + + read_lock(&device->context_lock); + idr_for_each(&device->context_idr, snapshot_context_count, &ctxtcount); + read_unlock(&device->context_lock); + + size += ctxtcount * sizeof(struct kgsl_snapshot_linux_context); + + /* Make sure there is enough room for the data */ + if (remain < size) { + SNAPSHOT_ERR_NOMEM(device, "OS"); + return 0; + } + + memset(header, 0, sizeof(*header)); + + header->osid = KGSL_SNAPSHOT_OS_LINUX; + + header->state = SNAPSHOT_STATE_HUNG; + + /* Get the kernel build information */ + strlcpy(header->release, utsname()->release, sizeof(header->release)); + strlcpy(header->version, utsname()->version, sizeof(header->version)); + + /* Get the Unix time for the timestamp */ + header->seconds = get_seconds(); + + /* Remember the power information */ + header->power_flags = pwr->power_flags; + header->power_level = pwr->active_pwrlevel; + header->power_interval_timeout = pwr->interval_timeout; + header->grpclk = kgsl_get_clkrate(pwr->grp_clks[0]); + + /* + * Save the last active context from global index since its more + * reliable than currrent RB index + */ + kgsl_sharedmem_readl(&device->memstore, &header->current_context, + KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context)); + + + /* Get the current PT base */ + temp_ptbase = kgsl_mmu_get_current_ttbr0(&device->mmu); + /* Truncate to 32 bits in case LPAE is used */ + header->ptbase = (__u32)temp_ptbase; + /* And the PID for the task leader */ + pid = header->pid = kgsl_mmu_get_ptname_from_ptbase(&device->mmu, + temp_ptbase); + + task = find_task_by_vpid(pid); + + if (task) + get_task_comm(header->comm, task); + + header->ctxtcount = ctxtcount; + + _ctxtptr = buf + sizeof(*header); + /* append information for each context */ + + read_lock(&device->context_lock); + idr_for_each(&device->context_idr, snapshot_context_info, NULL); + read_unlock(&device->context_lock); + + /* Return the size of the data segment */ + return size; +} + +static void kgsl_snapshot_put_object(struct kgsl_snapshot_object *obj) +{ + list_del(&obj->node); + + obj->entry->memdesc.priv &= ~KGSL_MEMDESC_FROZEN; + kgsl_mem_entry_put(obj->entry); + + kfree(obj); +} + +/** + * kgsl_snapshot_have_object() - return 1 if the object has been processed + * @snapshot: the snapshot data + * @process: The process that owns the the object to freeze + * @gpuaddr: The gpu address of the object to freeze + * @size: the size of the object (may not always be the size of the region) + * + * Return 1 if the object is already in the list - this can save us from + * having to parse the same thing over again. There are 2 lists that are + * tracking objects so check for the object in both lists +*/ +int kgsl_snapshot_have_object(struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process, + uint64_t gpuaddr, uint64_t size) +{ + struct kgsl_snapshot_object *obj; + struct kgsl_snapshot_cp_obj *obj_cp; + struct adreno_ib_object *ib_obj; + int i; + + /* Check whether the object is tracked already in ib list */ + list_for_each_entry(obj_cp, &snapshot->cp_list, node) { + if (obj_cp->ib_obj_list == NULL + || obj_cp->ib_obj_list->num_objs == 0) + continue; + + ib_obj = &(obj_cp->ib_obj_list->obj_list[0]); + if (ib_obj->entry == NULL || ib_obj->entry->priv != process) + continue; + + for (i = 0; i < obj_cp->ib_obj_list->num_objs; i++) { + ib_obj = &(obj_cp->ib_obj_list->obj_list[i]); + if ((gpuaddr >= ib_obj->gpuaddr) && + ((gpuaddr + size) <= + (ib_obj->gpuaddr + ib_obj->size))) + return 1; + } + } + + list_for_each_entry(obj, &snapshot->obj_list, node) { + if (obj->entry == NULL || obj->entry->priv != process) + continue; + + if ((gpuaddr >= obj->gpuaddr) && + ((gpuaddr + size) <= (obj->gpuaddr + obj->size))) + return 1; + } + + return 0; +} +EXPORT_SYMBOL(kgsl_snapshot_have_object); + +/** + * kgsl_snapshot_get_object() - Mark a GPU buffer to be frozen + * @snapshot: The snapshot data + * @process: The process that owns the object we want to freeze + * @gpuaddr: The gpu address of the object to freeze + * @size: the size of the object (may not always be the size of the region) + * @type: the type of object being saved (shader, vbo, etc) + * + * Mark and freeze a GPU buffer object. This will prevent it from being + * freed until it can be copied out as part of the snapshot dump. Returns the + * size of the object being frozen + */ +int kgsl_snapshot_get_object(struct kgsl_snapshot *snapshot, + struct kgsl_process_private *process, uint64_t gpuaddr, + uint64_t size, unsigned int type) +{ + struct kgsl_mem_entry *entry; + struct kgsl_snapshot_object *obj; + uint64_t offset; + int ret = -EINVAL; + unsigned int mem_type; + + if (!gpuaddr) + return 0; + + entry = kgsl_sharedmem_find(process, gpuaddr); + + if (entry == NULL) { + KGSL_CORE_ERR("Unable to find GPU buffer 0x%016llX\n", gpuaddr); + return -EINVAL; + } + + /* We can't freeze external memory, because we don't own it */ + if (entry->memdesc.flags & KGSL_MEMFLAGS_USERMEM_MASK) + goto err_put; + /* + * Do not save texture and render targets in snapshot, + * they can be just too big + */ + + mem_type = kgsl_memdesc_get_memtype(&entry->memdesc); + if (KGSL_MEMTYPE_TEXTURE == mem_type || + KGSL_MEMTYPE_EGL_SURFACE == mem_type || + KGSL_MEMTYPE_EGL_IMAGE == mem_type) { + ret = 0; + goto err_put; + } + + /* + * size indicates the number of bytes in the region to save. This might + * not always be the entire size of the region because some buffers are + * sub-allocated from a larger region. However, if size 0 was passed + * thats a flag that the caller wants to capture the entire buffer + */ + + if (size == 0) { + size = entry->memdesc.size; + offset = 0; + + /* Adjust the gpuaddr to the start of the object */ + gpuaddr = entry->memdesc.gpuaddr; + } else { + offset = gpuaddr - entry->memdesc.gpuaddr; + } + + if (size + offset > entry->memdesc.size) { + KGSL_CORE_ERR("Invalid size for GPU buffer 0x%016llX\n", + gpuaddr); + goto err_put; + } + + /* If the buffer is already on the list, skip it */ + list_for_each_entry(obj, &snapshot->obj_list, node) { + /* combine the range with existing object if they overlap */ + if (obj->entry->priv == process && obj->type == type && + kgsl_addr_range_overlap(obj->gpuaddr, obj->size, + gpuaddr, size)) { + uint64_t end1 = obj->gpuaddr + obj->size; + uint64_t end2 = gpuaddr + size; + if (obj->gpuaddr > gpuaddr) + obj->gpuaddr = gpuaddr; + if (end1 > end2) + obj->size = end1 - obj->gpuaddr; + else + obj->size = end2 - obj->gpuaddr; + obj->offset = obj->gpuaddr - entry->memdesc.gpuaddr; + ret = 0; + goto err_put; + } + } + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + + if (obj == NULL) + goto err_put; + + obj->type = type; + obj->entry = entry; + obj->gpuaddr = gpuaddr; + obj->size = size; + obj->offset = offset; + + list_add(&obj->node, &snapshot->obj_list); + + /* + * Return the size of the entire mem entry that was frozen - this gets + * used for tracking how much memory is frozen for a hang. Also, mark + * the memory entry as frozen. If the entry was already marked as + * frozen, then another buffer already got to it. In that case, return + * 0 so it doesn't get counted twice + */ + + ret = (entry->memdesc.priv & KGSL_MEMDESC_FROZEN) ? 0 + : entry->memdesc.size; + + entry->memdesc.priv |= KGSL_MEMDESC_FROZEN; + + return ret; +err_put: + kgsl_mem_entry_put(entry); + return ret; +} +EXPORT_SYMBOL(kgsl_snapshot_get_object); + +/** + * kgsl_snapshot_dump_registers - helper function to dump device registers + * @device - the device to dump registers from + * @snapshot - pointer to the start of the region of memory for the snapshot + * @remain - a pointer to the number of bytes remaining in the snapshot + * @priv - A pointer to the kgsl_snapshot_registers data + * + * Given an array of register ranges pairs (start,end [inclusive]), dump the + * registers into a snapshot register section. The snapshot region stores a + * part of dwords for each register - the word address of the register, and + * the value. + */ +size_t kgsl_snapshot_dump_registers(struct kgsl_device *device, u8 *buf, + size_t remain, void *priv) +{ + struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf; + struct kgsl_snapshot_registers *regs = priv; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + int count = 0, j, k; + + /* Figure out how many registers we are going to dump */ + + for (j = 0; j < regs->count; j++) { + int start = regs->regs[j * 2]; + int end = regs->regs[j * 2 + 1]; + + count += (end - start + 1); + } + + if (remain < (count * 8) + sizeof(*header)) { + SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); + return 0; + } + + for (j = 0; j < regs->count; j++) { + unsigned int start = regs->regs[j * 2]; + unsigned int end = regs->regs[j * 2 + 1]; + + for (k = start; k <= end; k++) { + unsigned int val; + + kgsl_regread(device, k, &val); + *data++ = k; + *data++ = val; + } + } + + header->count = count; + + /* Return the size of the section */ + return (count * 8) + sizeof(*header); +} +EXPORT_SYMBOL(kgsl_snapshot_dump_registers); + +struct kgsl_snapshot_indexed_registers { + unsigned int index; + unsigned int data; + unsigned int start; + unsigned int count; +}; + +static size_t kgsl_snapshot_dump_indexed_regs(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_indexed_registers *iregs = priv; + struct kgsl_snapshot_indexed_regs *header = + (struct kgsl_snapshot_indexed_regs *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + int i; + + BUG_ON(!mutex_is_locked(&device->mutex)); + + if (remain < (iregs->count * 4) + sizeof(*header)) { + SNAPSHOT_ERR_NOMEM(device, "INDEXED REGS"); + return 0; + } + + header->index_reg = iregs->index; + header->data_reg = iregs->data; + header->count = iregs->count; + header->start = iregs->start; + + for (i = 0; i < iregs->count; i++) { + kgsl_regwrite(device, iregs->index, iregs->start + i); + kgsl_regread(device, iregs->data, &data[i]); + } + + return (iregs->count * 4) + sizeof(*header); +} + +/** + * kgsl_snapshot_indexed_registers - Add a set of indexed registers to the + * snapshot + * @device: Pointer to the KGSL device being snapshotted + * @snapshot: Snapshot instance + * @index: Offset for the index register + * @data: Offset for the data register + * @start: Index to start reading + * @count: Number of entries to read + * + * Dump the values from an indexed register group into the snapshot + */ +void kgsl_snapshot_indexed_registers(struct kgsl_device *device, + struct kgsl_snapshot *snapshot, + unsigned int index, unsigned int data, + unsigned int start, + unsigned int count) +{ + struct kgsl_snapshot_indexed_registers iregs; + iregs.index = index; + iregs.data = data; + iregs.start = start; + iregs.count = count; + + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_INDEXED_REGS, + snapshot, kgsl_snapshot_dump_indexed_regs, &iregs); +} +EXPORT_SYMBOL(kgsl_snapshot_indexed_registers); + +/** + * kgsl_snapshot_add_section() - Add a new section to the GPU snapshot + * @device: the KGSL device being snapshotted + * @id: the section id + * @snapshot: pointer to the snapshot instance + * @func: Function pointer to fill the section + * @priv: Private pointer to pass to the function + * + * Set up a KGSL snapshot header by filling the memory with the callback + * function and adding the standard section header + */ +void kgsl_snapshot_add_section(struct kgsl_device *device, u16 id, + struct kgsl_snapshot *snapshot, + size_t (*func)(struct kgsl_device *, u8 *, size_t, void *), + void *priv) +{ + struct kgsl_snapshot_section_header *header = + (struct kgsl_snapshot_section_header *)snapshot->ptr; + u8 *data = snapshot->ptr + sizeof(*header); + size_t ret = 0; + + /* + * Sanity check to make sure there is enough for the header. The + * callback will check to make sure there is enough for the rest + * of the data. If there isn't enough room then don't advance the + * pointer. + */ + + if (snapshot->remain < sizeof(*header)) + return; + + /* It is legal to have no function (i.e. - make an empty section) */ + if (func) { + ret = func(device, data, snapshot->remain - sizeof(*header), + priv); + + /* + * If there wasn't enough room for the data then don't bother + * setting up the header. + */ + + if (ret == 0) + return; + } + + header->magic = SNAPSHOT_SECTION_MAGIC; + header->id = id; + header->size = ret + sizeof(*header); + + snapshot->ptr += header->size; + snapshot->remain -= header->size; + snapshot->size += header->size; +} + +/** + * kgsl_snapshot() - construct a device snapshot + * @device: device to snapshot + * @context: the context that is hung, might be NULL if unknown. + * + * Given a device, construct a binary snapshot dump of the current device state + * and store it in the device snapshot memory. + */ +void kgsl_device_snapshot(struct kgsl_device *device, + struct kgsl_context *context) +{ + struct kgsl_snapshot_header *header = device->snapshot_memory.ptr; + struct kgsl_snapshot *snapshot; + struct timespec boot; + phys_addr_t pa; + + if (device->snapshot_memory.ptr == NULL) { + KGSL_DRV_ERR(device, + "snapshot: no snapshot memory available\n"); + return; + } + + BUG_ON(!kgsl_state_is_awake(device)); + /* increment the hang count for good book keeping */ + device->snapshot_faultcount++; + + /* + * The first hang is always the one we are interested in. Don't capture + * a new snapshot instance if the old one hasn't been grabbed yet + */ + if (device->snapshot != NULL) + return; + + /* Allocate memory for the snapshot instance */ + snapshot = kzalloc(sizeof(*snapshot), GFP_KERNEL); + if (snapshot == NULL) + return; + + init_completion(&snapshot->dump_gate); + INIT_LIST_HEAD(&snapshot->obj_list); + INIT_LIST_HEAD(&snapshot->cp_list); + INIT_WORK(&snapshot->work, kgsl_snapshot_save_frozen_objs); + + snapshot->start = device->snapshot_memory.ptr; + snapshot->ptr = device->snapshot_memory.ptr; + snapshot->remain = device->snapshot_memory.size; + + header = (struct kgsl_snapshot_header *) snapshot->ptr; + + header->magic = SNAPSHOT_MAGIC; + header->gpuid = kgsl_gpuid(device, &header->chipid); + + snapshot->ptr += sizeof(*header); + snapshot->remain -= sizeof(*header); + snapshot->size += sizeof(*header); + + /* Build the Linux specific header */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_OS, + snapshot, snapshot_os, NULL); + + /* Get the device specific sections */ + if (device->ftbl->snapshot) + device->ftbl->snapshot(device, snapshot, context); + + /* + * The timestamp is the seconds since boot so it is easier to match to + * the kernel log + */ + + getboottime(&boot); + snapshot->timestamp = get_seconds() - boot.tv_sec; + + /* Store the instance in the device until it gets dumped */ + device->snapshot = snapshot; + + /* log buffer info to aid in ramdump fault tolerance */ + pa = __pa(device->snapshot_memory.ptr); + KGSL_DRV_ERR(device, "snapshot created at pa %pa size %zd\n", + &pa, snapshot->size); + + sysfs_notify(&device->snapshot_kobj, NULL, "timestamp"); + + /* + * Queue a work item that will save the IB data in snapshot into + * static memory to prevent loss of data due to overwriting of + * memory. + * + */ + kgsl_schedule_work(&snapshot->work); +} +EXPORT_SYMBOL(kgsl_device_snapshot); + +/* An attribute for showing snapshot details */ +struct kgsl_snapshot_attribute { + struct attribute attr; + ssize_t (*show)(struct kgsl_device *device, char *buf); + ssize_t (*store)(struct kgsl_device *device, const char *buf, + size_t count); +}; + +/** + * kgsl_snapshot_process_ib_obj_list() - Go through the list of IB's which need + * to be dumped for snapshot and move them to the global snapshot list so + * they will get dumped when the global list is dumped + * @device: device being snapshotted + */ +static void kgsl_snapshot_process_ib_obj_list(struct kgsl_snapshot *snapshot) +{ + struct kgsl_snapshot_cp_obj *obj, *obj_temp; + struct adreno_ib_object *ib_obj; + int i; + + list_for_each_entry_safe(obj, obj_temp, &snapshot->cp_list, + node) { + for (i = 0; i < obj->ib_obj_list->num_objs; i++) { + ib_obj = &(obj->ib_obj_list->obj_list[i]); + kgsl_snapshot_get_object(snapshot, ib_obj->entry->priv, + ib_obj->gpuaddr, ib_obj->size, + ib_obj->snapshot_obj_type); + } + list_del(&obj->node); + adreno_ib_destroy_obj_list(obj->ib_obj_list); + kfree(obj); + } +} + +#define to_snapshot_attr(a) \ +container_of(a, struct kgsl_snapshot_attribute, attr) + +#define kobj_to_device(a) \ +container_of(a, struct kgsl_device, snapshot_kobj) + +/* Dump the sysfs binary data to the user */ +static ssize_t snapshot_show(struct file *filep, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, + size_t count) +{ + struct kgsl_device *device = kobj_to_device(kobj); + struct kgsl_snapshot *snapshot; + struct kgsl_snapshot_object *obj, *tmp; + struct kgsl_snapshot_section_header head; + struct snapshot_obj_itr itr; + int ret; + + if (device == NULL) + return 0; + + mutex_lock(&device->mutex); + snapshot = device->snapshot; + mutex_unlock(&device->mutex); + + /* Return nothing if we haven't taken a snapshot yet */ + if (snapshot == NULL) + return 0; + + /* + * Wait for the dump worker to finish. This is interruptible + * to allow userspace to bail if things go horribly wrong. + */ + ret = wait_for_completion_interruptible(&snapshot->dump_gate); + if (ret) + return ret; + + obj_itr_init(&itr, buf, off, count); + + ret = obj_itr_out(&itr, snapshot->start, snapshot->size); + if (ret == 0) + goto done; + + /* Dump the memory pool if it exists */ + if (device->snapshot->mempool) { + ret = obj_itr_out(&itr, snapshot->mempool, + snapshot->mempool_size); + if (ret == 0) + goto done; + } + + { + head.magic = SNAPSHOT_SECTION_MAGIC; + head.id = KGSL_SNAPSHOT_SECTION_END; + head.size = sizeof(head); + + obj_itr_out(&itr, &head, sizeof(head)); + } + + /* + * Make sure everything has been written out before destroying things. + * The best way to confirm this is to go all the way through without + * writing any bytes - so only release if we get this far and + * itr->write is 0 + */ + + if (itr.write == 0) { + mutex_lock(&device->mutex); + device->snapshot = NULL; + mutex_unlock(&device->mutex); + + list_for_each_entry_safe(obj, tmp, &snapshot->obj_list, node) + kgsl_snapshot_put_object(obj); + + if (snapshot->mempool) + vfree(snapshot->mempool); + + kfree(snapshot); + KGSL_CORE_ERR("snapshot: objects released\n"); + } + +done: + + return itr.write; +} + +/* Show the total number of hangs since device boot */ +static ssize_t faultcount_show(struct kgsl_device *device, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", device->snapshot_faultcount); +} + +/* Reset the total number of hangs since device boot */ +static ssize_t faultcount_store(struct kgsl_device *device, const char *buf, + size_t count) +{ + if (device && count > 0) + device->snapshot_faultcount = 0; + + return count; +} + +/* Show the timestamp of the last collected snapshot */ +static ssize_t timestamp_show(struct kgsl_device *device, char *buf) +{ + unsigned long timestamp = + device->snapshot ? device->snapshot->timestamp : 0; + + return snprintf(buf, PAGE_SIZE, "%lu\n", timestamp); +} + +static struct bin_attribute snapshot_attr = { + .attr.name = "dump", + .attr.mode = 0444, + .size = 0, + .read = snapshot_show +}; + +#define SNAPSHOT_ATTR(_name, _mode, _show, _store) \ +struct kgsl_snapshot_attribute attr_##_name = { \ + .attr = { .name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +} + +static SNAPSHOT_ATTR(timestamp, 0444, timestamp_show, NULL); +static SNAPSHOT_ATTR(faultcount, 0644, faultcount_show, faultcount_store); + +static ssize_t snapshot_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct kgsl_snapshot_attribute *pattr = to_snapshot_attr(attr); + struct kgsl_device *device = kobj_to_device(kobj); + ssize_t ret; + + if (device && pattr->show) + ret = pattr->show(device, buf); + else + ret = -EIO; + + return ret; +} + +static ssize_t snapshot_sysfs_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t count) +{ + struct kgsl_snapshot_attribute *pattr = to_snapshot_attr(attr); + struct kgsl_device *device = kobj_to_device(kobj); + ssize_t ret; + + if (device && pattr->store) + ret = pattr->store(device, buf, count); + else + ret = -EIO; + + return ret; +} + +static const struct sysfs_ops snapshot_sysfs_ops = { + .show = snapshot_sysfs_show, + .store = snapshot_sysfs_store, +}; + +static struct kobj_type ktype_snapshot = { + .sysfs_ops = &snapshot_sysfs_ops, +}; + +/** + * kgsl_device_snapshot_init() - add resources for the device GPU snapshot + * @device: The device to initalize + * + * Allocate memory for a GPU snapshot for the specified device, + * and create the sysfs files to manage it + */ +int kgsl_device_snapshot_init(struct kgsl_device *device) +{ + int ret; + + if (kgsl_property_read_u32(device, "qcom,snapshot-size", + (unsigned int *) &(device->snapshot_memory.size))) + device->snapshot_memory.size = KGSL_SNAPSHOT_MEMSIZE; + + /* + * Choosing a memory size of 0 is essentially the same as disabling + * snapshotting + */ + if (device->snapshot_memory.size == 0) + return 0; + + /* + * I'm not sure why anybody would choose to do so but make sure + * that we can at least fit the snapshot header in the requested + * region + */ + if (device->snapshot_memory.size < sizeof(struct kgsl_snapshot_header)) + device->snapshot_memory.size = + sizeof(struct kgsl_snapshot_header); + + device->snapshot_memory.ptr = kzalloc(device->snapshot_memory.size, + GFP_KERNEL); + + if (device->snapshot_memory.ptr == NULL) + return -ENOMEM; + + device->snapshot = NULL; + device->snapshot_faultcount = 0; + + ret = kobject_init_and_add(&device->snapshot_kobj, &ktype_snapshot, + &device->dev->kobj, "snapshot"); + if (ret) + goto done; + + ret = sysfs_create_bin_file(&device->snapshot_kobj, &snapshot_attr); + if (ret) + goto done; + + ret = sysfs_create_file(&device->snapshot_kobj, &attr_timestamp.attr); + if (ret) + goto done; + + ret = sysfs_create_file(&device->snapshot_kobj, &attr_faultcount.attr); + +done: + return ret; +} +EXPORT_SYMBOL(kgsl_device_snapshot_init); + +/** + * kgsl_device_snapshot_close() - take down snapshot memory for a device + * @device: Pointer to the kgsl_device + * + * Remove the sysfs files and free the memory allocated for the GPU + * snapshot + */ +void kgsl_device_snapshot_close(struct kgsl_device *device) +{ + sysfs_remove_bin_file(&device->snapshot_kobj, &snapshot_attr); + sysfs_remove_file(&device->snapshot_kobj, &attr_timestamp.attr); + + kobject_put(&device->snapshot_kobj); + + kfree(device->snapshot_memory.ptr); + + device->snapshot_memory.ptr = NULL; + device->snapshot_memory.size = 0; + device->snapshot_faultcount = 0; +} +EXPORT_SYMBOL(kgsl_device_snapshot_close); + +/** + * kgsl_snapshot_add_ib_obj_list() - Add a IB object list to the snapshot + * object list + * @device: the device that is being snapshotted + * @ib_obj_list: The IB list that has objects required to execute an IB + * @num_objs: Number of IB objects + * @ptbase: The pagetable base in which the IB is mapped + * + * Adds a new IB to the list of IB objects maintained when getting snapshot + * Returns 0 on success else -ENOMEM on error + */ +int kgsl_snapshot_add_ib_obj_list(struct kgsl_snapshot *snapshot, + struct adreno_ib_object_list *ib_obj_list) +{ + struct kgsl_snapshot_cp_obj *obj; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return -ENOMEM; + obj->ib_obj_list = ib_obj_list; + list_add(&obj->node, &snapshot->cp_list); + return 0; +} + +static size_t _mempool_add_object(u8 *data, struct kgsl_snapshot_object *obj) +{ + struct kgsl_snapshot_section_header *section = + (struct kgsl_snapshot_section_header *)data; + struct kgsl_snapshot_gpu_object_v2 *header = + (struct kgsl_snapshot_gpu_object_v2 *)(data + sizeof(*section)); + u8 *dest = data + sizeof(*section) + sizeof(*header); + uint64_t size; + + size = obj->size; + + if (!kgsl_memdesc_map(&obj->entry->memdesc)) { + KGSL_CORE_ERR("snapshot: failed to map GPU object\n"); + return 0; + } + + section->magic = SNAPSHOT_SECTION_MAGIC; + section->id = KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2; + section->size = size + sizeof(*header) + sizeof(*section); + + header->size = size >> 2; + header->gpuaddr = obj->gpuaddr; + header->ptbase = + kgsl_mmu_pagetable_get_ttbr0(obj->entry->priv->pagetable); + header->type = obj->type; + + memcpy(dest, obj->entry->memdesc.hostptr + obj->offset, size); + kgsl_memdesc_unmap(&obj->entry->memdesc); + + return section->size; +} + +/** + * kgsl_snapshot_save_frozen_objs() - Save the objects frozen in snapshot into + * memory so that the data reported in these objects is correct when snapshot + * is taken + * @work: The work item that scheduled this work + */ +void kgsl_snapshot_save_frozen_objs(struct work_struct *work) +{ + struct kgsl_snapshot *snapshot = container_of(work, + struct kgsl_snapshot, work); + struct kgsl_snapshot_object *obj, *tmp; + size_t size = 0; + void *ptr; + + kgsl_snapshot_process_ib_obj_list(snapshot); + + list_for_each_entry(obj, &snapshot->obj_list, node) { + obj->size = ALIGN(obj->size, 4); + + size += ((size_t) obj->size + + sizeof(struct kgsl_snapshot_gpu_object) + + sizeof(struct kgsl_snapshot_section_header)); + } + + if (size == 0) + goto done; + + snapshot->mempool = vmalloc(size); + if (snapshot->mempool != NULL) + KGSL_CORE_ERR("snapshot: mempool address %p, size %zx\n", + snapshot->mempool, size); + + ptr = snapshot->mempool; + snapshot->mempool_size = 0; + + /* even if vmalloc fails, make sure we clean up the obj_list */ + list_for_each_entry_safe(obj, tmp, &snapshot->obj_list, node) { + if (snapshot->mempool) { + size_t ret = _mempool_add_object(ptr, obj); + ptr += ret; + snapshot->mempool_size += ret; + } + + kgsl_snapshot_put_object(obj); + } +done: + /* + * Get rid of the process struct here, so that it doesn't sit + * around until someone bothers to read the snapshot file. + */ + kgsl_process_private_put(snapshot->process); + snapshot->process = NULL; + + complete_all(&snapshot->dump_gate); + return; +} diff --git a/drivers/gpu/msm/kgsl_snapshot.h b/drivers/gpu/msm/kgsl_snapshot.h new file mode 100644 index 000000000000..8167ff83a18b --- /dev/null +++ b/drivers/gpu/msm/kgsl_snapshot.h @@ -0,0 +1,238 @@ +/* Copyright (c) 2012-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _KGSL_SNAPSHOT_H_ +#define _KGSL_SNAPSHOT_H_ + +#include <linux/types.h> + +/* Snapshot header */ + +/* High word is static, low word is snapshot version ID */ +#define SNAPSHOT_MAGIC 0x504D0002 + +/* GPU ID scheme: + * [16:31] - core identifer (0x0002 for 2D or 0x0003 for 3D) + * [00:16] - GPU specific identifier + */ + +struct kgsl_snapshot_header { + __u32 magic; /* Magic identifier */ + __u32 gpuid; /* GPU ID - see above */ + /* Added in snapshot version 2 */ + __u32 chipid; /* Chip ID from the GPU */ +} __packed; + +/* Section header */ +#define SNAPSHOT_SECTION_MAGIC 0xABCD + +struct kgsl_snapshot_section_header { + __u16 magic; /* Magic identifier */ + __u16 id; /* Type of section */ + __u32 size; /* Size of the section including this header */ +} __packed; + +/* Section identifiers */ +#define KGSL_SNAPSHOT_SECTION_OS 0x0101 +#define KGSL_SNAPSHOT_SECTION_REGS 0x0201 +#define KGSL_SNAPSHOT_SECTION_RB 0x0301 +#define KGSL_SNAPSHOT_SECTION_RB_V2 0x0302 +#define KGSL_SNAPSHOT_SECTION_IB 0x0401 +#define KGSL_SNAPSHOT_SECTION_IB_V2 0x0402 +#define KGSL_SNAPSHOT_SECTION_INDEXED_REGS 0x0501 +#define KGSL_SNAPSHOT_SECTION_ISTORE 0x0801 +#define KGSL_SNAPSHOT_SECTION_DEBUG 0x0901 +#define KGSL_SNAPSHOT_SECTION_DEBUGBUS 0x0A01 +#define KGSL_SNAPSHOT_SECTION_GPU_OBJECT 0x0B01 +#define KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2 0x0B02 +#define KGSL_SNAPSHOT_SECTION_MEMLIST 0x0E01 +#define KGSL_SNAPSHOT_SECTION_MEMLIST_V2 0x0E02 +#define KGSL_SNAPSHOT_SECTION_SHADER 0x1201 + +#define KGSL_SNAPSHOT_SECTION_END 0xFFFF + +/* OS sub-section header */ +#define KGSL_SNAPSHOT_OS_LINUX 0x0001 + +/* Linux OS specific information */ + +#define SNAPSHOT_STATE_HUNG 0 +#define SNAPSHOT_STATE_RUNNING 1 + +struct kgsl_snapshot_linux { + int osid; /* subsection OS identifier */ + int state; /* 1 if the thread is running, 0 for hung */ + __u32 seconds; /* Unix timestamp for the snapshot */ + __u32 power_flags; /* Current power flags */ + __u32 power_level; /* Current power level */ + __u32 power_interval_timeout; /* Power interval timeout */ + __u32 grpclk; /* Current GP clock value */ + __u32 busclk; /* Current busclk value */ + __u32 ptbase; /* Current ptbase */ + __u32 pid; /* PID of the process that owns the PT */ + __u32 current_context; /* ID of the current context */ + __u32 ctxtcount; /* Number of contexts appended to section */ + unsigned char release[32]; /* kernel release */ + unsigned char version[32]; /* kernel version */ + unsigned char comm[16]; /* Name of the process that owns the PT */ +} __packed; + +/* + * This structure contains a record of an active context. + * These are appended one after another in the OS section below + * the header above + */ + +struct kgsl_snapshot_linux_context { + __u32 id; /* The context ID */ + __u32 timestamp_queued; /* The last queued timestamp */ + __u32 timestamp_retired; /* The last timestamp retired by HW */ +}; + +/* Ringbuffer sub-section header */ +struct kgsl_snapshot_rb { + int start; /* dword at the start of the dump */ + int end; /* dword at the end of the dump */ + int rbsize; /* Size (in dwords) of the ringbuffer */ + int wptr; /* Current index of the CPU write pointer */ + int rptr; /* Current index of the GPU read pointer */ + int count; /* Number of dwords in the dump */ + __u32 timestamp_queued; /* The last queued timestamp */ + __u32 timestamp_retired; /* The last timestamp retired by HW */ +} __packed; + +struct kgsl_snapshot_rb_v2 { + int start; /* dword at the start of the dump */ + int end; /* dword at the end of the dump */ + int rbsize; /* Size (in dwords) of the ringbuffer */ + int wptr; /* Current index of the CPU write pointer */ + int rptr; /* Current index of the GPU read pointer */ + int count; /* Number of dwords in the dump */ + __u32 timestamp_queued; /* The last queued timestamp */ + __u32 timestamp_retired; /* The last timestamp retired by HW */ + __u64 gpuaddr; /* The GPU address of the ringbuffer */ + __u32 id; /* Ringbuffer identifier */ +} __packed; + + +/* Replay or Memory list section, both sections have same header */ +struct kgsl_snapshot_replay_mem_list { + /* + * Number of IBs to replay for replay section or + * number of memory list entries for mem list section + */ + int num_entries; + /* Pagetable base to which the replay IBs or memory entries belong */ + __u32 ptbase; +} __packed; + +/* Replay or Memory list section, both sections have same header */ +struct kgsl_snapshot_mem_list_v2 { + /* + * Number of IBs to replay for replay section or + * number of memory list entries for mem list section + */ + int num_entries; + /* Pagetable base to which the replay IBs or memory entries belong */ + __u64 ptbase; +} __packed; + + +/* Indirect buffer sub-section header */ +struct kgsl_snapshot_ib { + __u32 gpuaddr; /* GPU address of the the IB */ + __u32 ptbase; /* Base for the pagetable the GPU address is valid in */ + int size; /* Size of the IB */ +} __packed; + +/* Indirect buffer sub-section header (v2) */ +struct kgsl_snapshot_ib_v2 { + __u64 gpuaddr; /* GPU address of the the IB */ + __u64 ptbase; /* Base for the pagetable the GPU address is valid in */ + __u64 size; /* Size of the IB */ +} __packed; + + +/* Register sub-section header */ +struct kgsl_snapshot_regs { + __u32 count; /* Number of register pairs in the section */ +} __packed; + +/* Indexed register sub-section header */ +struct kgsl_snapshot_indexed_regs { + __u32 index_reg; /* Offset of the index register for this section */ + __u32 data_reg; /* Offset of the data register for this section */ + int start; /* Starting index */ + int count; /* Number of dwords in the data */ +} __packed; + +/* Istore sub-section header */ +struct kgsl_snapshot_istore { + int count; /* Number of instructions in the istore */ +} __packed; + +/* Debug data sub-section header */ + +/* A2XX debug sections */ +#define SNAPSHOT_DEBUG_SX 1 +#define SNAPSHOT_DEBUG_CP 2 +#define SNAPSHOT_DEBUG_SQ 3 +#define SNAPSHOT_DEBUG_SQTHREAD 4 +#define SNAPSHOT_DEBUG_MIU 5 + +/* A3XX debug sections */ +#define SNAPSHOT_DEBUG_VPC_MEMORY 6 +#define SNAPSHOT_DEBUG_CP_MEQ 7 +#define SNAPSHOT_DEBUG_CP_PM4_RAM 8 +#define SNAPSHOT_DEBUG_CP_PFP_RAM 9 +#define SNAPSHOT_DEBUG_CP_ROQ 10 +#define SNAPSHOT_DEBUG_SHADER_MEMORY 11 +#define SNAPSHOT_DEBUG_CP_MERCIU 12 + +struct kgsl_snapshot_debug { + int type; /* Type identifier for the attached tata */ + int size; /* Size of the section in dwords */ +} __packed; + +struct kgsl_snapshot_debugbus { + int id; /* Debug bus ID */ + int count; /* Number of dwords in the dump */ +} __packed; + +struct kgsl_snapshot_shader { + int type; /* SP/TP statetype */ + int index; /* SP/TP index */ + int size; /* Number of dwords in the dump */ +} __packed; + +#define SNAPSHOT_GPU_OBJECT_SHADER 1 +#define SNAPSHOT_GPU_OBJECT_IB 2 +#define SNAPSHOT_GPU_OBJECT_GENERIC 3 +#define SNAPSHOT_GPU_OBJECT_DRAW 4 +#define SNAPSHOT_GPU_OBJECT_GLOBAL 5 + +struct kgsl_snapshot_gpu_object { + int type; /* Type of GPU object */ + __u32 gpuaddr; /* GPU address of the the object */ + __u32 ptbase; /* Base for the pagetable the GPU address is valid in */ + int size; /* Size of the object (in dwords) */ +}; + +struct kgsl_snapshot_gpu_object_v2 { + int type; /* Type of GPU object */ + __u64 gpuaddr; /* GPU address of the the object */ + __u64 ptbase; /* Base for the pagetable the GPU address is valid in */ + __u64 size; /* Size of the object (in dwords) */ +} __packed; + +#endif diff --git a/drivers/gpu/msm/kgsl_sync.c b/drivers/gpu/msm/kgsl_sync.c new file mode 100644 index 000000000000..401a6661f5a9 --- /dev/null +++ b/drivers/gpu/msm/kgsl_sync.c @@ -0,0 +1,646 @@ +/* Copyright (c) 2012-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/err.h> +#include <linux/file.h> +#include <linux/oneshot_sync.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/uaccess.h> + +#include <asm/current.h> + +#include "kgsl_sync.h" + +static void kgsl_sync_timeline_signal(struct sync_timeline *timeline, + unsigned int timestamp); + +static struct sync_pt *kgsl_sync_pt_create(struct sync_timeline *timeline, + struct kgsl_context *context, unsigned int timestamp) +{ + struct sync_pt *pt; + pt = sync_pt_create(timeline, (int) sizeof(struct kgsl_sync_pt)); + if (pt) { + struct kgsl_sync_pt *kpt = (struct kgsl_sync_pt *) pt; + kpt->context = context; + kpt->timestamp = timestamp; + } + return pt; +} + +/* + * This should only be called on sync_pts which have been created but + * not added to a fence. + */ +static void kgsl_sync_pt_destroy(struct sync_pt *pt) +{ + sync_pt_free(pt); +} + +static struct sync_pt *kgsl_sync_pt_dup(struct sync_pt *pt) +{ + struct kgsl_sync_pt *kpt = (struct kgsl_sync_pt *) pt; + return kgsl_sync_pt_create(sync_pt_parent(pt), kpt->context, kpt->timestamp); +} + +static int kgsl_sync_pt_has_signaled(struct sync_pt *pt) +{ + struct kgsl_sync_pt *kpt = (struct kgsl_sync_pt *) pt; + struct kgsl_sync_timeline *ktimeline = + (struct kgsl_sync_timeline *) sync_pt_parent(pt); + unsigned int ts = kpt->timestamp; + int ret = 0; + + spin_lock(&ktimeline->lock); + ret = (timestamp_cmp(ktimeline->last_timestamp, ts) >= 0); + spin_unlock(&ktimeline->lock); + + return ret; +} + +static int kgsl_sync_pt_compare(struct sync_pt *a, struct sync_pt *b) +{ + struct kgsl_sync_pt *kpt_a = (struct kgsl_sync_pt *) a; + struct kgsl_sync_pt *kpt_b = (struct kgsl_sync_pt *) b; + unsigned int ts_a = kpt_a->timestamp; + unsigned int ts_b = kpt_b->timestamp; + return timestamp_cmp(ts_a, ts_b); +} + +struct kgsl_fence_event_priv { + struct kgsl_context *context; + unsigned int timestamp; +}; + +/** + * kgsl_fence_event_cb - Event callback for a fence timestamp event + * @device - The KGSL device that expired the timestamp + * @context- Pointer to the context that owns the event + * @priv: Private data for the callback + * @result - Result of the event (retired or canceled) + * + * Signal a fence following the expiration of a timestamp + */ + +static void kgsl_fence_event_cb(struct kgsl_device *device, + struct kgsl_event_group *group, void *priv, int result) +{ + struct kgsl_fence_event_priv *ev = priv; + kgsl_sync_timeline_signal(ev->context->timeline, ev->timestamp); + kgsl_context_put(ev->context); + kfree(ev); +} + +static int _add_fence_event(struct kgsl_device *device, + struct kgsl_context *context, unsigned int timestamp) +{ + struct kgsl_fence_event_priv *event; + int ret; + + event = kmalloc(sizeof(*event), GFP_KERNEL); + if (event == NULL) + return -ENOMEM; + + /* + * Increase the refcount for the context to keep it through the + * callback + */ + if (!_kgsl_context_get(context)) { + kfree(event); + return -ENOENT; + } + + event->context = context; + event->timestamp = timestamp; + event->context = context; + + ret = kgsl_add_event(device, &context->events, timestamp, + kgsl_fence_event_cb, event); + + if (ret) { + kgsl_context_put(context); + kfree(event); + } + + return ret; +} + +/** + * kgsl_add_fence_event - Create a new fence event + * @device - KGSL device to create the event on + * @timestamp - Timestamp to trigger the event + * @data - Return fence fd stored in struct kgsl_timestamp_event_fence + * @len - length of the fence event + * @owner - driver instance that owns this event + * @returns 0 on success or error code on error + * + * Create a fence and register an event to signal the fence when + * the timestamp expires + */ + +int kgsl_add_fence_event(struct kgsl_device *device, + u32 context_id, u32 timestamp, void __user *data, int len, + struct kgsl_device_private *owner) +{ + struct kgsl_timestamp_event_fence priv; + struct kgsl_context *context; + struct sync_pt *pt; + struct sync_fence *fence = NULL; + int ret = -EINVAL; + char fence_name[sizeof(fence->name)] = {}; + unsigned int cur; + + priv.fence_fd = -1; + + if (len != sizeof(priv)) + return -EINVAL; + + context = kgsl_context_get_owner(owner, context_id); + + if (context == NULL) + return -EINVAL; + + if (test_bit(KGSL_CONTEXT_PRIV_INVALID, &context->priv)) + goto out; + + pt = kgsl_sync_pt_create(context->timeline, context, timestamp); + if (pt == NULL) { + KGSL_DRV_CRIT_RATELIMIT(device, "kgsl_sync_pt_create failed\n"); + ret = -ENOMEM; + goto out; + } + snprintf(fence_name, sizeof(fence_name), + "%s-pid-%d-ctx-%d-ts-%d", + device->name, current->group_leader->pid, + context_id, timestamp); + + + fence = sync_fence_create(fence_name, pt); + if (fence == NULL) { + /* only destroy pt when not added to fence */ + kgsl_sync_pt_destroy(pt); + KGSL_DRV_CRIT_RATELIMIT(device, "sync_fence_create failed\n"); + ret = -ENOMEM; + goto out; + } + + priv.fence_fd = get_unused_fd_flags(0); + if (priv.fence_fd < 0) { + KGSL_DRV_CRIT_RATELIMIT(device, + "Unable to get a file descriptor: %d\n", + priv.fence_fd); + ret = priv.fence_fd; + goto out; + } + + /* + * If the timestamp hasn't expired yet create an event to trigger it. + * Otherwise, just signal the fence - there is no reason to go through + * the effort of creating a fence we don't need. + */ + + kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, &cur); + + if (timestamp_cmp(cur, timestamp) >= 0) { + ret = 0; + kgsl_sync_timeline_signal(context->timeline, cur); + } else { + ret = _add_fence_event(device, context, timestamp); + if (ret) + goto out; + } + + if (copy_to_user(data, &priv, sizeof(priv))) { + ret = -EFAULT; + goto out; + } + sync_fence_install(fence, priv.fence_fd); +out: + kgsl_context_put(context); + if (ret) { + if (priv.fence_fd >= 0) + put_unused_fd(priv.fence_fd); + + if (fence) + sync_fence_put(fence); + } + return ret; +} + +static unsigned int kgsl_sync_get_timestamp( + struct kgsl_sync_timeline *ktimeline, enum kgsl_timestamp_type type) +{ + unsigned int ret = 0; + struct kgsl_context *context; + + if (ktimeline->device == NULL) + return 0; + + context = kgsl_context_get(ktimeline->device, + ktimeline->context_id); + + if (context) + kgsl_readtimestamp(ktimeline->device, context, type, &ret); + + kgsl_context_put(context); + return ret; +} + +static void kgsl_sync_timeline_value_str(struct sync_timeline *sync_timeline, + char *str, int size) +{ + struct kgsl_sync_timeline *ktimeline = + (struct kgsl_sync_timeline *) sync_timeline; + + /* + * This callback can be called before the device and spinlock are + * initialized in struct kgsl_sync_timeline. kgsl_sync_get_timestamp() + * will check if device is NULL and return 0. Queued and retired + * timestamp of the context will be reported as 0, which is correct + * because the context and timeline are just getting initialized. + */ + unsigned int timestamp_retired = kgsl_sync_get_timestamp(ktimeline, + KGSL_TIMESTAMP_RETIRED); + unsigned int timestamp_queued = kgsl_sync_get_timestamp(ktimeline, + KGSL_TIMESTAMP_QUEUED); + + snprintf(str, size, "%u queued:%u retired:%u", + ktimeline->last_timestamp, + timestamp_queued, timestamp_retired); +} + +static void kgsl_sync_pt_value_str(struct sync_pt *sync_pt, + char *str, int size) +{ + struct kgsl_sync_pt *kpt = (struct kgsl_sync_pt *) sync_pt; + snprintf(str, size, "%u", kpt->timestamp); +} + +static int kgsl_sync_fill_driver_data(struct sync_pt *sync_pt, void *data, + int size) +{ + struct kgsl_sync_pt *kpt = (struct kgsl_sync_pt *) sync_pt; + + if (size < sizeof(kpt->timestamp)) + return -ENOMEM; + + memcpy(data, &kpt->timestamp, sizeof(kpt->timestamp)); + return sizeof(kpt->timestamp); +} + +static void kgsl_sync_pt_log(struct sync_pt *sync_pt) +{ + struct kgsl_sync_pt *kpt = (struct kgsl_sync_pt *) sync_pt; + pr_info("-----\n"); + kgsl_context_dump(kpt->context); + pr_info("-----\n"); +} + +static void kgsl_sync_timeline_release_obj(struct sync_timeline *sync_timeline) +{ + /* + * Make sure to free the timeline only after destroy flag is set. + * This is to avoid further accessing to the timeline from KGSL and + * also to catch any unbalanced kref of timeline. + */ + BUG_ON(sync_timeline && (sync_timeline->destroyed != true)); +} +static const struct sync_timeline_ops kgsl_sync_timeline_ops = { + .driver_name = "kgsl-timeline", + .dup = kgsl_sync_pt_dup, + .has_signaled = kgsl_sync_pt_has_signaled, + .compare = kgsl_sync_pt_compare, + .timeline_value_str = kgsl_sync_timeline_value_str, + .pt_value_str = kgsl_sync_pt_value_str, + .fill_driver_data = kgsl_sync_fill_driver_data, + .release_obj = kgsl_sync_timeline_release_obj, + .pt_log = kgsl_sync_pt_log, +}; + +int kgsl_sync_timeline_create(struct kgsl_context *context) +{ + struct kgsl_sync_timeline *ktimeline; + + /* Generate a name which includes the thread name, thread id, process + * name, process id, and context id. This makes it possible to + * identify the context of a timeline in the sync dump. */ + char ktimeline_name[sizeof(context->timeline->name)] = {}; + snprintf(ktimeline_name, sizeof(ktimeline_name), + "%s_%.15s(%d)-%.15s(%d)-%d", + context->device->name, + current->group_leader->comm, current->group_leader->pid, + current->comm, current->pid, context->id); + + context->timeline = sync_timeline_create(&kgsl_sync_timeline_ops, + (int) sizeof(struct kgsl_sync_timeline), ktimeline_name); + if (context->timeline == NULL) + return -EINVAL; + + ktimeline = (struct kgsl_sync_timeline *) context->timeline; + ktimeline->last_timestamp = 0; + ktimeline->device = context->device; + ktimeline->context_id = context->id; + + spin_lock_init(&ktimeline->lock); + return 0; +} + +static void kgsl_sync_timeline_signal(struct sync_timeline *timeline, + unsigned int timestamp) +{ + struct kgsl_sync_timeline *ktimeline = + (struct kgsl_sync_timeline *) timeline; + + spin_lock(&ktimeline->lock); + if (timestamp_cmp(timestamp, ktimeline->last_timestamp) > 0) + ktimeline->last_timestamp = timestamp; + spin_unlock(&ktimeline->lock); + + sync_timeline_signal(timeline); +} + +void kgsl_sync_timeline_destroy(struct kgsl_context *context) +{ + sync_timeline_destroy(context->timeline); +} + +static void kgsl_sync_callback(struct sync_fence *fence, + struct sync_fence_waiter *waiter) +{ + struct kgsl_sync_fence_waiter *kwaiter = + (struct kgsl_sync_fence_waiter *) waiter; + kwaiter->func(kwaiter->priv); + sync_fence_put(kwaiter->fence); + kfree(kwaiter); +} + +struct kgsl_sync_fence_waiter *kgsl_sync_fence_async_wait(int fd, + void (*func)(void *priv), void *priv) +{ + struct kgsl_sync_fence_waiter *kwaiter; + struct sync_fence *fence; + int status; + + fence = sync_fence_fdget(fd); + if (fence == NULL) + return ERR_PTR(-EINVAL); + + /* create the waiter */ + kwaiter = kzalloc(sizeof(*kwaiter), GFP_ATOMIC); + if (kwaiter == NULL) { + sync_fence_put(fence); + return ERR_PTR(-ENOMEM); + } + + kwaiter->fence = fence; + kwaiter->priv = priv; + kwaiter->func = func; + + strlcpy(kwaiter->name, fence->name, sizeof(kwaiter->name)); + + sync_fence_waiter_init((struct sync_fence_waiter *) kwaiter, + kgsl_sync_callback); + + /* if status then error or signaled */ + status = sync_fence_wait_async(fence, + (struct sync_fence_waiter *) kwaiter); + if (status) { + kfree(kwaiter); + sync_fence_put(fence); + if (status < 0) + kwaiter = ERR_PTR(status); + else + kwaiter = NULL; + } + + return kwaiter; +} + +int kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_waiter *kwaiter) +{ + if (kwaiter == NULL) + return 0; + + if (sync_fence_cancel_async(kwaiter->fence, + (struct sync_fence_waiter *) kwaiter) == 0) { + sync_fence_put(kwaiter->fence); + kfree(kwaiter); + return 1; + } + return 0; +} + +#ifdef CONFIG_ONESHOT_SYNC + +struct kgsl_syncsource { + struct kref refcount; + int id; + struct kgsl_process_private *private; + struct oneshot_sync_timeline *oneshot; +}; + +long kgsl_ioctl_syncsource_create(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_syncsource *syncsource = NULL; + struct kgsl_syncsource_create *param = data; + int ret = -EINVAL; + int id = 0; + struct kgsl_process_private *private = dev_priv->process_priv; + char name[32]; + + syncsource = kzalloc(sizeof(*syncsource), GFP_KERNEL); + if (syncsource == NULL) { + ret = -ENOMEM; + goto out; + } + + snprintf(name, sizeof(name), "kgsl-syncsource-pid-%d", + current->group_leader->pid); + + syncsource->oneshot = oneshot_timeline_create(name); + if (syncsource->oneshot == NULL) { + ret = -ENOMEM; + goto out; + } + + idr_preload(GFP_KERNEL); + spin_lock(&private->syncsource_lock); + id = idr_alloc(&private->syncsource_idr, syncsource, 1, 0, GFP_NOWAIT); + spin_unlock(&private->syncsource_lock); + idr_preload_end(); + + if (id > 0) { + kref_init(&syncsource->refcount); + syncsource->id = id; + syncsource->private = private; + + param->id = id; + ret = 0; + } else { + ret = id; + } + +out: + if (ret) { + if (syncsource && syncsource->oneshot) + oneshot_timeline_destroy(syncsource->oneshot); + kfree(syncsource); + } + + return ret; +} + +static struct kgsl_syncsource * +kgsl_syncsource_get(struct kgsl_process_private *private, int id) +{ + int result = 0; + struct kgsl_syncsource *syncsource = NULL; + + spin_lock(&private->syncsource_lock); + + syncsource = idr_find(&private->syncsource_idr, id); + if (syncsource) + result = kref_get_unless_zero(&syncsource->refcount); + + spin_unlock(&private->syncsource_lock); + + return result ? syncsource : NULL; +} + +static void kgsl_syncsource_destroy(struct kref *kref) +{ + struct kgsl_syncsource *syncsource = container_of(kref, + struct kgsl_syncsource, + refcount); + + struct kgsl_process_private *private = syncsource->private; + + spin_lock(&private->syncsource_lock); + if (syncsource->id != 0) { + idr_remove(&private->syncsource_idr, syncsource->id); + syncsource->id = 0; + } + oneshot_timeline_destroy(syncsource->oneshot); + spin_unlock(&private->syncsource_lock); + + kfree(syncsource); +} + +void kgsl_syncsource_put(struct kgsl_syncsource *syncsource) +{ + if (syncsource) + kref_put(&syncsource->refcount, kgsl_syncsource_destroy); +} + +long kgsl_ioctl_syncsource_destroy(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_syncsource_destroy *param = data; + struct kgsl_syncsource *syncsource = NULL; + struct kgsl_process_private *private; + + syncsource = kgsl_syncsource_get(dev_priv->process_priv, + param->id); + + if (syncsource == NULL) + return -EINVAL; + + private = syncsource->private; + + spin_lock(&private->syncsource_lock); + idr_remove(&private->syncsource_idr, param->id); + syncsource->id = 0; + spin_unlock(&private->syncsource_lock); + + /* put reference from syncsource creation */ + kgsl_syncsource_put(syncsource); + /* put reference from getting the syncsource above */ + kgsl_syncsource_put(syncsource); + return 0; +} + +long kgsl_ioctl_syncsource_create_fence(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + struct kgsl_syncsource_create_fence *param = data; + struct kgsl_syncsource *syncsource = NULL; + int ret = -EINVAL; + struct sync_fence *fence = NULL; + int fd = -1; + char name[32]; + + + syncsource = kgsl_syncsource_get(dev_priv->process_priv, + param->id); + if (syncsource == NULL) + goto out; + + snprintf(name, sizeof(name), "kgsl-syncsource-pid-%d-%d", + current->group_leader->pid, syncsource->id); + + fence = oneshot_fence_create(syncsource->oneshot, name); + if (fence == NULL) { + ret = -ENOMEM; + goto out; + } + + fd = get_unused_fd_flags(0); + if (fd < 0) { + ret = -EBADF; + goto out; + } + ret = 0; + + sync_fence_install(fence, fd); + + param->fence_fd = fd; +out: + if (ret) { + if (fence) + sync_fence_put(fence); + if (fd >= 0) + put_unused_fd(fd); + + } + kgsl_syncsource_put(syncsource); + return ret; +} + +long kgsl_ioctl_syncsource_signal_fence(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + int ret = -EINVAL; + struct kgsl_syncsource_signal_fence *param = data; + struct kgsl_syncsource *syncsource = NULL; + struct sync_fence *fence = NULL; + + syncsource = kgsl_syncsource_get(dev_priv->process_priv, + param->id); + if (syncsource == NULL) + goto out; + + fence = sync_fence_fdget(param->fence_fd); + if (fence == NULL) { + ret = -EBADF; + goto out; + } + + ret = oneshot_fence_signal(syncsource->oneshot, fence); +out: + if (fence) + sync_fence_put(fence); + kgsl_syncsource_put(syncsource); + return ret; +} +#endif diff --git a/drivers/gpu/msm/kgsl_sync.h b/drivers/gpu/msm/kgsl_sync.h new file mode 100644 index 000000000000..bf69ad9657c3 --- /dev/null +++ b/drivers/gpu/msm/kgsl_sync.h @@ -0,0 +1,139 @@ +/* Copyright (c) 2012-2014, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef __KGSL_SYNC_H +#define __KGSL_SYNC_H + +#include <linux/sync.h> +#include "kgsl_device.h" + +struct kgsl_sync_timeline { + struct sync_timeline timeline; + unsigned int last_timestamp; + struct kgsl_device *device; + u32 context_id; + spinlock_t lock; +}; + +struct kgsl_sync_pt { + struct sync_pt pt; + struct kgsl_context *context; + unsigned int timestamp; +}; + +struct kgsl_sync_fence_waiter { + struct sync_fence_waiter waiter; + struct sync_fence *fence; + char name[32]; + void (*func)(void *priv); + void *priv; +}; + +struct kgsl_syncsource; + +#if defined(CONFIG_SYNC) +int kgsl_add_fence_event(struct kgsl_device *device, + u32 context_id, u32 timestamp, void __user *data, int len, + struct kgsl_device_private *owner); +int kgsl_sync_timeline_create(struct kgsl_context *context); +void kgsl_sync_timeline_destroy(struct kgsl_context *context); +struct kgsl_sync_fence_waiter *kgsl_sync_fence_async_wait(int fd, + void (*func)(void *priv), void *priv); +int kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_waiter *waiter); +static inline void kgsl_sync_fence_log(struct sync_fence *fence) +{ +} +#else +static inline int kgsl_add_fence_event(struct kgsl_device *device, + u32 context_id, u32 timestamp, void __user *data, int len, + struct kgsl_device_private *owner) +{ + return -EINVAL; +} + +static inline int kgsl_sync_timeline_create(struct kgsl_context *context) +{ + context->timeline = NULL; + return 0; +} + +static inline void kgsl_sync_timeline_destroy(struct kgsl_context *context) +{ +} + +static inline struct +kgsl_sync_fence_waiter *kgsl_sync_fence_async_wait(int fd, + void (*func)(void *priv), void *priv) +{ + return NULL; +} + +static inline int +kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_waiter *waiter) +{ + return 1; +} + +static inline void kgsl_sync_fence_log(struct sync_fence *fence) +{ +} + +#endif + +#ifdef CONFIG_ONESHOT_SYNC +long kgsl_ioctl_syncsource_create(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_syncsource_destroy(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_syncsource_create_fence(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); +long kgsl_ioctl_syncsource_signal_fence(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data); + +void kgsl_syncsource_put(struct kgsl_syncsource *syncsource); + +#else +static inline long +kgsl_ioctl_syncsource_create(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + return -ENOIOCTLCMD; +} + +static inline long +kgsl_ioctl_syncsource_destroy(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + return -ENOIOCTLCMD; +} + +static inline long +kgsl_ioctl_syncsource_create_fence(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + return -ENOIOCTLCMD; +} + +static inline long +kgsl_ioctl_syncsource_signal_fence(struct kgsl_device_private *dev_priv, + unsigned int cmd, void *data) +{ + return -ENOIOCTLCMD; +} + +static inline void kgsl_syncsource_put(struct kgsl_syncsource *syncsource) +{ + +} +#endif + +#endif /* __KGSL_SYNC_H */ diff --git a/drivers/gpu/msm/kgsl_trace.c b/drivers/gpu/msm/kgsl_trace.c new file mode 100644 index 000000000000..3541425ff643 --- /dev/null +++ b/drivers/gpu/msm/kgsl_trace.c @@ -0,0 +1,26 @@ +/* Copyright (c) 2011, 2013, 2015 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/module.h> + +#include "kgsl.h" +#include "kgsl_device.h" + +/* Instantiate tracepoints */ +#define CREATE_TRACE_POINTS +#include "kgsl_trace.h" + +EXPORT_TRACEPOINT_SYMBOL(kgsl_regwrite); +EXPORT_TRACEPOINT_SYMBOL(kgsl_issueibcmds); +EXPORT_TRACEPOINT_SYMBOL(kgsl_user_pwrlevel_constraint); +EXPORT_TRACEPOINT_SYMBOL(kgsl_constraint); diff --git a/drivers/gpu/msm/kgsl_trace.h b/drivers/gpu/msm/kgsl_trace.h new file mode 100644 index 000000000000..da7a282acc62 --- /dev/null +++ b/drivers/gpu/msm/kgsl_trace.h @@ -0,0 +1,1107 @@ +/* Copyright (c) 2011-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#if !defined(_KGSL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _KGSL_TRACE_H + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kgsl +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE kgsl_trace + +#include <linux/tracepoint.h> +#include "kgsl_device.h" +#include "adreno_drawctxt.h" + +struct kgsl_device; +struct kgsl_ringbuffer_issueibcmds; +struct kgsl_device_waittimestamp; + +/* + * Tracepoint for kgsl issue ib commands + */ +TRACE_EVENT(kgsl_issueibcmds, + + TP_PROTO(struct kgsl_device *device, + int drawctxt_id, + struct kgsl_cmdbatch *cmdbatch, + unsigned int numibs, + int timestamp, + int flags, + int result, + unsigned int type), + + TP_ARGS(device, drawctxt_id, cmdbatch, numibs, timestamp, + flags, result, type), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, drawctxt_id) + __field(unsigned int, numibs) + __field(unsigned int, timestamp) + __field(unsigned int, flags) + __field(int, result) + __field(unsigned int, drawctxt_type) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->drawctxt_id = drawctxt_id; + __entry->numibs = numibs; + __entry->timestamp = timestamp; + __entry->flags = flags; + __entry->result = result; + __entry->drawctxt_type = type; + ), + + TP_printk( + "d_name=%s ctx=%u ib=0x0 numibs=%u ts=%u " + "flags=%s result=%d type=%s", + __get_str(device_name), + __entry->drawctxt_id, + __entry->numibs, + __entry->timestamp, + __entry->flags ? __print_flags(__entry->flags, "|", + KGSL_CMDBATCH_FLAGS) : "None", + __entry->result, + __print_symbolic(__entry->drawctxt_type, KGSL_CONTEXT_TYPES) + ) +); + +/* + * Tracepoint for kgsl readtimestamp + */ +TRACE_EVENT(kgsl_readtimestamp, + + TP_PROTO(struct kgsl_device *device, + unsigned int context_id, + unsigned int type, + unsigned int timestamp), + + TP_ARGS(device, context_id, type, timestamp), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, context_id) + __field(unsigned int, type) + __field(unsigned int, timestamp) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->context_id = context_id; + __entry->type = type; + __entry->timestamp = timestamp; + ), + + TP_printk( + "d_name=%s context_id=%u type=%u ts=%u", + __get_str(device_name), + __entry->context_id, + __entry->type, + __entry->timestamp + ) +); + +/* + * Tracepoint for kgsl waittimestamp entry + */ +TRACE_EVENT(kgsl_waittimestamp_entry, + + TP_PROTO(struct kgsl_device *device, + unsigned int context_id, + unsigned int curr_ts, + unsigned int wait_ts, + unsigned int timeout), + + TP_ARGS(device, context_id, curr_ts, wait_ts, timeout), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, context_id) + __field(unsigned int, curr_ts) + __field(unsigned int, wait_ts) + __field(unsigned int, timeout) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->context_id = context_id; + __entry->curr_ts = curr_ts; + __entry->wait_ts = wait_ts; + __entry->timeout = timeout; + ), + + TP_printk( + "d_name=%s ctx=%u curr_ts=%u ts=%u timeout=%u", + __get_str(device_name), + __entry->context_id, + __entry->curr_ts, + __entry->wait_ts, + __entry->timeout + ) +); + +/* + * Tracepoint for kgsl waittimestamp exit + */ +TRACE_EVENT(kgsl_waittimestamp_exit, + + TP_PROTO(struct kgsl_device *device, unsigned int curr_ts, + int result), + + TP_ARGS(device, curr_ts, result), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, curr_ts) + __field(int, result) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->curr_ts = curr_ts; + __entry->result = result; + ), + + TP_printk( + "d_name=%s curr_ts=%u result=%d", + __get_str(device_name), + __entry->curr_ts, + __entry->result + ) +); + +DECLARE_EVENT_CLASS(kgsl_pwr_template, + TP_PROTO(struct kgsl_device *device, int on), + + TP_ARGS(device, on), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(int, on) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->on = on; + ), + + TP_printk( + "d_name=%s flag=%s", + __get_str(device_name), + __entry->on ? "on" : "off" + ) +); + +DEFINE_EVENT(kgsl_pwr_template, kgsl_irq, + TP_PROTO(struct kgsl_device *device, int on), + TP_ARGS(device, on) +); + +DEFINE_EVENT(kgsl_pwr_template, kgsl_bus, + TP_PROTO(struct kgsl_device *device, int on), + TP_ARGS(device, on) +); + +DEFINE_EVENT(kgsl_pwr_template, kgsl_rail, + TP_PROTO(struct kgsl_device *device, int on), + TP_ARGS(device, on) +); + +DEFINE_EVENT(kgsl_pwr_template, kgsl_retention_clk, + TP_PROTO(struct kgsl_device *device, int on), + TP_ARGS(device, on) +); + +TRACE_EVENT(kgsl_clk, + + TP_PROTO(struct kgsl_device *device, unsigned int on, + unsigned int freq), + + TP_ARGS(device, on, freq), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(int, on) + __field(unsigned int, freq) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->on = on; + __entry->freq = freq; + ), + + TP_printk( + "d_name=%s flag=%s active_freq=%d", + __get_str(device_name), + __entry->on ? "on" : "off", + __entry->freq + ) +); + +TRACE_EVENT(kgsl_pwrlevel, + + TP_PROTO(struct kgsl_device *device, + unsigned int pwrlevel, + unsigned int freq, + unsigned int prev_pwrlevel, + unsigned int prev_freq), + + TP_ARGS(device, pwrlevel, freq, prev_pwrlevel, prev_freq), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, pwrlevel) + __field(unsigned int, freq) + __field(unsigned int, prev_pwrlevel) + __field(unsigned int, prev_freq) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->pwrlevel = pwrlevel; + __entry->freq = freq; + __entry->prev_pwrlevel = prev_pwrlevel; + __entry->prev_freq = prev_freq; + ), + + TP_printk( + "d_name=%s pwrlevel=%d freq=%d prev_pwrlevel=%d prev_freq=%d", + __get_str(device_name), + __entry->pwrlevel, + __entry->freq, + __entry->prev_pwrlevel, + __entry->prev_freq + ) +); + +TRACE_EVENT(kgsl_buslevel, + + TP_PROTO(struct kgsl_device *device, unsigned int pwrlevel, + unsigned int bus), + + TP_ARGS(device, pwrlevel, bus), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, pwrlevel) + __field(unsigned int, bus) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->pwrlevel = pwrlevel; + __entry->bus = bus; + ), + + TP_printk( + "d_name=%s pwrlevel=%d bus=%d", + __get_str(device_name), + __entry->pwrlevel, + __entry->bus + ) +); + +TRACE_EVENT(kgsl_gpubusy, + TP_PROTO(struct kgsl_device *device, unsigned int busy, + unsigned int elapsed), + + TP_ARGS(device, busy, elapsed), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, busy) + __field(unsigned int, elapsed) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->busy = busy; + __entry->elapsed = elapsed; + ), + + TP_printk( + "d_name=%s busy=%u elapsed=%d", + __get_str(device_name), + __entry->busy, + __entry->elapsed + ) +); + +TRACE_EVENT(kgsl_pwrstats, + TP_PROTO(struct kgsl_device *device, s64 time, + struct kgsl_power_stats *pstats), + + TP_ARGS(device, time, pstats), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(s64, total_time) + __field(u64, busy_time) + __field(u64, ram_time) + __field(u64, ram_wait) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->total_time = time; + __entry->busy_time = pstats->busy_time; + __entry->ram_time = pstats->ram_time; + __entry->ram_wait = pstats->ram_wait; + ), + + TP_printk( + "d_name=%s total=%lld busy=%lld ram_time=%lld ram_wait=%lld", + __get_str(device_name), __entry->total_time, __entry->busy_time, + __entry->ram_time, __entry->ram_wait + ) +); + +DECLARE_EVENT_CLASS(kgsl_pwrstate_template, + TP_PROTO(struct kgsl_device *device, unsigned int state), + + TP_ARGS(device, state), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, state) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->state = state; + ), + + TP_printk( + "d_name=%s state=%s", + __get_str(device_name), + kgsl_pwrstate_to_str(__entry->state) + ) +); + +DEFINE_EVENT(kgsl_pwrstate_template, kgsl_pwr_set_state, + TP_PROTO(struct kgsl_device *device, unsigned int state), + TP_ARGS(device, state) +); + +DEFINE_EVENT(kgsl_pwrstate_template, kgsl_pwr_request_state, + TP_PROTO(struct kgsl_device *device, unsigned int state), + TP_ARGS(device, state) +); + +TRACE_EVENT(kgsl_mem_alloc, + + TP_PROTO(struct kgsl_mem_entry *mem_entry), + + TP_ARGS(mem_entry), + + TP_STRUCT__entry( + __field(uint64_t, gpuaddr) + __field(uint64_t, size) + __field(unsigned int, tgid) + __array(char, usage, 16) + __field(unsigned int, id) + __field(uint64_t, flags) + ), + + TP_fast_assign( + __entry->gpuaddr = mem_entry->memdesc.gpuaddr; + __entry->size = mem_entry->memdesc.size; + __entry->tgid = mem_entry->priv->pid; + kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage), + mem_entry->memdesc.flags); + __entry->id = mem_entry->id; + __entry->flags = mem_entry->memdesc.flags; + ), + + TP_printk( + "gpuaddr=0x%llx size=%llu tgid=%u usage=%s id=%u flags=0x%llx", + __entry->gpuaddr, __entry->size, __entry->tgid, + __entry->usage, __entry->id, __entry->flags + ) +); + +TRACE_EVENT(kgsl_mem_mmap, + + TP_PROTO(struct kgsl_mem_entry *mem_entry), + + TP_ARGS(mem_entry), + + TP_STRUCT__entry( + __field(unsigned long, useraddr) + __field(uint64_t, gpuaddr) + __field(uint64_t, size) + __array(char, usage, 16) + __field(unsigned int, id) + __field(uint64_t, flags) + ), + + TP_fast_assign( + __entry->useraddr = mem_entry->memdesc.useraddr; + __entry->gpuaddr = mem_entry->memdesc.gpuaddr; + __entry->size = mem_entry->memdesc.size; + kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage), + mem_entry->memdesc.flags); + __entry->id = mem_entry->id; + __entry->flags = mem_entry->memdesc.flags; + ), + + TP_printk( + "useraddr=0x%lx gpuaddr=0x%llx size=%llu usage=%s id=%u flags=0x%llx", + __entry->useraddr, __entry->gpuaddr, __entry->size, + __entry->usage, __entry->id, __entry->flags + ) +); + +TRACE_EVENT(kgsl_mem_unmapped_area_collision, + + TP_PROTO(struct kgsl_mem_entry *mem_entry, + unsigned long addr, + unsigned long len), + + TP_ARGS(mem_entry, addr, len), + + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned long, addr) + __field(unsigned long, len) + ), + + TP_fast_assign( + __entry->id = mem_entry->id; + __entry->len = len; + __entry->addr = addr; + ), + + TP_printk( + "id=%u len=%lu addr=0x%lx", + __entry->id, __entry->len, __entry->addr + ) +); + +TRACE_EVENT(kgsl_mem_map, + + TP_PROTO(struct kgsl_mem_entry *mem_entry, int fd), + + TP_ARGS(mem_entry, fd), + + TP_STRUCT__entry( + __field(uint64_t, gpuaddr) + __field(uint64_t, size) + __field(int, fd) + __field(int, type) + __field(unsigned int, tgid) + __array(char, usage, 16) + __field(unsigned int, id) + ), + + TP_fast_assign( + __entry->gpuaddr = mem_entry->memdesc.gpuaddr; + __entry->size = mem_entry->memdesc.size; + __entry->fd = fd; + __entry->type = kgsl_memdesc_usermem_type(&mem_entry->memdesc); + __entry->tgid = mem_entry->priv->pid; + kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage), + mem_entry->memdesc.flags); + __entry->id = mem_entry->id; + ), + + TP_printk( + "gpuaddr=0x%llx size=%llu type=%s fd=%d tgid=%u usage=%s id=%u", + __entry->gpuaddr, __entry->size, + __print_symbolic(__entry->type, KGSL_MEM_TYPES), + __entry->fd, __entry->tgid, + __entry->usage, __entry->id + ) +); + +TRACE_EVENT(kgsl_mem_free, + + TP_PROTO(struct kgsl_mem_entry *mem_entry), + + TP_ARGS(mem_entry), + + TP_STRUCT__entry( + __field(uint64_t, gpuaddr) + __field(uint64_t, size) + __field(int, type) + __field(int, fd) + __field(unsigned int, tgid) + __array(char, usage, 16) + __field(unsigned int, id) + ), + + TP_fast_assign( + __entry->gpuaddr = mem_entry->memdesc.gpuaddr; + __entry->size = mem_entry->memdesc.size; + __entry->type = kgsl_memdesc_usermem_type(&mem_entry->memdesc); + __entry->tgid = mem_entry->priv->pid; + kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage), + mem_entry->memdesc.flags); + __entry->id = mem_entry->id; + ), + + TP_printk( + "gpuaddr=0x%llx size=%llu type=%s tgid=%u usage=%s id=%u", + __entry->gpuaddr, __entry->size, + __print_symbolic(__entry->type, KGSL_MEM_TYPES), + __entry->tgid, __entry->usage, __entry->id + ) +); + +TRACE_EVENT(kgsl_mem_sync_cache, + + TP_PROTO(struct kgsl_mem_entry *mem_entry, uint64_t offset, + uint64_t length, unsigned int op), + + TP_ARGS(mem_entry, offset, length, op), + + TP_STRUCT__entry( + __field(uint64_t, gpuaddr) + __array(char, usage, 16) + __field(unsigned int, tgid) + __field(unsigned int, id) + __field(unsigned int, op) + __field(uint64_t, offset) + __field(uint64_t, length) + ), + + TP_fast_assign( + __entry->gpuaddr = mem_entry->memdesc.gpuaddr; + kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage), + mem_entry->memdesc.flags); + __entry->tgid = mem_entry->priv->pid; + __entry->id = mem_entry->id; + __entry->op = op; + __entry->offset = offset; + __entry->length = (length == 0) ? + mem_entry->memdesc.size : length; + ), + + TP_printk( + "gpuaddr=0x%llx size=%llu tgid=%u usage=%s id=%u op=%c%c offset=%llu", + __entry->gpuaddr, __entry->length, + __entry->tgid, __entry->usage, __entry->id, + (__entry->op & KGSL_GPUMEM_CACHE_CLEAN) ? 'c' : '.', + (__entry->op & KGSL_GPUMEM_CACHE_INV) ? 'i' : '.', + __entry->offset + ) +); + +TRACE_EVENT(kgsl_mem_sync_full_cache, + + TP_PROTO(unsigned int num_bufs, uint64_t bulk_size), + TP_ARGS(num_bufs, bulk_size), + + TP_STRUCT__entry( + __field(unsigned int, num_bufs) + __field(uint64_t, bulk_size) + ), + + TP_fast_assign( + __entry->num_bufs = num_bufs; + __entry->bulk_size = bulk_size; + ), + + TP_printk( + "num_bufs=%u bulk_size=%llu op=ci", + __entry->num_bufs, __entry->bulk_size + ) +); + +DECLARE_EVENT_CLASS(kgsl_mem_timestamp_template, + + TP_PROTO(struct kgsl_device *device, struct kgsl_mem_entry *mem_entry, + unsigned int id, unsigned int curr_ts, unsigned int free_ts), + + TP_ARGS(device, mem_entry, id, curr_ts, free_ts), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(uint64_t, gpuaddr) + __field(uint64_t, size) + __field(int, type) + __array(char, usage, 16) + __field(unsigned int, id) + __field(unsigned int, drawctxt_id) + __field(unsigned int, curr_ts) + __field(unsigned int, free_ts) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->gpuaddr = mem_entry->memdesc.gpuaddr; + __entry->size = mem_entry->memdesc.size; + kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage), + mem_entry->memdesc.flags); + __entry->id = mem_entry->id; + __entry->drawctxt_id = id; + __entry->type = kgsl_memdesc_usermem_type(&mem_entry->memdesc); + __entry->curr_ts = curr_ts; + __entry->free_ts = free_ts; + ), + + TP_printk( + "d_name=%s gpuaddr=0x%llx size=%llu type=%s usage=%s id=%u ctx=%u" + " curr_ts=%u free_ts=%u", + __get_str(device_name), + __entry->gpuaddr, + __entry->size, + __print_symbolic(__entry->type, KGSL_MEM_TYPES), + __entry->usage, + __entry->id, + __entry->drawctxt_id, + __entry->curr_ts, + __entry->free_ts + ) +); + +DEFINE_EVENT(kgsl_mem_timestamp_template, kgsl_mem_timestamp_queue, + TP_PROTO(struct kgsl_device *device, struct kgsl_mem_entry *mem_entry, + unsigned int id, unsigned int curr_ts, unsigned int free_ts), + TP_ARGS(device, mem_entry, id, curr_ts, free_ts) +); + +DEFINE_EVENT(kgsl_mem_timestamp_template, kgsl_mem_timestamp_free, + TP_PROTO(struct kgsl_device *device, struct kgsl_mem_entry *mem_entry, + unsigned int id, unsigned int curr_ts, unsigned int free_ts), + TP_ARGS(device, mem_entry, id, curr_ts, free_ts) +); + +TRACE_EVENT(kgsl_context_create, + + TP_PROTO(struct kgsl_device *device, struct kgsl_context *context, + unsigned int flags), + + TP_ARGS(device, context, flags), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, id) + __field(unsigned int, flags) + __field(unsigned int, priority) + __field(unsigned int, type) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->id = context->id; + __entry->flags = flags & ~(KGSL_CONTEXT_PRIORITY_MASK | + KGSL_CONTEXT_TYPE_MASK); + __entry->priority = + (flags & KGSL_CONTEXT_PRIORITY_MASK) + >> KGSL_CONTEXT_PRIORITY_SHIFT; + __entry->type = + (flags & KGSL_CONTEXT_TYPE_MASK) + >> KGSL_CONTEXT_TYPE_SHIFT; + ), + + TP_printk( + "d_name=%s ctx=%u flags=%s priority=%u type=%s", + __get_str(device_name), __entry->id, + __entry->flags ? __print_flags(__entry->flags, "|", + KGSL_CONTEXT_FLAGS) : "None", + __entry->priority, + __print_symbolic(__entry->type, KGSL_CONTEXT_TYPES) + ) +); + +TRACE_EVENT(kgsl_context_detach, + + TP_PROTO(struct kgsl_device *device, struct kgsl_context *context), + + TP_ARGS(device, context), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, id) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->id = context->id; + ), + + TP_printk( + "d_name=%s ctx=%u", + __get_str(device_name), __entry->id + ) +); + +TRACE_EVENT(kgsl_context_destroy, + + TP_PROTO(struct kgsl_device *device, struct kgsl_context *context), + + TP_ARGS(device, context), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, id) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->id = context->id; + ), + + TP_printk( + "d_name=%s ctx=%u", + __get_str(device_name), __entry->id + ) +); + +TRACE_EVENT(kgsl_user_pwrlevel_constraint, + + TP_PROTO(struct kgsl_device *device, unsigned int id, unsigned int type, + unsigned int sub_type), + + TP_ARGS(device, id, type, sub_type), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, id) + __field(unsigned int, type) + __field(unsigned int, sub_type) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->id = id; + __entry->type = type; + __entry->sub_type = sub_type; + ), + + TP_printk( + "d_name=%s ctx=%u constraint_type=%s constraint_subtype=%s", + __get_str(device_name), __entry->id, + __print_symbolic(__entry->type, KGSL_CONSTRAINT_TYPES), + __print_symbolic(__entry->sub_type, + KGSL_CONSTRAINT_PWRLEVEL_SUBTYPES) + ) +); + +TRACE_EVENT(kgsl_constraint, + + TP_PROTO(struct kgsl_device *device, unsigned int type, + unsigned int value, unsigned int on), + + TP_ARGS(device, type, value, on), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, type) + __field(unsigned int, value) + __field(unsigned int, on) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->type = type; + __entry->value = value; + __entry->on = on; + ), + + TP_printk( + "d_name=%s constraint_type=%s constraint_value=%u status=%s", + __get_str(device_name), + __print_symbolic(__entry->type, KGSL_CONSTRAINT_TYPES), + __entry->value, + __entry->on ? "ON" : "OFF" + ) +); + +TRACE_EVENT(kgsl_mmu_pagefault, + + TP_PROTO(struct kgsl_device *device, unsigned int page, + unsigned int pt, const char *op), + + TP_ARGS(device, page, pt, op), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, page) + __field(unsigned int, pt) + __string(op, op) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->page = page; + __entry->pt = pt; + __assign_str(op, op); + ), + + TP_printk( + "d_name=%s page=0x%08x pt=%u op=%s", + __get_str(device_name), __entry->page, __entry->pt, + __get_str(op) + ) +); + +TRACE_EVENT(kgsl_regwrite, + + TP_PROTO(struct kgsl_device *device, unsigned int offset, + unsigned int value), + + TP_ARGS(device, offset, value), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, offset) + __field(unsigned int, value) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->offset = offset; + __entry->value = value; + ), + + TP_printk( + "d_name=%s reg=0x%x value=0x%x", + __get_str(device_name), __entry->offset, __entry->value + ) +); + +TRACE_EVENT(kgsl_popp_level, + + TP_PROTO(struct kgsl_device *device, int level1, int level2), + + TP_ARGS(device, level1, level2), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(int, level1) + __field(int, level2) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->level1 = level1; + __entry->level2 = level2; + ), + + TP_printk( + "d_name=%s old level=%d new level=%d", + __get_str(device_name), __entry->level1, __entry->level2) +); + +TRACE_EVENT(kgsl_popp_mod, + + TP_PROTO(struct kgsl_device *device, int x, int y), + + TP_ARGS(device, x, y), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(int, x) + __field(int, y) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->x = x; + __entry->y = y; + ), + + TP_printk( + "d_name=%s GPU busy mod=%d bus busy mod=%d", + __get_str(device_name), __entry->x, __entry->y) +); + +TRACE_EVENT(kgsl_popp_nap, + + TP_PROTO(struct kgsl_device *device, int t, int nap, int percent), + + TP_ARGS(device, t, nap, percent), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(int, t) + __field(int, nap) + __field(int, percent) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->t = t; + __entry->nap = nap; + __entry->percent = percent; + ), + + TP_printk( + "d_name=%s nap time=%d number of naps=%d percentage=%d", + __get_str(device_name), __entry->t, __entry->nap, + __entry->percent) +); + +TRACE_EVENT(kgsl_register_event, + TP_PROTO(unsigned int id, unsigned int timestamp, void *func), + TP_ARGS(id, timestamp, func), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, timestamp) + __field(void *, func) + ), + TP_fast_assign( + __entry->id = id; + __entry->timestamp = timestamp; + __entry->func = func; + ), + TP_printk( + "ctx=%u ts=%u cb=%pF", + __entry->id, __entry->timestamp, __entry->func) +); + +TRACE_EVENT(kgsl_fire_event, + TP_PROTO(unsigned int id, unsigned int ts, + unsigned int type, unsigned int age, void *func), + TP_ARGS(id, ts, type, age, func), + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, ts) + __field(unsigned int, type) + __field(unsigned int, age) + __field(void *, func) + ), + TP_fast_assign( + __entry->id = id; + __entry->ts = ts; + __entry->type = type; + __entry->age = age; + __entry->func = func; + ), + TP_printk( + "ctx=%u ts=%u type=%s age=%u cb=%pF", + __entry->id, __entry->ts, + __print_symbolic(__entry->type, KGSL_EVENT_TYPES), + __entry->age, __entry->func) +); + +TRACE_EVENT(kgsl_active_count, + + TP_PROTO(struct kgsl_device *device, unsigned long ip), + + TP_ARGS(device, ip), + + TP_STRUCT__entry( + __string(device_name, device->name) + __field(unsigned int, count) + __field(unsigned long, ip) + ), + + TP_fast_assign( + __assign_str(device_name, device->name); + __entry->count = atomic_read(&device->active_cnt); + __entry->ip = ip; + ), + + TP_printk( + "d_name=%s active_cnt=%u func=%pf", + __get_str(device_name), __entry->count, (void *) __entry->ip + ) +); + +TRACE_EVENT(kgsl_pagetable_destroy, + TP_PROTO(u64 ptbase, unsigned int name), + TP_ARGS(ptbase, name), + TP_STRUCT__entry( + __field(u64, ptbase) + __field(unsigned int, name) + ), + TP_fast_assign( + __entry->ptbase = ptbase; + __entry->name = name; + ), + TP_printk("ptbase=%llx name=%u", __entry->ptbase, __entry->name) +); + +DECLARE_EVENT_CLASS(syncpoint_timestamp_template, + TP_PROTO(struct kgsl_cmdbatch *cmdbatch, struct kgsl_context *context, + unsigned int timestamp), + TP_ARGS(cmdbatch, context, timestamp), + TP_STRUCT__entry( + __field(unsigned int, cmdbatch_context_id) + __field(unsigned int, context_id) + __field(unsigned int, timestamp) + ), + TP_fast_assign( + __entry->cmdbatch_context_id = cmdbatch->context->id; + __entry->context_id = context->id; + __entry->timestamp = timestamp; + ), + TP_printk("ctx=%d sync ctx=%d ts=%d", + __entry->cmdbatch_context_id, __entry->context_id, + __entry->timestamp) +); + +DEFINE_EVENT(syncpoint_timestamp_template, syncpoint_timestamp, + TP_PROTO(struct kgsl_cmdbatch *cmdbatch, struct kgsl_context *context, + unsigned int timestamp), + TP_ARGS(cmdbatch, context, timestamp) +); + +DEFINE_EVENT(syncpoint_timestamp_template, syncpoint_timestamp_expire, + TP_PROTO(struct kgsl_cmdbatch *cmdbatch, struct kgsl_context *context, + unsigned int timestamp), + TP_ARGS(cmdbatch, context, timestamp) +); + +DECLARE_EVENT_CLASS(syncpoint_fence_template, + TP_PROTO(struct kgsl_cmdbatch *cmdbatch, char *name), + TP_ARGS(cmdbatch, name), + TP_STRUCT__entry( + __string(fence_name, name) + __field(unsigned int, cmdbatch_context_id) + ), + TP_fast_assign( + __entry->cmdbatch_context_id = cmdbatch->context->id; + __assign_str(fence_name, name); + ), + TP_printk("ctx=%d fence=%s", + __entry->cmdbatch_context_id, __get_str(fence_name)) +); + +DEFINE_EVENT(syncpoint_fence_template, syncpoint_fence, + TP_PROTO(struct kgsl_cmdbatch *cmdbatch, char *name), + TP_ARGS(cmdbatch, name) +); + +DEFINE_EVENT(syncpoint_fence_template, syncpoint_fence_expire, + TP_PROTO(struct kgsl_cmdbatch *cmdbatch, char *name), + TP_ARGS(cmdbatch, name) +); + +TRACE_EVENT(kgsl_msg, + TP_PROTO(const char *msg), + TP_ARGS(msg), + TP_STRUCT__entry( + __string(msg, msg) + ), + TP_fast_assign( + __assign_str(msg, msg); + ), + TP_printk( + "%s", __get_str(msg) + ) +); + + +#endif /* _KGSL_TRACE_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h new file mode 100644 index 000000000000..68cfe76e8652 --- /dev/null +++ b/include/linux/msm_kgsl.h @@ -0,0 +1,13 @@ +#ifndef _MSM_KGSL_H +#define _MSM_KGSL_H + +#include <uapi/linux/msm_kgsl.h> + +/* Limits mitigations APIs */ +void *kgsl_pwr_limits_add(enum kgsl_deviceid id); +void kgsl_pwr_limits_del(void *limit); +int kgsl_pwr_limits_set_freq(void *limit, unsigned int freq); +void kgsl_pwr_limits_set_default(void *limit); +unsigned int kgsl_pwr_limits_get_freq(enum kgsl_deviceid id); + +#endif /* _MSM_KGSL_H */ diff --git a/include/uapi/linux/msm_kgsl.h b/include/uapi/linux/msm_kgsl.h new file mode 100644 index 000000000000..51d6e99f0449 --- /dev/null +++ b/include/uapi/linux/msm_kgsl.h @@ -0,0 +1,1440 @@ +#ifndef _UAPI_MSM_KGSL_H +#define _UAPI_MSM_KGSL_H + +/* + * The KGSL version has proven not to be very useful in userspace if features + * are cherry picked into other trees out of order so it is frozen as of 3.14. + * It is left here for backwards compatabilty and as a reminder that + * software releases are never linear. Also, I like pie. + */ + +#define KGSL_VERSION_MAJOR 3 +#define KGSL_VERSION_MINOR 14 + +/* + * We have traditionally mixed context and issueibcmds / command batch flags + * together into a big flag stew. This worked fine until we started adding a + * lot more command batch flags and we started running out of bits. Turns out + * we have a bit of room in the context type / priority mask that we could use + * for command batches, but that means we need to split out the flags into two + * coherent sets. + * + * If any future definitions are for both context and cmdbatch add both defines + * and link the cmdbatch to the context define as we do below. Otherwise feel + * free to add exclusive bits to either set. + */ + +/* --- context flags --- */ +#define KGSL_CONTEXT_SAVE_GMEM 0x00000001 +#define KGSL_CONTEXT_NO_GMEM_ALLOC 0x00000002 +/* This is a cmdbatch exclusive flag - use the CMDBATCH equivalent instead */ +#define KGSL_CONTEXT_SUBMIT_IB_LIST 0x00000004 +#define KGSL_CONTEXT_CTX_SWITCH 0x00000008 +#define KGSL_CONTEXT_PREAMBLE 0x00000010 +#define KGSL_CONTEXT_TRASH_STATE 0x00000020 +#define KGSL_CONTEXT_PER_CONTEXT_TS 0x00000040 +#define KGSL_CONTEXT_USER_GENERATED_TS 0x00000080 +/* This is a cmdbatch exclusive flag - use the CMDBATCH equivalent instead */ +#define KGSL_CONTEXT_END_OF_FRAME 0x00000100 +#define KGSL_CONTEXT_NO_FAULT_TOLERANCE 0x00000200 +/* This is a cmdbatch exclusive flag - use the CMDBATCH equivalent instead */ +#define KGSL_CONTEXT_SYNC 0x00000400 +#define KGSL_CONTEXT_PWR_CONSTRAINT 0x00000800 + +#define KGSL_CONTEXT_PRIORITY_MASK 0x0000F000 +#define KGSL_CONTEXT_PRIORITY_SHIFT 12 +#define KGSL_CONTEXT_PRIORITY_UNDEF 0 + +#define KGSL_CONTEXT_IFH_NOP 0x00010000 +#define KGSL_CONTEXT_SECURE 0x00020000 + +#define KGSL_CONTEXT_PREEMPT_STYLE_MASK 0x0E000000 +#define KGSL_CONTEXT_PREEMPT_STYLE_SHIFT 25 +#define KGSL_CONTEXT_PREEMPT_STYLE_DEFAULT 0x0 +#define KGSL_CONTEXT_PREEMPT_STYLE_RINGBUFFER 0x1 +#define KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN 0x2 + +#define KGSL_CONTEXT_TYPE_MASK 0x01F00000 +#define KGSL_CONTEXT_TYPE_SHIFT 20 +#define KGSL_CONTEXT_TYPE_ANY 0 +#define KGSL_CONTEXT_TYPE_GL 1 +#define KGSL_CONTEXT_TYPE_CL 2 +#define KGSL_CONTEXT_TYPE_C2D 3 +#define KGSL_CONTEXT_TYPE_RS 4 +#define KGSL_CONTEXT_TYPE_UNKNOWN 0x1E + +#define KGSL_CONTEXT_INVALID 0xffffffff + +/* + * --- command batch flags --- + * The bits that are linked to a KGSL_CONTEXT equivalent are either legacy + * definitions or bits that are valid for both contexts and cmdbatches. To be + * safe the other 8 bits that are still available in the context field should be + * omitted here in case we need to share - the other bits are available for + * cmdbatch only flags as needed + */ +#define KGSL_CMDBATCH_MEMLIST 0x00000001 +#define KGSL_CMDBATCH_MARKER 0x00000002 +#define KGSL_CMDBATCH_SUBMIT_IB_LIST KGSL_CONTEXT_SUBMIT_IB_LIST /* 0x004 */ +#define KGSL_CMDBATCH_CTX_SWITCH KGSL_CONTEXT_CTX_SWITCH /* 0x008 */ +#define KGSL_CMDBATCH_PROFILING 0x00000010 +#define KGSL_CMDBATCH_END_OF_FRAME KGSL_CONTEXT_END_OF_FRAME /* 0x100 */ +#define KGSL_CMDBATCH_SYNC KGSL_CONTEXT_SYNC /* 0x400 */ +#define KGSL_CMDBATCH_PWR_CONSTRAINT KGSL_CONTEXT_PWR_CONSTRAINT /* 0x800 */ + +/* + * Reserve bits [16:19] and bits [28:31] for possible bits shared between + * contexts and command batches. Update this comment as new flags are added. + */ + +/* + * gpu_command_object flags - these flags communicate the type of command or + * memory object being submitted for a GPU command + */ + +/* Flags for GPU command objects */ +#define KGSL_CMDLIST_IB 0x00000001U +#define KGSL_CMDLIST_CTXTSWITCH_PREAMBLE 0x00000002U +#define KGSL_CMDLIST_IB_PREAMBLE 0x00000004U + +/* Flags for GPU command memory objects */ +#define KGSL_OBJLIST_MEMOBJ 0x00000008U +#define KGSL_OBJLIST_PROFILE 0x00000010U + +/* Flags for GPU command sync points */ +#define KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP 0 +#define KGSL_CMD_SYNCPOINT_TYPE_FENCE 1 + +/* --- Memory allocation flags --- */ + +/* General allocation hints */ +#define KGSL_MEMFLAGS_SECURE 0x00000008ULL +#define KGSL_MEMFLAGS_GPUREADONLY 0x01000000U +#define KGSL_MEMFLAGS_GPUWRITEONLY 0x02000000U +#define KGSL_MEMFLAGS_FORCE_32BIT 0x100000000ULL + +/* Memory caching hints */ +#define KGSL_CACHEMODE_MASK 0x0C000000U +#define KGSL_CACHEMODE_SHIFT 26 + +#define KGSL_CACHEMODE_WRITECOMBINE 0 +#define KGSL_CACHEMODE_UNCACHED 1 +#define KGSL_CACHEMODE_WRITETHROUGH 2 +#define KGSL_CACHEMODE_WRITEBACK 3 + +#define KGSL_MEMFLAGS_USE_CPU_MAP 0x10000000ULL + +/* Memory types for which allocations are made */ +#define KGSL_MEMTYPE_MASK 0x0000FF00 +#define KGSL_MEMTYPE_SHIFT 8 + +#define KGSL_MEMTYPE_OBJECTANY 0 +#define KGSL_MEMTYPE_FRAMEBUFFER 1 +#define KGSL_MEMTYPE_RENDERBUFFER 2 +#define KGSL_MEMTYPE_ARRAYBUFFER 3 +#define KGSL_MEMTYPE_ELEMENTARRAYBUFFER 4 +#define KGSL_MEMTYPE_VERTEXARRAYBUFFER 5 +#define KGSL_MEMTYPE_TEXTURE 6 +#define KGSL_MEMTYPE_SURFACE 7 +#define KGSL_MEMTYPE_EGL_SURFACE 8 +#define KGSL_MEMTYPE_GL 9 +#define KGSL_MEMTYPE_CL 10 +#define KGSL_MEMTYPE_CL_BUFFER_MAP 11 +#define KGSL_MEMTYPE_CL_BUFFER_NOMAP 12 +#define KGSL_MEMTYPE_CL_IMAGE_MAP 13 +#define KGSL_MEMTYPE_CL_IMAGE_NOMAP 14 +#define KGSL_MEMTYPE_CL_KERNEL_STACK 15 +#define KGSL_MEMTYPE_COMMAND 16 +#define KGSL_MEMTYPE_2D 17 +#define KGSL_MEMTYPE_EGL_IMAGE 18 +#define KGSL_MEMTYPE_EGL_SHADOW 19 +#define KGSL_MEMTYPE_MULTISAMPLE 20 +#define KGSL_MEMTYPE_KERNEL 255 + +/* + * Alignment hint, passed as the power of 2 exponent. + * i.e 4k (2^12) would be 12, 64k (2^16)would be 16. + */ +#define KGSL_MEMALIGN_MASK 0x00FF0000 +#define KGSL_MEMALIGN_SHIFT 16 + +enum kgsl_user_mem_type { + KGSL_USER_MEM_TYPE_PMEM = 0x00000000, + KGSL_USER_MEM_TYPE_ASHMEM = 0x00000001, + KGSL_USER_MEM_TYPE_ADDR = 0x00000002, + KGSL_USER_MEM_TYPE_ION = 0x00000003, + /* + * ION type is retained for backwards compatibilty but Ion buffers are + * dma-bufs so try to use that naming if we can + */ + KGSL_USER_MEM_TYPE_DMABUF = 0x00000003, + KGSL_USER_MEM_TYPE_MAX = 0x00000007, +}; +#define KGSL_MEMFLAGS_USERMEM_MASK 0x000000e0 +#define KGSL_MEMFLAGS_USERMEM_SHIFT 5 + +/* + * Unfortunately, enum kgsl_user_mem_type starts at 0 which does not + * leave a good value for allocated memory. In the flags we use + * 0 to indicate allocated memory and thus need to add 1 to the enum + * values. + */ +#define KGSL_USERMEM_FLAG(x) (((x) + 1) << KGSL_MEMFLAGS_USERMEM_SHIFT) + +#define KGSL_MEMFLAGS_NOT_USERMEM 0 +#define KGSL_MEMFLAGS_USERMEM_PMEM KGSL_USERMEM_FLAG(KGSL_USER_MEM_TYPE_PMEM) +#define KGSL_MEMFLAGS_USERMEM_ASHMEM \ + KGSL_USERMEM_FLAG(KGSL_USER_MEM_TYPE_ASHMEM) +#define KGSL_MEMFLAGS_USERMEM_ADDR KGSL_USERMEM_FLAG(KGSL_USER_MEM_TYPE_ADDR) +#define KGSL_MEMFLAGS_USERMEM_ION KGSL_USERMEM_FLAG(KGSL_USER_MEM_TYPE_ION) + +/* --- generic KGSL flag values --- */ + +#define KGSL_FLAGS_NORMALMODE 0x00000000 +#define KGSL_FLAGS_SAFEMODE 0x00000001 +#define KGSL_FLAGS_INITIALIZED0 0x00000002 +#define KGSL_FLAGS_INITIALIZED 0x00000004 +#define KGSL_FLAGS_STARTED 0x00000008 +#define KGSL_FLAGS_ACTIVE 0x00000010 +#define KGSL_FLAGS_RESERVED0 0x00000020 +#define KGSL_FLAGS_RESERVED1 0x00000040 +#define KGSL_FLAGS_RESERVED2 0x00000080 +#define KGSL_FLAGS_SOFT_RESET 0x00000100 +#define KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS 0x00000200 + +/* Server Side Sync Timeout in milliseconds */ +#define KGSL_SYNCOBJ_SERVER_TIMEOUT 2000 + +/* + * Reset status values for context + */ +enum kgsl_ctx_reset_stat { + KGSL_CTX_STAT_NO_ERROR = 0x00000000, + KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT = 0x00000001, + KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT = 0x00000002, + KGSL_CTX_STAT_UNKNOWN_CONTEXT_RESET_EXT = 0x00000003 +}; + +#define KGSL_CONVERT_TO_MBPS(val) \ + (val*1000*1000U) + +/* device id */ +enum kgsl_deviceid { + KGSL_DEVICE_3D0 = 0x00000000, + KGSL_DEVICE_MAX +}; + +struct kgsl_devinfo { + + unsigned int device_id; + /* chip revision id + * coreid:8 majorrev:8 minorrev:8 patch:8 + */ + unsigned int chip_id; + unsigned int mmu_enabled; + unsigned long gmem_gpubaseaddr; + /* + * This field contains the adreno revision + * number 200, 205, 220, etc... + */ + unsigned int gpu_id; + size_t gmem_sizebytes; +}; + +/* + * struct kgsl_devmemstore - this structure defines the region of memory + * that can be mmap()ed from this driver. The timestamp fields are volatile + * because they are written by the GPU + * @soptimestamp: Start of pipeline timestamp written by GPU before the + * commands in concern are processed + * @sbz: Unused, kept for 8 byte alignment + * @eoptimestamp: End of pipeline timestamp written by GPU after the + * commands in concern are processed + * @sbz2: Unused, kept for 8 byte alignment + * @preempted: Indicates if the context was preempted + * @sbz3: Unused, kept for 8 byte alignment + * @ref_wait_ts: Timestamp on which to generate interrupt, unused now. + * @sbz4: Unused, kept for 8 byte alignment + * @current_context: The current context the GPU is working on + * @sbz5: Unused, kept for 8 byte alignment + */ +struct kgsl_devmemstore { + volatile unsigned int soptimestamp; + unsigned int sbz; + volatile unsigned int eoptimestamp; + unsigned int sbz2; + volatile unsigned int preempted; + unsigned int sbz3; + volatile unsigned int ref_wait_ts; + unsigned int sbz4; + unsigned int current_context; + unsigned int sbz5; +}; + +#define KGSL_MEMSTORE_OFFSET(ctxt_id, field) \ + ((ctxt_id)*sizeof(struct kgsl_devmemstore) + \ + offsetof(struct kgsl_devmemstore, field)) + +/* timestamp id*/ +enum kgsl_timestamp_type { + KGSL_TIMESTAMP_CONSUMED = 0x00000001, /* start-of-pipeline timestamp */ + KGSL_TIMESTAMP_RETIRED = 0x00000002, /* end-of-pipeline timestamp*/ + KGSL_TIMESTAMP_QUEUED = 0x00000003, +}; + +/* property types - used with kgsl_device_getproperty */ +#define KGSL_PROP_DEVICE_INFO 0x1 +#define KGSL_PROP_DEVICE_SHADOW 0x2 +#define KGSL_PROP_DEVICE_POWER 0x3 +#define KGSL_PROP_SHMEM 0x4 +#define KGSL_PROP_SHMEM_APERTURES 0x5 +#define KGSL_PROP_MMU_ENABLE 0x6 +#define KGSL_PROP_INTERRUPT_WAITS 0x7 +#define KGSL_PROP_VERSION 0x8 +#define KGSL_PROP_GPU_RESET_STAT 0x9 +#define KGSL_PROP_PWRCTRL 0xE +#define KGSL_PROP_PWR_CONSTRAINT 0x12 +#define KGSL_PROP_UCHE_GMEM_VADDR 0x13 +#define KGSL_PROP_SP_GENERIC_MEM 0x14 +#define KGSL_PROP_UCODE_VERSION 0x15 +#define KGSL_PROP_GPMU_VERSION 0x16 + +struct kgsl_shadowprop { + unsigned long gpuaddr; + size_t size; + unsigned int flags; /* contains KGSL_FLAGS_ values */ +}; + +struct kgsl_version { + unsigned int drv_major; + unsigned int drv_minor; + unsigned int dev_major; + unsigned int dev_minor; +}; + +struct kgsl_sp_generic_mem { + uint64_t local; + uint64_t pvt; +}; + +struct kgsl_ucode_version { + unsigned int pfp; + unsigned int pm4; +}; + +struct kgsl_gpmu_version { + unsigned int major; + unsigned int minor; + unsigned int features; +}; + +/* Performance counter groups */ + +#define KGSL_PERFCOUNTER_GROUP_CP 0x0 +#define KGSL_PERFCOUNTER_GROUP_RBBM 0x1 +#define KGSL_PERFCOUNTER_GROUP_PC 0x2 +#define KGSL_PERFCOUNTER_GROUP_VFD 0x3 +#define KGSL_PERFCOUNTER_GROUP_HLSQ 0x4 +#define KGSL_PERFCOUNTER_GROUP_VPC 0x5 +#define KGSL_PERFCOUNTER_GROUP_TSE 0x6 +#define KGSL_PERFCOUNTER_GROUP_RAS 0x7 +#define KGSL_PERFCOUNTER_GROUP_UCHE 0x8 +#define KGSL_PERFCOUNTER_GROUP_TP 0x9 +#define KGSL_PERFCOUNTER_GROUP_SP 0xA +#define KGSL_PERFCOUNTER_GROUP_RB 0xB +#define KGSL_PERFCOUNTER_GROUP_PWR 0xC +#define KGSL_PERFCOUNTER_GROUP_VBIF 0xD +#define KGSL_PERFCOUNTER_GROUP_VBIF_PWR 0xE +#define KGSL_PERFCOUNTER_GROUP_MH 0xF +#define KGSL_PERFCOUNTER_GROUP_PA_SU 0x10 +#define KGSL_PERFCOUNTER_GROUP_SQ 0x11 +#define KGSL_PERFCOUNTER_GROUP_SX 0x12 +#define KGSL_PERFCOUNTER_GROUP_TCF 0x13 +#define KGSL_PERFCOUNTER_GROUP_TCM 0x14 +#define KGSL_PERFCOUNTER_GROUP_TCR 0x15 +#define KGSL_PERFCOUNTER_GROUP_L2 0x16 +#define KGSL_PERFCOUNTER_GROUP_VSC 0x17 +#define KGSL_PERFCOUNTER_GROUP_CCU 0x18 +#define KGSL_PERFCOUNTER_GROUP_LRZ 0x19 +#define KGSL_PERFCOUNTER_GROUP_CMP 0x1A +#define KGSL_PERFCOUNTER_GROUP_ALWAYSON 0x1B +#define KGSL_PERFCOUNTER_GROUP_SP_PWR 0x1C +#define KGSL_PERFCOUNTER_GROUP_TP_PWR 0x1D +#define KGSL_PERFCOUNTER_GROUP_RB_PWR 0x1E +#define KGSL_PERFCOUNTER_GROUP_CCU_PWR 0x1F +#define KGSL_PERFCOUNTER_GROUP_UCHE_PWR 0x20 +#define KGSL_PERFCOUNTER_GROUP_CP_PWR 0x21 +#define KGSL_PERFCOUNTER_GROUP_GPMU_PWR 0x22 +#define KGSL_PERFCOUNTER_GROUP_ALWAYSON_PWR 0x23 +#define KGSL_PERFCOUNTER_GROUP_MAX 0x24 + +#define KGSL_PERFCOUNTER_NOT_USED 0xFFFFFFFF +#define KGSL_PERFCOUNTER_BROKEN 0xFFFFFFFE + +/* structure holds list of ibs */ +struct kgsl_ibdesc { + unsigned long gpuaddr; + unsigned long __pad; + size_t sizedwords; + unsigned int ctrl; +}; + +/** + * struct kgsl_cmdbatch_profiling_buffer + * @wall_clock_s: Wall clock at ringbuffer submission time (seconds) + * @wall_clock_ns: Wall clock at ringbuffer submission time (nanoseconds) + * @gpu_ticks_queued: GPU ticks at ringbuffer submission + * @gpu_ticks_submitted: GPU ticks when starting cmdbatch execution + * @gpu_ticks_retired: GPU ticks when finishing cmdbatch execution + * + * This structure defines the profiling buffer used to measure cmdbatch + * execution time + */ +struct kgsl_cmdbatch_profiling_buffer { + uint64_t wall_clock_s; + uint64_t wall_clock_ns; + uint64_t gpu_ticks_queued; + uint64_t gpu_ticks_submitted; + uint64_t gpu_ticks_retired; +}; + +/* ioctls */ +#define KGSL_IOC_TYPE 0x09 + +/* get misc info about the GPU + type should be a value from enum kgsl_property_type + value points to a structure that varies based on type + sizebytes is sizeof() that structure + for KGSL_PROP_DEVICE_INFO, use struct kgsl_devinfo + this structure contaings hardware versioning info. + for KGSL_PROP_DEVICE_SHADOW, use struct kgsl_shadowprop + this is used to find mmap() offset and sizes for mapping + struct kgsl_memstore into userspace. +*/ +struct kgsl_device_getproperty { + unsigned int type; + void __user *value; + size_t sizebytes; +}; + +#define IOCTL_KGSL_DEVICE_GETPROPERTY \ + _IOWR(KGSL_IOC_TYPE, 0x2, struct kgsl_device_getproperty) + +/* IOCTL_KGSL_DEVICE_READ (0x3) - removed 03/2012 + */ + +/* block until the GPU has executed past a given timestamp + * timeout is in milliseconds. + */ +struct kgsl_device_waittimestamp { + unsigned int timestamp; + unsigned int timeout; +}; + +#define IOCTL_KGSL_DEVICE_WAITTIMESTAMP \ + _IOW(KGSL_IOC_TYPE, 0x6, struct kgsl_device_waittimestamp) + +struct kgsl_device_waittimestamp_ctxtid { + unsigned int context_id; + unsigned int timestamp; + unsigned int timeout; +}; + +#define IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID \ + _IOW(KGSL_IOC_TYPE, 0x7, struct kgsl_device_waittimestamp_ctxtid) + +/* DEPRECATED: issue indirect commands to the GPU. + * drawctxt_id must have been created with IOCTL_KGSL_DRAWCTXT_CREATE + * ibaddr and sizedwords must specify a subset of a buffer created + * with IOCTL_KGSL_SHAREDMEM_FROM_PMEM + * flags may be a mask of KGSL_CONTEXT_ values + * timestamp is a returned counter value which can be passed to + * other ioctls to determine when the commands have been executed by + * the GPU. + * + * This fucntion is deprecated - consider using IOCTL_KGSL_SUBMIT_COMMANDS + * instead + */ +struct kgsl_ringbuffer_issueibcmds { + unsigned int drawctxt_id; + unsigned long ibdesc_addr; + unsigned int numibs; + unsigned int timestamp; /*output param */ + unsigned int flags; +}; + +#define IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS \ + _IOWR(KGSL_IOC_TYPE, 0x10, struct kgsl_ringbuffer_issueibcmds) + +/* read the most recently executed timestamp value + * type should be a value from enum kgsl_timestamp_type + */ +struct kgsl_cmdstream_readtimestamp { + unsigned int type; + unsigned int timestamp; /*output param */ +}; + +#define IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_OLD \ + _IOR(KGSL_IOC_TYPE, 0x11, struct kgsl_cmdstream_readtimestamp) + +#define IOCTL_KGSL_CMDSTREAM_READTIMESTAMP \ + _IOWR(KGSL_IOC_TYPE, 0x11, struct kgsl_cmdstream_readtimestamp) + +/* free memory when the GPU reaches a given timestamp. + * gpuaddr specify a memory region created by a + * IOCTL_KGSL_SHAREDMEM_FROM_PMEM call + * type should be a value from enum kgsl_timestamp_type + */ +struct kgsl_cmdstream_freememontimestamp { + unsigned long gpuaddr; + unsigned int type; + unsigned int timestamp; +}; + +#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP \ + _IOW(KGSL_IOC_TYPE, 0x12, struct kgsl_cmdstream_freememontimestamp) + +/* Previous versions of this header had incorrectly defined + IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP as a read-only ioctl instead + of a write only ioctl. To ensure binary compatability, the following + #define will be used to intercept the incorrect ioctl +*/ + +#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_OLD \ + _IOR(KGSL_IOC_TYPE, 0x12, struct kgsl_cmdstream_freememontimestamp) + +/* create a draw context, which is used to preserve GPU state. + * The flags field may contain a mask KGSL_CONTEXT_* values + */ +struct kgsl_drawctxt_create { + unsigned int flags; + unsigned int drawctxt_id; /*output param */ +}; + +#define IOCTL_KGSL_DRAWCTXT_CREATE \ + _IOWR(KGSL_IOC_TYPE, 0x13, struct kgsl_drawctxt_create) + +/* destroy a draw context */ +struct kgsl_drawctxt_destroy { + unsigned int drawctxt_id; +}; + +#define IOCTL_KGSL_DRAWCTXT_DESTROY \ + _IOW(KGSL_IOC_TYPE, 0x14, struct kgsl_drawctxt_destroy) + +/* add a block of pmem, fb, ashmem or user allocated address + * into the GPU address space */ +struct kgsl_map_user_mem { + int fd; + unsigned long gpuaddr; /*output param */ + size_t len; + size_t offset; + unsigned long hostptr; /*input param */ + enum kgsl_user_mem_type memtype; + unsigned int flags; +}; + +#define IOCTL_KGSL_MAP_USER_MEM \ + _IOWR(KGSL_IOC_TYPE, 0x15, struct kgsl_map_user_mem) + +struct kgsl_cmdstream_readtimestamp_ctxtid { + unsigned int context_id; + unsigned int type; + unsigned int timestamp; /*output param */ +}; + +#define IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID \ + _IOWR(KGSL_IOC_TYPE, 0x16, struct kgsl_cmdstream_readtimestamp_ctxtid) + +struct kgsl_cmdstream_freememontimestamp_ctxtid { + unsigned int context_id; + unsigned long gpuaddr; + unsigned int type; + unsigned int timestamp; +}; + +#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID \ + _IOW(KGSL_IOC_TYPE, 0x17, \ + struct kgsl_cmdstream_freememontimestamp_ctxtid) + +/* add a block of pmem or fb into the GPU address space */ +struct kgsl_sharedmem_from_pmem { + int pmem_fd; + unsigned long gpuaddr; /*output param */ + unsigned int len; + unsigned int offset; +}; + +#define IOCTL_KGSL_SHAREDMEM_FROM_PMEM \ + _IOWR(KGSL_IOC_TYPE, 0x20, struct kgsl_sharedmem_from_pmem) + +/* remove memory from the GPU's address space */ +struct kgsl_sharedmem_free { + unsigned long gpuaddr; +}; + +#define IOCTL_KGSL_SHAREDMEM_FREE \ + _IOW(KGSL_IOC_TYPE, 0x21, struct kgsl_sharedmem_free) + +struct kgsl_cff_user_event { + unsigned char cff_opcode; + unsigned int op1; + unsigned int op2; + unsigned int op3; + unsigned int op4; + unsigned int op5; + unsigned int __pad[2]; +}; + +#define IOCTL_KGSL_CFF_USER_EVENT \ + _IOW(KGSL_IOC_TYPE, 0x31, struct kgsl_cff_user_event) + +struct kgsl_gmem_desc { + unsigned int x; + unsigned int y; + unsigned int width; + unsigned int height; + unsigned int pitch; +}; + +struct kgsl_buffer_desc { + void *hostptr; + unsigned long gpuaddr; + int size; + unsigned int format; + unsigned int pitch; + unsigned int enabled; +}; + +struct kgsl_bind_gmem_shadow { + unsigned int drawctxt_id; + struct kgsl_gmem_desc gmem_desc; + unsigned int shadow_x; + unsigned int shadow_y; + struct kgsl_buffer_desc shadow_buffer; + unsigned int buffer_id; +}; + +#define IOCTL_KGSL_DRAWCTXT_BIND_GMEM_SHADOW \ + _IOW(KGSL_IOC_TYPE, 0x22, struct kgsl_bind_gmem_shadow) + +/* add a block of memory into the GPU address space */ + +/* + * IOCTL_KGSL_SHAREDMEM_FROM_VMALLOC deprecated 09/2012 + * use IOCTL_KGSL_GPUMEM_ALLOC instead + */ + +struct kgsl_sharedmem_from_vmalloc { + unsigned long gpuaddr; /*output param */ + unsigned int hostptr; + unsigned int flags; +}; + +#define IOCTL_KGSL_SHAREDMEM_FROM_VMALLOC \ + _IOWR(KGSL_IOC_TYPE, 0x23, struct kgsl_sharedmem_from_vmalloc) + +/* + * This is being deprecated in favor of IOCTL_KGSL_GPUMEM_CACHE_SYNC which + * supports both directions (flush and invalidate). This code will still + * work, but by definition it will do a flush of the cache which might not be + * what you want to have happen on a buffer following a GPU operation. It is + * safer to go with IOCTL_KGSL_GPUMEM_CACHE_SYNC + */ + +#define IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE \ + _IOW(KGSL_IOC_TYPE, 0x24, struct kgsl_sharedmem_free) + +struct kgsl_drawctxt_set_bin_base_offset { + unsigned int drawctxt_id; + unsigned int offset; +}; + +#define IOCTL_KGSL_DRAWCTXT_SET_BIN_BASE_OFFSET \ + _IOW(KGSL_IOC_TYPE, 0x25, struct kgsl_drawctxt_set_bin_base_offset) + +enum kgsl_cmdwindow_type { + KGSL_CMDWINDOW_MIN = 0x00000000, + KGSL_CMDWINDOW_2D = 0x00000000, + KGSL_CMDWINDOW_3D = 0x00000001, /* legacy */ + KGSL_CMDWINDOW_MMU = 0x00000002, + KGSL_CMDWINDOW_ARBITER = 0x000000FF, + KGSL_CMDWINDOW_MAX = 0x000000FF, +}; + +/* write to the command window */ +struct kgsl_cmdwindow_write { + enum kgsl_cmdwindow_type target; + unsigned int addr; + unsigned int data; +}; + +#define IOCTL_KGSL_CMDWINDOW_WRITE \ + _IOW(KGSL_IOC_TYPE, 0x2e, struct kgsl_cmdwindow_write) + +struct kgsl_gpumem_alloc { + unsigned long gpuaddr; /* output param */ + size_t size; + unsigned int flags; +}; + +#define IOCTL_KGSL_GPUMEM_ALLOC \ + _IOWR(KGSL_IOC_TYPE, 0x2f, struct kgsl_gpumem_alloc) + +struct kgsl_cff_syncmem { + unsigned long gpuaddr; + size_t len; + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_CFF_SYNCMEM \ + _IOW(KGSL_IOC_TYPE, 0x30, struct kgsl_cff_syncmem) + +/* + * A timestamp event allows the user space to register an action following an + * expired timestamp. Note IOCTL_KGSL_TIMESTAMP_EVENT has been redefined to + * _IOWR to support fences which need to return a fd for the priv parameter. + */ + +struct kgsl_timestamp_event { + int type; /* Type of event (see list below) */ + unsigned int timestamp; /* Timestamp to trigger event on */ + unsigned int context_id; /* Context for the timestamp */ + void __user *priv; /* Pointer to the event specific blob */ + size_t len; /* Size of the event specific blob */ +}; + +#define IOCTL_KGSL_TIMESTAMP_EVENT_OLD \ + _IOW(KGSL_IOC_TYPE, 0x31, struct kgsl_timestamp_event) + +/* A genlock timestamp event releases an existing lock on timestamp expire */ + +#define KGSL_TIMESTAMP_EVENT_GENLOCK 1 + +struct kgsl_timestamp_event_genlock { + int handle; /* Handle of the genlock lock to release */ +}; + +/* A fence timestamp event releases an existing lock on timestamp expire */ + +#define KGSL_TIMESTAMP_EVENT_FENCE 2 + +struct kgsl_timestamp_event_fence { + int fence_fd; /* Fence to signal */ +}; + +/* + * Set a property within the kernel. Uses the same structure as + * IOCTL_KGSL_GETPROPERTY + */ + +#define IOCTL_KGSL_SETPROPERTY \ + _IOW(KGSL_IOC_TYPE, 0x32, struct kgsl_device_getproperty) + +#define IOCTL_KGSL_TIMESTAMP_EVENT \ + _IOWR(KGSL_IOC_TYPE, 0x33, struct kgsl_timestamp_event) + +/** + * struct kgsl_gpumem_alloc_id - argument to IOCTL_KGSL_GPUMEM_ALLOC_ID + * @id: returned id value for this allocation. + * @flags: mask of KGSL_MEM* values requested and actual flags on return. + * @size: requested size of the allocation and actual size on return. + * @mmapsize: returned size to pass to mmap() which may be larger than 'size' + * @gpuaddr: returned GPU address for the allocation + * + * Allocate memory for access by the GPU. The flags and size fields are echoed + * back by the kernel, so that the caller can know if the request was + * adjusted. + * + * Supported flags: + * KGSL_MEMFLAGS_GPUREADONLY: the GPU will be unable to write to the buffer + * KGSL_MEMTYPE*: usage hint for debugging aid + * KGSL_MEMALIGN*: alignment hint, may be ignored or adjusted by the kernel. + * KGSL_MEMFLAGS_USE_CPU_MAP: If set on call and return, the returned GPU + * address will be 0. Calling mmap() will set the GPU address. + */ +struct kgsl_gpumem_alloc_id { + unsigned int id; + unsigned int flags; + size_t size; + size_t mmapsize; + unsigned long gpuaddr; +/* private: reserved for future use*/ + unsigned long __pad[2]; +}; + +#define IOCTL_KGSL_GPUMEM_ALLOC_ID \ + _IOWR(KGSL_IOC_TYPE, 0x34, struct kgsl_gpumem_alloc_id) + +/** + * struct kgsl_gpumem_free_id - argument to IOCTL_KGSL_GPUMEM_FREE_ID + * @id: GPU allocation id to free + * + * Free an allocation by id, in case a GPU address has not been assigned or + * is unknown. Freeing an allocation by id with this ioctl or by GPU address + * with IOCTL_KGSL_SHAREDMEM_FREE are equivalent. + */ +struct kgsl_gpumem_free_id { + unsigned int id; +/* private: reserved for future use*/ + unsigned int __pad; +}; + +#define IOCTL_KGSL_GPUMEM_FREE_ID \ + _IOWR(KGSL_IOC_TYPE, 0x35, struct kgsl_gpumem_free_id) + +/** + * struct kgsl_gpumem_get_info - argument to IOCTL_KGSL_GPUMEM_GET_INFO + * @gpuaddr: GPU address to query. Also set on return. + * @id: GPU allocation id to query. Also set on return. + * @flags: returned mask of KGSL_MEM* values. + * @size: returned size of the allocation. + * @mmapsize: returned size to pass mmap(), which may be larger than 'size' + * @useraddr: returned address of the userspace mapping for this buffer + * + * This ioctl allows querying of all user visible attributes of an existing + * allocation, by either the GPU address or the id returned by a previous + * call to IOCTL_KGSL_GPUMEM_ALLOC_ID. Legacy allocation ioctls may not + * return all attributes so this ioctl can be used to look them up if needed. + * + */ +struct kgsl_gpumem_get_info { + unsigned long gpuaddr; + unsigned int id; + unsigned int flags; + size_t size; + size_t mmapsize; + unsigned long useraddr; +/* private: reserved for future use*/ + unsigned long __pad[4]; +}; + +#define IOCTL_KGSL_GPUMEM_GET_INFO\ + _IOWR(KGSL_IOC_TYPE, 0x36, struct kgsl_gpumem_get_info) + +/** + * struct kgsl_gpumem_sync_cache - argument to IOCTL_KGSL_GPUMEM_SYNC_CACHE + * @gpuaddr: GPU address of the buffer to sync. + * @id: id of the buffer to sync. Either gpuaddr or id is sufficient. + * @op: a mask of KGSL_GPUMEM_CACHE_* values + * @offset: offset into the buffer + * @length: number of bytes starting from offset to perform + * the cache operation on + * + * Sync the L2 cache for memory headed to and from the GPU - this replaces + * KGSL_SHAREDMEM_FLUSH_CACHE since it can handle cache management for both + * directions + * + */ +struct kgsl_gpumem_sync_cache { + unsigned long gpuaddr; + unsigned int id; + unsigned int op; + size_t offset; + size_t length; +}; + +#define KGSL_GPUMEM_CACHE_CLEAN (1 << 0) +#define KGSL_GPUMEM_CACHE_TO_GPU KGSL_GPUMEM_CACHE_CLEAN + +#define KGSL_GPUMEM_CACHE_INV (1 << 1) +#define KGSL_GPUMEM_CACHE_FROM_GPU KGSL_GPUMEM_CACHE_INV + +#define KGSL_GPUMEM_CACHE_FLUSH \ + (KGSL_GPUMEM_CACHE_CLEAN | KGSL_GPUMEM_CACHE_INV) + +/* Flag to ensure backwards compatibility of kgsl_gpumem_sync_cache struct */ +#define KGSL_GPUMEM_CACHE_RANGE (1 << 31U) + +#define IOCTL_KGSL_GPUMEM_SYNC_CACHE \ + _IOW(KGSL_IOC_TYPE, 0x37, struct kgsl_gpumem_sync_cache) + +/** + * struct kgsl_perfcounter_get - argument to IOCTL_KGSL_PERFCOUNTER_GET + * @groupid: Performance counter group ID + * @countable: Countable to select within the group + * @offset: Return offset of the reserved LO counter + * @offset_hi: Return offset of the reserved HI counter + * + * Get an available performance counter from a specified groupid. The offset + * of the performance counter will be returned after successfully assigning + * the countable to the counter for the specified group. An error will be + * returned and an offset of 0 if the groupid is invalid or there are no + * more counters left. After successfully getting a perfcounter, the user + * must call kgsl_perfcounter_put(groupid, contable) when finished with + * the perfcounter to clear up perfcounter resources. + * + */ +struct kgsl_perfcounter_get { + unsigned int groupid; + unsigned int countable; + unsigned int offset; + unsigned int offset_hi; +/* private: reserved for future use */ + unsigned int __pad; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_PERFCOUNTER_GET \ + _IOWR(KGSL_IOC_TYPE, 0x38, struct kgsl_perfcounter_get) + +/** + * struct kgsl_perfcounter_put - argument to IOCTL_KGSL_PERFCOUNTER_PUT + * @groupid: Performance counter group ID + * @countable: Countable to release within the group + * + * Put an allocated performance counter to allow others to have access to the + * resource that was previously taken. This is only to be called after + * successfully getting a performance counter from kgsl_perfcounter_get(). + * + */ +struct kgsl_perfcounter_put { + unsigned int groupid; + unsigned int countable; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_PERFCOUNTER_PUT \ + _IOW(KGSL_IOC_TYPE, 0x39, struct kgsl_perfcounter_put) + +/** + * struct kgsl_perfcounter_query - argument to IOCTL_KGSL_PERFCOUNTER_QUERY + * @groupid: Performance counter group ID + * @countable: Return active countables array + * @size: Size of active countables array + * @max_counters: Return total number counters for the group ID + * + * Query the available performance counters given a groupid. The array + * *countables is used to return the current active countables in counters. + * The size of the array is passed in so the kernel will only write at most + * size or counter->size for the group id. The total number of available + * counters for the group ID is returned in max_counters. + * If the array or size passed in are invalid, then only the maximum number + * of counters will be returned, no data will be written to *countables. + * If the groupid is invalid an error code will be returned. + * + */ +struct kgsl_perfcounter_query { + unsigned int groupid; + /* Array to return the current countable for up to size counters */ + unsigned int __user *countables; + unsigned int count; + unsigned int max_counters; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_PERFCOUNTER_QUERY \ + _IOWR(KGSL_IOC_TYPE, 0x3A, struct kgsl_perfcounter_query) + +/** + * struct kgsl_perfcounter_query - argument to IOCTL_KGSL_PERFCOUNTER_QUERY + * @groupid: Performance counter group IDs + * @countable: Performance counter countable IDs + * @value: Return performance counter reads + * @size: Size of all arrays (groupid/countable pair and return value) + * + * Read in the current value of a performance counter given by the groupid + * and countable. + * + */ + +struct kgsl_perfcounter_read_group { + unsigned int groupid; + unsigned int countable; + unsigned long long value; +}; + +struct kgsl_perfcounter_read { + struct kgsl_perfcounter_read_group __user *reads; + unsigned int count; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_PERFCOUNTER_READ \ + _IOWR(KGSL_IOC_TYPE, 0x3B, struct kgsl_perfcounter_read) +/* + * struct kgsl_gpumem_sync_cache_bulk - argument to + * IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK + * @id_list: list of GPU buffer ids of the buffers to sync + * @count: number of GPU buffer ids in id_list + * @op: a mask of KGSL_GPUMEM_CACHE_* values + * + * Sync the cache for memory headed to and from the GPU. Certain + * optimizations can be made on the cache operation based on the total + * size of the working set of memory to be managed. + */ +struct kgsl_gpumem_sync_cache_bulk { + unsigned int __user *id_list; + unsigned int count; + unsigned int op; +/* private: reserved for future use */ + unsigned int __pad[2]; /* For future binary compatibility */ +}; + +#define IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK \ + _IOWR(KGSL_IOC_TYPE, 0x3C, struct kgsl_gpumem_sync_cache_bulk) + +/* + * struct kgsl_cmd_syncpoint_timestamp + * @context_id: ID of a KGSL context + * @timestamp: GPU timestamp + * + * This structure defines a syncpoint comprising a context/timestamp pair. A + * list of these may be passed by IOCTL_KGSL_SUBMIT_COMMANDS to define + * dependencies that must be met before the command can be submitted to the + * hardware + */ +struct kgsl_cmd_syncpoint_timestamp { + unsigned int context_id; + unsigned int timestamp; +}; + +struct kgsl_cmd_syncpoint_fence { + int fd; +}; + +/** + * struct kgsl_cmd_syncpoint - Define a sync point for a command batch + * @type: type of sync point defined here + * @priv: Pointer to the type specific buffer + * @size: Size of the type specific buffer + * + * This structure contains pointers defining a specific command sync point. + * The pointer and size should point to a type appropriate structure. + */ +struct kgsl_cmd_syncpoint { + int type; + void __user *priv; + size_t size; +}; + +/* Flag to indicate that the cmdlist may contain memlists */ +#define KGSL_IBDESC_MEMLIST 0x1 + +/* Flag to point out the cmdbatch profiling buffer in the memlist */ +#define KGSL_IBDESC_PROFILING_BUFFER 0x2 + +/** + * struct kgsl_submit_commands - Argument to IOCTL_KGSL_SUBMIT_COMMANDS + * @context_id: KGSL context ID that owns the commands + * @flags: + * @cmdlist: User pointer to a list of kgsl_ibdesc structures + * @numcmds: Number of commands listed in cmdlist + * @synclist: User pointer to a list of kgsl_cmd_syncpoint structures + * @numsyncs: Number of sync points listed in synclist + * @timestamp: On entry the a user defined timestamp, on exist the timestamp + * assigned to the command batch + * + * This structure specifies a command to send to the GPU hardware. This is + * similar to kgsl_issueibcmds expect that it doesn't support the legacy way to + * submit IB lists and it adds sync points to block the IB until the + * dependencies are satisified. This entry point is the new and preferred way + * to submit commands to the GPU. The memory list can be used to specify all + * memory that is referrenced in the current set of commands. + */ + +struct kgsl_submit_commands { + unsigned int context_id; + unsigned int flags; + struct kgsl_ibdesc __user *cmdlist; + unsigned int numcmds; + struct kgsl_cmd_syncpoint __user *synclist; + unsigned int numsyncs; + unsigned int timestamp; +/* private: reserved for future use */ + unsigned int __pad[4]; +}; + +#define IOCTL_KGSL_SUBMIT_COMMANDS \ + _IOWR(KGSL_IOC_TYPE, 0x3D, struct kgsl_submit_commands) + +/** + * struct kgsl_device_constraint - device constraint argument + * @context_id: KGSL context ID + * @type: type of constraint i.e pwrlevel/none + * @data: constraint data + * @size: size of the constraint data + */ +struct kgsl_device_constraint { + unsigned int type; + unsigned int context_id; + void __user *data; + size_t size; +}; + +/* Constraint Type*/ +#define KGSL_CONSTRAINT_NONE 0 +#define KGSL_CONSTRAINT_PWRLEVEL 1 + +/* PWRLEVEL constraint level*/ +/* set to min frequency */ +#define KGSL_CONSTRAINT_PWR_MIN 0 +/* set to max frequency */ +#define KGSL_CONSTRAINT_PWR_MAX 1 + +struct kgsl_device_constraint_pwrlevel { + unsigned int level; +}; + +/** + * struct kgsl_syncsource_create - Argument to IOCTL_KGSL_SYNCSOURCE_CREATE + * @id: returned id for the syncsource that was created. + * + * This ioctl creates a userspace sync timeline. + */ + +struct kgsl_syncsource_create { + unsigned int id; +/* private: reserved for future use */ + unsigned int __pad[3]; +}; + +#define IOCTL_KGSL_SYNCSOURCE_CREATE \ + _IOWR(KGSL_IOC_TYPE, 0x40, struct kgsl_syncsource_create) + +/** + * struct kgsl_syncsource_destroy - Argument to IOCTL_KGSL_SYNCSOURCE_DESTROY + * @id: syncsource id to destroy + * + * This ioctl creates a userspace sync timeline. + */ + +struct kgsl_syncsource_destroy { + unsigned int id; +/* private: reserved for future use */ + unsigned int __pad[3]; +}; + +#define IOCTL_KGSL_SYNCSOURCE_DESTROY \ + _IOWR(KGSL_IOC_TYPE, 0x41, struct kgsl_syncsource_destroy) + +/** + * struct kgsl_syncsource_create_fence - Argument to + * IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE + * @id: syncsource id + * @fence_fd: returned sync_fence fd + * + * Create a fence that may be signaled by userspace by calling + * IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE. There are no order dependencies between + * these fences. + */ +struct kgsl_syncsource_create_fence { + unsigned int id; + int fence_fd; +/* private: reserved for future use */ + unsigned int __pad[4]; +}; + +/** + * struct kgsl_syncsource_signal_fence - Argument to + * IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE + * @id: syncsource id + * @fence_fd: sync_fence fd to signal + * + * Signal a fence that was created by a IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE + * call using the same syncsource id. This allows a fence to be shared + * to other processes but only signaled by the process owning the fd + * used to create the fence. + */ +#define IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE \ + _IOWR(KGSL_IOC_TYPE, 0x42, struct kgsl_syncsource_create_fence) + +struct kgsl_syncsource_signal_fence { + unsigned int id; + int fence_fd; +/* private: reserved for future use */ + unsigned int __pad[4]; +}; + +#define IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE \ + _IOWR(KGSL_IOC_TYPE, 0x43, struct kgsl_syncsource_signal_fence) + +/** + * struct kgsl_cff_sync_gpuobj - Argument to IOCTL_KGSL_CFF_SYNC_GPUOBJ + * @offset: Offset into the GPU object to sync + * @length: Number of bytes to sync + * @id: ID of the GPU object to sync + */ +struct kgsl_cff_sync_gpuobj { + uint64_t offset; + uint64_t length; + unsigned int id; +}; + +#define IOCTL_KGSL_CFF_SYNC_GPUOBJ \ + _IOW(KGSL_IOC_TYPE, 0x44, struct kgsl_cff_sync_gpuobj) + +/** + * struct kgsl_gpuobj_alloc - Argument to IOCTL_KGSL_GPUOBJ_ALLOC + * @size: Size in bytes of the object to allocate + * @flags: mask of KGSL_MEMFLAG_* bits + * @va_len: Size in bytes of the virtual region to allocate + * @mmapsize: Returns the mmap() size of the object + * @id: Returns the GPU object ID of the new object + * @metadata_len: Length of the metdata to copy from the user + * @metadata: Pointer to the user specified metadata to store for the object + */ +struct kgsl_gpuobj_alloc { + uint64_t size; + uint64_t flags; + uint64_t va_len; + uint64_t mmapsize; + unsigned int id; + unsigned int metadata_len; + uint64_t metadata; +}; + +/* Let the user know that this header supports the gpuobj metadata */ +#define KGSL_GPUOBJ_ALLOC_METADATA_MAX 64 + +#define IOCTL_KGSL_GPUOBJ_ALLOC \ + _IOWR(KGSL_IOC_TYPE, 0x45, struct kgsl_gpuobj_alloc) + +/** + * struct kgsl_gpuobj_free - Argument to IOCTL_KGLS_GPUOBJ_FREE + * @flags: Mask of: KGSL_GUPOBJ_FREE_ON_EVENT + * @priv: Pointer to the private object if KGSL_GPUOBJ_FREE_ON_EVENT is + * specified + * @id: ID of the GPU object to free + * @type: If KGSL_GPUOBJ_FREE_ON_EVENT is specified, the type of asynchronous + * event to free on + * @len: Length of the data passed in priv + */ +struct kgsl_gpuobj_free { + uint64_t flags; + uint64_t __user priv; + unsigned int id; + unsigned int type; + unsigned int len; +}; + +#define KGSL_GPUOBJ_FREE_ON_EVENT 1 + +#define KGSL_GPU_EVENT_TIMESTAMP 1 +#define KGSL_GPU_EVENT_FENCE 2 + +/** + * struct kgsl_gpu_event_timestamp - Specifies a timestamp event to free a GPU + * object on + * @context_id: ID of the timestamp event to wait for + * @timestamp: Timestamp of the timestamp event to wait for + */ +struct kgsl_gpu_event_timestamp { + unsigned int context_id; + unsigned int timestamp; +}; + +/** + * struct kgsl_gpu_event_fence - Specifies a fence ID to to free a GPU object on + * @fd: File descriptor for the fence + */ +struct kgsl_gpu_event_fence { + int fd; +}; + +#define IOCTL_KGSL_GPUOBJ_FREE \ + _IOW(KGSL_IOC_TYPE, 0x46, struct kgsl_gpuobj_free) + +/** + * struct kgsl_gpuobj_info - argument to IOCTL_KGSL_GPUOBJ_INFO + * @gpuaddr: GPU address of the object + * @flags: Current flags for the object + * @size: Size of the object + * @va_len: VA size of the object + * @va_addr: Virtual address of the object (if it is mapped) + * id - GPU object ID of the object to query + */ +struct kgsl_gpuobj_info { + uint64_t gpuaddr; + uint64_t flags; + uint64_t size; + uint64_t va_len; + uint64_t va_addr; + unsigned int id; +}; + +#define IOCTL_KGSL_GPUOBJ_INFO \ + _IOWR(KGSL_IOC_TYPE, 0x47, struct kgsl_gpuobj_info) + +/** + * struct kgsl_gpuobj_import - argument to IOCTL_KGSL_GPUOBJ_IMPORT + * @priv: Pointer to the private data for the import type + * @priv_len: Length of the private data + * @flags: Mask of KGSL_MEMFLAG_ flags + * @type: Type of the import (KGSL_USER_MEM_TYPE_*) + * @id: Returns the ID of the new GPU object + */ +struct kgsl_gpuobj_import { + uint64_t __user priv; + uint64_t priv_len; + uint64_t flags; + unsigned int type; + unsigned int id; +}; + +/** + * struct kgsl_gpuobj_import_dma_buf - import a dmabuf object + * @fd: File descriptor for the dma-buf object + */ +struct kgsl_gpuobj_import_dma_buf { + int fd; +}; + +/** + * struct kgsl_gpuobj_import_useraddr - import an object based on a useraddr + * @virtaddr: Virtual address of the object to import + */ +struct kgsl_gpuobj_import_useraddr { + uint64_t virtaddr; +}; + +#define IOCTL_KGSL_GPUOBJ_IMPORT \ + _IOWR(KGSL_IOC_TYPE, 0x48, struct kgsl_gpuobj_import) + +/** + * struct kgsl_gpuobj_sync_obj - Individual GPU object to sync + * @offset: Offset within the GPU object to sync + * @length: Number of bytes to sync + * @id: ID of the GPU object to sync + * @op: Cache operation to execute + */ + +struct kgsl_gpuobj_sync_obj { + uint64_t offset; + uint64_t length; + unsigned int id; + unsigned int op; +}; + +/** + * struct kgsl_gpuobj_sync - Argument for IOCTL_KGSL_GPUOBJ_SYNC + * @objs: Pointer to an array of kgsl_gpuobj_sync_obj structs + * @obj_len: Size of each item in the array + * @count: Number of items in the array + */ + +struct kgsl_gpuobj_sync { + uint64_t __user objs; + unsigned int obj_len; + unsigned int count; +}; + +#define IOCTL_KGSL_GPUOBJ_SYNC \ + _IOW(KGSL_IOC_TYPE, 0x49, struct kgsl_gpuobj_sync) + +/** + * struct kgsl_command_object - GPU command object + * @offset: GPU address offset of the object + * @gpuaddr: GPU address of the object + * @size: Size of the object + * @flags: Current flags for the object + * @id - GPU command object ID + */ +struct kgsl_command_object { + uint64_t offset; + uint64_t gpuaddr; + uint64_t size; + unsigned int flags; + unsigned int id; +}; + +/** + * struct kgsl_command_syncpoint - GPU syncpoint object + * @priv: Pointer to the type specific buffer + * @size: Size of the type specific buffer + * @type: type of sync point defined here + */ +struct kgsl_command_syncpoint { + uint64_t __user priv; + uint64_t size; + unsigned int type; +}; + +/** + * struct kgsl_command_object - Argument for IOCTL_KGSL_GPU_COMMAND + * @flags: Current flags for the object + * @cmdlist: List of kgsl_command_objects for submission + * @cmd_size: Size of kgsl_command_objects structure + * @numcmds: Number of kgsl_command_objects in command list + * @objlist: List of kgsl_command_objects for tracking + * @obj_size: Size of kgsl_command_objects structure + * @numobjs: Number of kgsl_command_objects in object list + * @synclist: List of kgsl_command_syncpoints + * @sync_size: Size of kgsl_command_syncpoint structure + * @numsyncs: Number of kgsl_command_syncpoints in syncpoint list + * @context_id: Context ID submittin ghte kgsl_gpu_command + * @timestamp: Timestamp for the submitted commands + */ +struct kgsl_gpu_command { + uint64_t flags; + uint64_t __user cmdlist; + unsigned int cmdsize; + unsigned int numcmds; + uint64_t __user objlist; + unsigned int objsize; + unsigned int numobjs; + uint64_t __user synclist; + unsigned int syncsize; + unsigned int numsyncs; + unsigned int context_id; + unsigned int timestamp; +}; + +#define IOCTL_KGSL_GPU_COMMAND \ + _IOWR(KGSL_IOC_TYPE, 0x4A, struct kgsl_gpu_command) + +/** + * struct kgsl_preemption_counters_query - argument to + * IOCTL_KGSL_PREEMPTIONCOUNTER_QUERY + * @counters: Return preemption counters array + * @size_user: Size allocated by userspace + * @size_priority_level: Size of preemption counters for each + * priority level + * @max_priority_level: Return max number of priority levels + * + * Query the available preemption counters. The array counters + * is used to return preemption counters. The size of the array + * is passed in so the kernel will only write at most size_user + * or max available preemption counters. The total number of + * preemption counters is returned in max_priority_level. If the + * array or size passed in are invalid, then an error is + * returned back. + */ +struct kgsl_preemption_counters_query { + uint64_t __user counters; + unsigned int size_user; + unsigned int size_priority_level; + unsigned int max_priority_level; +}; + +#define IOCTL_KGSL_PREEMPTIONCOUNTER_QUERY \ + _IOWR(KGSL_IOC_TYPE, 0x4B, struct kgsl_preemption_counters_query) + +/** + * struct kgsl_gpuobj_set_info - argument for IOCTL_KGSL_GPUOBJ_SET_INFO + * @flags: Flags to indicate which paramaters to change + * @metadata: If KGSL_GPUOBJ_SET_INFO_METADATA is set, a pointer to the new + * metadata + * @id: GPU memory object ID to change + * @metadata_len: If KGSL_GPUOBJ_SET_INFO_METADATA is set, the length of the + * new metadata string + * @type: If KGSL_GPUOBJ_SET_INFO_TYPE is set, the new type of the memory object + */ + +#define KGSL_GPUOBJ_SET_INFO_METADATA (1 << 0) +#define KGSL_GPUOBJ_SET_INFO_TYPE (1 << 1) + +struct kgsl_gpuobj_set_info { + uint64_t flags; + uint64_t metadata; + unsigned int id; + unsigned int metadata_len; + unsigned int type; +}; + +#define IOCTL_KGSL_GPUOBJ_SET_INFO \ + _IOW(KGSL_IOC_TYPE, 0x4C, struct kgsl_gpuobj_set_info) + +#endif /* _UAPI_MSM_KGSL_H */ |