summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/msm/adreno_ringbuffer.c58
-rw-r--r--drivers/gpu/msm/adreno_ringbuffer.h14
-rw-r--r--drivers/gpu/msm/kgsl.h4
-rw-r--r--drivers/gpu/msm/kgsl_iommu.c60
4 files changed, 107 insertions, 29 deletions
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
index 65e73356857f..3a3777823013 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2017,2019, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -259,6 +259,11 @@ static int _adreno_ringbuffer_probe(struct adreno_device *adreno_dev,
PAGE_SIZE, 0, KGSL_MEMDESC_PRIVILEGED, "pagetable_desc");
if (ret)
return ret;
+
+ /* allocate a chunk of memory to create user profiling IB1s */
+ kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->profile_desc,
+ PAGE_SIZE, KGSL_MEMFLAGS_GPUREADONLY, 0, "profile_desc");
+
return kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->buffer_desc,
KGSL_RB_SIZE, KGSL_MEMFLAGS_GPUREADONLY,
0, "ringbuffer");
@@ -272,7 +277,7 @@ int adreno_ringbuffer_probe(struct adreno_device *adreno_dev, bool nopreempt)
if (!adreno_is_a3xx(adreno_dev)) {
status = kgsl_allocate_global(device, &device->scratch,
- PAGE_SIZE, 0, 0, "scratch");
+ PAGE_SIZE, 0, KGSL_MEMDESC_RANDOM, "scratch");
if (status != 0)
return status;
}
@@ -303,7 +308,7 @@ static void _adreno_ringbuffer_close(struct adreno_device *adreno_dev,
kgsl_free_global(device, &rb->pagetable_desc);
kgsl_free_global(device, &rb->preemption_desc);
-
+ kgsl_free_global(device, &rb->profile_desc);
kgsl_free_global(device, &rb->buffer_desc);
kgsl_del_event_group(&rb->events);
memset(rb, 0, sizeof(struct adreno_ringbuffer));
@@ -737,6 +742,37 @@ static inline int _get_alwayson_counter(struct adreno_device *adreno_dev,
return (unsigned int)(p - cmds);
}
+/* This is the maximum possible size for 64 bit targets */
+#define PROFILE_IB_DWORDS 4
+#define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2))
+
+static int set_user_profiling(struct adreno_device *adreno_dev,
+ struct adreno_ringbuffer *rb, u32 *cmds, u64 gpuaddr)
+{
+ int dwords, index = 0;
+ u64 ib_gpuaddr;
+ u32 *ib;
+
+ if (!rb->profile_desc.hostptr)
+ return 0;
+
+ ib = ((u32 *) rb->profile_desc.hostptr) +
+ (rb->profile_index * PROFILE_IB_DWORDS);
+ ib_gpuaddr = rb->profile_desc.gpuaddr +
+ (rb->profile_index * (PROFILE_IB_DWORDS << 2));
+
+ dwords = _get_alwayson_counter(adreno_dev, ib, gpuaddr);
+
+ /* Make an indirect buffer for the request */
+ cmds[index++] = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1);
+ index += cp_gpuaddr(adreno_dev, &cmds[index], ib_gpuaddr);
+ cmds[index++] = dwords;
+
+ rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS;
+
+ return index;
+}
+
/* adreno_rindbuffer_submitcmd - submit userspace IBs to the GPU */
int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
struct kgsl_drawobj_cmd *cmdobj,
@@ -836,14 +872,12 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
if (drawobj->flags & KGSL_DRAWOBJ_PROFILING &&
!adreno_is_a3xx(adreno_dev) && profile_buffer) {
user_profiling = true;
- dwords += 6;
/*
- * REG_TO_MEM packet on A5xx and above needs another ordinal.
- * Add 2 more dwords since we do profiling before and after.
+ * User side profiling uses two IB1s, one before with 4 dwords
+ * per INDIRECT_BUFFER_PFE call
*/
- if (!ADRENO_LEGACY_PM4(adreno_dev))
- dwords += 2;
+ dwords += 8;
/*
* we want to use an adreno_submit_time struct to get the
@@ -886,11 +920,11 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
}
/*
- * Add cmds to read the GPU ticks at the start of command obj and
+ * Add IB1 to read the GPU ticks at the start of command obj and
* write it into the appropriate command obj profiling buffer offset
*/
if (user_profiling) {
- cmds += _get_alwayson_counter(adreno_dev, cmds,
+ cmds += set_user_profiling(adreno_dev, rb, cmds,
cmdobj->profiling_buffer_gpuaddr +
offsetof(struct kgsl_drawobj_profiling_buffer,
gpu_ticks_submitted));
@@ -929,11 +963,11 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
}
/*
- * Add cmds to read the GPU ticks at the end of command obj and
+ * Add IB1 to read the GPU ticks at the end of command obj and
* write it into the appropriate command obj profiling buffer offset
*/
if (user_profiling) {
- cmds += _get_alwayson_counter(adreno_dev, cmds,
+ cmds += set_user_profiling(adreno_dev, rb, cmds,
cmdobj->profiling_buffer_gpuaddr +
offsetof(struct kgsl_drawobj_profiling_buffer,
gpu_ticks_retired));
diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h
index 63374af1e3f7..d64ccbdc846c 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.h
+++ b/drivers/gpu/msm/adreno_ringbuffer.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2016, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2016,2019, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -127,6 +127,18 @@ struct adreno_ringbuffer {
unsigned long sched_timer;
enum adreno_dispatcher_starve_timer_states starve_timer_state;
spinlock_t preempt_lock;
+ /**
+ * @profile_desc: global memory to construct IB1s to do user side
+ * profiling
+ */
+ struct kgsl_memdesc profile_desc;
+ /**
+ * @profile_index: Pointer to the next "slot" in profile_desc for a user
+ * profiling IB1. This allows for PAGE_SIZE / 16 = 256 simultaneous
+ * commands per ringbuffer with user profiling enabled
+ * enough.
+ */
+ u32 profile_index;
};
/* Returns the current ringbuffer */
diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h
index a486d9a86f9d..6b8ef82d340f 100644
--- a/drivers/gpu/msm/kgsl.h
+++ b/drivers/gpu/msm/kgsl.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008-2016, 2018, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2008-2016,2018-2019, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -189,6 +189,8 @@ struct kgsl_memdesc_ops {
#define KGSL_MEMDESC_TZ_LOCKED BIT(7)
/* The memdesc is allocated through contiguous memory */
#define KGSL_MEMDESC_CONTIG BIT(8)
+/* For global buffers, randomly assign an address from the region */
+#define KGSL_MEMDESC_RANDOM BIT(9)
/**
* struct kgsl_memdesc - GPU memory object descriptor
diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c
index 320b815e2beb..ffab5423d694 100644
--- a/drivers/gpu/msm/kgsl_iommu.c
+++ b/drivers/gpu/msm/kgsl_iommu.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2011-2018, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2011-2019, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -20,6 +20,7 @@
#include <linux/msm_kgsl.h>
#include <linux/ratelimit.h>
#include <linux/of_platform.h>
+#include <linux/random.h>
#include <soc/qcom/scm.h>
#include <soc/qcom/secure_buffer.h>
#include <stddef.h>
@@ -84,15 +85,8 @@ static struct kmem_cache *addr_entry_cache;
*
* Here we define an array and a simple allocator to keep track of the currently
* active global entries. Each entry is assigned a unique address inside of a
- * MMU implementation specific "global" region. The addresses are assigned
- * sequentially and never re-used to avoid having to go back and reprogram
- * existing pagetables. The entire list of active entries are mapped and
- * unmapped into every new pagetable as it is created and destroyed.
- *
- * Because there are relatively few entries and they are defined at boot time we
- * don't need to go over the top to define a dynamic allocation scheme. It will
- * be less wasteful to pick a static number with a little bit of growth
- * potential.
+ * MMU implementation specific "global" region. We use a simple bitmap based
+ * allocator for the region to allow for both fixed and dynamic addressing.
*/
#define GLOBAL_PT_ENTRIES 32
@@ -102,10 +96,12 @@ struct global_pt_entry {
char name[32];
};
+#define GLOBAL_MAP_PAGES (KGSL_IOMMU_GLOBAL_MEM_SIZE >> PAGE_SHIFT)
+
static struct global_pt_entry global_pt_entries[GLOBAL_PT_ENTRIES];
static struct kgsl_memdesc *kgsl_global_secure_pt_entry;
+static DECLARE_BITMAP(global_map, GLOBAL_MAP_PAGES);
static int global_pt_count;
-uint64_t global_pt_alloc;
static struct kgsl_memdesc gpu_qdss_desc;
static struct kgsl_memdesc gpu_qtimer_desc;
@@ -186,6 +182,12 @@ static void kgsl_iommu_remove_global(struct kgsl_mmu *mmu,
for (i = 0; i < global_pt_count; i++) {
if (global_pt_entries[i].memdesc == memdesc) {
+ u64 offset = memdesc->gpuaddr -
+ KGSL_IOMMU_GLOBAL_MEM_BASE(mmu);
+
+ bitmap_clear(global_map, offset >> PAGE_SHIFT,
+ kgsl_memdesc_footprint(memdesc) >> PAGE_SHIFT);
+
memdesc->gpuaddr = 0;
memdesc->priv &= ~KGSL_MEMDESC_GLOBAL;
global_pt_entries[i].memdesc = NULL;
@@ -197,15 +199,43 @@ static void kgsl_iommu_remove_global(struct kgsl_mmu *mmu,
static void kgsl_iommu_add_global(struct kgsl_mmu *mmu,
struct kgsl_memdesc *memdesc, const char *name)
{
+ u32 bit, start = 0;
+ u64 size = kgsl_memdesc_footprint(memdesc);
+
if (memdesc->gpuaddr != 0)
return;
- BUG_ON(global_pt_count >= GLOBAL_PT_ENTRIES);
- BUG_ON((global_pt_alloc + memdesc->size) >= KGSL_IOMMU_GLOBAL_MEM_SIZE);
+ if (WARN_ON(global_pt_count >= GLOBAL_PT_ENTRIES))
+ return;
+
+ if (WARN_ON(size > KGSL_IOMMU_GLOBAL_MEM_SIZE))
+ return;
+
+ if (memdesc->priv & KGSL_MEMDESC_RANDOM) {
+ u32 range = GLOBAL_MAP_PAGES - (size >> PAGE_SHIFT);
+
+ start = get_random_int() % range;
+ }
+
+ while (start >= 0) {
+ bit = bitmap_find_next_zero_area(global_map, GLOBAL_MAP_PAGES,
+ start, size >> PAGE_SHIFT, 0);
+
+ if (bit < GLOBAL_MAP_PAGES)
+ break;
+
+ start--;
+ }
+
+ if (WARN_ON(start < 0))
+ return;
+
+ memdesc->gpuaddr =
+ KGSL_IOMMU_GLOBAL_MEM_BASE(mmu) + (bit << PAGE_SHIFT);
+
+ bitmap_set(global_map, bit, size >> PAGE_SHIFT);
- memdesc->gpuaddr = KGSL_IOMMU_GLOBAL_MEM_BASE(mmu) + global_pt_alloc;
memdesc->priv |= KGSL_MEMDESC_GLOBAL;
- global_pt_alloc += memdesc->size;
global_pt_entries[global_pt_count].memdesc = memdesc;
strlcpy(global_pt_entries[global_pt_count].name, name,