summaryrefslogtreecommitdiff
path: root/drivers/gpu/msm/adreno_ringbuffer.c
diff options
context:
space:
mode:
authorJordan Crouse <jcrouse@codeaurora.org>2016-05-11 09:37:38 -0600
committerCarter Cooper <ccooper@codeaurora.org>2016-07-20 15:19:33 -0600
commit46369030ee7d8ee547f31942ea6d9a79b11cc4e5 (patch)
tree82fdc67b351dcf3bbd188c2e41bdf27133ca609f /drivers/gpu/msm/adreno_ringbuffer.c
parent0da2fbad582d83abd814f2e802cc88d8588a487e (diff)
msm: kgsl: Implement fast preemption for 5XX
Allow 5XX targets to preempt quickly from an atomic context. In particular this allows quicker transition from a high priority ringbuffer to a lower one without having to wait for the worker to schedule. CRs-Fixed: 1009124 Change-Id: Ic0dedbad01a31a5da2954b097cb6fa937d45ef5c Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Diffstat (limited to 'drivers/gpu/msm/adreno_ringbuffer.c')
-rw-r--r--drivers/gpu/msm/adreno_ringbuffer.c271
1 files changed, 99 insertions, 172 deletions
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
index 383acb71a571..f93d433f406a 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -30,8 +30,6 @@
#include "a3xx_reg.h"
#include "adreno_a5xx.h"
-#define GSL_RB_NOP_SIZEDWORDS 2
-
#define RB_HOSTPTR(_rb, _pos) \
((unsigned int *) ((_rb)->buffer_desc.hostptr + \
((_pos) * sizeof(unsigned int))))
@@ -50,86 +48,89 @@ static void _cff_write_ringbuffer(struct adreno_ringbuffer *rb)
if (device->cff_dump_enable == 0)
return;
- /*
- * This code is predicated on the fact that we write a full block of
- * stuff without wrapping
- */
- BUG_ON(rb->wptr < rb->last_wptr);
-
- size = (rb->wptr - rb->last_wptr) * sizeof(unsigned int);
+ size = (rb->_wptr - rb->last_wptr) * sizeof(unsigned int);
hostptr = RB_HOSTPTR(rb, rb->last_wptr);
gpuaddr = RB_GPUADDR(rb, rb->last_wptr);
kgsl_cffdump_memcpy(device, gpuaddr, hostptr, size);
+ rb->last_wptr = rb->_wptr;
}
-void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb,
+static void adreno_get_submit_time(struct adreno_device *adreno_dev,
struct adreno_submit_time *time)
{
- struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
- BUG_ON(rb->wptr == 0);
-
- /* Write the changes to CFF if so enabled */
- _cff_write_ringbuffer(rb);
-
+ unsigned long flags;
/*
- * Read the current GPU ticks and wallclock for most accurate
- * profiling
+ * Here we are attempting to create a mapping between the
+ * GPU time domain (alwayson counter) and the CPU time domain
+ * (local_clock) by sampling both values as close together as
+ * possible. This is useful for many types of debugging and
+ * profiling. In order to make this mapping as accurate as
+ * possible, we must turn off interrupts to avoid running
+ * interrupt handlers between the two samples.
*/
- if (time != NULL) {
- /*
- * Here we are attempting to create a mapping between the
- * GPU time domain (alwayson counter) and the CPU time domain
- * (local_clock) by sampling both values as close together as
- * possible. This is useful for many types of debugging and
- * profiling. In order to make this mapping as accurate as
- * possible, we must turn off interrupts to avoid running
- * interrupt handlers between the two samples.
- */
- unsigned long flags;
- local_irq_save(flags);
+ local_irq_save(flags);
- /* Read always on registers */
- if (!adreno_is_a3xx(adreno_dev)) {
- adreno_readreg64(adreno_dev,
- ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO,
- ADRENO_REG_RBBM_ALWAYSON_COUNTER_HI,
- &time->ticks);
+ /* Read always on registers */
+ if (!adreno_is_a3xx(adreno_dev)) {
+ adreno_readreg64(adreno_dev,
+ ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO,
+ ADRENO_REG_RBBM_ALWAYSON_COUNTER_HI,
+ &time->ticks);
- /*
- * Mask hi bits as they may be incorrect on
- * a4x and some a5x
- */
- if (ADRENO_GPUREV(adreno_dev) >= 400 &&
+ /* Mask hi bits as they may be incorrect on some targets */
+ if (ADRENO_GPUREV(adreno_dev) >= 400 &&
ADRENO_GPUREV(adreno_dev) <= ADRENO_REV_A530)
- time->ticks &= 0xFFFFFFFF;
- }
- else
- time->ticks = 0;
+ time->ticks &= 0xFFFFFFFF;
+ } else
+ time->ticks = 0;
- /* Get the kernel clock for time since boot */
- time->ktime = local_clock();
+ /* Get the kernel clock for time since boot */
+ time->ktime = local_clock();
- /* Get the timeofday for the wall time (for the user) */
- getnstimeofday(&time->utime);
+ /* Get the timeofday for the wall time (for the user) */
+ getnstimeofday(&time->utime);
- local_irq_restore(flags);
- }
+ local_irq_restore(flags);
+}
- /* Memory barrier before informing the hardware of new commands */
- mb();
+void adreno_ringbuffer_wptr(struct adreno_device *adreno_dev,
+ struct adreno_ringbuffer *rb)
+{
+ unsigned long flags;
- if (adreno_preempt_state(adreno_dev, ADRENO_DISPATCHER_PREEMPT_CLEAR) &&
- (adreno_dev->cur_rb == rb)) {
- /*
- * Let the pwrscale policy know that new commands have
- * been submitted.
- */
- kgsl_pwrscale_busy(KGSL_DEVICE(adreno_dev));
- adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR, rb->wptr);
+ spin_lock_irqsave(&rb->preempt_lock, flags);
+ if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) {
+
+ if (adreno_dev->cur_rb == rb) {
+ /*
+ * Let the pwrscale policy know that new commands have
+ * been submitted.
+ */
+ kgsl_pwrscale_busy(KGSL_DEVICE(adreno_dev));
+ adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR,
+ rb->_wptr);
+ }
}
+
+ rb->wptr = rb->_wptr;
+ spin_unlock_irqrestore(&rb->preempt_lock, flags);
+}
+
+void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb,
+ struct adreno_submit_time *time)
+{
+ struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+
+ /* Write the changes to CFF if so enabled */
+ _cff_write_ringbuffer(rb);
+
+ if (time != NULL)
+ adreno_get_submit_time(adreno_dev, time);
+
+ adreno_ringbuffer_wptr(adreno_dev, rb);
}
int adreno_ringbuffer_submit_spin(struct adreno_ringbuffer *rb,
@@ -141,125 +142,36 @@ int adreno_ringbuffer_submit_spin(struct adreno_ringbuffer *rb,
return adreno_spin_idle(adreno_dev, timeout);
}
-static int
-adreno_ringbuffer_waitspace(struct adreno_ringbuffer *rb,
- unsigned int numcmds, int wptr_ahead)
+unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb,
+ unsigned int dwords)
{
- int nopcount = 0;
- unsigned int freecmds;
- unsigned int wptr = rb->wptr;
- unsigned int *cmds = NULL;
- uint64_t gpuaddr;
- unsigned long wait_time;
- unsigned long wait_timeout = msecs_to_jiffies(ADRENO_IDLE_TIMEOUT);
- unsigned int rptr;
struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+ unsigned int rptr = adreno_get_rptr(rb);
+ unsigned int ret;
- /* if wptr ahead, fill the remaining with NOPs */
- if (wptr_ahead) {
- /* -1 for header */
- nopcount = KGSL_RB_DWORDS - rb->wptr - 1;
-
- cmds = RB_HOSTPTR(rb, rb->wptr);
- gpuaddr = RB_GPUADDR(rb, rb->wptr);
-
- rptr = adreno_get_rptr(rb);
- /* For non current rb we don't expect the rptr to move */
- if ((adreno_dev->cur_rb != rb ||
- !adreno_preempt_state(adreno_dev,
- ADRENO_DISPATCHER_PREEMPT_CLEAR)) &&
- !rptr)
- return -ENOSPC;
-
- /* Make sure that rptr is not 0 before submitting
- * commands at the end of ringbuffer. We do not
- * want the rptr and wptr to become equal when
- * the ringbuffer is not empty */
- wait_time = jiffies + wait_timeout;
- while (!rptr) {
- rptr = adreno_get_rptr(rb);
- if (time_after(jiffies, wait_time))
- return -ETIMEDOUT;
- }
-
- rb->wptr = 0;
- }
-
- rptr = adreno_get_rptr(rb);
- freecmds = rptr - rb->wptr;
- if (freecmds == 0 || freecmds > numcmds)
- goto done;
-
- /* non current rptr will not advance anyway or if preemption underway */
- if (adreno_dev->cur_rb != rb ||
- !adreno_preempt_state(adreno_dev,
- ADRENO_DISPATCHER_PREEMPT_CLEAR)) {
- rb->wptr = wptr;
- return -ENOSPC;
- }
-
- wait_time = jiffies + wait_timeout;
- /* wait for space in ringbuffer */
- while (1) {
- rptr = adreno_get_rptr(rb);
-
- freecmds = rptr - rb->wptr;
-
- if (freecmds == 0 || freecmds > numcmds)
- break;
+ if (rptr <= rb->_wptr) {
+ unsigned int *cmds;
- if (time_after(jiffies, wait_time)) {
- KGSL_DRV_ERR(KGSL_DEVICE(adreno_dev),
- "Timed out waiting for freespace in RB rptr: 0x%x, wptr: 0x%x, rb id %d\n",
- rptr, wptr, rb->id);
- return -ETIMEDOUT;
+ if (rb->_wptr + dwords <= (KGSL_RB_DWORDS - 2)) {
+ ret = rb->_wptr;
+ rb->_wptr = (rb->_wptr + dwords) % KGSL_RB_DWORDS;
+ return RB_HOSTPTR(rb, ret);
}
- }
-done:
- if (wptr_ahead) {
- *cmds = cp_packet(adreno_dev, CP_NOP, nopcount);
- kgsl_cffdump_write(KGSL_DEVICE(adreno_dev), gpuaddr, *cmds);
- }
- return 0;
-}
+ cmds = RB_HOSTPTR(rb, rb->_wptr);
+ *cmds = cp_packet(adreno_dev, CP_NOP,
+ KGSL_RB_DWORDS - rb->_wptr - 1);
-unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb,
- unsigned int numcmds)
-{
- unsigned int *ptr = NULL;
- int ret = 0;
- unsigned int rptr;
- BUG_ON(numcmds >= KGSL_RB_DWORDS);
-
- rptr = adreno_get_rptr(rb);
- /* check for available space */
- if (rb->wptr >= rptr) {
- /* wptr ahead or equal to rptr */
- /* reserve dwords for nop packet */
- if ((rb->wptr + numcmds) > (KGSL_RB_DWORDS -
- GSL_RB_NOP_SIZEDWORDS))
- ret = adreno_ringbuffer_waitspace(rb, numcmds, 1);
- } else {
- /* wptr behind rptr */
- if ((rb->wptr + numcmds) >= rptr)
- ret = adreno_ringbuffer_waitspace(rb, numcmds, 0);
- /* check for remaining space */
- /* reserve dwords for nop packet */
- if (!ret && (rb->wptr + numcmds) > (KGSL_RB_DWORDS -
- GSL_RB_NOP_SIZEDWORDS))
- ret = adreno_ringbuffer_waitspace(rb, numcmds, 1);
+ rb->_wptr = 0;
}
- if (!ret) {
- rb->last_wptr = rb->wptr;
-
- ptr = (unsigned int *)rb->buffer_desc.hostptr + rb->wptr;
- rb->wptr += numcmds;
- } else
- ptr = ERR_PTR(ret);
+ if (rb->_wptr + dwords < rptr) {
+ ret = rb->_wptr;
+ rb->_wptr = (rb->_wptr + dwords) % KGSL_RB_DWORDS;
+ return RB_HOSTPTR(rb, ret);
+ }
- return ptr;
+ return ERR_PTR(-ENOSPC);
}
/**
@@ -282,6 +194,7 @@ int adreno_ringbuffer_start(struct adreno_device *adreno_dev,
kgsl_sharedmem_writel(device, &device->scratch,
SCRATCH_RPTR_OFFSET(rb->id), 0);
rb->wptr = 0;
+ rb->_wptr = 0;
rb->wptr_preempt_end = 0xFFFFFFFF;
rb->starve_timer_state =
ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT;
@@ -323,6 +236,8 @@ static int _adreno_ringbuffer_probe(struct adreno_device *adreno_dev,
rb->timestamp = 0;
init_waitqueue_head(&rb->ts_expire_waitq);
+ spin_lock_init(&rb->preempt_lock);
+
/*
* Allocate mem for storing RB pagetables and commands to
* switch pagetable
@@ -457,6 +372,10 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
!(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE))
return -ENOENT;
+ /* On fault return error so that we don't keep submitting */
+ if (adreno_gpu_fault(adreno_dev) != 0)
+ return -EPROTO;
+
rb->timestamp++;
/* If this is a internal IB, use the global timestamp for it */
@@ -715,7 +634,7 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
* required. If we have commands less than the space reserved in RB
* adjust the wptr accordingly.
*/
- rb->wptr = rb->wptr - (total_sizedwords - (ringcmds - start));
+ rb->_wptr = rb->_wptr - (total_sizedwords - (ringcmds - start));
adreno_ringbuffer_submit(rb, time);
@@ -1062,8 +981,16 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
* In the unlikely event of an error in the drawctxt switch,
* treat it like a hang
*/
- if (ret)
+ if (ret) {
+ /*
+ * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it,
+ * the upper layers know how to handle it
+ */
+ if (ret != -ENOSPC && ret != -ENOENT)
+ KGSL_DRV_ERR(device,
+ "Unable to switch draw context: %d\n", ret);
goto done;
+ }
if (test_bit(CMDBATCH_FLAG_WFI, &cmdbatch->priv))
flags = KGSL_CMD_FLAGS_WFI;