diff options
author | Jordan Crouse <jcrouse@codeaurora.org> | 2016-05-11 09:37:38 -0600 |
---|---|---|
committer | Carter Cooper <ccooper@codeaurora.org> | 2016-07-20 15:19:33 -0600 |
commit | 46369030ee7d8ee547f31942ea6d9a79b11cc4e5 (patch) | |
tree | 82fdc67b351dcf3bbd188c2e41bdf27133ca609f /drivers/gpu/msm/adreno_ringbuffer.c | |
parent | 0da2fbad582d83abd814f2e802cc88d8588a487e (diff) |
msm: kgsl: Implement fast preemption for 5XX
Allow 5XX targets to preempt quickly from an atomic context. In
particular this allows quicker transition from a high priority
ringbuffer to a lower one without having to wait for the worker
to schedule.
CRs-Fixed: 1009124
Change-Id: Ic0dedbad01a31a5da2954b097cb6fa937d45ef5c
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Diffstat (limited to 'drivers/gpu/msm/adreno_ringbuffer.c')
-rw-r--r-- | drivers/gpu/msm/adreno_ringbuffer.c | 271 |
1 files changed, 99 insertions, 172 deletions
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c index 383acb71a571..f93d433f406a 100644 --- a/drivers/gpu/msm/adreno_ringbuffer.c +++ b/drivers/gpu/msm/adreno_ringbuffer.c @@ -30,8 +30,6 @@ #include "a3xx_reg.h" #include "adreno_a5xx.h" -#define GSL_RB_NOP_SIZEDWORDS 2 - #define RB_HOSTPTR(_rb, _pos) \ ((unsigned int *) ((_rb)->buffer_desc.hostptr + \ ((_pos) * sizeof(unsigned int)))) @@ -50,86 +48,89 @@ static void _cff_write_ringbuffer(struct adreno_ringbuffer *rb) if (device->cff_dump_enable == 0) return; - /* - * This code is predicated on the fact that we write a full block of - * stuff without wrapping - */ - BUG_ON(rb->wptr < rb->last_wptr); - - size = (rb->wptr - rb->last_wptr) * sizeof(unsigned int); + size = (rb->_wptr - rb->last_wptr) * sizeof(unsigned int); hostptr = RB_HOSTPTR(rb, rb->last_wptr); gpuaddr = RB_GPUADDR(rb, rb->last_wptr); kgsl_cffdump_memcpy(device, gpuaddr, hostptr, size); + rb->last_wptr = rb->_wptr; } -void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb, +static void adreno_get_submit_time(struct adreno_device *adreno_dev, struct adreno_submit_time *time) { - struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb); - BUG_ON(rb->wptr == 0); - - /* Write the changes to CFF if so enabled */ - _cff_write_ringbuffer(rb); - + unsigned long flags; /* - * Read the current GPU ticks and wallclock for most accurate - * profiling + * Here we are attempting to create a mapping between the + * GPU time domain (alwayson counter) and the CPU time domain + * (local_clock) by sampling both values as close together as + * possible. This is useful for many types of debugging and + * profiling. In order to make this mapping as accurate as + * possible, we must turn off interrupts to avoid running + * interrupt handlers between the two samples. */ - if (time != NULL) { - /* - * Here we are attempting to create a mapping between the - * GPU time domain (alwayson counter) and the CPU time domain - * (local_clock) by sampling both values as close together as - * possible. This is useful for many types of debugging and - * profiling. In order to make this mapping as accurate as - * possible, we must turn off interrupts to avoid running - * interrupt handlers between the two samples. - */ - unsigned long flags; - local_irq_save(flags); + local_irq_save(flags); - /* Read always on registers */ - if (!adreno_is_a3xx(adreno_dev)) { - adreno_readreg64(adreno_dev, - ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO, - ADRENO_REG_RBBM_ALWAYSON_COUNTER_HI, - &time->ticks); + /* Read always on registers */ + if (!adreno_is_a3xx(adreno_dev)) { + adreno_readreg64(adreno_dev, + ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO, + ADRENO_REG_RBBM_ALWAYSON_COUNTER_HI, + &time->ticks); - /* - * Mask hi bits as they may be incorrect on - * a4x and some a5x - */ - if (ADRENO_GPUREV(adreno_dev) >= 400 && + /* Mask hi bits as they may be incorrect on some targets */ + if (ADRENO_GPUREV(adreno_dev) >= 400 && ADRENO_GPUREV(adreno_dev) <= ADRENO_REV_A530) - time->ticks &= 0xFFFFFFFF; - } - else - time->ticks = 0; + time->ticks &= 0xFFFFFFFF; + } else + time->ticks = 0; - /* Get the kernel clock for time since boot */ - time->ktime = local_clock(); + /* Get the kernel clock for time since boot */ + time->ktime = local_clock(); - /* Get the timeofday for the wall time (for the user) */ - getnstimeofday(&time->utime); + /* Get the timeofday for the wall time (for the user) */ + getnstimeofday(&time->utime); - local_irq_restore(flags); - } + local_irq_restore(flags); +} - /* Memory barrier before informing the hardware of new commands */ - mb(); +void adreno_ringbuffer_wptr(struct adreno_device *adreno_dev, + struct adreno_ringbuffer *rb) +{ + unsigned long flags; - if (adreno_preempt_state(adreno_dev, ADRENO_DISPATCHER_PREEMPT_CLEAR) && - (adreno_dev->cur_rb == rb)) { - /* - * Let the pwrscale policy know that new commands have - * been submitted. - */ - kgsl_pwrscale_busy(KGSL_DEVICE(adreno_dev)); - adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR, rb->wptr); + spin_lock_irqsave(&rb->preempt_lock, flags); + if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) { + + if (adreno_dev->cur_rb == rb) { + /* + * Let the pwrscale policy know that new commands have + * been submitted. + */ + kgsl_pwrscale_busy(KGSL_DEVICE(adreno_dev)); + adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR, + rb->_wptr); + } } + + rb->wptr = rb->_wptr; + spin_unlock_irqrestore(&rb->preempt_lock, flags); +} + +void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb, + struct adreno_submit_time *time) +{ + struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb); + + /* Write the changes to CFF if so enabled */ + _cff_write_ringbuffer(rb); + + if (time != NULL) + adreno_get_submit_time(adreno_dev, time); + + adreno_ringbuffer_wptr(adreno_dev, rb); } int adreno_ringbuffer_submit_spin(struct adreno_ringbuffer *rb, @@ -141,125 +142,36 @@ int adreno_ringbuffer_submit_spin(struct adreno_ringbuffer *rb, return adreno_spin_idle(adreno_dev, timeout); } -static int -adreno_ringbuffer_waitspace(struct adreno_ringbuffer *rb, - unsigned int numcmds, int wptr_ahead) +unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb, + unsigned int dwords) { - int nopcount = 0; - unsigned int freecmds; - unsigned int wptr = rb->wptr; - unsigned int *cmds = NULL; - uint64_t gpuaddr; - unsigned long wait_time; - unsigned long wait_timeout = msecs_to_jiffies(ADRENO_IDLE_TIMEOUT); - unsigned int rptr; struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb); + unsigned int rptr = adreno_get_rptr(rb); + unsigned int ret; - /* if wptr ahead, fill the remaining with NOPs */ - if (wptr_ahead) { - /* -1 for header */ - nopcount = KGSL_RB_DWORDS - rb->wptr - 1; - - cmds = RB_HOSTPTR(rb, rb->wptr); - gpuaddr = RB_GPUADDR(rb, rb->wptr); - - rptr = adreno_get_rptr(rb); - /* For non current rb we don't expect the rptr to move */ - if ((adreno_dev->cur_rb != rb || - !adreno_preempt_state(adreno_dev, - ADRENO_DISPATCHER_PREEMPT_CLEAR)) && - !rptr) - return -ENOSPC; - - /* Make sure that rptr is not 0 before submitting - * commands at the end of ringbuffer. We do not - * want the rptr and wptr to become equal when - * the ringbuffer is not empty */ - wait_time = jiffies + wait_timeout; - while (!rptr) { - rptr = adreno_get_rptr(rb); - if (time_after(jiffies, wait_time)) - return -ETIMEDOUT; - } - - rb->wptr = 0; - } - - rptr = adreno_get_rptr(rb); - freecmds = rptr - rb->wptr; - if (freecmds == 0 || freecmds > numcmds) - goto done; - - /* non current rptr will not advance anyway or if preemption underway */ - if (adreno_dev->cur_rb != rb || - !adreno_preempt_state(adreno_dev, - ADRENO_DISPATCHER_PREEMPT_CLEAR)) { - rb->wptr = wptr; - return -ENOSPC; - } - - wait_time = jiffies + wait_timeout; - /* wait for space in ringbuffer */ - while (1) { - rptr = adreno_get_rptr(rb); - - freecmds = rptr - rb->wptr; - - if (freecmds == 0 || freecmds > numcmds) - break; + if (rptr <= rb->_wptr) { + unsigned int *cmds; - if (time_after(jiffies, wait_time)) { - KGSL_DRV_ERR(KGSL_DEVICE(adreno_dev), - "Timed out waiting for freespace in RB rptr: 0x%x, wptr: 0x%x, rb id %d\n", - rptr, wptr, rb->id); - return -ETIMEDOUT; + if (rb->_wptr + dwords <= (KGSL_RB_DWORDS - 2)) { + ret = rb->_wptr; + rb->_wptr = (rb->_wptr + dwords) % KGSL_RB_DWORDS; + return RB_HOSTPTR(rb, ret); } - } -done: - if (wptr_ahead) { - *cmds = cp_packet(adreno_dev, CP_NOP, nopcount); - kgsl_cffdump_write(KGSL_DEVICE(adreno_dev), gpuaddr, *cmds); - } - return 0; -} + cmds = RB_HOSTPTR(rb, rb->_wptr); + *cmds = cp_packet(adreno_dev, CP_NOP, + KGSL_RB_DWORDS - rb->_wptr - 1); -unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb, - unsigned int numcmds) -{ - unsigned int *ptr = NULL; - int ret = 0; - unsigned int rptr; - BUG_ON(numcmds >= KGSL_RB_DWORDS); - - rptr = adreno_get_rptr(rb); - /* check for available space */ - if (rb->wptr >= rptr) { - /* wptr ahead or equal to rptr */ - /* reserve dwords for nop packet */ - if ((rb->wptr + numcmds) > (KGSL_RB_DWORDS - - GSL_RB_NOP_SIZEDWORDS)) - ret = adreno_ringbuffer_waitspace(rb, numcmds, 1); - } else { - /* wptr behind rptr */ - if ((rb->wptr + numcmds) >= rptr) - ret = adreno_ringbuffer_waitspace(rb, numcmds, 0); - /* check for remaining space */ - /* reserve dwords for nop packet */ - if (!ret && (rb->wptr + numcmds) > (KGSL_RB_DWORDS - - GSL_RB_NOP_SIZEDWORDS)) - ret = adreno_ringbuffer_waitspace(rb, numcmds, 1); + rb->_wptr = 0; } - if (!ret) { - rb->last_wptr = rb->wptr; - - ptr = (unsigned int *)rb->buffer_desc.hostptr + rb->wptr; - rb->wptr += numcmds; - } else - ptr = ERR_PTR(ret); + if (rb->_wptr + dwords < rptr) { + ret = rb->_wptr; + rb->_wptr = (rb->_wptr + dwords) % KGSL_RB_DWORDS; + return RB_HOSTPTR(rb, ret); + } - return ptr; + return ERR_PTR(-ENOSPC); } /** @@ -282,6 +194,7 @@ int adreno_ringbuffer_start(struct adreno_device *adreno_dev, kgsl_sharedmem_writel(device, &device->scratch, SCRATCH_RPTR_OFFSET(rb->id), 0); rb->wptr = 0; + rb->_wptr = 0; rb->wptr_preempt_end = 0xFFFFFFFF; rb->starve_timer_state = ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT; @@ -323,6 +236,8 @@ static int _adreno_ringbuffer_probe(struct adreno_device *adreno_dev, rb->timestamp = 0; init_waitqueue_head(&rb->ts_expire_waitq); + spin_lock_init(&rb->preempt_lock); + /* * Allocate mem for storing RB pagetables and commands to * switch pagetable @@ -457,6 +372,10 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) return -ENOENT; + /* On fault return error so that we don't keep submitting */ + if (adreno_gpu_fault(adreno_dev) != 0) + return -EPROTO; + rb->timestamp++; /* If this is a internal IB, use the global timestamp for it */ @@ -715,7 +634,7 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb, * required. If we have commands less than the space reserved in RB * adjust the wptr accordingly. */ - rb->wptr = rb->wptr - (total_sizedwords - (ringcmds - start)); + rb->_wptr = rb->_wptr - (total_sizedwords - (ringcmds - start)); adreno_ringbuffer_submit(rb, time); @@ -1062,8 +981,16 @@ int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev, * In the unlikely event of an error in the drawctxt switch, * treat it like a hang */ - if (ret) + if (ret) { + /* + * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it, + * the upper layers know how to handle it + */ + if (ret != -ENOSPC && ret != -ENOENT) + KGSL_DRV_ERR(device, + "Unable to switch draw context: %d\n", ret); goto done; + } if (test_bit(CMDBATCH_FLAG_WFI, &cmdbatch->priv)) flags = KGSL_CMD_FLAGS_WFI; |