diff options
| author | Jordan Crouse <jcrouse@codeaurora.org> | 2017-02-13 10:14:24 -0700 |
|---|---|---|
| committer | Jordan Crouse <jcrouse@codeaurora.org> | 2017-02-22 09:52:19 -0700 |
| commit | 378583458fa167277b15d145dccce253459393ec (patch) | |
| tree | ce644ceb8f537bdcd25b1e5ef55676348f175d72 | |
| parent | b0dfb38811ed601bca0f52a2425fa4eae760f9aa (diff) | |
drm/msm: Add support for multiple ringbuffers
Add the infrastructure for supporting multiple ringbuffers.
Change-Id: Ic0dedbada90ec5c4c8074ffce33c3fe275b0cda1
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
| -rw-r--r-- | drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 14 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 14 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 72 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/adreno/a5xx_power.c | 6 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/adreno/adreno_gpu.c | 168 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/adreno/adreno_gpu.h | 36 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/msm_drv.c | 44 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/msm_drv.h | 34 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/msm_gem.h | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/msm_gem_submit.c | 7 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/msm_gpu.c | 171 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/msm_gpu.h | 45 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/msm_ringbuffer.c | 12 | ||||
| -rw-r--r-- | drivers/gpu/drm/msm/msm_ringbuffer.h | 6 | ||||
| -rw-r--r-- | include/uapi/drm/msm_drm.h | 17 |
16 files changed, 446 insertions, 202 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 5a061ad6225a..c4f886fd6037 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -40,10 +40,11 @@ extern bool hang_debug; static void a3xx_dump(struct msm_gpu *gpu); +static bool a3xx_idle(struct msm_gpu *gpu); static bool a3xx_me_init(struct msm_gpu *gpu) { - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT3(ring, CP_ME_INIT, 17); OUT_RING(ring, 0x000003f7); @@ -64,8 +65,8 @@ static bool a3xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); - return gpu->funcs->idle(gpu); + gpu->funcs->flush(gpu, ring); + return a3xx_idle(gpu); } static int a3xx_hw_init(struct msm_gpu *gpu) @@ -331,7 +332,7 @@ static void a3xx_destroy(struct msm_gpu *gpu) static bool a3xx_idle(struct msm_gpu *gpu) { /* wait for ringbuffer to drain: */ - if (!adreno_idle(gpu)) + if (!adreno_idle(gpu, gpu->rb[0])) return false; /* then wait for GPU to finish: */ @@ -439,9 +440,10 @@ static const struct adreno_gpu_funcs funcs = { .pm_resume = msm_gpu_pm_resume, .recover = a3xx_recover, .last_fence = adreno_last_fence, + .submitted_fence = adreno_submitted_fence, .submit = adreno_submit, .flush = adreno_flush, - .idle = a3xx_idle, + .active_ring = adreno_active_ring, .irq = a3xx_irq, .destroy = a3xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -489,7 +491,7 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) adreno_gpu->registers = a3xx_registers; adreno_gpu->reg_offsets = a3xx_register_offsets; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) goto fail; diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index 47c9b22b0801..534a7c3fbdca 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -31,6 +31,7 @@ extern bool hang_debug; static void a4xx_dump(struct msm_gpu *gpu); +static bool a4xx_idle(struct msm_gpu *gpu); /* * a4xx_enable_hwcg() - Program the clock control registers @@ -115,7 +116,7 @@ static void a4xx_enable_hwcg(struct msm_gpu *gpu) static bool a4xx_me_init(struct msm_gpu *gpu) { - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT3(ring, CP_ME_INIT, 17); OUT_RING(ring, 0x000003f7); @@ -136,8 +137,8 @@ static bool a4xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); - return gpu->funcs->idle(gpu); + gpu->funcs->flush(gpu, ring); + return a4xx_idle(gpu); } static int a4xx_hw_init(struct msm_gpu *gpu) @@ -329,7 +330,7 @@ static void a4xx_destroy(struct msm_gpu *gpu) static bool a4xx_idle(struct msm_gpu *gpu) { /* wait for ringbuffer to drain: */ - if (!adreno_idle(gpu)) + if (!adreno_idle(gpu, gpu->rb[0])) return false; /* then wait for GPU to finish: */ @@ -522,9 +523,10 @@ static const struct adreno_gpu_funcs funcs = { .pm_resume = a4xx_pm_resume, .recover = a4xx_recover, .last_fence = adreno_last_fence, + .submitted_fence = adreno_submitted_fence, .submit = adreno_submit, .flush = adreno_flush, - .idle = a4xx_idle, + .active_ring = adreno_active_ring, .irq = a4xx_irq, .destroy = a4xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -566,7 +568,7 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) adreno_gpu->registers = a4xx_registers; adreno_gpu->reg_offsets = a4xx_register_offsets; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) goto fail; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 8bc3c7bee3fb..1e554ef51968 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -22,7 +22,7 @@ static int a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct msm_drm_private *priv = gpu->dev->dev_private; - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[submit->ring]; unsigned int i, ibs = 0; for (i = 0; i < submit->nr_cmds; i++) { @@ -47,11 +47,12 @@ static int a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, OUT_PKT7(ring, CP_EVENT_WRITE, 4); OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31)); - OUT_RING(ring, lower_32_bits(rbmemptr(adreno_gpu, fence))); - OUT_RING(ring, upper_32_bits(rbmemptr(adreno_gpu, fence))); + + OUT_RING(ring, lower_32_bits(rbmemptr(adreno_gpu, ring->id, fence))); + OUT_RING(ring, upper_32_bits(rbmemptr(adreno_gpu, ring->id, fence))); OUT_RING(ring, submit->fence); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); return 0; } @@ -175,7 +176,7 @@ static void a5xx_enable_hwcg(struct msm_gpu *gpu) static int a5xx_me_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT7(ring, CP_ME_INIT, 8); @@ -206,9 +207,8 @@ static int a5xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); - - return gpu->funcs->idle(gpu) ? 0 : -EINVAL; + gpu->funcs->flush(gpu, ring); + return a5xx_idle(gpu, ring) ? 0 : -EINVAL; } static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu, @@ -545,11 +545,11 @@ static int a5xx_hw_init(struct msm_gpu *gpu) * ticking correctly */ if (adreno_is_a530(adreno_gpu)) { - OUT_PKT7(gpu->rb, CP_EVENT_WRITE, 1); - OUT_RING(gpu->rb, 0x0F); + OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1); + OUT_RING(gpu->rb[0], 0x0F); - gpu->funcs->flush(gpu); - if (!gpu->funcs->idle(gpu)) + gpu->funcs->flush(gpu, gpu->rb[0]); + if (!a5xx_idle(gpu, gpu->rb[0])) return -EINVAL; } @@ -562,13 +562,13 @@ static int a5xx_hw_init(struct msm_gpu *gpu) * cause a XPU violation. */ if (test_bit(A5XX_ZAP_SHADER_LOADED, &a5xx_gpu->flags)) { - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT7(ring, CP_SET_SECURE_MODE, 1); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); - if (!gpu->funcs->idle(gpu)) + gpu->funcs->flush(gpu, gpu->rb[0]); + if (!a5xx_idle(gpu, gpu->rb[0])) return -EINVAL; } else { /* Print a warning so if we die, we know why */ @@ -639,16 +639,19 @@ static inline bool _a5xx_check_idle(struct msm_gpu *gpu) A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT); } -static bool a5xx_idle(struct msm_gpu *gpu) +bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { /* wait for CP to drain ringbuffer: */ - if (!adreno_idle(gpu)) + if (!adreno_idle(gpu, ring)) return false; if (spin_until(_a5xx_check_idle(gpu))) { - DRM_ERROR("%s: timeout waiting for GPU to idle: status %8.8X irq %8.8X\n", - gpu->name, + DRM_ERROR( + "%s: timeout waiting for GPU RB %d to idle: status %8.8X rptr/wptr: %4.4X/%4.4X irq %8.8X\n", + gpu->name, ring->id, gpu_read(gpu, REG_A5XX_RBBM_STATUS), + gpu_read(gpu, REG_A5XX_CP_RB_RPTR), + gpu_read(gpu, REG_A5XX_CP_RB_WPTR), gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS)); return false; @@ -768,6 +771,32 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu) { struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; + struct msm_ringbuffer *ring; + int i; + + dev_err(dev->dev, "GPU fault detected: status %8.8X\n", + gpu_read(gpu, REG_A5XX_RBBM_STATUS)); + + dev_err(dev->dev, + "RPTR/WPTR %4.4X/%4.4X IB1 %16.16llX/%4.4X IB2 %16.16llX/%4.4X\n", + gpu_read(gpu, REG_A5XX_CP_RB_RPTR), + gpu_read(gpu, REG_A5XX_CP_RB_WPTR), + gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI), + gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ), + gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI), + gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ)); + + FOR_EACH_RING(gpu, ring, i) { + if (!ring) + continue; + + dev_err(dev->dev, "RB %d: submitted %d retired %d\n", ring->id, + adreno_last_fence(gpu, ring), + adreno_submitted_fence(gpu, ring)); + } + + /* Turn off the hangcheck timer to keep it from bothering us */ + del_timer(&gpu->hangcheck_timer); queue_work(priv->wq, &gpu->recover_work); } @@ -951,9 +980,10 @@ static const struct adreno_gpu_funcs funcs = { .pm_resume = a5xx_pm_resume, .recover = a5xx_recover, .last_fence = adreno_last_fence, + .submitted_fence = adreno_submitted_fence, .submit = a5xx_submit, .flush = adreno_flush, - .idle = a5xx_idle, + .active_ring = adreno_active_ring, .irq = a5xx_irq, .destroy = a5xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -1073,7 +1103,7 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) /* Check the efuses for some configuration */ a5xx_efuses_read(pdev, adreno_gpu); - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) { a5xx_destroy(&(a5xx_gpu->base.base)); return ERR_PTR(ret); diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h index e82f54063877..6c8d1f41b680 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h @@ -62,5 +62,6 @@ static inline int spin_usecs(struct msm_gpu *gpu, uint32_t usecs, return -ETIMEDOUT; } +bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring); #endif /* __A5XX_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index 18bca141b425..2040b18f731f 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -229,7 +229,7 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; if (!a5xx_gpu->gpmu_dwords) return 0; @@ -248,9 +248,9 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu) OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); OUT_RING(ring, 1); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); - if (!gpu->funcs->idle(gpu)) { + if (!a5xx_idle(gpu, ring)) { DRM_ERROR("%s: Unable to load GPMU firmware. GPMU will not be active\n", gpu->name); return -EINVAL; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index fad7a5fd9d60..8506ca2e9b8a 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -21,7 +21,6 @@ #include "msm_gem.h" #include "msm_mmu.h" -#define RB_SIZE SZ_32K #define RB_BLKSIZE 32 int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) @@ -60,30 +59,34 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) int adreno_hw_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - int ret; + int i; DBG("%s", gpu->name); - ret = msm_gem_get_iova(gpu->rb->bo, gpu->aspace, &gpu->rb_iova); - if (ret) { - gpu->rb_iova = 0; - dev_err(gpu->dev->dev, "could not map ringbuffer: %d\n", ret); - return ret; + for (i = 0; i < gpu->nr_rings; i++) { + int ret = msm_gem_get_iova(gpu->rb[i]->bo, gpu->aspace, + &gpu->rb[i]->iova); + if (ret) { + gpu->rb[i]->iova = 0; + dev_err(gpu->dev->dev, + "could not map ringbuffer %d: %d\n", i, ret); + return ret; + } } /* Setup REG_CP_RB_CNTL: */ adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL, - /* size is log2(quad-words): */ - AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) | - AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) | - (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0)); + /* size is log2(quad-words): */ + AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) | + AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) | + (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0)); - /* Setup ringbuffer address */ + /* Setup ringbuffer address - use ringbuffer[0] for GPU init */ adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_BASE, - REG_ADRENO_CP_RB_BASE_HI, gpu->rb_iova); + REG_ADRENO_CP_RB_BASE_HI, gpu->rb[0]->iova); adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR, - REG_ADRENO_CP_RB_RPTR_ADDR_HI, rbmemptr(adreno_gpu, rptr)); + REG_ADRENO_CP_RB_RPTR_ADDR_HI, rbmemptr(adreno_gpu, 0, rptr)); return 0; } @@ -94,35 +97,68 @@ static uint32_t get_wptr(struct msm_ringbuffer *ring) } /* Use this helper to read rptr, since a430 doesn't update rptr in memory */ -static uint32_t get_rptr(struct adreno_gpu *adreno_gpu) +static uint32_t get_rptr(struct adreno_gpu *adreno_gpu, + struct msm_ringbuffer *ring) { - if (adreno_is_a430(adreno_gpu)) - return adreno_gpu->memptrs->rptr = adreno_gpu_read( + if (adreno_is_a430(adreno_gpu)) { + /* + * If index is anything but 0 this will probably break horribly, + * but I think that we have enough infrastructure in place to + * ensure that it won't be. If not then this is why your + * a430 stopped working. + */ + return adreno_gpu->memptrs->rptr[ring->id] = adreno_gpu_read( adreno_gpu, REG_ADRENO_CP_RB_RPTR); - else - return adreno_gpu->memptrs->rptr; + } else + return adreno_gpu->memptrs->rptr[ring->id]; } -uint32_t adreno_last_fence(struct msm_gpu *gpu) +struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu) +{ + return gpu->rb[0]; +} + +uint32_t adreno_submitted_fence(struct msm_gpu *gpu, + struct msm_ringbuffer *ring) +{ + if (!ring) + return 0; + + return ring->submitted_fence; +} + +uint32_t adreno_last_fence(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - return adreno_gpu->memptrs->fence; + + if (!ring) + return 0; + + return adreno_gpu->memptrs->fence[ring->id]; } void adreno_recover(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct drm_device *dev = gpu->dev; - int ret; + struct msm_ringbuffer *ring; + int ret, i; gpu->funcs->pm_suspend(gpu); - /* reset ringbuffer: */ - gpu->rb->cur = gpu->rb->start; + /* reset ringbuffer(s): */ + + FOR_EACH_RING(gpu, ring, i) { + if (!ring) + continue; + + ring->cur = ring->start; - /* reset completed fence seqno, just discard anything pending: */ - adreno_gpu->memptrs->fence = gpu->submitted_fence; - adreno_gpu->memptrs->rptr = 0; + /* reset completed fence seqno, discard anything pending: */ + adreno_gpu->memptrs->fence[ring->id] = + adreno_submitted_fence(gpu, ring); + adreno_gpu->memptrs->rptr[ring->id] = 0; + } gpu->funcs->pm_resume(gpu); @@ -140,7 +176,7 @@ int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct msm_drm_private *priv = gpu->dev->dev_private; - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[submit->ring]; unsigned i, ibs = 0; for (i = 0; i < submit->nr_cmds; i++) { @@ -186,7 +222,7 @@ int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, OUT_PKT3(ring, CP_EVENT_WRITE, 3); OUT_RING(ring, CACHE_FLUSH_TS); - OUT_RING(ring, rbmemptr(adreno_gpu, fence)); + OUT_RING(ring, rbmemptr(adreno_gpu, ring->id, fence)); OUT_RING(ring, submit->fence); /* we could maybe be clever and only CP_COND_EXEC the interrupt: */ @@ -213,12 +249,12 @@ int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, } #endif - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); return 0; } -void adreno_flush(struct msm_gpu *gpu) +void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); uint32_t wptr; @@ -228,7 +264,7 @@ void adreno_flush(struct msm_gpu *gpu) * to account for the possibility that the last command fit exactly into * the ringbuffer and rb->next hasn't wrapped to zero yet */ - wptr = get_wptr(gpu->rb) & ((gpu->rb->size / 4) - 1); + wptr = get_wptr(ring) % (MSM_GPU_RINGBUFFER_SZ >> 2); /* ensure writes to ringbuffer have hit system memory: */ mb(); @@ -236,17 +272,18 @@ void adreno_flush(struct msm_gpu *gpu) adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_WPTR, wptr); } -bool adreno_idle(struct msm_gpu *gpu) +bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - uint32_t wptr = get_wptr(gpu->rb); + uint32_t wptr = get_wptr(ring); /* wait for CP to drain ringbuffer: */ - if (!spin_until(get_rptr(adreno_gpu) == wptr)) + if (!spin_until(get_rptr(adreno_gpu, ring) == wptr)) return true; /* TODO maybe we need to reset GPU here to recover from hang? */ - DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name); + DRM_ERROR("%s: timeout waiting to drain ringbuffer %d!\n", gpu->name, + ring->id); return false; } @@ -254,6 +291,7 @@ bool adreno_idle(struct msm_gpu *gpu) void adreno_show(struct msm_gpu *gpu, struct seq_file *m) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct msm_ringbuffer *ring; int i; seq_printf(m, "revision: %d (%d.%d.%d.%d)\n", @@ -261,10 +299,18 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m) adreno_gpu->rev.major, adreno_gpu->rev.minor, adreno_gpu->rev.patchid); - seq_printf(m, "fence: %d/%d\n", adreno_gpu->memptrs->fence, - gpu->submitted_fence); - seq_printf(m, "rptr: %d\n", get_rptr(adreno_gpu)); - seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb)); + FOR_EACH_RING(gpu, ring, i) { + if (!ring) + continue; + + seq_printf(m, "rb %d: fence: %d/%d\n", i, + adreno_last_fence(gpu, ring), + adreno_submitted_fence(gpu, ring)); + + seq_printf(m, " rptr: %d\n", + get_rptr(adreno_gpu, ring)); + seq_printf(m, "rb wptr: %d\n", get_wptr(ring)); + } gpu->funcs->pm_resume(gpu); @@ -294,20 +340,28 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m) void adreno_dump_info(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct msm_ringbuffer *ring; int i; - printk("revision: %d (%d.%d.%d.%d)\n", + pr_err("revision: %d (%d.%d.%d.%d)\n", adreno_gpu->info->revn, adreno_gpu->rev.core, adreno_gpu->rev.major, adreno_gpu->rev.minor, adreno_gpu->rev.patchid); - printk("fence: %d/%d\n", adreno_gpu->memptrs->fence, - gpu->submitted_fence); - printk("rptr: %d\n", get_rptr(adreno_gpu)); - printk("rb wptr: %d\n", get_wptr(gpu->rb)); + FOR_EACH_RING(gpu, ring, i) { + if (!ring) + continue; + + pr_err("rb %d: fence: %d/%d\n", i, + adreno_last_fence(gpu, ring), + adreno_submitted_fence(gpu, ring)); + + pr_err("rptr: %d\n", get_rptr(adreno_gpu, ring)); + pr_err("rb wptr: %d\n", get_wptr(ring)); + } for (i = 0; i < 8; i++) { - printk("CP_SCRATCH_REG%d: %u\n", i, + pr_err("CP_SCRATCH_REG%d: %u\n", i, gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i)); } } @@ -332,19 +386,20 @@ void adreno_dump(struct msm_gpu *gpu) } } -static uint32_t ring_freewords(struct msm_gpu *gpu) +static uint32_t ring_freewords(struct msm_ringbuffer *ring) { - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - uint32_t size = gpu->rb->size / 4; - uint32_t wptr = get_wptr(gpu->rb); - uint32_t rptr = get_rptr(adreno_gpu); + struct adreno_gpu *adreno_gpu = to_adreno_gpu(ring->gpu); + uint32_t size = MSM_GPU_RINGBUFFER_SZ >> 2; + uint32_t wptr = get_wptr(ring); + uint32_t rptr = get_rptr(adreno_gpu, ring); return (rptr + (size - 1) - wptr) % size; } -void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords) +void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords) { - if (spin_until(ring_freewords(gpu) >= ndwords)) - DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name); + if (spin_until(ring_freewords(ring) >= ndwords)) + DRM_ERROR("%s: timeout waiting for space in ringubffer %d\n", + ring->gpu->name, ring->id); } static const char *iommu_ports[] = { @@ -465,7 +520,8 @@ static int adreno_of_parse(struct platform_device *pdev, struct msm_gpu *gpu) } int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, - struct adreno_gpu *adreno_gpu, const struct adreno_gpu_funcs *funcs) + struct adreno_gpu *adreno_gpu, + const struct adreno_gpu_funcs *funcs, int nr_rings) { struct adreno_platform_config *config = pdev->dev.platform_data; struct msm_gpu *gpu = &adreno_gpu->base; @@ -483,7 +539,7 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, ret = msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base, adreno_gpu->info->name, "kgsl_3d0_reg_memory", "kgsl_3d0_irq", - RB_SIZE); + nr_rings); if (ret) return ret; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 86a993bcce32..9b4e1828bb6f 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -83,12 +83,18 @@ struct adreno_info { const struct adreno_info *adreno_info(struct adreno_rev rev); -#define rbmemptr(adreno_gpu, member) \ +#define _sizeof(member) \ + sizeof(((struct adreno_rbmemptrs *) 0)->member[0]) + +#define _base(adreno_gpu, member) \ ((adreno_gpu)->memptrs_iova + offsetof(struct adreno_rbmemptrs, member)) +#define rbmemptr(adreno_gpu, index, member) \ + (_base((adreno_gpu), member) + ((index) * _sizeof(member))) + struct adreno_rbmemptrs { - volatile uint32_t rptr; - volatile uint32_t fence; + volatile uint32_t rptr[MSM_GPU_MAX_RINGS]; + volatile uint32_t fence[MSM_GPU_MAX_RINGS]; }; struct adreno_gpu { @@ -205,21 +211,25 @@ static inline int adreno_is_a540(struct adreno_gpu *gpu) int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value); int adreno_hw_init(struct msm_gpu *gpu); -uint32_t adreno_last_fence(struct msm_gpu *gpu); +uint32_t adreno_last_fence(struct msm_gpu *gpu, struct msm_ringbuffer *ring); +uint32_t adreno_submitted_fence(struct msm_gpu *gpu, + struct msm_ringbuffer *ring); void adreno_recover(struct msm_gpu *gpu); int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx); -void adreno_flush(struct msm_gpu *gpu); -bool adreno_idle(struct msm_gpu *gpu); +void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring); +bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring); #ifdef CONFIG_DEBUG_FS void adreno_show(struct msm_gpu *gpu, struct seq_file *m); #endif void adreno_dump_info(struct msm_gpu *gpu); void adreno_dump(struct msm_gpu *gpu); -void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords); +void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords); +struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu); int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, - struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs); + struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs, + int nr_rings); void adreno_gpu_cleanup(struct adreno_gpu *gpu); @@ -228,7 +238,7 @@ void adreno_gpu_cleanup(struct adreno_gpu *gpu); static inline void OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt+1); + adreno_wait_ring(ring, cnt+1); OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF)); } @@ -236,14 +246,14 @@ OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) static inline void OUT_PKT2(struct msm_ringbuffer *ring) { - adreno_wait_ring(ring->gpu, 1); + adreno_wait_ring(ring, 1); OUT_RING(ring, CP_TYPE2_PKT); } static inline void OUT_PKT3(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt+1); + adreno_wait_ring(ring, cnt+1); OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8)); } @@ -265,14 +275,14 @@ static inline u32 PM4_PARITY(u32 val) static inline void OUT_PKT4(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt + 1); + adreno_wait_ring(ring, cnt + 1); OUT_RING(ring, PKT4(regindx, cnt)); } static inline void OUT_PKT7(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt + 1); + adreno_wait_ring(ring, cnt + 1); OUT_RING(ring, CP_TYPE7_PKT | (cnt << 0) | (PM4_PARITY(cnt) << 15) | ((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23)); } diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 9163b90981a2..940674c8d759 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -939,14 +939,23 @@ int msm_wait_fence(struct drm_device *dev, uint32_t fence, ktime_t *timeout , bool interruptible) { struct msm_drm_private *priv = dev->dev_private; + struct msm_gpu *gpu = priv->gpu; + int index = FENCE_RING(fence); + uint32_t submitted; int ret; - if (!priv->gpu) + if (!gpu) return -ENXIO; - if (fence > priv->gpu->submitted_fence) { + if (index > MSM_GPU_MAX_RINGS || index >= gpu->nr_rings || + !gpu->rb[index]) + return -EINVAL; + + submitted = gpu->funcs->submitted_fence(gpu, gpu->rb[index]); + + if (fence > submitted) { DRM_ERROR("waiting on invalid fence: %u (of %u)\n", - fence, priv->gpu->submitted_fence); + fence, submitted); return -EINVAL; } @@ -976,7 +985,7 @@ int msm_wait_fence(struct drm_device *dev, uint32_t fence, if (ret == 0) { DBG("timeout waiting for fence: %u (completed: %u)", - fence, priv->completed_fence); + fence, priv->completed_fence[index]); ret = -ETIMEDOUT; } else if (ret != -ERESTARTSYS) { ret = 0; @@ -990,12 +999,13 @@ int msm_queue_fence_cb(struct drm_device *dev, struct msm_fence_cb *cb, uint32_t fence) { struct msm_drm_private *priv = dev->dev_private; + int index = FENCE_RING(fence); int ret = 0; mutex_lock(&dev->struct_mutex); if (!list_empty(&cb->work.entry)) { ret = -EINVAL; - } else if (fence > priv->completed_fence) { + } else if (fence > priv->completed_fence[index]) { cb->fence = fence; list_add_tail(&cb->work.entry, &priv->fence_cbs); } else { @@ -1010,21 +1020,21 @@ int msm_queue_fence_cb(struct drm_device *dev, void msm_update_fence(struct drm_device *dev, uint32_t fence) { struct msm_drm_private *priv = dev->dev_private; + struct msm_fence_cb *cb, *tmp; + int index = FENCE_RING(fence); - mutex_lock(&dev->struct_mutex); - priv->completed_fence = max(fence, priv->completed_fence); - - while (!list_empty(&priv->fence_cbs)) { - struct msm_fence_cb *cb; - - cb = list_first_entry(&priv->fence_cbs, - struct msm_fence_cb, work.entry); + if (index >= MSM_GPU_MAX_RINGS) + return; - if (cb->fence > priv->completed_fence) - break; + mutex_lock(&dev->struct_mutex); + priv->completed_fence[index] = max(fence, priv->completed_fence[index]); - list_del_init(&cb->work.entry); - queue_work(priv->wq, &cb->work); + list_for_each_entry_safe(cb, tmp, &priv->fence_cbs, work.entry) { + if (COMPARE_FENCE_LTE(cb->fence, + priv->completed_fence[index])) { + list_del_init(&cb->work.entry); + queue_work(priv->wq, &cb->work); + } } mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 406b40f0f8de..c058ec56aafc 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -250,6 +250,8 @@ struct msm_drm_commit { struct kthread_worker worker; }; +#define MSM_GPU_MAX_RINGS 1 + struct msm_drm_private { struct msm_kms *kms; @@ -280,7 +282,9 @@ struct msm_drm_private { struct drm_fb_helper *fbdev; - uint32_t next_fence, completed_fence; + uint32_t next_fence[MSM_GPU_MAX_RINGS]; + uint32_t completed_fence[MSM_GPU_MAX_RINGS]; + wait_queue_head_t fence_event; struct msm_rd_state *rd; @@ -351,6 +355,31 @@ struct msm_format { uint32_t pixel_format; }; +/* + * Some GPU targets can support multiple ringbuffers and preempt between them. + * In order to do this without massive API changes we will steal two bits from + * the top of the fence and use them to identify the ringbuffer, (0x00000001 for + * riug 0, 0x40000001 for ring 1, 0x50000001 for ring 2, etc). If you are going + * to do a fence comparision you have to make sure you are only comparing + * against fences from the same ring, but since fences within a ringbuffer are + * still contigious you can still use straight comparisons (i.e 0x40000001 is + * older than 0x40000002). Mathmatically there will be 0x3FFFFFFF timestamps + * per ring or ~103 days of 120 interrupts per second (two interrupts per frame + * at 60 FPS). + */ +#define FENCE_RING(_fence) ((_fence >> 30) & 3) +#define FENCE(_ring, _fence) ((((_ring) & 3) << 30) | ((_fence) & 0x3FFFFFFF)) + +static inline bool COMPARE_FENCE_LTE(uint32_t a, uint32_t b) +{ + return ((FENCE_RING(a) == FENCE_RING(b)) && a <= b); +} + +static inline bool COMPARE_FENCE_LT(uint32_t a, uint32_t b) +{ + return ((FENCE_RING(a) == FENCE_RING(b)) && a < b); +} + /* callback from wq once fence has passed: */ struct msm_fence_cb { struct work_struct work; @@ -529,7 +558,8 @@ u32 msm_readl(const void __iomem *addr); static inline bool fence_completed(struct drm_device *dev, uint32_t fence) { struct msm_drm_private *priv = dev->dev_private; - return priv->completed_fence >= fence; + + return priv->completed_fence[FENCE_RING(fence)] >= fence; } static inline int align_pitch(int width, int bpp) diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index d9c3977434d9..1bd5b21f0763 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -121,6 +121,7 @@ struct msm_gem_submit { struct list_head bo_list; struct ww_acquire_ctx ticket; uint32_t fence; + int ring; bool valid; unsigned int nr_cmds; unsigned int nr_bos; diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index f7b5e30b41eb..7c83a9acd3e6 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -349,7 +349,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, /* for now, we just have 3d pipe.. eventually this would need to * be more clever to dispatch to appropriate gpu module: */ - if (args->pipe != MSM_PIPE_3D0) + if (MSM_PIPE_ID(args->flags) != MSM_PIPE_3D0) return -EINVAL; gpu = priv->gpu; @@ -435,6 +435,11 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, submit->nr_cmds = i; + /* Clamp the user submitted ring to the range of available rings */ + submit->ring = clamp_t(uint32_t, + (args->flags & MSM_SUBMIT_RING_MASK) >> MSM_SUBMIT_RING_SHIFT, + 0, gpu->nr_rings - 1); + ret = msm_gpu_submit(gpu, submit, ctx); args->fence = submit->fence; diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 08ecc089611f..57d518300425 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -273,15 +273,33 @@ static void recover_worker(struct work_struct *work) mutex_lock(&dev->struct_mutex); if (msm_gpu_active(gpu)) { struct msm_gem_submit *submit; - uint32_t fence = gpu->funcs->last_fence(gpu); - - /* retire completed submits, plus the one that hung: */ - retire_submits(gpu, fence + 1); + struct msm_ringbuffer *ring; + int i; inactive_cancel(gpu); + + FOR_EACH_RING(gpu, ring, i) { + uint32_t fence; + + if (!ring) + continue; + + fence = gpu->funcs->last_fence(gpu, ring); + + /* + * Retire the faulting command on the active ring and + * make sure the other rings are cleaned up + */ + if (ring == gpu->funcs->active_ring(gpu)) + retire_submits(gpu, fence + 1); + else + retire_submits(gpu, fence); + } + + /* Recover the GPU */ gpu->funcs->recover(gpu); - /* replay the remaining submits after the one that hung: */ + /* replay the remaining submits for all rings: */ list_for_each_entry(submit, &gpu->submit_list, node) { gpu->funcs->submit(gpu, submit, NULL); } @@ -303,25 +321,28 @@ static void hangcheck_handler(unsigned long data) struct msm_gpu *gpu = (struct msm_gpu *)data; struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; - uint32_t fence = gpu->funcs->last_fence(gpu); + struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); + uint32_t fence = gpu->funcs->last_fence(gpu, ring); + uint32_t submitted = gpu->funcs->submitted_fence(gpu, ring); - if (fence != gpu->hangcheck_fence) { + if (fence != gpu->hangcheck_fence[ring->id]) { /* some progress has been made.. ya! */ - gpu->hangcheck_fence = fence; - } else if (fence < gpu->submitted_fence) { + gpu->hangcheck_fence[ring->id] = fence; + } else if (fence < submitted) { /* no progress and not done.. hung! */ - gpu->hangcheck_fence = fence; - dev_err(dev->dev, "%s: hangcheck detected gpu lockup!\n", - gpu->name); + gpu->hangcheck_fence[ring->id] = fence; + dev_err(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", + gpu->name, ring->id); dev_err(dev->dev, "%s: completed fence: %u\n", gpu->name, fence); dev_err(dev->dev, "%s: submitted fence: %u\n", - gpu->name, gpu->submitted_fence); + gpu->name, submitted); + queue_work(priv->wq, &gpu->recover_work); } /* if still more pending work, reset the hangcheck timer: */ - if (gpu->submitted_fence > gpu->hangcheck_fence) + if (submitted > gpu->hangcheck_fence[ring->id]) hangcheck_timer_reset(gpu); /* workaround for missing irq: */ @@ -430,54 +451,66 @@ out: static void retire_submits(struct msm_gpu *gpu, uint32_t fence) { struct drm_device *dev = gpu->dev; + struct msm_gem_submit *submit, *tmp; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - while (!list_empty(&gpu->submit_list)) { - struct msm_gem_submit *submit; - - submit = list_first_entry(&gpu->submit_list, - struct msm_gem_submit, node); + /* + * Find and retire all the submits in the same ring that are older than + * or equal to 'fence' + */ - if (submit->fence <= fence) { + list_for_each_entry_safe(submit, tmp, &gpu->submit_list, node) { + if (COMPARE_FENCE_LTE(submit->fence, fence)) { list_del(&submit->node); kfree(submit); - } else { - break; } } } -static void retire_worker(struct work_struct *work) +static bool _fence_signaled(struct msm_gem_object *obj, uint32_t fence) { - struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); - struct drm_device *dev = gpu->dev; - uint32_t fence = gpu->funcs->last_fence(gpu); + if (obj->write_fence & 0x3FFFFFFF) + return COMPARE_FENCE_LTE(obj->write_fence, fence); - msm_update_fence(gpu->dev, fence); + return COMPARE_FENCE_LTE(obj->read_fence, fence); +} - mutex_lock(&dev->struct_mutex); +static void _retire_ring(struct msm_gpu *gpu, uint32_t fence) +{ + struct msm_gem_object *obj, *tmp; retire_submits(gpu, fence); - while (!list_empty(&gpu->active_list)) { - struct msm_gem_object *obj; - - obj = list_first_entry(&gpu->active_list, - struct msm_gem_object, mm_list); - - if ((obj->read_fence <= fence) && - (obj->write_fence <= fence)) { - /* move to inactive: */ + list_for_each_entry_safe(obj, tmp, &gpu->active_list, mm_list) { + if (_fence_signaled(obj, fence)) { msm_gem_move_to_inactive(&obj->base); msm_gem_put_iova(&obj->base, gpu->aspace); drm_gem_object_unreference(&obj->base); - } else { - break; } } +} - mutex_unlock(&dev->struct_mutex); +static void retire_worker(struct work_struct *work) +{ + struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); + struct drm_device *dev = gpu->dev; + struct msm_ringbuffer *ring; + int i; + + FOR_EACH_RING(gpu, ring, i) { + uint32_t fence; + + if (!ring) + continue; + + fence = gpu->funcs->last_fence(gpu, ring); + msm_update_fence(gpu->dev, fence); + + mutex_lock(&dev->struct_mutex); + _retire_ring(gpu, fence); + mutex_unlock(&dev->struct_mutex); + } if (!msm_gpu_active(gpu)) inactive_start(gpu); @@ -497,13 +530,12 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, { struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; + struct msm_ringbuffer *ring = gpu->rb[submit->ring]; int i, ret; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - submit->fence = ++priv->next_fence; - - gpu->submitted_fence = submit->fence; + submit->fence = FENCE(submit->ring, ++priv->next_fence[submit->ring]); inactive_cancel(gpu); @@ -511,7 +543,7 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, msm_rd_dump_submit(submit); - gpu->submitted_fence = submit->fence; + ring->submitted_fence = submit->fence; update_sw_cntrs(gpu); @@ -564,7 +596,8 @@ static const char *clk_names[] = { int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, - const char *name, const char *ioname, const char *irqname, int ringsz) + const char *name, const char *ioname, const char *irqname, + int nr_rings) { struct iommu_domain *iommu; int i, ret; @@ -666,17 +699,28 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, dev_info(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name); } - /* Create ringbuffer: */ - mutex_lock(&drm->struct_mutex); - gpu->rb = msm_ringbuffer_new(gpu, ringsz); - mutex_unlock(&drm->struct_mutex); - if (IS_ERR(gpu->rb)) { - ret = PTR_ERR(gpu->rb); - gpu->rb = NULL; - dev_err(drm->dev, "could not create ringbuffer: %d\n", ret); - goto fail; + if (nr_rings > ARRAY_SIZE(gpu->rb)) { + WARN(1, "Only creating %lu ringbuffers\n", ARRAY_SIZE(gpu->rb)); + nr_rings = ARRAY_SIZE(gpu->rb); } + /* Create ringbuffer(s): */ + for (i = 0; i < nr_rings; i++) { + mutex_lock(&drm->struct_mutex); + gpu->rb[i] = msm_ringbuffer_new(gpu, i); + mutex_unlock(&drm->struct_mutex); + + if (IS_ERR(gpu->rb[i])) { + ret = PTR_ERR(gpu->rb[i]); + gpu->rb[i] = NULL; + dev_err(drm->dev, + "could not create ringbuffer %d: %d\n", i, ret); + goto fail; + } + } + + gpu->nr_rings = nr_rings; + #ifdef CONFIG_SMP gpu->pm_qos_req_dma.type = PM_QOS_REQ_AFFINE_IRQ; gpu->pm_qos_req_dma.irq = gpu->irq; @@ -690,20 +734,31 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, return 0; fail: + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + if (gpu->rb[i]) + msm_ringbuffer_destroy(gpu->rb[i]); + } + return ret; } void msm_gpu_cleanup(struct msm_gpu *gpu) { + int i; + DBG("%s", gpu->name); WARN_ON(!list_empty(&gpu->active_list)); bs_fini(gpu); - if (gpu->rb) { - if (gpu->rb_iova) - msm_gem_put_iova(gpu->rb->bo, gpu->aspace); - msm_ringbuffer_destroy(gpu->rb); + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + if (!gpu->rb[i]) + continue; + + if (gpu->rb[i]->iova) + msm_gem_put_iova(gpu->rb[i]->bo, gpu->aspace); + + msm_ringbuffer_destroy(gpu->rb[i]); } } diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index c6f1d3bd36e9..bb2d7d6727fb 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -49,10 +49,13 @@ struct msm_gpu_funcs { int (*pm_resume)(struct msm_gpu *gpu); int (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx); - void (*flush)(struct msm_gpu *gpu); - bool (*idle)(struct msm_gpu *gpu); + void (*flush)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); irqreturn_t (*irq)(struct msm_gpu *irq); - uint32_t (*last_fence)(struct msm_gpu *gpu); + uint32_t (*last_fence)(struct msm_gpu *gpu, + struct msm_ringbuffer *ring); + uint32_t (*submitted_fence)(struct msm_gpu *gpu, + struct msm_ringbuffer *ring); + struct msm_ringbuffer *(*active_ring)(struct msm_gpu *gpu); void (*recover)(struct msm_gpu *gpu); void (*destroy)(struct msm_gpu *gpu); #ifdef CONFIG_DEBUG_FS @@ -78,14 +81,12 @@ struct msm_gpu { const struct msm_gpu_perfcntr *perfcntrs; uint32_t num_perfcntrs; - struct msm_ringbuffer *rb; - uint64_t rb_iova; + struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS]; + int nr_rings; /* list of GEM active objects: */ struct list_head active_list; - uint32_t submitted_fence; - /* is gpu powered/active? */ int active_cnt; bool inactive; @@ -123,15 +124,37 @@ struct msm_gpu { #define DRM_MSM_HANGCHECK_PERIOD 500 /* in ms */ #define DRM_MSM_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_MSM_HANGCHECK_PERIOD) struct timer_list hangcheck_timer; - uint32_t hangcheck_fence; + uint32_t hangcheck_fence[MSM_GPU_MAX_RINGS]; struct work_struct recover_work; struct list_head submit_list; }; +/* It turns out that all targets use the same ringbuffer size. */ +#define MSM_GPU_RINGBUFFER_SZ SZ_32K + +static inline struct msm_ringbuffer *__get_ring(struct msm_gpu *gpu, int index) +{ + return (index < ARRAY_SIZE(gpu->rb) ? gpu->rb[index] : NULL); +} + +#define FOR_EACH_RING(gpu, ring, index) \ + for (index = 0, ring = (gpu)->rb[0]; \ + index < (gpu)->nr_rings && index < ARRAY_SIZE((gpu)->rb); \ + index++, ring = __get_ring(gpu, index)) + static inline bool msm_gpu_active(struct msm_gpu *gpu) { - return gpu->submitted_fence > gpu->funcs->last_fence(gpu); + struct msm_ringbuffer *ring; + int i; + + FOR_EACH_RING(gpu, ring, i) { + if (gpu->funcs->submitted_fence(gpu, ring) > + gpu->funcs->last_fence(gpu, ring)) + return true; + } + + return false; } /* Perf-Counters: @@ -210,7 +233,9 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, - const char *name, const char *ioname, const char *irqname, int ringsz); + const char *name, const char *ioname, const char *irqname, + int nr_rings); + void msm_gpu_cleanup(struct msm_gpu *gpu); struct msm_gpu *adreno_load_gpu(struct drm_device *dev); diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 1f14b908b221..19ac38b2eba2 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -18,12 +18,13 @@ #include "msm_ringbuffer.h" #include "msm_gpu.h" -struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) +struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id) { struct msm_ringbuffer *ring; int ret; - size = ALIGN(size, 4); /* size should be dword aligned */ + /* We assume everwhere that MSM_GPU_RINGBUFFER_SZ is a power of 2 */ + BUILD_BUG_ON(!is_power_of_2(MSM_GPU_RINGBUFFER_SZ)); ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (!ring) { @@ -32,7 +33,8 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) } ring->gpu = gpu; - ring->bo = msm_gem_new(gpu->dev, size, MSM_BO_WC); + ring->id = id; + ring->bo = msm_gem_new(gpu->dev, MSM_GPU_RINGBUFFER_SZ, MSM_BO_WC); if (IS_ERR(ring->bo)) { ret = PTR_ERR(ring->bo); ring->bo = NULL; @@ -40,11 +42,9 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) } ring->start = msm_gem_vaddr_locked(ring->bo); - ring->end = ring->start + (size / 4); + ring->end = ring->start + (MSM_GPU_RINGBUFFER_SZ >> 2); ring->cur = ring->start; - ring->size = size; - return ring; fail: diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h index 6e0e1049fa4f..e40d130853e0 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.h +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h @@ -22,12 +22,14 @@ struct msm_ringbuffer { struct msm_gpu *gpu; - int size; + int id; struct drm_gem_object *bo; uint32_t *start, *end, *cur; + uint64_t iova; + uint32_t submitted_fence; }; -struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size); +struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id); void msm_ringbuffer_destroy(struct msm_ringbuffer *ring); /* ringbuffer helpers (the parts that are same for a3xx/a2xx/z180..) */ diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 20ef9bc424f3..be4a18c0712c 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -40,6 +40,15 @@ #define MSM_PIPE_2D1 0x02 #define MSM_PIPE_3D0 0x10 +/* The pipe-id just uses the lower bits, so can be OR'd with flags in + * the upper 16 bits (which could be extended further, if needed, maybe + * we extend/overload the pipe-id some day to deal with multiple rings, + * but even then I don't think we need the full lower 16 bits). + */ +#define MSM_PIPE_ID_MASK 0xffff +#define MSM_PIPE_ID(x) ((x) & MSM_PIPE_ID_MASK) +#define MSM_PIPE_FLAGS(x) ((x) & ~MSM_PIPE_ID_MASK) + /* timeouts are specified in clock-monotonic absolute times (to simplify * restarting interrupted ioctls). The following struct is logically the * same as 'struct timespec' but 32/64b ABI safe. @@ -178,12 +187,18 @@ struct drm_msm_gem_submit_bo { __u64 presumed; /* in/out, presumed buffer address */ }; +/* Valid submit ioctl flags: */ +#define MSM_SUBMIT_RING_MASK 0x000F0000 +#define MSM_SUBMIT_RING_SHIFT 16 + +#define MSM_SUBMIT_FLAGS (MSM_SUBMIT_RING_MASK) + /* Each cmdstream submit consists of a table of buffers involved, and * one or more cmdstream buffers. This allows for conditional execution * (context-restore), and IB buffers needed for per tile/bin draw cmds. */ struct drm_msm_gem_submit { - __u32 pipe; /* in, MSM_PIPE_x */ + __u32 flags; /* MSM_PIPE_x | MSM_SUBMIT_x */ __u32 fence; /* out */ __u32 nr_bos; /* in, number of submit_bo's */ __u32 nr_cmds; /* in, number of submit_cmd's */ |
