summaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
authorSharat Masetty <smasetty@codeaurora.org>2017-04-17 12:28:38 +0530
committerSharat Masetty <smasetty@codeaurora.org>2017-04-24 10:28:17 +0530
commite84be6486a923e82be8fa16a52c5bc0d2f355bc6 (patch)
tree9d09b3b71a2648ca94c738c78f2976a7b45917d2 /drivers/gpu
parentce87c901721567907d20d8c8323dec3a55ce3741 (diff)
drm/msm: Enable per cmdstream profiling for the user
If the user provides a profile buffer identified with a buffer type MSM_SUBMIT_CMD_PROFILE_BUF, then the driver records the kernel clock time and gpu ticks at the time of cmdstream submission, and the GPU records the ticks just before the start of the cmdstream execution and right after the end of the cmdstream execution. Change-Id: Ic6298ec5919b18e976ae089ffb0860b8165ce4f3 Signed-off-by: Sharat Masetty <smasetty@codeaurora.org>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_gpu.c62
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.c1
-rw-r--r--drivers/gpu/drm/msm/msm_gem.h2
-rw-r--r--drivers/gpu/drm/msm/msm_gem_submit.c10
4 files changed, 75 insertions, 0 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 32b2c7fab839..9ceef8f437b5 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -133,10 +133,30 @@ static int a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
OUT_RING(ring, 0x02);
+ /* Record the always on counter before command execution */
+ if (submit->profile_buf_iova) {
+ uint64_t gpuaddr = submit->profile_buf_iova +
+ offsetof(struct drm_msm_gem_submit_profile_buffer,
+ ticks_submitted);
+
+ /*
+ * Set bit[30] to make this command a 64 bit write operation.
+ * bits[18-29] is to specify number of consecutive registers
+ * to copy, so set this space with 2, since we want to copy
+ * data from REG_A5XX_RBBM_ALWAYSON_COUNTER_LO and [HI].
+ */
+ OUT_PKT7(ring, CP_REG_TO_MEM, 3);
+ OUT_RING(ring, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO |
+ (1 << 30) | (2 << 18));
+ OUT_RING(ring, lower_32_bits(gpuaddr));
+ OUT_RING(ring, upper_32_bits(gpuaddr));
+ }
+
/* Submit the commands */
for (i = 0; i < submit->nr_cmds; i++) {
switch (submit->cmd[i].type) {
case MSM_SUBMIT_CMD_IB_TARGET_BUF:
+ case MSM_SUBMIT_CMD_PROFILE_BUF:
break;
case MSM_SUBMIT_CMD_BUF:
OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
@@ -164,6 +184,19 @@ static int a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
OUT_RING(ring, 0x01);
+ /* Record the always on counter after command execution */
+ if (submit->profile_buf_iova) {
+ uint64_t gpuaddr = submit->profile_buf_iova +
+ offsetof(struct drm_msm_gem_submit_profile_buffer,
+ ticks_retired);
+
+ OUT_PKT7(ring, CP_REG_TO_MEM, 3);
+ OUT_RING(ring, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO |
+ (1 << 30) | (2 << 18));
+ OUT_RING(ring, lower_32_bits(gpuaddr));
+ OUT_RING(ring, upper_32_bits(gpuaddr));
+ }
+
/* Write the fence to the scratch register */
OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
OUT_RING(ring, submit->fence);
@@ -193,6 +226,35 @@ static int a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
/* Set bit 0 to trigger an interrupt on preempt complete */
OUT_RING(ring, 0x01);
+ if (submit->profile_buf_iova) {
+ unsigned long flags;
+ uint64_t ktime;
+ struct drm_msm_gem_submit_profile_buffer *profile_buf =
+ submit->profile_buf_vaddr;
+
+ /*
+ * With this profiling, we are trying to create closest
+ * possible mapping between the CPU time domain(monotonic clock)
+ * and the GPU time domain(ticks). In order to make this
+ * happen, we need to briefly turn off interrupts to make sure
+ * interrupts do not run between collecting these two samples.
+ */
+ local_irq_save(flags);
+
+ profile_buf->ticks_queued = gpu_read64(gpu,
+ REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
+ REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
+
+ ktime = ktime_get_raw_ns();
+
+ local_irq_restore(flags);
+
+ do_div(ktime, NSEC_PER_SEC);
+
+ profile_buf->queue_time = ktime;
+ profile_buf->submit_time = ktime;
+ }
+
a5xx_flush(gpu, ring);
/* Check to see if we need to start preemption */
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 969ed810ce9d..19267b2a3b49 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -183,6 +183,7 @@ int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
case MSM_SUBMIT_CMD_IB_TARGET_BUF:
/* ignore IB-targets */
break;
+ case MSM_SUBMIT_CMD_PROFILE_BUF:
case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
break;
case MSM_SUBMIT_CMD_BUF:
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index ac46c473791f..2045dc34c20a 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -125,6 +125,8 @@ struct msm_gem_submit {
uint32_t fence;
int ring;
bool valid;
+ uint64_t profile_buf_iova;
+ void *profile_buf_vaddr;
unsigned int nr_cmds;
unsigned int nr_bos;
struct {
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 0566cefaae81..52fc81420690 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -48,6 +48,9 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
submit->nr_bos = 0;
submit->nr_cmds = 0;
+ submit->profile_buf_vaddr = NULL;
+ submit->profile_buf_iova = 0;
+
INIT_LIST_HEAD(&submit->bo_list);
ww_acquire_init(&submit->ticket, &reservation_ww_class);
}
@@ -393,6 +396,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
case MSM_SUBMIT_CMD_BUF:
case MSM_SUBMIT_CMD_IB_TARGET_BUF:
case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
+ case MSM_SUBMIT_CMD_PROFILE_BUF:
break;
default:
DRM_ERROR("invalid type: %08x\n", submit_cmd.type);
@@ -425,6 +429,12 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
submit->cmd[i].iova = iova + submit_cmd.submit_offset;
submit->cmd[i].idx = submit_cmd.submit_idx;
+ if (submit_cmd.type == MSM_SUBMIT_CMD_PROFILE_BUF) {
+ submit->profile_buf_iova = submit->cmd[i].iova;
+ submit->profile_buf_vaddr =
+ msm_gem_vaddr_locked(&msm_obj->base);
+ }
+
if (submit->valid)
continue;