summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHareesh Gundu <hareeshg@codeaurora.org>2017-08-24 23:11:09 +0530
committerHareesh Gundu <hareeshg@codeaurora.org>2017-09-13 11:41:46 +0530
commit22b96ae58a358e561b11a3bbfcb0ad7b7fb25f0d (patch)
treea46f1f0e28c83cc70a7b0230184119b677d2ddc4
parent14f6bfeeebb97c02cbf0c43818f7998e3bdb3cae (diff)
msm: kgsl: Do not crash on context detach timeout failure
If context wait for global timeouts in detach path there is no use to process its pending commands. Hence to handle context detach timeout failure,invalidate the context and remove all the pending commands from that context. So that other context commands proceed successfully after the context detach timeout fault recovery. Change-Id: Ie4ff0ed5d08312d345b248a2404ce085552b0b09 Signed-off-by: Hareesh Gundu <hareeshg@codeaurora.org>
-rw-r--r--drivers/gpu/msm/adreno.h1
-rw-r--r--drivers/gpu/msm/adreno_dispatch.c3
-rw-r--r--drivers/gpu/msm/adreno_drawctxt.c34
-rw-r--r--drivers/gpu/msm/adreno_drawctxt.h4
4 files changed, 30 insertions, 12 deletions
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index 4a0acdcf8844..305163147c1a 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -184,6 +184,7 @@ enum adreno_gpurev {
#define ADRENO_TIMEOUT_FAULT BIT(2)
#define ADRENO_IOMMU_PAGE_FAULT BIT(3)
#define ADRENO_PREEMPT_FAULT BIT(4)
+#define ADRENO_CTX_DETATCH_TIMEOUT_FAULT BIT(5)
#define ADRENO_SPTP_PC_CTRL 0
#define ADRENO_PPD_CTRL 1
diff --git a/drivers/gpu/msm/adreno_dispatch.c b/drivers/gpu/msm/adreno_dispatch.c
index 1a94e71f5c1d..862d832823f7 100644
--- a/drivers/gpu/msm/adreno_dispatch.c
+++ b/drivers/gpu/msm/adreno_dispatch.c
@@ -1832,7 +1832,8 @@ static void process_cmdobj_fault(struct kgsl_device *device,
* because we won't see this cmdobj again
*/
- if (fault & ADRENO_TIMEOUT_FAULT)
+ if ((fault & ADRENO_TIMEOUT_FAULT) ||
+ (fault & ADRENO_CTX_DETATCH_TIMEOUT_FAULT))
bitmap_zero(&cmdobj->fault_policy, BITS_PER_LONG);
/*
diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c
index b8ae24bc3935..3e765a61bd5e 100644
--- a/drivers/gpu/msm/adreno_drawctxt.c
+++ b/drivers/gpu/msm/adreno_drawctxt.c
@@ -301,6 +301,7 @@ void adreno_drawctxt_invalidate(struct kgsl_device *device,
/* Give the bad news to everybody waiting around */
wake_up_all(&drawctxt->waiting);
wake_up_all(&drawctxt->wq);
+ wake_up_all(&drawctxt->timeout);
}
/*
@@ -394,6 +395,7 @@ adreno_drawctxt_create(struct kgsl_device_private *dev_priv,
spin_lock_init(&drawctxt->lock);
init_waitqueue_head(&drawctxt->wq);
init_waitqueue_head(&drawctxt->waiting);
+ init_waitqueue_head(&drawctxt->timeout);
/* Set the context priority */
_set_context_priority(drawctxt);
@@ -506,20 +508,32 @@ void adreno_drawctxt_detach(struct kgsl_context *context)
drawctxt->internal_timestamp, 30 * 1000);
/*
- * If the wait for global fails due to timeout then nothing after this
- * point is likely to work very well - Get GPU snapshot and BUG_ON()
- * so we can take advantage of the debug tools to figure out what the
- * h - e - double hockey sticks happened. If EAGAIN error is returned
+ * If the wait for global fails due to timeout then mark it as
+ * context detach timeout fault and schedule dispatcher to kick
+ * in GPU recovery. For a ADRENO_CTX_DETATCH_TIMEOUT_FAULT we clear
+ * the policy and invalidate the context. If EAGAIN error is returned
* then recovery will kick in and there will be no more commands in the
- * RB pipe from this context which is waht we are waiting for, so ignore
- * -EAGAIN error
+ * RB pipe from this context which is what we are waiting for, so ignore
+ * -EAGAIN error.
*/
if (ret && ret != -EAGAIN) {
- KGSL_DRV_ERR(device, "Wait for global ts=%d type=%d error=%d\n",
- drawctxt->internal_timestamp,
+ KGSL_DRV_ERR(device,
+ "Wait for global ctx=%d ts=%d type=%d error=%d\n",
+ drawctxt->base.id, drawctxt->internal_timestamp,
drawctxt->type, ret);
- device->force_panic = 1;
- kgsl_device_snapshot(device, context);
+
+ adreno_set_gpu_fault(adreno_dev,
+ ADRENO_CTX_DETATCH_TIMEOUT_FAULT);
+ mutex_unlock(&device->mutex);
+
+ /* Schedule dispatcher to kick in recovery */
+ adreno_dispatcher_schedule(device);
+
+ /* Wait for context to be invalidated and release context */
+ ret = wait_event_interruptible_timeout(drawctxt->timeout,
+ kgsl_context_invalid(&drawctxt->base),
+ msecs_to_jiffies(5000));
+ return;
}
kgsl_sharedmem_writel(device, &device->memstore,
diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h
index 0578f16ae9e1..07108eaf502f 100644
--- a/drivers/gpu/msm/adreno_drawctxt.h
+++ b/drivers/gpu/msm/adreno_drawctxt.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2016, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -40,6 +40,7 @@ struct kgsl_context;
* @pending: Priority list node for the dispatcher list of pending contexts
* @wq: Workqueue structure for contexts to sleep pending room in the queue
* @waiting: Workqueue structure for contexts waiting for a timestamp or event
+ * @timeout: Workqueue structure for contexts waiting to invalidate
* @queued: Number of commands queued in the drawqueue
* @fault_policy: GFT fault policy set in _skip_cmd();
* @debug_root: debugfs entry for this context.
@@ -68,6 +69,7 @@ struct adreno_context {
struct plist_node pending;
wait_queue_head_t wq;
wait_queue_head_t waiting;
+ wait_queue_head_t timeout;
int queued;
unsigned int fault_policy;