From 6f4e75a49fd07d707995865493b9f452302ae36b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 17 Nov 2011 17:59:46 -0800 Subject: [SCSI] libsas: kill sas_slave_destroy Per commit 3e4ec344 "libata: kill ATA_FLAG_DISABLED" needing to set ATA_DEV_NONE is a holdover from before libsas converted to the "new-style" ata-eh. Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_scsi_host.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'drivers/scsi/libsas/sas_scsi_host.c') diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index b6e233d9a0a1..e95e5e17bd88 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -797,14 +797,6 @@ int sas_slave_configure(struct scsi_device *scsi_dev) return 0; } -void sas_slave_destroy(struct scsi_device *scsi_dev) -{ - struct domain_device *dev = sdev_to_domain_dev(scsi_dev); - - if (dev_is_sata(dev)) - sas_to_ata_dev(dev)->class = ATA_DEV_NONE; -} - int sas_change_queue_depth(struct scsi_device *sdev, int depth, int reason) { struct domain_device *dev = sdev_to_domain_dev(sdev); @@ -1108,7 +1100,6 @@ EXPORT_SYMBOL_GPL(sas_request_addr); EXPORT_SYMBOL_GPL(sas_queuecommand); EXPORT_SYMBOL_GPL(sas_target_alloc); EXPORT_SYMBOL_GPL(sas_slave_configure); -EXPORT_SYMBOL_GPL(sas_slave_destroy); EXPORT_SYMBOL_GPL(sas_change_queue_depth); EXPORT_SYMBOL_GPL(sas_change_queue_type); EXPORT_SYMBOL_GPL(sas_bios_param); -- cgit v1.2.3 From 735f7d2fedf57380214221be7bed7f62d729e262 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 17 Nov 2011 17:59:47 -0800 Subject: [SCSI] libsas: fix domain_device leak Arrange for the deallocation of a struct domain_device object when it no longer has: 1/ any children 2/ references by any scsi_targets 3/ references by a lldd The comment about domain_device lifetime in Documentation/scsi/libsas.txt is stale as it appears mainline never had a version of a struct domain_device that was registered as a kobject. We now manage domain_device reference counts on behalf of external agents. Reviewed-by: Jack Wang Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_scsi_host.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'drivers/scsi/libsas/sas_scsi_host.c') diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index e95e5e17bd88..2a163c73fd8b 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -737,16 +737,10 @@ struct domain_device *sas_find_dev_by_rphy(struct sas_rphy *rphy) return found_dev; } -static inline struct domain_device *sas_find_target(struct scsi_target *starget) -{ - struct sas_rphy *rphy = dev_to_rphy(starget->dev.parent); - - return sas_find_dev_by_rphy(rphy); -} - int sas_target_alloc(struct scsi_target *starget) { - struct domain_device *found_dev = sas_find_target(starget); + struct sas_rphy *rphy = dev_to_rphy(starget->dev.parent); + struct domain_device *found_dev = sas_find_dev_by_rphy(rphy); int res; if (!found_dev) @@ -758,6 +752,7 @@ int sas_target_alloc(struct scsi_target *starget) return res; } + kref_get(&found_dev->kref); starget->hostdata = found_dev; return 0; } @@ -1047,7 +1042,7 @@ int sas_slave_alloc(struct scsi_device *scsi_dev) void sas_target_destroy(struct scsi_target *starget) { - struct domain_device *found_dev = sas_find_target(starget); + struct domain_device *found_dev = starget->hostdata; if (!found_dev) return; @@ -1055,7 +1050,8 @@ void sas_target_destroy(struct scsi_target *starget) if (dev_is_sata(found_dev)) ata_sas_port_destroy(found_dev->sata_dev.ap); - return; + starget->hostdata = NULL; + sas_put_device(found_dev); } static void sas_parse_addr(u8 *sas_addr, const char *p) -- cgit v1.2.3 From 312d3e56119a4bc5c36a96818f87f650c069ddc2 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 17 Nov 2011 17:59:50 -0800 Subject: [SCSI] libsas: remove ata_port.lock management duties from lldds Each libsas driver (mvsas, pm8001, and isci) has invented a different method for managing the ap->lock. The lock is held by the ata ->queuecommand() path. mvsas drops it prior to acquiring any internal locks which allows it to hold its internal lock across calls to task->task_done(). This capability is important as it is the only way the driver can flush task->task_done() instances to guarantee that it no longer has any in-flight references to a domain_device at ->lldd_dev_gone() time. Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_scsi_host.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/scsi/libsas/sas_scsi_host.c') diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 2a163c73fd8b..fd60465d4b2d 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -198,11 +198,9 @@ int sas_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) } if (dev_is_sata(dev)) { - unsigned long flags; - - spin_lock_irqsave(dev->sata_dev.ap->lock, flags); + spin_lock_irq(dev->sata_dev.ap->lock); res = ata_sas_queuecmd(cmd, dev->sata_dev.ap); - spin_unlock_irqrestore(dev->sata_dev.ap->lock, flags); + spin_unlock_irq(dev->sata_dev.ap->lock); return res; } -- cgit v1.2.3 From e139942d77a6e3ac83bc322e826668054a8601d6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 7 Jan 2012 08:52:39 +0000 Subject: [SCSI] libsas: convert dev->gone to flags In preparation for adding tracking of another device state "destroy". Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_scsi_host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/scsi/libsas/sas_scsi_host.c') diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index fd60465d4b2d..15533a17eb97 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -192,7 +192,7 @@ int sas_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) int res = 0; /* If the device fell off, no sense in issuing commands */ - if (dev->gone) { + if (test_bit(SAS_DEV_GONE, &dev->state)) { cmd->result = DID_BAD_TARGET << 16; goto out_done; } -- cgit v1.2.3 From 3dff5721e4f67e6231dfc419d30aaa7563bfffd4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 28 Nov 2011 12:08:22 -0800 Subject: [SCSI] libsas: close error handling vs sas_ata_task_done() race Since sas_ata does not implement ->freeze(), completions for scmds and internal commands can still arrive concurrent with ata_scsi_cmd_error_handler() and sas_ata_post_internal() respectively. By the time either of those is called libata has committed to completing the qc, and the ATA_PFLAG_FROZEN flag tells sas_ata_task_done() it has lost the race. In the sas_ata_post_internal() case we take on the additional responsibility of freeing the sas_task to close the race with sas_ata_task_done() freeing the the task while sas_ata_post_internal() is in the process of invoking ->lldd_abort_task(). Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_scsi_host.c | 44 ------------------------------------- 1 file changed, 44 deletions(-) (limited to 'drivers/scsi/libsas/sas_scsi_host.c') diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 15533a17eb97..ba5876ccd29a 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -956,49 +956,6 @@ void sas_shutdown_queue(struct sas_ha_struct *sas_ha) spin_unlock_irqrestore(&core->task_queue_lock, flags); } -/* - * Call the LLDD task abort routine directly. This function is intended for - * use by upper layers that need to tell the LLDD to abort a task. - */ -int __sas_task_abort(struct sas_task *task) -{ - struct sas_internal *si = - to_sas_internal(task->dev->port->ha->core.shost->transportt); - unsigned long flags; - int res; - - spin_lock_irqsave(&task->task_state_lock, flags); - if (task->task_state_flags & SAS_TASK_STATE_ABORTED || - task->task_state_flags & SAS_TASK_STATE_DONE) { - spin_unlock_irqrestore(&task->task_state_lock, flags); - SAS_DPRINTK("%s: Task %p already finished.\n", __func__, - task); - return 0; - } - task->task_state_flags |= SAS_TASK_STATE_ABORTED; - spin_unlock_irqrestore(&task->task_state_lock, flags); - - if (!si->dft->lldd_abort_task) - return -ENODEV; - - res = si->dft->lldd_abort_task(task); - - spin_lock_irqsave(&task->task_state_lock, flags); - if ((task->task_state_flags & SAS_TASK_STATE_DONE) || - (res == TMF_RESP_FUNC_COMPLETE)) - { - spin_unlock_irqrestore(&task->task_state_lock, flags); - task->task_done(task); - return 0; - } - - if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) - task->task_state_flags &= ~SAS_TASK_STATE_ABORTED; - spin_unlock_irqrestore(&task->task_state_lock, flags); - - return -EAGAIN; -} - /* * Tell an upper layer that it needs to initiate an abort for a given task. * This should only ever be called by an LLDD. @@ -1097,7 +1054,6 @@ EXPORT_SYMBOL_GPL(sas_slave_configure); EXPORT_SYMBOL_GPL(sas_change_queue_depth); EXPORT_SYMBOL_GPL(sas_change_queue_type); EXPORT_SYMBOL_GPL(sas_bios_param); -EXPORT_SYMBOL_GPL(__sas_task_abort); EXPORT_SYMBOL_GPL(sas_task_abort); EXPORT_SYMBOL_GPL(sas_phy_reset); EXPORT_SYMBOL_GPL(sas_phy_enable); -- cgit v1.2.3 From a3a142524aa4b1539a64a55087bf12ffa4b1f94e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Dec 2011 23:24:42 -0800 Subject: [SCSI] libsas: prevent double completion of scmds from eh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We invoke task->task_done() to free the task in the eh case, but at this point we are prepared for scsi_eh_flush_done_q() to finish off the scmd. Introduce sas_end_task() to capture the final response status from the lldd and free the task. Also take the opportunity to kill this warning. drivers/scsi/libsas/sas_scsi_host.c: In function ‘sas_end_task’: drivers/scsi/libsas/sas_scsi_host.c:102:3: warning: case value ‘2’ not in enumerated type ‘enum exec_status’ [-Wswitch] Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_scsi_host.c | 61 ++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 28 deletions(-) (limited to 'drivers/scsi/libsas/sas_scsi_host.c') diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index ba5876ccd29a..50db8f971a06 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -49,27 +49,12 @@ #include #include -/* ---------- SCSI Host glue ---------- */ - -static void sas_scsi_task_done(struct sas_task *task) +/* record final status and free the task */ +static void sas_end_task(struct scsi_cmnd *sc, struct sas_task *task) { struct task_status_struct *ts = &task->task_status; - struct scsi_cmnd *sc = task->uldd_task; int hs = 0, stat = 0; - if (unlikely(task->task_state_flags & SAS_TASK_STATE_ABORTED)) { - /* Aborted tasks will be completed by the error handler */ - SAS_DPRINTK("task done but aborted\n"); - return; - } - - if (unlikely(!sc)) { - SAS_DPRINTK("task_done called with non existing SCSI cmnd!\n"); - list_del_init(&task->list); - sas_free_task(task); - return; - } - if (ts->resp == SAS_TASK_UNDELIVERED) { /* transport error */ hs = DID_NO_CONNECT; @@ -124,10 +109,32 @@ static void sas_scsi_task_done(struct sas_task *task) break; } } - ASSIGN_SAS_TASK(sc, NULL); + sc->result = (hs << 16) | stat; + ASSIGN_SAS_TASK(sc, NULL); list_del_init(&task->list); sas_free_task(task); +} + +static void sas_scsi_task_done(struct sas_task *task) +{ + struct scsi_cmnd *sc = task->uldd_task; + + if (unlikely(task->task_state_flags & SAS_TASK_STATE_ABORTED)) { + /* Aborted tasks will be completed by the error handler */ + SAS_DPRINTK("task done but aborted\n"); + return; + } + + if (unlikely(!sc)) { + SAS_DPRINTK("task_done called with non existing SCSI cmnd!\n"); + list_del_init(&task->list); + sas_free_task(task); + return; + } + + ASSIGN_SAS_TASK(sc, NULL); + sas_end_task(sc, task); sc->scsi_done(sc); } @@ -236,18 +243,16 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd) struct sas_task *task = TO_SAS_TASK(cmd); struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(cmd->device->host); - /* remove the aborted task flag to allow the task to be - * completed now. At this point, we only get called following - * an actual abort of the task, so we should be guaranteed not - * to be racing with any completions from the LLD (hence we - * don't need the task state lock to clear the flag) */ - task->task_state_flags &= ~SAS_TASK_STATE_ABORTED; - /* Now call task_done. However, task will be free'd after - * this */ - task->task_done(task); + /* At this point, we only get called following an actual abort + * of the task, so we should be guaranteed not to be racing with + * any completions from the LLD. Task is freed after this. + */ + sas_end_task(cmd, task); + /* now finish the command and move it on to the error * handler done list, this also takes it off the - * error handler pending list */ + * error handler pending list. + */ scsi_eh_finish_cmd(cmd, &sas_ha->eh_done_q); } -- cgit v1.2.3 From 9095a64a9aead653df320e3a6fc70835c15d46e4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 28 Nov 2011 11:29:20 -0800 Subject: [SCSI] libsas: fix timeout vs completion race Until we have told the lldd to forget a task a timed out operation can return from the hardware at any time. Since completion frees the task we need to make sure that no tasks run their normal completion handler once eh has decided to manage the task. Similar to ata_scsi_cmd_error_handler() freeze completions to let eh judge the outcome of the race. Task collector mode is problematic because it presents a situation where a task can be timed out and aborted before the lldd has even seen it. For this case we need to guarantee that a task that an lldd has been told to forget does not get queued after the lldd says "never seen it". With sas_scsi_timed_out we achieve this with the ->task_queue_flush mutex, rather than adding more time. Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_scsi_host.c | 104 ++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 53 deletions(-) (limited to 'drivers/scsi/libsas/sas_scsi_host.c') diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 50db8f971a06..0e3fdba7b510 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -119,9 +119,19 @@ static void sas_end_task(struct scsi_cmnd *sc, struct sas_task *task) static void sas_scsi_task_done(struct sas_task *task) { struct scsi_cmnd *sc = task->uldd_task; + struct domain_device *dev = task->dev; + struct sas_ha_struct *ha = dev->port->ha; + unsigned long flags; + + spin_lock_irqsave(&dev->done_lock, flags); + if (test_bit(SAS_HA_FROZEN, &ha->state)) + task = NULL; + else + ASSIGN_SAS_TASK(sc, NULL); + spin_unlock_irqrestore(&dev->done_lock, flags); - if (unlikely(task->task_state_flags & SAS_TASK_STATE_ABORTED)) { - /* Aborted tasks will be completed by the error handler */ + if (unlikely(!task)) { + /* task will be completed by the error handler */ SAS_DPRINTK("task done but aborted\n"); return; } @@ -133,7 +143,6 @@ static void sas_scsi_task_done(struct sas_task *task) return; } - ASSIGN_SAS_TASK(sc, NULL); sas_end_task(sc, task); sc->scsi_done(sc); } @@ -298,6 +307,7 @@ enum task_disposition { TASK_IS_DONE, TASK_IS_ABORTED, TASK_IS_AT_LU, + TASK_IS_NOT_AT_HA, TASK_IS_NOT_AT_LU, TASK_ABORT_FAILED, }; @@ -314,19 +324,18 @@ static enum task_disposition sas_scsi_find_task(struct sas_task *task) struct scsi_core *core = &ha->core; struct sas_task *t, *n; + mutex_lock(&core->task_queue_flush); spin_lock_irqsave(&core->task_queue_lock, flags); - list_for_each_entry_safe(t, n, &core->task_queue, list) { + list_for_each_entry_safe(t, n, &core->task_queue, list) if (task == t) { list_del_init(&t->list); - spin_unlock_irqrestore(&core->task_queue_lock, - flags); - SAS_DPRINTK("%s: task 0x%p aborted from " - "task_queue\n", - __func__, task); - return TASK_IS_ABORTED; + break; } - } spin_unlock_irqrestore(&core->task_queue_lock, flags); + mutex_unlock(&core->task_queue_flush); + + if (task == t) + return TASK_IS_NOT_AT_HA; } for (i = 0; i < 5; i++) { @@ -499,8 +508,7 @@ try_bus_reset: } static int sas_eh_handle_sas_errors(struct Scsi_Host *shost, - struct list_head *work_q, - struct list_head *done_q) + struct list_head *work_q) { struct scsi_cmnd *cmd, *n; enum task_disposition res = TASK_IS_DONE; @@ -511,7 +519,16 @@ static int sas_eh_handle_sas_errors(struct Scsi_Host *shost, Again: list_for_each_entry_safe(cmd, n, work_q, eh_entry) { - struct sas_task *task = TO_SAS_TASK(cmd); + struct domain_device *dev = cmd_to_domain_dev(cmd); + struct sas_task *task; + + spin_lock_irqsave(&dev->done_lock, flags); + /* by this point the lldd has either observed + * SAS_HA_FROZEN and is leaving the task alone, or has + * won the race with eh and decided to complete it + */ + task = TO_SAS_TASK(cmd); + spin_unlock_irqrestore(&dev->done_lock, flags); if (!task) continue; @@ -534,6 +551,14 @@ Again: cmd->eh_eflags = 0; switch (res) { + case TASK_IS_NOT_AT_HA: + SAS_DPRINTK("%s: task 0x%p is not at ha: %s\n", + __func__, task, + cmd->retries ? "retry" : "aborted"); + if (cmd->retries) + cmd->retries--; + sas_eh_finish_cmd(cmd); + continue; case TASK_IS_DONE: SAS_DPRINTK("%s: task 0x%p is done\n", __func__, task); @@ -635,7 +660,8 @@ void sas_scsi_recover_host(struct Scsi_Host *shost) * Deal with commands that still have SAS tasks (i.e. they didn't * complete via the normal sas_task completion mechanism) */ - if (sas_eh_handle_sas_errors(shost, &eh_work_q, &ha->eh_done_q)) + set_bit(SAS_HA_FROZEN, &ha->state); + if (sas_eh_handle_sas_errors(shost, &eh_work_q)) goto out; /* @@ -649,6 +675,10 @@ void sas_scsi_recover_host(struct Scsi_Host *shost) scsi_eh_ready_devs(shost, &eh_work_q, &ha->eh_done_q); out: + clear_bit(SAS_HA_FROZEN, &ha->state); + if (ha->lldd_max_execute_num > 1) + wake_up_process(ha->core.queue_thread); + /* now link into libata eh --- if we have any ata devices */ sas_ata_strategy_handler(shost); @@ -660,43 +690,7 @@ out: enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) { - struct sas_task *task = TO_SAS_TASK(cmd); - unsigned long flags; - enum blk_eh_timer_return rtn; - - if (sas_ata_timed_out(cmd, task, &rtn)) - return rtn; - - if (!task) { - cmd->request->timeout /= 2; - SAS_DPRINTK("command 0x%p, task 0x%p, gone: %s\n", - cmd, task, (cmd->request->timeout ? - "BLK_EH_RESET_TIMER" : "BLK_EH_NOT_HANDLED")); - if (!cmd->request->timeout) - return BLK_EH_NOT_HANDLED; - return BLK_EH_RESET_TIMER; - } - - spin_lock_irqsave(&task->task_state_lock, flags); - BUG_ON(task->task_state_flags & SAS_TASK_STATE_ABORTED); - if (task->task_state_flags & SAS_TASK_STATE_DONE) { - spin_unlock_irqrestore(&task->task_state_lock, flags); - SAS_DPRINTK("command 0x%p, task 0x%p, timed out: " - "BLK_EH_HANDLED\n", cmd, task); - return BLK_EH_HANDLED; - } - if (!(task->task_state_flags & SAS_TASK_AT_INITIATOR)) { - spin_unlock_irqrestore(&task->task_state_lock, flags); - SAS_DPRINTK("command 0x%p, task 0x%p, not at initiator: " - "BLK_EH_RESET_TIMER\n", - cmd, task); - return BLK_EH_RESET_TIMER; - } - task->task_state_flags |= SAS_TASK_STATE_ABORTED; - spin_unlock_irqrestore(&task->task_state_lock, flags); - - SAS_DPRINTK("command 0x%p, task 0x%p, timed out: BLK_EH_NOT_HANDLED\n", - cmd, task); + scmd_printk(KERN_DEBUG, cmd, "command %p timed out\n", cmd); return BLK_EH_NOT_HANDLED; } @@ -861,9 +855,11 @@ static void sas_queue(struct sas_ha_struct *sas_ha) int res; struct sas_internal *i = to_sas_internal(core->shost->transportt); + mutex_lock(&core->task_queue_flush); spin_lock_irqsave(&core->task_queue_lock, flags); while (!kthread_should_stop() && - !list_empty(&core->task_queue)) { + !list_empty(&core->task_queue) && + !test_bit(SAS_HA_FROZEN, &sas_ha->state)) { can_queue = sas_ha->lldd_queue_size - core->task_queue_size; if (can_queue >= 0) { @@ -899,6 +895,7 @@ static void sas_queue(struct sas_ha_struct *sas_ha) } } spin_unlock_irqrestore(&core->task_queue_lock, flags); + mutex_unlock(&core->task_queue_flush); } /** @@ -925,6 +922,7 @@ int sas_init_queue(struct sas_ha_struct *sas_ha) struct scsi_core *core = &sas_ha->core; spin_lock_init(&core->task_queue_lock); + mutex_init(&core->task_queue_flush); core->task_queue_size = 0; INIT_LIST_HEAD(&core->task_queue); -- cgit v1.2.3 From 3944f50995f947558c35fb16ae0288354756762c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 29 Nov 2011 12:08:50 -0800 Subject: [SCSI] libsas: let libata handle command timeouts libsas-eh if it successfully aborts an ata command will hide the timeout condition (AC_ERR_TIMEOUT) from libata. The command likely completes with the all-zero task->task_status it started with. Instead, interpret a TMF_RESP_FUNC_COMPLETE as the end of the sas_task but keep the scmd around for libata-eh to handle. Tested-by: Andrzej Jakowski Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_scsi_host.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'drivers/scsi/libsas/sas_scsi_host.c') diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 0e3fdba7b510..e02ca3d570f5 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -265,6 +265,22 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd) scsi_eh_finish_cmd(cmd, &sas_ha->eh_done_q); } +static void sas_eh_defer_cmd(struct scsi_cmnd *cmd) +{ + struct sas_task *task = TO_SAS_TASK(cmd); + struct domain_device *dev = task->dev; + struct sas_ha_struct *ha = dev->port->ha; + + if (!dev_is_sata(dev)) { + sas_eh_finish_cmd(cmd); + return; + } + + /* report the timeout to libata */ + sas_end_task(cmd, task); + list_move_tail(&cmd->eh_entry, &ha->eh_ata_q); +} + static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd *my_cmd) { struct scsi_cmnd *cmd, *n; @@ -562,12 +578,12 @@ Again: case TASK_IS_DONE: SAS_DPRINTK("%s: task 0x%p is done\n", __func__, task); - sas_eh_finish_cmd(cmd); + sas_eh_defer_cmd(cmd); continue; case TASK_IS_ABORTED: SAS_DPRINTK("%s: task 0x%p is aborted\n", __func__, task); - sas_eh_finish_cmd(cmd); + sas_eh_defer_cmd(cmd); continue; case TASK_IS_AT_LU: SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task); @@ -635,12 +651,14 @@ Again: goto clear_q; } } + list_splice_tail_init(&ha->eh_ata_q, work_q); return list_empty(work_q); clear_q: SAS_DPRINTK("--- Exit %s -- clear_q\n", __func__); list_for_each_entry_safe(cmd, n, work_q, eh_entry) sas_eh_finish_cmd(cmd); + list_splice_tail_init(&ha->eh_ata_q, work_q); return list_empty(work_q); } -- cgit v1.2.3 From 3a2cdf391b62919d3d2862cdce3d70b9a7a99673 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 29 Nov 2011 14:54:28 -0800 Subject: [SCSI] libsas: defer SAS_TASK_NEED_DEV_RESET commands to libata lldds use the SAS_TASK_NEED_DEV_RESET interface to request that eh perform a reset. In the sata device case defer the commands that triggered the reset to libata-eh context so it can perform its pre and post reset management. In the sas_ata_post_internal() case the reset request is falling on deaf ears as the sas_task is immediately destroyed without any reset action. Since it is currently a nop, and likely superfluous given the conversion to new-style libata-eh, just drop the request. Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_scsi_host.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/scsi/libsas/sas_scsi_host.c') diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index e02ca3d570f5..af71a6d0edae 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -288,7 +288,7 @@ static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd list_for_each_entry_safe(cmd, n, error_q, eh_entry) { if (cmd->device->sdev_target == my_cmd->device->sdev_target && cmd->device->lun == my_cmd->device->lun) - sas_eh_finish_cmd(cmd); + sas_eh_defer_cmd(cmd); } } @@ -594,7 +594,7 @@ Again: "recovered\n", SAS_ADDR(task->dev), cmd->device->lun); - sas_eh_finish_cmd(cmd); + sas_eh_defer_cmd(cmd); sas_scsi_clear_queue_lu(work_q, cmd); goto Again; } -- cgit v1.2.3 From 2a559f4ba443265b4c58925b48296f1cf81b49f9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 4 Dec 2011 00:06:57 -0800 Subject: [SCSI] libsas: sas_phy_enable via transport_sas_phy_reset Execute the link-reset triggered by sas_phy_enable via transport_sas_phy_reset so that it can be managed by libata. Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_scsi_host.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/scsi/libsas/sas_scsi_host.c') diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index af71a6d0edae..5cc44fddfe95 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -1077,7 +1077,6 @@ EXPORT_SYMBOL_GPL(sas_change_queue_type); EXPORT_SYMBOL_GPL(sas_bios_param); EXPORT_SYMBOL_GPL(sas_task_abort); EXPORT_SYMBOL_GPL(sas_phy_reset); -EXPORT_SYMBOL_GPL(sas_phy_enable); EXPORT_SYMBOL_GPL(sas_eh_device_reset_handler); EXPORT_SYMBOL_GPL(sas_eh_bus_reset_handler); EXPORT_SYMBOL_GPL(sas_slave_alloc); -- cgit v1.2.3 From f41a0c441c3fe43e79ebeb75584dbb5bfa83e5cd Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 21 Dec 2011 21:33:17 -0800 Subject: [SCSI] libsas: fix sas_find_local_phy(), take phy references In the direct-attached case this routine returns the phy on which this device was first discovered. Which is broken if we want to support wide-targets, as this phy reference can become stale even though the port is still active. In the expander-attached case this routine tries to lookup the phy by scanning the attached sas addresses of the parent expander, and BUG_ONs if it can't find it. However since eh and the libsas workqueue run independently we can still be attempting device recovery via eh after libsas has recorded the device as detached. This is even easier to hit now that eh is blocked while device domain rediscovery takes place, and that libata is fed more timed out commands increasing the chances that it will try to recover the ata device. Arrange for dev->phy to always point to a last known good phy, it may be stale after the port is torn down, but it will catch up for wide port reconfigurations, and never be NULL. Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_scsi_host.c | 38 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 20 deletions(-) (limited to 'drivers/scsi/libsas/sas_scsi_host.c') diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 5cc44fddfe95..94ef76316c31 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -439,30 +439,26 @@ static int sas_recover_I_T(struct domain_device *dev) return res; } -/* Find the sas_phy that's attached to this device */ -struct sas_phy *sas_find_local_phy(struct domain_device *dev) +/* take a reference on the last known good phy for this device */ +struct sas_phy *sas_get_local_phy(struct domain_device *dev) { - struct domain_device *pdev = dev->parent; - struct ex_phy *exphy = NULL; - int i; + struct sas_ha_struct *ha = dev->port->ha; + struct sas_phy *phy; + unsigned long flags; - /* Directly attached device */ - if (!pdev) - return dev->port->phy; + /* a published domain device always has a valid phy, it may be + * stale, but it is never NULL + */ + BUG_ON(!dev->phy); - /* Otherwise look in the expander */ - for (i = 0; i < pdev->ex_dev.num_phys; i++) - if (!memcmp(dev->sas_addr, - pdev->ex_dev.ex_phy[i].attached_sas_addr, - SAS_ADDR_SIZE)) { - exphy = &pdev->ex_dev.ex_phy[i]; - break; - } + spin_lock_irqsave(&ha->phy_port_lock, flags); + phy = dev->phy; + get_device(&phy->dev); + spin_unlock_irqrestore(&ha->phy_port_lock, flags); - BUG_ON(!exphy); - return exphy->phy; + return phy; } -EXPORT_SYMBOL_GPL(sas_find_local_phy); +EXPORT_SYMBOL_GPL(sas_get_local_phy); /* Attempt to send a LUN reset message to a device */ int sas_eh_device_reset_handler(struct scsi_cmnd *cmd) @@ -489,7 +485,7 @@ int sas_eh_device_reset_handler(struct scsi_cmnd *cmd) int sas_eh_bus_reset_handler(struct scsi_cmnd *cmd) { struct domain_device *dev = cmd_to_domain_dev(cmd); - struct sas_phy *phy = sas_find_local_phy(dev); + struct sas_phy *phy = sas_get_local_phy(dev); int res; res = sas_phy_reset(phy, 1); @@ -497,6 +493,8 @@ int sas_eh_bus_reset_handler(struct scsi_cmnd *cmd) SAS_DPRINTK("Bus reset of %s failed 0x%x\n", kobject_name(&phy->dev.kobj), res); + sas_put_local_phy(phy); + if (res == TMF_RESP_FUNC_SUCC || res == TMF_RESP_FUNC_COMPLETE) return SUCCESS; -- cgit v1.2.3 From 45c73b65194173e77030d5b95abe5b63a402d268 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 9 Jan 2012 10:12:52 -0800 Subject: [SCSI] libsas: pre-clean commands that won the eh vs completion race When scrolling forward through the eh list (in a clear_q scenario) it is possible to encounter commands that won the completion vs eh race. Rather than sprinkle more "if (!task)" throughout the handler just make a pass through the list and delete the race winners before handling the rest. Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_scsi_host.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'drivers/scsi/libsas/sas_scsi_host.c') diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 94ef76316c31..731c89250639 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -249,8 +249,8 @@ out_done: static void sas_eh_finish_cmd(struct scsi_cmnd *cmd) { - struct sas_task *task = TO_SAS_TASK(cmd); struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(cmd->device->host); + struct sas_task *task = TO_SAS_TASK(cmd); /* At this point, we only get called following an actual abort * of the task, so we should be guaranteed not to be racing with @@ -267,9 +267,9 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd) static void sas_eh_defer_cmd(struct scsi_cmnd *cmd) { - struct sas_task *task = TO_SAS_TASK(cmd); - struct domain_device *dev = task->dev; + struct domain_device *dev = cmd_to_domain_dev(cmd); struct sas_ha_struct *ha = dev->port->ha; + struct sas_task *task = TO_SAS_TASK(cmd); if (!dev_is_sata(dev)) { sas_eh_finish_cmd(cmd); @@ -530,8 +530,9 @@ static int sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct sas_internal *i = to_sas_internal(shost->transportt); unsigned long flags; struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost); + LIST_HEAD(done); -Again: + /* clean out any commands that won the completion vs eh race */ list_for_each_entry_safe(cmd, n, work_q, eh_entry) { struct domain_device *dev = cmd_to_domain_dev(cmd); struct sas_task *task; @@ -545,7 +546,12 @@ Again: spin_unlock_irqrestore(&dev->done_lock, flags); if (!task) - continue; + list_move_tail(&cmd->eh_entry, &done); + } + + Again: + list_for_each_entry_safe(cmd, n, work_q, eh_entry) { + struct sas_task *task = TO_SAS_TASK(cmd); list_del_init(&cmd->eh_entry); @@ -649,15 +655,16 @@ Again: goto clear_q; } } + out: + list_splice_tail(&done, work_q); list_splice_tail_init(&ha->eh_ata_q, work_q); return list_empty(work_q); -clear_q: + + clear_q: SAS_DPRINTK("--- Exit %s -- clear_q\n", __func__); list_for_each_entry_safe(cmd, n, work_q, eh_entry) sas_eh_finish_cmd(cmd); - - list_splice_tail_init(&ha->eh_ata_q, work_q); - return list_empty(work_q); + goto out; } void sas_scsi_recover_host(struct Scsi_Host *shost) -- cgit v1.2.3 From 8abda4d28a55ecb91e39ceb5e3ee264c5a3cd1af Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 10 Jan 2012 15:14:09 -0800 Subject: [SCSI] libsas: close scsi_remove_target() vs libata-eh race ata_port lifetime in libata follows the host. In libsas it follows the scsi_target. Once scsi_remove_device() has caused all commands to be completed it allows scsi_remove_target() to immediately proceed to freeing the ata_port causing bug reports like: [ 848.393333] BUG: spinlock bad magic on CPU#4, kworker/u:2/5107 [ 848.400262] general protection fault: 0000 [#1] SMP [ 848.406244] CPU 4 [ 848.408310] Modules linked in: nls_utf8 ipv6 uinput i2c_i801 i2c_core iTCO_wdt iTCO_vendor_support ioatdma dca sg sd_mod sr_mod cdrom ahci libahci isci libsas libata scsi_transport_sas [last unloaded: scsi_wait_scan] [ 848.432060] [ 848.434137] Pid: 5107, comm: kworker/u:2 Not tainted 3.2.0-isci+ #8 Intel Corporation S2600CP/S2600CP [ 848.445310] RIP: 0010:[] [] spin_dump+0x5e/0x8c [ 848.454787] RSP: 0018:ffff8807f868dca0 EFLAGS: 00010002 [ 848.461137] RAX: 0000000000000048 RBX: ffff8807fe86a630 RCX: ffffffff817d0be0 [ 848.469520] RDX: 0000000000000000 RSI: ffffffff814af1cf RDI: 0000000000000002 [ 848.477959] RBP: ffff8807f868dcb0 R08: 00000000ffffffff R09: 000000006b6b6b6b [ 848.486327] R10: 000000000003fb8c R11: ffffffff81a19448 R12: 6b6b6b6b6b6b6b6b [ 848.494699] R13: ffff8808027dc520 R14: 0000000000000000 R15: 000000000000001e [ 848.503067] FS: 0000000000000000(0000) GS:ffff88083fd00000(0000) knlGS:0000000000000000 [ 848.512899] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 848.519710] CR2: 00007ff77d001000 CR3: 00000007f7a5d000 CR4: 00000000000406e0 [ 848.528072] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 848.536446] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 848.544831] Process kworker/u:2 (pid: 5107, threadinfo ffff8807f868c000, task ffff8807ff348000) [ 848.555327] Stack: [ 848.557959] ffff8807fe86a630 ffff8807fe86a630 ffff8807f868dcd0 ffffffff8126a6e0 [ 848.567072] ffffffff817c142f ffff8807fe86a630 ffff8807f868dcf0 ffffffff8126a703 [ 848.576190] ffff8808027dc520 0000000000000286 ffff8807f868dd10 ffffffff814af1bb [ 848.585281] Call Trace: [ 848.588409] [] spin_bug+0x26/0x28 [ 848.594357] [] do_raw_spin_unlock+0x21/0x88 [ 848.601283] [] _raw_spin_unlock_irqrestore+0x2c/0x65 [ 848.609089] [] ata_scsi_port_error_handler+0x548/0x557 [libata] [ 848.618331] [] ? async_schedule+0x17/0x17 [ 848.625060] [] async_sas_ata_eh+0x45/0x69 [libsas] [ 848.632655] [] async_run_entry_fn+0x97/0x125 [ 848.639670] [] process_one_work+0x207/0x38d [ 848.646577] [] ? process_one_work+0x15a/0x38d [ 848.653681] [] worker_thread+0x138/0x21c [ 848.660305] [] ? process_one_work+0x38d/0x38d [ 848.667493] [] kthread+0x9d/0xa5 [ 848.673382] [] ? trace_hardirqs_on_caller+0x12f/0x166 [ 848.681304] [] kernel_thread_helper+0x4/0x10 [ 848.688324] [] ? retint_restore_args+0x13/0x13 [ 848.695530] [] ? __init_kthread_worker+0x5b/0x5b [ 848.702929] [] ? gs_change+0x13/0x13 [ 848.709155] Code: 00 00 48 8d 88 38 04 00 00 44 8b 80 84 02 00 00 31 c0 e8 cf 1b 24 00 41 83 c8 ff 44 8b 4b 08 48 c7 c1 e0 0b 7d 81 4d 85 e4 74 10 <45> 8b 84 24 84 02 00 00 49 8d 8c 24 38 04 00 00 8b 53 04 48 89 [ 848.732467] RIP [] spin_dump+0x5e/0x8c [ 848.738905] RSP [ 848.743743] ---[ end trace 143161646eee8caa ]--- ...so arrange for the ata_port to have the same end of life as the domain device. Reported-by: Marcin Tomczak Acked-by: Jeff Garzik Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_scsi_host.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/scsi/libsas/sas_scsi_host.c') diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 731c89250639..b563ff27626b 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -1028,9 +1028,6 @@ void sas_target_destroy(struct scsi_target *starget) if (!found_dev) return; - if (dev_is_sata(found_dev)) - ata_sas_port_destroy(found_dev->sata_dev.ap); - starget->hostdata = NULL; sas_put_device(found_dev); } -- cgit v1.2.3 From d230ce691c7712c4f56ba3378d6d2f44628a49f1 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 11 Jan 2012 12:08:36 -0800 Subject: [SCSI] libsas: fix mixed topology recovery If we have a domain with sas and sata devices there may still be sas recovery actions to take after peeling off the commands to send to libata. Reported-by: Andrzej Jakowski Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_scsi_host.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/scsi/libsas/sas_scsi_host.c') diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index b563ff27626b..e58ca50517d5 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -678,7 +678,8 @@ void sas_scsi_recover_host(struct Scsi_Host *shost) shost->host_eh_scheduled = 0; spin_unlock_irqrestore(shost->host_lock, flags); - SAS_DPRINTK("Enter %s\n", __func__); + SAS_DPRINTK("Enter %s busy: %d failed: %d\n", + __func__, shost->host_busy, shost->host_failed); /* * Deal with commands that still have SAS tasks (i.e. they didn't * complete via the normal sas_task completion mechanism) @@ -693,9 +694,9 @@ void sas_scsi_recover_host(struct Scsi_Host *shost) * scsi_unjam_host does, but we skip scsi_eh_abort_cmds because any * command we see here has no sas_task and is thus unknown to the HA. */ - if (!sas_ata_eh(shost, &eh_work_q, &ha->eh_done_q)) - if (!scsi_eh_get_sense(&eh_work_q, &ha->eh_done_q)) - scsi_eh_ready_devs(shost, &eh_work_q, &ha->eh_done_q); + sas_ata_eh(shost, &eh_work_q, &ha->eh_done_q); + if (!scsi_eh_get_sense(&eh_work_q, &ha->eh_done_q)) + scsi_eh_ready_devs(shost, &eh_work_q, &ha->eh_done_q); out: clear_bit(SAS_HA_FROZEN, &ha->state); @@ -707,8 +708,8 @@ out: scsi_eh_flush_done_q(&ha->eh_done_q); - SAS_DPRINTK("--- Exit %s\n", __func__); - return; + SAS_DPRINTK("--- Exit %s: busy: %d failed: %d\n", + __func__, shost->host_busy, shost->host_failed); } enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) -- cgit v1.2.3 From 9508a66f898d46e726a318469312b45e0b1d078b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 Jan 2012 20:47:01 -0800 Subject: [SCSI] libsas: async ata scanning libsas ata error handling is already async but this does not help the scan case. Move initial link recovery out from under host->scan_mutex, and delay synchronization with eh until after all port probe/recovery work has been queued. Device ordering is maintained with scan order by still calling sas_rphy_add() in order of domain discovery. Since we now scan the domain list when invoking libata-eh we need to be careful to check for fully initialized ata ports. Acked-by: Jack Wang Acked-by: Jeff Garzik Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_scsi_host.c | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'drivers/scsi/libsas/sas_scsi_host.c') diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index e58ca50517d5..3701ff7e7267 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -762,17 +762,10 @@ int sas_target_alloc(struct scsi_target *starget) { struct sas_rphy *rphy = dev_to_rphy(starget->dev.parent); struct domain_device *found_dev = sas_find_dev_by_rphy(rphy); - int res; if (!found_dev) return -ENODEV; - if (dev_is_sata(found_dev)) { - res = sas_ata_init_host_and_port(found_dev, starget); - if (res) - return res; - } - kref_get(&found_dev->kref); starget->hostdata = found_dev; return 0; @@ -1012,16 +1005,6 @@ void sas_task_abort(struct sas_task *task) } } -int sas_slave_alloc(struct scsi_device *scsi_dev) -{ - struct domain_device *dev = sdev_to_domain_dev(scsi_dev); - - if (dev_is_sata(dev)) - return ata_sas_port_init(dev->sata_dev.ap); - - return 0; -} - void sas_target_destroy(struct scsi_target *starget) { struct domain_device *found_dev = starget->hostdata; @@ -1082,6 +1065,5 @@ EXPORT_SYMBOL_GPL(sas_task_abort); EXPORT_SYMBOL_GPL(sas_phy_reset); EXPORT_SYMBOL_GPL(sas_eh_device_reset_handler); EXPORT_SYMBOL_GPL(sas_eh_bus_reset_handler); -EXPORT_SYMBOL_GPL(sas_slave_alloc); EXPORT_SYMBOL_GPL(sas_target_destroy); EXPORT_SYMBOL_GPL(sas_ioctl); -- cgit v1.2.3 From 840234745edaa82d514420dc1086e63536493a51 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 20 Jan 2012 15:23:07 -0800 Subject: [SCSI] libsas: fix lifetime of SAS_HA_FROZEN Until all sas_tasks are known to no longer be in-flight this flag gates late completions from colliding with error handling. However, it must be cleared prior to the submission of scsi_send_eh_cmnd() requests, otherwise those commands will never be completed correctly. This was spotted by slub debug: ============================================================================= BUG sas_task: Objects remaining on kmem_cache_close() ----------------------------------------------------------------------------- INFO: Slab 0xffffea001f0eba00 objects=34 used=1 fp=0xffff8807c3aecb00 flags=0x8000000000004080 Pid: 22919, comm: modprobe Not tainted 3.2.0-isci+ #2 Call Trace: [] slab_err+0xb0/0xd2 [] ? free_percpu+0x31/0x117 [] ? kzalloc+0x14/0x16 [] ? kzalloc+0x14/0x16 [] kmem_cache_destroy+0x11d/0x270 [] sas_class_exit+0x10/0x12 [libsas] [] sys_delete_module+0x1c4/0x23c [] ? sysret_check+0x2e/0x69 [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] system_call_fastpath+0x16/0x1b INFO: Object 0xffff8807c3aed280 @offset=21120 INFO: Allocated in sas_alloc_task+0x22/0x90 [libsas] age=4615311 cpu=2 pid=12966 __slab_alloc.clone.3+0x1d1/0x234 kmem_cache_alloc+0x52/0x10d sas_alloc_task+0x22/0x90 [libsas] sas_queuecommand+0x20e/0x230 [libsas] scsi_send_eh_cmnd+0xd1/0x30c scsi_eh_try_stu+0x4f/0x6b scsi_eh_ready_devs+0xba/0x6ef sas_scsi_recover_host+0xa35/0xab1 [libsas] scsi_error_handler+0x14b/0x5fa kthread+0x9d/0xa5 kernel_thread_helper+0x4/0x10 Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_scsi_host.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/scsi/libsas/sas_scsi_host.c') diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 3701ff7e7267..fd3291337c1b 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -521,8 +521,7 @@ try_bus_reset: return FAILED; } -static int sas_eh_handle_sas_errors(struct Scsi_Host *shost, - struct list_head *work_q) +static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *work_q) { struct scsi_cmnd *cmd, *n; enum task_disposition res = TASK_IS_DONE; @@ -658,7 +657,7 @@ static int sas_eh_handle_sas_errors(struct Scsi_Host *shost, out: list_splice_tail(&done, work_q); list_splice_tail_init(&ha->eh_ata_q, work_q); - return list_empty(work_q); + return; clear_q: SAS_DPRINTK("--- Exit %s -- clear_q\n", __func__); @@ -682,10 +681,13 @@ void sas_scsi_recover_host(struct Scsi_Host *shost) __func__, shost->host_busy, shost->host_failed); /* * Deal with commands that still have SAS tasks (i.e. they didn't - * complete via the normal sas_task completion mechanism) + * complete via the normal sas_task completion mechanism), + * SAS_HA_FROZEN gives eh dominion over all sas_task completion. */ set_bit(SAS_HA_FROZEN, &ha->state); - if (sas_eh_handle_sas_errors(shost, &eh_work_q)) + sas_eh_handle_sas_errors(shost, &eh_work_q); + clear_bit(SAS_HA_FROZEN, &ha->state); + if (list_empty(&eh_work_q)) goto out; /* @@ -699,7 +701,6 @@ void sas_scsi_recover_host(struct Scsi_Host *shost) scsi_eh_ready_devs(shost, &eh_work_q, &ha->eh_done_q); out: - clear_bit(SAS_HA_FROZEN, &ha->state); if (ha->lldd_max_execute_num > 1) wake_up_process(ha->core.queue_thread); -- cgit v1.2.3 From 26a2e68f816ebd736a0484ca293457b280af4ef1 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 30 Jan 2012 21:40:45 -0800 Subject: [SCSI] libsas: don't recover end devices attached to disabled phys If userspace has decided to disable a phy the kernel should honor that and not inadvertantly re-enable the phy via error recovery. This is more straightforward in the sata case where link recovery (via libata-eh) is separate from sas_task cancelling in libsas-eh. Teach libsas to accept -ENODEV as a successful response from I_T_nexus_reset ('successful' in terms of not escalating further). This is a more comprehensive fix then "libsas: don't recover 'gone' devices in sas_ata_hard_reset()", as it is no longer sata-specific. aic94xx does check the return value from sas_phy_reset() so if the phy is disabled we proceed with clearing the I_T_nexus. Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_scsi_host.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/scsi/libsas/sas_scsi_host.c') diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index fd3291337c1b..f0b9b7bf1882 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -607,7 +607,8 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head * SAS_DPRINTK("task 0x%p is not at LU: I_T recover\n", task); tmf_resp = sas_recover_I_T(task->dev); - if (tmf_resp == TMF_RESP_FUNC_COMPLETE) { + if (tmf_resp == TMF_RESP_FUNC_COMPLETE || + tmf_resp == -ENODEV) { struct domain_device *dev = task->dev; SAS_DPRINTK("I_T %016llx recovered\n", SAS_ADDR(task->dev->sas_addr)); -- cgit v1.2.3