summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Reed <mdr@sgi.com>2009-04-08 14:34:33 -0500
committerJames Bottomley <James.Bottomley@HansenPartnership.com>2009-05-20 17:21:13 -0500
commit413e6e18b483de272bdafa56e5c086c75f11d681 (patch)
tree89a61e76f63b3c5c0b51e3115b7e5eaef78f2893
parentfd65e5e93cbd9d2f34bbb0f0b2f46a30a1d20915 (diff)
downloadtalos-op-linux-413e6e18b483de272bdafa56e5c086c75f11d681.tar.gz
talos-op-linux-413e6e18b483de272bdafa56e5c086c75f11d681.zip
[SCSI] qla1280: error recovery rewrite
The driver now waits for the scsi commands associated with a particular error recovery step to be returned to the mid-layer, and returns the appropriate SUCCESS or FAILED status. Removes unneeded polling of chip for interrupts. This patch also bumps the driver version number. Signed-off-by: Michael Reed <mdr@sgi.com> Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
-rw-r--r--drivers/scsi/qla1280.c290
-rw-r--r--drivers/scsi/qla1280.h3
2 files changed, 159 insertions, 134 deletions
diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index 0cbad4982db9..8371d917a9a2 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -17,9 +17,12 @@
* General Public License for more details.
*
******************************************************************************/
-#define QLA1280_VERSION "3.26"
+#define QLA1280_VERSION "3.27"
/*****************************************************************************
Revision History:
+ Rev 3.27, February 10, 2009, Michael Reed
+ - General code cleanup.
+ - Improve error recovery.
Rev 3.26, January 16, 2006 Jes Sorensen
- Ditch all < 2.6 support
Rev 3.25.1, February 10, 2005 Christoph Hellwig
@@ -718,6 +721,8 @@ qla1280_queuecommand(struct scsi_cmnd *cmd, void (*fn)(struct scsi_cmnd *))
cmd->scsi_done = fn;
sp->cmd = cmd;
sp->flags = 0;
+ sp->wait = NULL;
+ CMD_HANDLE(cmd) = (unsigned char *)NULL;
qla1280_print_scsi_cmd(5, cmd);
@@ -742,14 +747,6 @@ enum action {
ADAPTER_RESET,
};
-/* timer action for error action processor */
-static void qla1280_error_wait_timeout(unsigned long __data)
-{
- struct scsi_cmnd *cmd = (struct scsi_cmnd *)__data;
- struct srb *sp = (struct srb *)CMD_SP(cmd);
-
- complete(sp->wait);
-}
static void qla1280_mailbox_timeout(unsigned long __data)
{
@@ -764,6 +761,65 @@ static void qla1280_mailbox_timeout(unsigned long __data)
complete(ha->mailbox_wait);
}
+static int
+_qla1280_wait_for_single_command(struct scsi_qla_host *ha, struct srb *sp,
+ struct completion *wait)
+{
+ int status = FAILED;
+ struct scsi_cmnd *cmd = sp->cmd;
+
+ spin_unlock_irq(ha->host->host_lock);
+ wait_for_completion_timeout(wait, 4*HZ);
+ spin_lock_irq(ha->host->host_lock);
+ sp->wait = NULL;
+ if(CMD_HANDLE(cmd) == COMPLETED_HANDLE) {
+ status = SUCCESS;
+ (*cmd->scsi_done)(cmd);
+ }
+ return status;
+}
+
+static int
+qla1280_wait_for_single_command(struct scsi_qla_host *ha, struct srb *sp)
+{
+ DECLARE_COMPLETION_ONSTACK(wait);
+
+ sp->wait = &wait;
+ return _qla1280_wait_for_single_command(ha, sp, &wait);
+}
+
+static int
+qla1280_wait_for_pending_commands(struct scsi_qla_host *ha, int bus, int target)
+{
+ int cnt;
+ int status;
+ struct srb *sp;
+ struct scsi_cmnd *cmd;
+
+ status = SUCCESS;
+
+ /*
+ * Wait for all commands with the designated bus/target
+ * to be completed by the firmware
+ */
+ for (cnt = 0; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) {
+ sp = ha->outstanding_cmds[cnt];
+ if (sp) {
+ cmd = sp->cmd;
+
+ if (bus >= 0 && SCSI_BUS_32(cmd) != bus)
+ continue;
+ if (target >= 0 && SCSI_TCN_32(cmd) != target)
+ continue;
+
+ status = qla1280_wait_for_single_command(ha, sp);
+ if (status == FAILED)
+ break;
+ }
+ }
+ return status;
+}
+
/**************************************************************************
* qla1280_error_action
* The function will attempt to perform a specified error action and
@@ -777,11 +833,6 @@ static void qla1280_mailbox_timeout(unsigned long __data)
* Returns:
* SUCCESS or FAILED
*
- * Note:
- * Resetting the bus always succeeds - is has to, otherwise the
- * kernel will panic! Try a surgical technique - sending a BUS
- * DEVICE RESET message - on the offending target before pulling
- * the SCSI bus reset line.
**************************************************************************/
static int
qla1280_error_action(struct scsi_cmnd *cmd, enum action action)
@@ -789,15 +840,19 @@ qla1280_error_action(struct scsi_cmnd *cmd, enum action action)
struct scsi_qla_host *ha;
int bus, target, lun;
struct srb *sp;
- uint16_t data;
- unsigned char *handle;
- int result, i;
+ int i, found;
+ int result=FAILED;
+ int wait_for_bus=-1;
+ int wait_for_target = -1;
DECLARE_COMPLETION_ONSTACK(wait);
- struct timer_list timer;
ENTER("qla1280_error_action");
ha = (struct scsi_qla_host *)(CMD_HOST(cmd)->hostdata);
+ sp = (struct srb *)CMD_SP(cmd);
+ bus = SCSI_BUS_32(cmd);
+ target = SCSI_TCN_32(cmd);
+ lun = SCSI_LUN_32(cmd);
dprintk(4, "error_action %i, istatus 0x%04x\n", action,
RD_REG_WORD(&ha->iobase->istatus));
@@ -811,73 +866,42 @@ qla1280_error_action(struct scsi_cmnd *cmd, enum action action)
"Handle=0x%p, action=0x%x\n",
ha->host_no, cmd, CMD_HANDLE(cmd), action);
- sp = (struct srb *)CMD_SP(cmd);
- handle = CMD_HANDLE(cmd);
-
- /* Check for pending interrupts. */
- data = qla1280_debounce_register(&ha->iobase->istatus);
- /*
- * The io_request_lock is held when the reset handler is called, hence
- * the interrupt handler cannot be running in parallel as it also
- * grabs the lock. /Jes
- */
- if (data & RISC_INT)
- qla1280_isr(ha, &ha->done_q);
-
/*
- * Determine the suggested action that the mid-level driver wants
- * us to perform.
+ * Check to see if we have the command in the outstanding_cmds[]
+ * array. If not then it must have completed before this error
+ * action was initiated. If the error_action isn't ABORT_COMMAND
+ * then the driver must proceed with the requested action.
*/
- if (handle == (unsigned char *)INVALID_HANDLE || handle == NULL) {
- if(action == ABORT_COMMAND) {
- /* we never got this command */
- printk(KERN_INFO "qla1280: Aborting a NULL handle\n");
- return SUCCESS; /* no action - we don't have command */
+ found = -1;
+ for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++) {
+ if (sp == ha->outstanding_cmds[i]) {
+ found = i;
+ sp->wait = &wait; /* we'll wait for it to complete */
+ break;
}
- } else {
- sp->wait = &wait;
}
- bus = SCSI_BUS_32(cmd);
- target = SCSI_TCN_32(cmd);
- lun = SCSI_LUN_32(cmd);
+ if (found < 0) { /* driver doesn't have command */
+ result = SUCCESS;
+ if (qla1280_verbose) {
+ printk(KERN_INFO
+ "scsi(%ld:%d:%d:%d): specified command has "
+ "already completed.\n", ha->host_no, bus,
+ target, lun);
+ }
+ }
- /* Overloading result. Here it means the success or fail of the
- * *issue* of the action. When we return from the routine, it must
- * mean the actual success or fail of the action */
- result = FAILED;
switch (action) {
- case ABORT_COMMAND:
- if ((sp->flags & SRB_ABORT_PENDING)) {
- printk(KERN_WARNING
- "scsi(): Command has a pending abort "
- "message - ABORT_PENDING.\n");
- /* This should technically be impossible since we
- * now wait for abort completion */
- break;
- }
- for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++) {
- if (sp == ha->outstanding_cmds[i]) {
- dprintk(1, "qla1280: RISC aborting command\n");
- if (qla1280_abort_command(ha, sp, i) == 0)
- result = SUCCESS;
- else {
- /*
- * Since we don't know what might
- * have happend to the command, it
- * is unsafe to remove it from the
- * device's queue at this point.
- * Wait and let the escalation
- * process take care of it.
- */
- printk(KERN_WARNING
- "scsi(%li:%i:%i:%i): Unable"
- " to abort command!\n",
- ha->host_no, bus, target, lun);
- }
- }
- }
+ case ABORT_COMMAND:
+ dprintk(1, "qla1280: RISC aborting command\n");
+ /*
+ * The abort might fail due to race when the host_lock
+ * is released to issue the abort. As such, we
+ * don't bother to check the return status.
+ */
+ if (found >= 0)
+ qla1280_abort_command(ha, sp, found);
break;
case DEVICE_RESET:
@@ -885,16 +909,21 @@ qla1280_error_action(struct scsi_cmnd *cmd, enum action action)
printk(KERN_INFO
"scsi(%ld:%d:%d:%d): Queueing device reset "
"command.\n", ha->host_no, bus, target, lun);
- if (qla1280_device_reset(ha, bus, target) == 0)
- result = SUCCESS;
+ if (qla1280_device_reset(ha, bus, target) == 0) {
+ /* issued device reset, set wait conditions */
+ wait_for_bus = bus;
+ wait_for_target = target;
+ }
break;
case BUS_RESET:
if (qla1280_verbose)
printk(KERN_INFO "qla1280(%ld:%d): Issued bus "
"reset.\n", ha->host_no, bus);
- if (qla1280_bus_reset(ha, bus) == 0)
- result = SUCCESS;
+ if (qla1280_bus_reset(ha, bus) == 0) {
+ /* issued bus reset, set wait conditions */
+ wait_for_bus = bus;
+ }
break;
case ADAPTER_RESET:
@@ -907,55 +936,48 @@ qla1280_error_action(struct scsi_cmnd *cmd, enum action action)
"continue automatically\n", ha->host_no);
}
ha->flags.reset_active = 1;
- /*
- * We restarted all of the commands automatically, so the
- * mid-level code can expect completions momentitarily.
- */
- if (qla1280_abort_isp(ha) == 0)
- result = SUCCESS;
+
+ if (qla1280_abort_isp(ha) != 0) { /* it's dead */
+ result = FAILED;
+ }
ha->flags.reset_active = 0;
}
- if (!list_empty(&ha->done_q))
- qla1280_done(ha);
-
- /* If we didn't manage to issue the action, or we have no
- * command to wait for, exit here */
- if (result == FAILED || handle == NULL ||
- handle == (unsigned char *)INVALID_HANDLE) {
- /*
- * Clear completion queue to avoid qla1280_done() trying
- * to complete the command at a later stage after we
- * have exited the current context
- */
- sp->wait = NULL;
- goto leave;
- }
+ /*
+ * At this point, the host_lock has been released and retaken
+ * by the issuance of the mailbox command.
+ * Wait for the command passed in by the mid-layer if it
+ * was found by the driver. It might have been returned
+ * between eh recovery steps, hence the check of the "found"
+ * variable.
+ */
- /* set up a timer just in case we're really jammed */
- init_timer(&timer);
- timer.expires = jiffies + 4*HZ;
- timer.data = (unsigned long)cmd;
- timer.function = qla1280_error_wait_timeout;
- add_timer(&timer);
+ if (found >= 0)
+ result = _qla1280_wait_for_single_command(ha, sp, &wait);
- /* wait for the action to complete (or the timer to expire) */
- spin_unlock_irq(ha->host->host_lock);
- wait_for_completion(&wait);
- del_timer_sync(&timer);
- spin_lock_irq(ha->host->host_lock);
- sp->wait = NULL;
+ if (action == ABORT_COMMAND && result != SUCCESS) {
+ printk(KERN_WARNING
+ "scsi(%li:%i:%i:%i): "
+ "Unable to abort command!\n",
+ ha->host_no, bus, target, lun);
+ }
- /* the only action we might get a fail for is abort */
- if (action == ABORT_COMMAND) {
- if(sp->flags & SRB_ABORTED)
- result = SUCCESS;
- else
- result = FAILED;
+ /*
+ * If the command passed in by the mid-layer has been
+ * returned by the board, then wait for any additional
+ * commands which are supposed to complete based upon
+ * the error action.
+ *
+ * All commands are unconditionally returned during a
+ * call to qla1280_abort_isp(), ADAPTER_RESET. No need
+ * to wait for them.
+ */
+ if (result == SUCCESS && wait_for_bus >= 0) {
+ result = qla1280_wait_for_pending_commands(ha,
+ wait_for_bus, wait_for_target);
}
- leave:
dprintk(1, "RESET returning %d\n", result);
LEAVE("qla1280_error_action");
@@ -1258,7 +1280,8 @@ qla1280_done(struct scsi_qla_host *ha)
switch ((CMD_RESULT(cmd) >> 16)) {
case DID_RESET:
/* Issue marker command. */
- qla1280_marker(ha, bus, target, 0, MK_SYNC_ID);
+ if (!ha->flags.abort_isp_active)
+ qla1280_marker(ha, bus, target, 0, MK_SYNC_ID);
break;
case DID_ABORT:
sp->flags &= ~SRB_ABORT_PENDING;
@@ -1272,12 +1295,11 @@ qla1280_done(struct scsi_qla_host *ha)
scsi_dma_unmap(cmd);
/* Call the mid-level driver interrupt handler */
- CMD_HANDLE(sp->cmd) = (unsigned char *)INVALID_HANDLE;
ha->actthreads--;
- (*(cmd)->scsi_done)(cmd);
-
- if(sp->wait != NULL)
+ if (sp->wait == NULL)
+ (*(cmd)->scsi_done)(cmd);
+ else
complete(sp->wait);
}
LEAVE("qla1280_done");
@@ -3415,6 +3437,7 @@ qla1280_isr(struct scsi_qla_host *ha, struct list_head *done_q)
/* Save ISP completion status */
CMD_RESULT(sp->cmd) = 0;
+ CMD_HANDLE(sp->cmd) = COMPLETED_HANDLE;
/* Place block on done queue */
list_add_tail(&sp->list, done_q);
@@ -3681,6 +3704,8 @@ qla1280_status_entry(struct scsi_qla_host *ha, struct response *pkt,
}
}
+ CMD_HANDLE(sp->cmd) = COMPLETED_HANDLE;
+
/* Place command on done queue. */
list_add_tail(&sp->list, done_q);
out:
@@ -3736,6 +3761,8 @@ qla1280_error_entry(struct scsi_qla_host *ha, struct response *pkt,
CMD_RESULT(sp->cmd) = DID_ERROR << 16;
}
+ CMD_HANDLE(sp->cmd) = COMPLETED_HANDLE;
+
/* Place command on done queue. */
list_add_tail(&sp->list, done_q);
}
@@ -3786,19 +3813,16 @@ qla1280_abort_isp(struct scsi_qla_host *ha)
struct scsi_cmnd *cmd;
sp = ha->outstanding_cmds[cnt];
if (sp) {
-
cmd = sp->cmd;
CMD_RESULT(cmd) = DID_RESET << 16;
-
- sp->cmd = NULL;
+ CMD_HANDLE(cmd) = COMPLETED_HANDLE;
ha->outstanding_cmds[cnt] = NULL;
-
- (*cmd->scsi_done)(cmd);
-
- sp->flags = 0;
+ list_add_tail(&sp->list, &ha->done_q);
}
}
+ qla1280_done(ha);
+
status = qla1280_load_firmware(ha);
if (status)
goto out;
diff --git a/drivers/scsi/qla1280.h b/drivers/scsi/qla1280.h
index d7c44b8d2b4f..834884b9eed5 100644
--- a/drivers/scsi/qla1280.h
+++ b/drivers/scsi/qla1280.h
@@ -88,7 +88,8 @@
/* Maximum outstanding commands in ISP queues */
#define MAX_OUTSTANDING_COMMANDS 512
-#define INVALID_HANDLE (MAX_OUTSTANDING_COMMANDS + 2)
+#define COMPLETED_HANDLE ((unsigned char *) \
+ (MAX_OUTSTANDING_COMMANDS + 2))
/* ISP request and response entry counts (37-65535) */
#define REQUEST_ENTRY_CNT 255 /* Number of request entries. */
OpenPOWER on IntegriCloud