From cf131a81967583ae737df6383a0893b9fee75b4e Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 14 Jun 2019 12:32:26 -0400 Subject: IB/hfi1: Avoid hardlockup with flushlist_lock Heavy contention of the sde flushlist_lock can cause hard lockups at extreme scale when the flushing logic is under stress. Mitigate by replacing the item at a time copy to the local list with an O(1) list_splice_init() and using the high priority work queue to do the flushes. Fixes: 7724105686e7 ("IB/hfi1: add driver files") Cc: Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/sdma.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw/hfi1/sdma.c') diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index b0110728f541..70828de7436b 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -410,10 +410,7 @@ static void sdma_flush(struct sdma_engine *sde) sdma_flush_descq(sde); spin_lock_irqsave(&sde->flushlist_lock, flags); /* copy flush list */ - list_for_each_entry_safe(txp, txp_next, &sde->flushlist, list) { - list_del_init(&txp->list); - list_add_tail(&txp->list, &flushlist); - } + list_splice_init(&sde->flushlist, &flushlist); spin_unlock_irqrestore(&sde->flushlist_lock, flags); /* flush from flush list */ list_for_each_entry_safe(txp, txp_next, &flushlist, list) @@ -2413,7 +2410,7 @@ unlock_noconn: list_add_tail(&tx->list, &sde->flushlist); spin_unlock(&sde->flushlist_lock); iowait_inc_wait_count(wait, tx->num_desc); - schedule_work(&sde->flush_worker); + queue_work_on(sde->cpu, system_highpri_wq, &sde->flush_worker); ret = -ECOMM; goto unlock; nodesc: @@ -2511,7 +2508,7 @@ unlock_noconn: iowait_inc_wait_count(wait, tx->num_desc); } spin_unlock(&sde->flushlist_lock); - schedule_work(&sde->flush_worker); + queue_work_on(sde->cpu, system_highpri_wq, &sde->flush_worker); ret = -ECOMM; goto update_tail; nodesc: -- cgit v1.2.3 From f972775b1cc0441ae22c9f8d06dd16b118463632 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 14 Jun 2019 12:32:50 -0400 Subject: IB/hfi1: Wakeup QPs orphaned on wait list after flush Once an SDMA engine is taken down due to a link failure, any waiting QPs that do not have outstanding descriptors in the ring will stay on the dmawait list as long as the port is down. Since there is no timer running, they will stay there for a long time. The fix is to wake up all iowaits linked to dmawait. The send engine will build and post packets that get flushed back. Fixes: 7724105686e7 ("IB/hfi1: add driver files") Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/sdma.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'drivers/infiniband/hw/hfi1/sdma.c') diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 70828de7436b..28b66bd70b74 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -405,6 +405,7 @@ static void sdma_flush(struct sdma_engine *sde) struct sdma_txreq *txp, *txp_next; LIST_HEAD(flushlist); unsigned long flags; + uint seq; /* flush from head to tail */ sdma_flush_descq(sde); @@ -415,6 +416,22 @@ static void sdma_flush(struct sdma_engine *sde) /* flush from flush list */ list_for_each_entry_safe(txp, txp_next, &flushlist, list) complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED); + /* wakeup QPs orphaned on the dmawait list */ + do { + struct iowait *w, *nw; + + seq = read_seqbegin(&sde->waitlock); + if (!list_empty(&sde->dmawait)) { + write_seqlock(&sde->waitlock); + list_for_each_entry_safe(w, nw, &sde->dmawait, list) { + if (w->wakeup) { + w->wakeup(w, SDMA_AVAIL_REASON); + list_del_init(&w->list); + } + } + write_sequnlock(&sde->waitlock); + } + } while (read_seqretry(&sde->waitlock, seq)); } /* -- cgit v1.2.3