summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/sw/rdmavt
diff options
context:
space:
mode:
authorJianxin Xiong <jianxin.xiong@intel.com>2016-07-25 13:39:45 -0700
committerDoug Ledford <dledford@redhat.com>2016-08-02 16:00:58 -0400
commitd9b13c203003cfb78c1f216049a204d385ccaeff (patch)
tree9df5767ede1a28416cb31e8df3f9ab5eeca21f85 /drivers/infiniband/sw/rdmavt
parent856cc4c237add46510c8ae91764f4eda31a9e1cf (diff)
downloadblackbird-op-linux-d9b13c203003cfb78c1f216049a204d385ccaeff.tar.gz
blackbird-op-linux-d9b13c203003cfb78c1f216049a204d385ccaeff.zip
IB/rdmavt, hfi1: Fix NFSoRDMA failure with FRMR enabled
Hanging has been observed while writing a file over NFSoRDMA. Dmesg on the server contains messages like these: [ 931.992501] svcrdma: Error -22 posting RDMA_READ [ 952.076879] svcrdma: Error -22 posting RDMA_READ [ 982.154127] svcrdma: Error -22 posting RDMA_READ [ 1012.235884] svcrdma: Error -22 posting RDMA_READ [ 1042.319194] svcrdma: Error -22 posting RDMA_READ Here is why: With the base memory management extension enabled, FRMR is used instead of FMR. The xprtrdma server issues each RDMA read request as the following bundle: (1)IB_WR_REG_MR, signaled; (2)IB_WR_RDMA_READ, signaled; (3)IB_WR_LOCAL_INV, signaled & fencing. These requests are signaled. In order to generate completion, the fast register work request is processed by the hfi1 send engine after being posted to the work queue, and the corresponding lkey is not valid until the request is processed. However, the rdmavt driver validates lkey when the RDMA read request is posted and thus it fails immediately with error -EINVAL (-22). This patch changes the work flow of local operations (fast register and local invalidate) so that fast register work requests are always processed immediately to ensure that the corresponding lkey is valid when subsequent work requests are posted. Local invalidate requests are processed immediately if fencing is not required and no previous local invalidate request is pending. To allow completion generation for signaled local operations that have been processed before posting to the work queue, an internal send flag RVT_SEND_COMPLETION_ONLY is added. The hfi1 send engine checks this flag and only generates completion for such requests. Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Jianxin Xiong <jianxin.xiong@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/sw/rdmavt')
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c48
1 files changed, 32 insertions, 16 deletions
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 218494c6afe2..8ccf1b970b2c 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1579,6 +1579,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
int ret;
size_t cplen;
bool reserved_op;
+ int local_ops_delayed = 0;
BUILD_BUG_ON(IB_QPT_MAX >= (sizeof(u32) * BITS_PER_BYTE));
@@ -1592,25 +1593,37 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
cplen = ret;
/*
- * Local operations including fast register and local invalidate
- * can be processed immediately w/o being posted to the send queue
- * if neither fencing nor completion generation is needed. However,
- * once fencing or completion is requested, direct processing of
- * following local operations must be disabled until all the local
- * operations posted to the send queue have completed. This is
- * necessary to ensure the correct ordering.
+ * Local operations include fast register and local invalidate.
+ * Fast register needs to be processed immediately because the
+ * registered lkey may be used by following work requests and the
+ * lkey needs to be valid at the time those requests are posted.
+ * Local invalidate can be processed immediately if fencing is
+ * not required and no previous local invalidate ops are pending.
+ * Signaled local operations that have been processed immediately
+ * need to have requests with "completion only" flags set posted
+ * to the send queue in order to generate completions.
*/
- if ((rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL) &&
- !(wr->send_flags & (IB_SEND_FENCE | IB_SEND_SIGNALED)) &&
- !atomic_read(&qp->local_ops_pending)) {
- struct ib_reg_wr *reg = reg_wr(wr);
-
+ if ((rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL)) {
switch (wr->opcode) {
case IB_WR_REG_MR:
- return rvt_fast_reg_mr(qp, reg->mr, reg->key,
- reg->access);
+ ret = rvt_fast_reg_mr(qp,
+ reg_wr(wr)->mr,
+ reg_wr(wr)->key,
+ reg_wr(wr)->access);
+ if (ret || !(wr->send_flags & IB_SEND_SIGNALED))
+ return ret;
+ break;
case IB_WR_LOCAL_INV:
- return rvt_invalidate_rkey(qp, wr->ex.invalidate_rkey);
+ if ((wr->send_flags & IB_SEND_FENCE) ||
+ atomic_read(&qp->local_ops_pending)) {
+ local_ops_delayed = 1;
+ } else {
+ ret = rvt_invalidate_rkey(
+ qp, wr->ex.invalidate_rkey);
+ if (ret || !(wr->send_flags & IB_SEND_SIGNALED))
+ return ret;
+ }
+ break;
default:
return -EINVAL;
}
@@ -1675,7 +1688,10 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
}
if (rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL) {
- atomic_inc(&qp->local_ops_pending);
+ if (local_ops_delayed)
+ atomic_inc(&qp->local_ops_pending);
+ else
+ wqe->wr.send_flags |= RVT_SEND_COMPLETION_ONLY;
wqe->ssn = 0;
wqe->psn = 0;
wqe->lpsn = 0;
OpenPOWER on IntegriCloud