diff options
Diffstat (limited to 'net/sunrpc/xprtrdma/verbs.c')
-rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 255 |
1 files changed, 105 insertions, 150 deletions
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3ddba94c939f..7749a2bf6887 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -78,53 +78,25 @@ static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf); static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp); static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); +static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); -struct workqueue_struct *rpcrdma_receive_wq __read_mostly; - -int -rpcrdma_alloc_wq(void) -{ - struct workqueue_struct *recv_wq; - - recv_wq = alloc_workqueue("xprtrdma_receive", - WQ_MEM_RECLAIM | WQ_HIGHPRI, - 0); - if (!recv_wq) - return -ENOMEM; - - rpcrdma_receive_wq = recv_wq; - return 0; -} - -void -rpcrdma_destroy_wq(void) -{ - struct workqueue_struct *wq; - - if (rpcrdma_receive_wq) { - wq = rpcrdma_receive_wq; - rpcrdma_receive_wq = NULL; - destroy_workqueue(wq); - } -} - -/** - * rpcrdma_disconnect_worker - Force a disconnect - * @work: endpoint to be disconnected - * - * Provider callbacks can possibly run in an IRQ context. This function - * is invoked in a worker thread to guarantee that disconnect wake-up - * calls are always done in process context. +/* Wait for outstanding transport work to finish. */ -static void -rpcrdma_disconnect_worker(struct work_struct *work) +static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) { - struct rpcrdma_ep *ep = container_of(work, struct rpcrdma_ep, - rep_disconnect_worker.work); - struct rpcrdma_xprt *r_xprt = - container_of(ep, struct rpcrdma_xprt, rx_ep); + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct rpcrdma_ia *ia = &r_xprt->rx_ia; - xprt_force_disconnect(&r_xprt->rx_xprt); + /* Flush Receives, then wait for deferred Reply work + * to complete. + */ + ib_drain_qp(ia->ri_id->qp); + drain_workqueue(buf->rb_completion_wq); + + /* Deferred Reply processing might have scheduled + * local invalidations. + */ + ib_drain_sq(ia->ri_id->qp); } /** @@ -143,15 +115,6 @@ rpcrdma_qp_event_handler(struct ib_event *event, void *context) rx_ep); trace_xprtrdma_qp_event(r_xprt, event); - pr_err("rpcrdma: %s on device %s connected to %s:%s\n", - ib_event_msg(event->event), event->device->name, - rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt)); - - if (ep->rep_connected == 1) { - ep->rep_connected = -EIO; - schedule_delayed_work(&ep->rep_disconnect_worker, 0); - wake_up_all(&ep->rep_connect_wait); - } } /** @@ -189,11 +152,13 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) struct ib_cqe *cqe = wc->wr_cqe; struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep, rr_cqe); + struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; - /* WARNING: Only wr_id and status are reliable at this point */ + /* WARNING: Only wr_cqe and status are reliable at this point */ trace_xprtrdma_wc_receive(wc); + --r_xprt->rx_ep.rep_receive_count; if (wc->status != IB_WC_SUCCESS) - goto out_fail; + goto out_flushed; /* status == SUCCESS means all fields in wc are trustworthy */ rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); @@ -204,17 +169,16 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) rdmab_addr(rep->rr_rdmabuf), wc->byte_len, DMA_FROM_DEVICE); -out_schedule: + rpcrdma_post_recvs(r_xprt, false); rpcrdma_reply_handler(rep); return; -out_fail: +out_flushed: if (wc->status != IB_WC_WR_FLUSH_ERR) pr_err("rpcrdma: Recv: %s (%u/0x%x)\n", ib_wc_status_msg(wc->status), wc->status, wc->vendor_err); - rpcrdma_set_xdrlen(&rep->rr_hdrbuf, 0); - goto out_schedule; + rpcrdma_recv_buffer_put(rep); } static void @@ -316,7 +280,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) ep->rep_connected = -EAGAIN; goto disconnected; case RDMA_CM_EVENT_DISCONNECTED: - ++xprt->connect_cookie; ep->rep_connected = -ECONNABORTED; disconnected: xprt_force_disconnect(xprt); @@ -326,10 +289,9 @@ disconnected: break; } - dprintk("RPC: %s: %s:%s on %s/%s: %s\n", __func__, + dprintk("RPC: %s: %s:%s on %s/frwr: %s\n", __func__, rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt), - ia->ri_device->name, ia->ri_ops->ro_displayname, - rdma_event_msg(event->event)); + ia->ri_device->name, rdma_event_msg(event->event)); return 0; } @@ -347,22 +309,15 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_cm_event_handler, xprt, RDMA_PS_TCP, IB_QPT_RC); - if (IS_ERR(id)) { - rc = PTR_ERR(id); - dprintk("RPC: %s: rdma_create_id() failed %i\n", - __func__, rc); + if (IS_ERR(id)) return id; - } ia->ri_async_rc = -ETIMEDOUT; rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)&xprt->rx_xprt.addr, RDMA_RESOLVE_TIMEOUT); - if (rc) { - dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", - __func__, rc); + if (rc) goto out; - } rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); if (rc < 0) { trace_xprtrdma_conn_tout(xprt); @@ -375,11 +330,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) ia->ri_async_rc = -ETIMEDOUT; rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); - if (rc) { - dprintk("RPC: %s: rdma_resolve_route() failed %i\n", - __func__, rc); + if (rc) goto out; - } rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); if (rc < 0) { trace_xprtrdma_conn_tout(xprt); @@ -429,16 +381,8 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt) switch (xprt_rdma_memreg_strategy) { case RPCRDMA_FRWR: - if (frwr_is_supported(ia)) { - ia->ri_ops = &rpcrdma_frwr_memreg_ops; - break; - } - /*FALLTHROUGH*/ - case RPCRDMA_MTHCAFMR: - if (fmr_is_supported(ia)) { - ia->ri_ops = &rpcrdma_fmr_memreg_ops; + if (frwr_is_supported(ia)) break; - } /*FALLTHROUGH*/ default: pr_err("rpcrdma: Device %s does not support memreg mode %d\n", @@ -481,7 +425,7 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) * connection is already gone. */ if (ia->ri_id->qp) { - ib_drain_qp(ia->ri_id->qp); + rpcrdma_xprt_drain(r_xprt); rdma_destroy_qp(ia->ri_id); ia->ri_id->qp = NULL; } @@ -552,7 +496,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, } ia->ri_max_send_sges = max_sge; - rc = ia->ri_ops->ro_open(ia, ep, cdata); + rc = frwr_open(ia, ep, cdata); if (rc) return rc; @@ -579,16 +523,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, cdata->max_requests >> 2); ep->rep_send_count = ep->rep_send_batch; init_waitqueue_head(&ep->rep_connect_wait); - INIT_DELAYED_WORK(&ep->rep_disconnect_worker, - rpcrdma_disconnect_worker); + ep->rep_receive_count = 0; sendcq = ib_alloc_cq(ia->ri_device, NULL, ep->rep_attr.cap.max_send_wr + 1, 1, IB_POLL_WORKQUEUE); if (IS_ERR(sendcq)) { rc = PTR_ERR(sendcq); - dprintk("RPC: %s: failed to create send CQ: %i\n", - __func__, rc); goto out1; } @@ -597,8 +538,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, 0, IB_POLL_WORKQUEUE); if (IS_ERR(recvcq)) { rc = PTR_ERR(recvcq); - dprintk("RPC: %s: failed to create recv CQ: %i\n", - __func__, rc); goto out2; } @@ -611,7 +550,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, /* Prepare RDMA-CM private message */ pmsg->cp_magic = rpcrdma_cmp_magic; pmsg->cp_version = RPCRDMA_CMP_VERSION; - pmsg->cp_flags |= ia->ri_ops->ro_send_w_inv_ok; + pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK; pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize); pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize); ep->rep_remote_cma.private_data = pmsg; @@ -653,8 +592,6 @@ out1: void rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { - cancel_delayed_work_sync(&ep->rep_disconnect_worker); - if (ia->ri_id && ia->ri_id->qp) { rpcrdma_ep_disconnect(ep, ia); rdma_destroy_qp(ia->ri_id); @@ -740,11 +677,8 @@ rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, } err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr); - if (err) { - dprintk("RPC: %s: rdma_create_qp returned %d\n", - __func__, err); + if (err) goto out_destroy; - } /* Atomically replace the transport's ID and QP. */ rc = 0; @@ -775,8 +709,6 @@ retry: dprintk("RPC: %s: connecting...\n", __func__); rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); if (rc) { - dprintk("RPC: %s: rdma_create_qp failed %i\n", - __func__, rc); rc = -ENETUNREACH; goto out_noupdate; } @@ -798,11 +730,8 @@ retry: rpcrdma_post_recvs(r_xprt, true); rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); - if (rc) { - dprintk("RPC: %s: rdma_connect() failed with %i\n", - __func__, rc); + if (rc) goto out; - } wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); if (ep->rep_connected <= 0) { @@ -822,8 +751,10 @@ out_noupdate: return rc; } -/* - * rpcrdma_ep_disconnect +/** + * rpcrdma_ep_disconnect - Disconnect underlying transport + * @ep: endpoint to disconnect + * @ia: associated interface adapter * * This is separate from destroy to facilitate the ability * to reconnect without recreating the endpoint. @@ -834,19 +765,20 @@ out_noupdate: void rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { + struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt, + rx_ep); int rc; + /* returns without wait if ID is not connected */ rc = rdma_disconnect(ia->ri_id); if (!rc) - /* returns without wait if not connected */ wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 1); else ep->rep_connected = rc; - trace_xprtrdma_disconnect(container_of(ep, struct rpcrdma_xprt, - rx_ep), rc); + trace_xprtrdma_disconnect(r_xprt, rc); - ib_drain_qp(ia->ri_id->qp); + rpcrdma_xprt_drain(r_xprt); } /* Fixed-size circular FIFO queue. This implementation is wait-free and @@ -1034,7 +966,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) if (!mr) break; - rc = ia->ri_ops->ro_init_mr(ia, mr); + rc = frwr_init_mr(ia, mr); if (rc) { kfree(mr); break; @@ -1089,9 +1021,9 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) req->rl_buffer = buffer; INIT_LIST_HEAD(&req->rl_registered); - spin_lock(&buffer->rb_reqslock); + spin_lock(&buffer->rb_lock); list_add(&req->rl_all, &buffer->rb_allreqs); - spin_unlock(&buffer->rb_reqslock); + spin_unlock(&buffer->rb_lock); return req; } @@ -1134,8 +1066,6 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp) out_free: kfree(rep); out: - dprintk("RPC: %s: reply buffer %d alloc failed\n", - __func__, rc); return rc; } @@ -1159,7 +1089,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) INIT_LIST_HEAD(&buf->rb_send_bufs); INIT_LIST_HEAD(&buf->rb_allreqs); - spin_lock_init(&buf->rb_reqslock); for (i = 0; i < buf->rb_max_requests; i++) { struct rpcrdma_req *req; @@ -1174,13 +1103,19 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) } buf->rb_credits = 1; - buf->rb_posted_receives = 0; INIT_LIST_HEAD(&buf->rb_recv_bufs); rc = rpcrdma_sendctxs_create(r_xprt); if (rc) goto out; + buf->rb_completion_wq = alloc_workqueue("rpcrdma-%s", + WQ_MEM_RECLAIM | WQ_HIGHPRI, + 0, + r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR]); + if (!buf->rb_completion_wq) + goto out; + return 0; out: rpcrdma_buffer_destroy(buf); @@ -1194,9 +1129,18 @@ rpcrdma_destroy_rep(struct rpcrdma_rep *rep) kfree(rep); } +/** + * rpcrdma_req_destroy - Destroy an rpcrdma_req object + * @req: unused object to be destroyed + * + * This function assumes that the caller prevents concurrent device + * unload and transport tear-down. + */ void -rpcrdma_destroy_req(struct rpcrdma_req *req) +rpcrdma_req_destroy(struct rpcrdma_req *req) { + list_del(&req->rl_all); + rpcrdma_free_regbuf(req->rl_recvbuf); rpcrdma_free_regbuf(req->rl_sendbuf); rpcrdma_free_regbuf(req->rl_rdmabuf); @@ -1208,7 +1152,6 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf) { struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf); - struct rpcrdma_ia *ia = rdmab_to_ia(buf); struct rpcrdma_mr *mr; unsigned int count; @@ -1224,7 +1167,7 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf) if (!list_empty(&mr->mr_list)) list_del(&mr->mr_list); - ia->ri_ops->ro_release_mr(mr); + frwr_release_mr(mr); count++; spin_lock(&buf->rb_mrlock); } @@ -1234,11 +1177,24 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf) dprintk("RPC: %s: released %u MRs\n", __func__, count); } +/** + * rpcrdma_buffer_destroy - Release all hw resources + * @buf: root control block for resources + * + * ORDERING: relies on a prior ib_drain_qp : + * - No more Send or Receive completions can occur + * - All MRs, reps, and reqs are returned to their free lists + */ void rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) { cancel_delayed_work_sync(&buf->rb_refresh_worker); + if (buf->rb_completion_wq) { + destroy_workqueue(buf->rb_completion_wq); + buf->rb_completion_wq = NULL; + } + rpcrdma_sendctxs_destroy(buf); while (!list_empty(&buf->rb_recv_bufs)) { @@ -1250,19 +1206,14 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) rpcrdma_destroy_rep(rep); } - spin_lock(&buf->rb_reqslock); - while (!list_empty(&buf->rb_allreqs)) { + while (!list_empty(&buf->rb_send_bufs)) { struct rpcrdma_req *req; - req = list_first_entry(&buf->rb_allreqs, - struct rpcrdma_req, rl_all); - list_del(&req->rl_all); - - spin_unlock(&buf->rb_reqslock); - rpcrdma_destroy_req(req); - spin_lock(&buf->rb_reqslock); + req = list_first_entry(&buf->rb_send_bufs, + struct rpcrdma_req, rl_list); + list_del(&req->rl_list); + rpcrdma_req_destroy(req); } - spin_unlock(&buf->rb_reqslock); rpcrdma_mrs_destroy(buf); } @@ -1329,9 +1280,12 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr) { struct rpcrdma_xprt *r_xprt = mr->mr_xprt; - trace_xprtrdma_mr_unmap(mr); - ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, - mr->mr_sg, mr->mr_nents, mr->mr_dir); + if (mr->mr_dir != DMA_NONE) { + trace_xprtrdma_mr_unmap(mr); + ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, + mr->mr_sg, mr->mr_nents, mr->mr_dir); + mr->mr_dir = DMA_NONE; + } __rpcrdma_mr_put(&r_xprt->rx_buf, mr); } @@ -1410,7 +1364,7 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) * * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for * receiving the payload of RDMA RECV operations. During Long Calls - * or Replies they may be registered externally via ro_map. + * or Replies they may be registered externally via frwr_map. */ struct rpcrdma_regbuf * rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction, @@ -1446,8 +1400,10 @@ __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) (void *)rb->rg_base, rdmab_length(rb), rb->rg_direction); - if (ib_dma_mapping_error(device, rdmab_addr(rb))) + if (ib_dma_mapping_error(device, rdmab_addr(rb))) { + trace_xprtrdma_dma_maperr(rdmab_addr(rb)); return false; + } rb->rg_device = device; rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey; @@ -1479,10 +1435,14 @@ rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb) kfree(rb); } -/* - * Prepost any receive buffer, then post send. +/** + * rpcrdma_ep_post - Post WRs to a transport's Send Queue + * @ia: transport's device information + * @ep: transport's RDMA endpoint information + * @req: rpcrdma_req containing the Send WR to post * - * Receive buffer is donated to hardware, reclaimed upon recv completion. + * Returns 0 if the post was successful, otherwise -ENOTCONN + * is returned. */ int rpcrdma_ep_post(struct rpcrdma_ia *ia, @@ -1501,32 +1461,27 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, --ep->rep_send_count; } - rc = ia->ri_ops->ro_send(ia, req); + rc = frwr_send(ia, req); trace_xprtrdma_post_send(req, rc); if (rc) return -ENOTCONN; return 0; } -/** - * rpcrdma_post_recvs - Maybe post some Receive buffers - * @r_xprt: controlling transport - * @temp: when true, allocate temp rpcrdma_rep objects - * - */ -void +static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct rpcrdma_ep *ep = &r_xprt->rx_ep; struct ib_recv_wr *wr, *bad_wr; int needed, count, rc; rc = 0; count = 0; needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1); - if (buf->rb_posted_receives > needed) + if (ep->rep_receive_count > needed) goto out; - needed -= buf->rb_posted_receives; + needed -= ep->rep_receive_count; count = 0; wr = NULL; @@ -1574,7 +1529,7 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) --count; } } - buf->rb_posted_receives += count; + ep->rep_receive_count += count; out: trace_xprtrdma_post_recvs(r_xprt, count, rc); } |