diff options
Diffstat (limited to 'net/sunrpc')
-rw-r--r-- | net/sunrpc/clnt.c | 18 | ||||
-rw-r--r-- | net/sunrpc/sched.c | 106 | ||||
-rw-r--r-- | net/sunrpc/svcsock.c | 32 | ||||
-rw-r--r-- | net/sunrpc/xprt.c | 25 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/rpc_rdma.c | 86 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 53 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/xprt_rdma.h | 1 | ||||
-rw-r--r-- | net/sunrpc/xprtsock.c | 3 |
8 files changed, 202 insertions, 122 deletions
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 57d344cf2256..e7a96e478f63 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -436,7 +436,9 @@ void rpc_killall_tasks(struct rpc_clnt *clnt) if (!(rovr->tk_flags & RPC_TASK_KILLED)) { rovr->tk_flags |= RPC_TASK_KILLED; rpc_exit(rovr, -EIO); - rpc_wake_up_queued_task(rovr->tk_waitqueue, rovr); + if (RPC_IS_QUEUED(rovr)) + rpc_wake_up_queued_task(rovr->tk_waitqueue, + rovr); } } spin_unlock(&clnt->cl_lock); @@ -597,6 +599,14 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) } } +void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt) +{ + rpc_task_release_client(task); + rpc_task_set_client(task, clnt); +} +EXPORT_SYMBOL_GPL(rpc_task_reset_client); + + static void rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg) { @@ -636,12 +646,6 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data) rpc_task_set_client(task, task_setup_data->rpc_client); rpc_task_set_rpc_message(task, task_setup_data->rpc_message); - if (task->tk_status != 0) { - int ret = task->tk_status; - rpc_put_task(task); - return ERR_PTR(ret); - } - if (task->tk_action == NULL) rpc_call_start(task); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 243fc09b164e..ffb687671da0 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -252,23 +252,37 @@ static void rpc_set_active(struct rpc_task *task) /* * Mark an RPC call as having completed by clearing the 'active' bit + * and then waking up all tasks that were sleeping. */ -static void rpc_mark_complete_task(struct rpc_task *task) +static int rpc_complete_task(struct rpc_task *task) { - smp_mb__before_clear_bit(); + void *m = &task->tk_runstate; + wait_queue_head_t *wq = bit_waitqueue(m, RPC_TASK_ACTIVE); + struct wait_bit_key k = __WAIT_BIT_KEY_INITIALIZER(m, RPC_TASK_ACTIVE); + unsigned long flags; + int ret; + + spin_lock_irqsave(&wq->lock, flags); clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate); - smp_mb__after_clear_bit(); - wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE); + ret = atomic_dec_and_test(&task->tk_count); + if (waitqueue_active(wq)) + __wake_up_locked_key(wq, TASK_NORMAL, &k); + spin_unlock_irqrestore(&wq->lock, flags); + return ret; } /* * Allow callers to wait for completion of an RPC call + * + * Note the use of out_of_line_wait_on_bit() rather than wait_on_bit() + * to enforce taking of the wq->lock and hence avoid races with + * rpc_complete_task(). */ int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *)) { if (action == NULL) action = rpc_wait_bit_killable; - return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE, + return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE, action, TASK_KILLABLE); } EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); @@ -285,15 +299,8 @@ static void rpc_make_runnable(struct rpc_task *task) if (rpc_test_and_set_running(task)) return; if (RPC_IS_ASYNC(task)) { - int status; - INIT_WORK(&task->u.tk_work, rpc_async_schedule); - status = queue_work(rpciod_workqueue, &task->u.tk_work); - if (status < 0) { - printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); - task->tk_status = status; - return; - } + queue_work(rpciod_workqueue, &task->u.tk_work); } else wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED); } @@ -623,14 +630,12 @@ static void __rpc_execute(struct rpc_task *task) save_callback = task->tk_callback; task->tk_callback = NULL; save_callback(task); - } - - /* - * Perform the next FSM step. - * tk_action may be NULL when the task has been killed - * by someone else. - */ - if (!RPC_IS_QUEUED(task)) { + } else { + /* + * Perform the next FSM step. + * tk_action may be NULL when the task has been killed + * by someone else. + */ if (task->tk_action == NULL) break; task->tk_action(task); @@ -829,12 +834,6 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data) } rpc_init_task(task, setup_data); - if (task->tk_status < 0) { - int err = task->tk_status; - rpc_put_task(task); - return ERR_PTR(err); - } - task->tk_flags |= flags; dprintk("RPC: allocated task %p\n", task); return task; @@ -857,34 +856,67 @@ static void rpc_async_release(struct work_struct *work) rpc_free_task(container_of(work, struct rpc_task, u.tk_work)); } -void rpc_put_task(struct rpc_task *task) +static void rpc_release_resources_task(struct rpc_task *task) { - if (!atomic_dec_and_test(&task->tk_count)) - return; - /* Release resources */ if (task->tk_rqstp) xprt_release(task); if (task->tk_msg.rpc_cred) put_rpccred(task->tk_msg.rpc_cred); rpc_task_release_client(task); - if (task->tk_workqueue != NULL) { +} + +static void rpc_final_put_task(struct rpc_task *task, + struct workqueue_struct *q) +{ + if (q != NULL) { INIT_WORK(&task->u.tk_work, rpc_async_release); - queue_work(task->tk_workqueue, &task->u.tk_work); + queue_work(q, &task->u.tk_work); } else rpc_free_task(task); } + +static void rpc_do_put_task(struct rpc_task *task, struct workqueue_struct *q) +{ + if (atomic_dec_and_test(&task->tk_count)) { + rpc_release_resources_task(task); + rpc_final_put_task(task, q); + } +} + +void rpc_put_task(struct rpc_task *task) +{ + rpc_do_put_task(task, NULL); +} EXPORT_SYMBOL_GPL(rpc_put_task); +void rpc_put_task_async(struct rpc_task *task) +{ + rpc_do_put_task(task, task->tk_workqueue); +} +EXPORT_SYMBOL_GPL(rpc_put_task_async); + static void rpc_release_task(struct rpc_task *task) { dprintk("RPC: %5u release task\n", task->tk_pid); BUG_ON (RPC_IS_QUEUED(task)); - /* Wake up anyone who is waiting for task completion */ - rpc_mark_complete_task(task); + rpc_release_resources_task(task); - rpc_put_task(task); + /* + * Note: at this point we have been removed from rpc_clnt->cl_tasks, + * so it should be safe to use task->tk_count as a test for whether + * or not any other processes still hold references to our rpc_task. + */ + if (atomic_read(&task->tk_count) != 1 + !RPC_IS_ASYNC(task)) { + /* Wake up anyone who may be waiting for task completion */ + if (!rpc_complete_task(task)) + return; + } else { + if (!atomic_dec_and_test(&task->tk_count)) + return; + } + rpc_final_put_task(task, task->tk_workqueue); } int rpciod_up(void) @@ -908,7 +940,7 @@ static int rpciod_start(void) * Create the rpciod thread and wait for it to start. */ dprintk("RPC: creating workqueue rpciod\n"); - wq = alloc_workqueue("rpciod", WQ_RESCUER, 0); + wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0); rpciod_workqueue = wq; return rpciod_workqueue != NULL; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index d802e941d365..b7d435c3f19e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -420,6 +420,7 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, static void svc_udp_data_ready(struct sock *sk, int count) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; + wait_queue_head_t *wq = sk_sleep(sk); if (svsk) { dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", @@ -428,8 +429,8 @@ static void svc_udp_data_ready(struct sock *sk, int count) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + if (wq && waitqueue_active(wq)) + wake_up_interruptible(wq); } /* @@ -438,6 +439,7 @@ static void svc_udp_data_ready(struct sock *sk, int count) static void svc_write_space(struct sock *sk) { struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); + wait_queue_head_t *wq = sk_sleep(sk); if (svsk) { dprintk("svc: socket %p(inet %p), write_space busy=%d\n", @@ -445,10 +447,10 @@ static void svc_write_space(struct sock *sk) svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) { + if (wq && waitqueue_active(wq)) { dprintk("RPC svc_write_space: someone sleeping on %p\n", svsk); - wake_up_interruptible(sk_sleep(sk)); + wake_up_interruptible(wq); } } @@ -739,6 +741,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; + wait_queue_head_t *wq; dprintk("svc: socket %p TCP (listen) state change %d\n", sk, sk->sk_state); @@ -761,8 +764,9 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) printk("svc: socket %p: no user data\n", sk); } - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible_all(sk_sleep(sk)); + wq = sk_sleep(sk); + if (wq && waitqueue_active(wq)) + wake_up_interruptible_all(wq); } /* @@ -771,6 +775,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) static void svc_tcp_state_change(struct sock *sk) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; + wait_queue_head_t *wq = sk_sleep(sk); dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", sk, sk->sk_state, sk->sk_user_data); @@ -781,13 +786,14 @@ static void svc_tcp_state_change(struct sock *sk) set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible_all(sk_sleep(sk)); + if (wq && waitqueue_active(wq)) + wake_up_interruptible_all(wq); } static void svc_tcp_data_ready(struct sock *sk, int count) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; + wait_queue_head_t *wq = sk_sleep(sk); dprintk("svc: socket %p TCP data ready (svsk %p)\n", sk, sk->sk_user_data); @@ -795,8 +801,8 @@ static void svc_tcp_data_ready(struct sock *sk, int count) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + if (wq && waitqueue_active(wq)) + wake_up_interruptible(wq); } /* @@ -1531,6 +1537,7 @@ static void svc_sock_detach(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); struct sock *sk = svsk->sk_sk; + wait_queue_head_t *wq; dprintk("svc: svc_sock_detach(%p)\n", svsk); @@ -1539,8 +1546,9 @@ static void svc_sock_detach(struct svc_xprt *xprt) sk->sk_data_ready = svsk->sk_odata; sk->sk_write_space = svsk->sk_owspace; - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + wq = sk_sleep(sk); + if (wq && waitqueue_active(wq)) + wake_up_interruptible(wq); } /* diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 856274d7e85c..9494c3767356 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -202,10 +202,9 @@ int xprt_reserve_xprt(struct rpc_task *task) goto out_sleep; } xprt->snd_task = task; - if (req) { - req->rq_bytes_sent = 0; - req->rq_ntrans++; - } + req->rq_bytes_sent = 0; + req->rq_ntrans++; + return 1; out_sleep: @@ -213,7 +212,7 @@ out_sleep: task->tk_pid, xprt); task->tk_timeout = 0; task->tk_status = -EAGAIN; - if (req && req->rq_ntrans) + if (req->rq_ntrans) rpc_sleep_on(&xprt->resend, task, NULL); else rpc_sleep_on(&xprt->sending, task, NULL); @@ -965,7 +964,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req) xprt = kzalloc(size, GFP_KERNEL); if (xprt == NULL) goto out; - kref_init(&xprt->kref); + atomic_set(&xprt->count, 1); xprt->max_reqs = max_req; xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL); @@ -1145,13 +1144,11 @@ found: /** * xprt_destroy - destroy an RPC transport, killing off all requests. - * @kref: kref for the transport to destroy + * @xprt: transport to destroy * */ -static void xprt_destroy(struct kref *kref) +static void xprt_destroy(struct rpc_xprt *xprt) { - struct rpc_xprt *xprt = container_of(kref, struct rpc_xprt, kref); - dprintk("RPC: destroying transport %p\n", xprt); xprt->shutdown = 1; del_timer_sync(&xprt->timer); @@ -1175,7 +1172,8 @@ static void xprt_destroy(struct kref *kref) */ void xprt_put(struct rpc_xprt *xprt) { - kref_put(&xprt->kref, xprt_destroy); + if (atomic_dec_and_test(&xprt->count)) + xprt_destroy(xprt); } /** @@ -1185,6 +1183,7 @@ void xprt_put(struct rpc_xprt *xprt) */ struct rpc_xprt *xprt_get(struct rpc_xprt *xprt) { - kref_get(&xprt->kref); - return xprt; + if (atomic_inc_not_zero(&xprt->count)) + return xprt; + return NULL; } diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 2ac3f6e8adff..554d0814c875 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -87,6 +87,8 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs) { int len, n = 0, p; + int page_base; + struct page **ppages; if (pos == 0 && xdrbuf->head[0].iov_len) { seg[n].mr_page = NULL; @@ -95,34 +97,32 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, ++n; } - if (xdrbuf->page_len && (xdrbuf->pages[0] != NULL)) { - if (n == nsegs) - return 0; - seg[n].mr_page = xdrbuf->pages[0]; - seg[n].mr_offset = (void *)(unsigned long) xdrbuf->page_base; - seg[n].mr_len = min_t(u32, - PAGE_SIZE - xdrbuf->page_base, xdrbuf->page_len); - len = xdrbuf->page_len - seg[n].mr_len; + len = xdrbuf->page_len; + ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT); + page_base = xdrbuf->page_base & ~PAGE_MASK; + p = 0; + while (len && n < nsegs) { + seg[n].mr_page = ppages[p]; + seg[n].mr_offset = (void *)(unsigned long) page_base; + seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len); + BUG_ON(seg[n].mr_len > PAGE_SIZE); + len -= seg[n].mr_len; ++n; - p = 1; - while (len > 0) { - if (n == nsegs) - return 0; - seg[n].mr_page = xdrbuf->pages[p]; - seg[n].mr_offset = NULL; - seg[n].mr_len = min_t(u32, PAGE_SIZE, len); - len -= seg[n].mr_len; - ++n; - ++p; - } + ++p; + page_base = 0; /* page offset only applies to first page */ } + /* Message overflows the seg array */ + if (len && n == nsegs) + return 0; + if (xdrbuf->tail[0].iov_len) { /* the rpcrdma protocol allows us to omit any trailing * xdr pad bytes, saving the server an RDMA operation. */ if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) return n; if (n == nsegs) + /* Tail remains, but we're out of segments */ return 0; seg[n].mr_page = NULL; seg[n].mr_offset = xdrbuf->tail[0].iov_base; @@ -296,6 +296,8 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad) int copy_len; unsigned char *srcp, *destp; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); + int page_base; + struct page **ppages; destp = rqst->rq_svec[0].iov_base; curlen = rqst->rq_svec[0].iov_len; @@ -324,28 +326,25 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad) __func__, destp + copy_len, curlen); rqst->rq_svec[0].iov_len += curlen; } - r_xprt->rx_stats.pullup_copy_count += copy_len; - npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT; + + page_base = rqst->rq_snd_buf.page_base; + ppages = rqst->rq_snd_buf.pages + (page_base >> PAGE_SHIFT); + page_base &= ~PAGE_MASK; + npages = PAGE_ALIGN(page_base+copy_len) >> PAGE_SHIFT; for (i = 0; copy_len && i < npages; i++) { - if (i == 0) - curlen = PAGE_SIZE - rqst->rq_snd_buf.page_base; - else - curlen = PAGE_SIZE; + curlen = PAGE_SIZE - page_base; if (curlen > copy_len) curlen = copy_len; dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n", __func__, i, destp, copy_len, curlen); - srcp = kmap_atomic(rqst->rq_snd_buf.pages[i], - KM_SKB_SUNRPC_DATA); - if (i == 0) - memcpy(destp, srcp+rqst->rq_snd_buf.page_base, curlen); - else - memcpy(destp, srcp, curlen); + srcp = kmap_atomic(ppages[i], KM_SKB_SUNRPC_DATA); + memcpy(destp, srcp+page_base, curlen); kunmap_atomic(srcp, KM_SKB_SUNRPC_DATA); rqst->rq_svec[0].iov_len += curlen; destp += curlen; copy_len -= curlen; + page_base = 0; } /* header now contains entire send message */ return pad; @@ -606,6 +605,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) { int i, npages, curlen, olen; char *destp; + struct page **ppages; + int page_base; curlen = rqst->rq_rcv_buf.head[0].iov_len; if (curlen > copy_len) { /* write chunk header fixup */ @@ -624,32 +625,29 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) olen = copy_len; i = 0; rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen; + page_base = rqst->rq_rcv_buf.page_base; + ppages = rqst->rq_rcv_buf.pages + (page_base >> PAGE_SHIFT); + page_base &= ~PAGE_MASK; + if (copy_len && rqst->rq_rcv_buf.page_len) { - npages = PAGE_ALIGN(rqst->rq_rcv_buf.page_base + + npages = PAGE_ALIGN(page_base + rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT; for (; i < npages; i++) { - if (i == 0) - curlen = PAGE_SIZE - rqst->rq_rcv_buf.page_base; - else - curlen = PAGE_SIZE; + curlen = PAGE_SIZE - page_base; if (curlen > copy_len) curlen = copy_len; dprintk("RPC: %s: page %d" " srcp 0x%p len %d curlen %d\n", __func__, i, srcp, copy_len, curlen); - destp = kmap_atomic(rqst->rq_rcv_buf.pages[i], - KM_SKB_SUNRPC_DATA); - if (i == 0) - memcpy(destp + rqst->rq_rcv_buf.page_base, - srcp, curlen); - else - memcpy(destp, srcp, curlen); - flush_dcache_page(rqst->rq_rcv_buf.pages[i]); + destp = kmap_atomic(ppages[i], KM_SKB_SUNRPC_DATA); + memcpy(destp + page_base, srcp, curlen); + flush_dcache_page(ppages[i]); kunmap_atomic(destp, KM_SKB_SUNRPC_DATA); srcp += curlen; copy_len -= curlen; if (copy_len == 0) break; + page_base = 0; } rqst->rq_rcv_buf.page_len = olen - copy_len; } else diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 5f4c7b3bc711..d4297dc43dc4 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -144,6 +144,7 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) static inline void rpcrdma_event_process(struct ib_wc *wc) { + struct rpcrdma_mw *frmr; struct rpcrdma_rep *rep = (struct rpcrdma_rep *)(unsigned long) wc->wr_id; @@ -154,15 +155,23 @@ void rpcrdma_event_process(struct ib_wc *wc) return; if (IB_WC_SUCCESS != wc->status) { - dprintk("RPC: %s: %s WC status %X, connection lost\n", - __func__, (wc->opcode & IB_WC_RECV) ? "recv" : "send", - wc->status); + dprintk("RPC: %s: WC opcode %d status %X, connection lost\n", + __func__, wc->opcode, wc->status); rep->rr_len = ~0U; - rpcrdma_schedule_tasklet(rep); + if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV) + rpcrdma_schedule_tasklet(rep); return; } switch (wc->opcode) { + case IB_WC_FAST_REG_MR: + frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; + frmr->r.frmr.state = FRMR_IS_VALID; + break; + case IB_WC_LOCAL_INV: + frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; + frmr->r.frmr.state = FRMR_IS_INVALID; + break; case IB_WC_RECV: rep->rr_len = wc->byte_len; ib_dma_sync_single_for_cpu( @@ -1450,6 +1459,12 @@ rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing) seg->mr_dma = ib_dma_map_single(ia->ri_id->device, seg->mr_offset, seg->mr_dmalen, seg->mr_dir); + if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) { + dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n", + __func__, + (unsigned long long)seg->mr_dma, + seg->mr_offset, seg->mr_dmalen); + } } static void @@ -1469,7 +1484,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, struct rpcrdma_xprt *r_xprt) { struct rpcrdma_mr_seg *seg1 = seg; - struct ib_send_wr frmr_wr, *bad_wr; + struct ib_send_wr invalidate_wr, frmr_wr, *bad_wr, *post_wr; + u8 key; int len, pageoff; int i, rc; @@ -1484,6 +1500,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, rpcrdma_map_one(ia, seg, writing); seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma; len += seg->mr_len; + BUG_ON(seg->mr_len > PAGE_SIZE); ++seg; ++i; /* Check for holes */ @@ -1494,26 +1511,45 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, dprintk("RPC: %s: Using frmr %p to map %d segments\n", __func__, seg1->mr_chunk.rl_mw, i); + if (unlikely(seg1->mr_chunk.rl_mw->r.frmr.state == FRMR_IS_VALID)) { + dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n", + __func__, + seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey); + /* Invalidate before using. */ + memset(&invalidate_wr, 0, sizeof invalidate_wr); + invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; + invalidate_wr.next = &frmr_wr; + invalidate_wr.opcode = IB_WR_LOCAL_INV; + invalidate_wr.send_flags = IB_SEND_SIGNALED; + invalidate_wr.ex.invalidate_rkey = + seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; + DECR_CQCOUNT(&r_xprt->rx_ep); + post_wr = &invalidate_wr; + } else + post_wr = &frmr_wr; + /* Bump the key */ key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); /* Prepare FRMR WR */ memset(&frmr_wr, 0, sizeof frmr_wr); + frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; frmr_wr.opcode = IB_WR_FAST_REG_MR; - frmr_wr.send_flags = 0; /* unsignaled */ + frmr_wr.send_flags = IB_SEND_SIGNALED; frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma; frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; frmr_wr.wr.fast_reg.page_list_len = i; frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; + BUG_ON(frmr_wr.wr.fast_reg.length < len); frmr_wr.wr.fast_reg.access_flags = (writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ); frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); - rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr); + rc = ib_post_send(ia->ri_id->qp, post_wr, &bad_wr); if (rc) { dprintk("RPC: %s: failed ib_post_send for register," @@ -1542,8 +1578,9 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, rpcrdma_unmap_one(ia, seg++); memset(&invalidate_wr, 0, sizeof invalidate_wr); + invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; invalidate_wr.opcode = IB_WR_LOCAL_INV; - invalidate_wr.send_flags = 0; /* unsignaled */ + invalidate_wr.send_flags = IB_SEND_SIGNALED; invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index c7a7eba991bc..cae761a8536c 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -164,6 +164,7 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ struct { struct ib_fast_reg_page_list *fr_pgl; struct ib_mr *fr_mr; + enum { FRMR_IS_INVALID, FRMR_IS_VALID } state; } frmr; } r; struct list_head mw_list; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c431f5a57960..be96d429b475 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1631,7 +1631,8 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, } xs_reclassify_socket(family, sock); - if (xs_bind(transport, sock)) { + err = xs_bind(transport, sock); + if (err) { sock_release(sock); goto out; } |