summaryrefslogtreecommitdiffstats
path: root/net/sunrpc
diff options
context:
space:
mode:
Diffstat (limited to 'net/sunrpc')
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c3
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c5
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_unseal.c4
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c2
-rw-r--r--net/sunrpc/cache.c42
-rw-r--r--net/sunrpc/clnt.c14
-rw-r--r--net/sunrpc/debugfs.c6
-rw-r--r--net/sunrpc/rpc_pipe.c42
-rw-r--r--net/sunrpc/sched.c10
-rw-r--r--net/sunrpc/stats.c16
-rw-r--r--net/sunrpc/sunrpc.h6
-rw-r--r--net/sunrpc/svc.c118
-rw-r--r--net/sunrpc/svc_xprt.c34
-rw-r--r--net/sunrpc/svcsock.c21
-rw-r--r--net/sunrpc/xdr.c82
-rw-r--r--net/sunrpc/xprt.c34
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c7
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c13
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c53
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c32
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c33
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c38
-rw-r--r--net/sunrpc/xprtrdma/transport.c43
-rw-r--r--net/sunrpc/xprtrdma/verbs.c44
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h4
-rw-r--r--net/sunrpc/xprtsock.c7
27 files changed, 471 insertions, 246 deletions
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 12649c9fedab..8654494b4d0a 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -237,9 +237,6 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
- err = crypto_ahash_init(req);
- if (err)
- goto out;
err = crypto_ahash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength);
if (err)
goto out;
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 1d74d653e6c0..94a2b3f082a8 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -177,6 +177,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
u64 seq_send;
u8 *cksumkey;
unsigned int cksum_usage;
+ __be64 seq_send_be64;
dprintk("RPC: %s\n", __func__);
@@ -187,7 +188,9 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
spin_lock(&krb5_seq_lock);
seq_send = ctx->seq_send64++;
spin_unlock(&krb5_seq_lock);
- *((__be64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send);
+
+ seq_send_be64 = cpu_to_be64(seq_send);
+ memcpy(krb5_hdr + 8, (char *) &seq_send_be64, 8);
if (ctx->initiate) {
cksumkey = ctx->initiator_sign;
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index dcf9515d9aef..b601a73cc9db 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -155,10 +155,12 @@ gss_verify_mic_v2(struct krb5_ctx *ctx,
u8 flags;
int i;
unsigned int cksum_usage;
+ __be16 be16_ptr;
dprintk("RPC: %s\n", __func__);
- if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_MIC)
+ memcpy(&be16_ptr, (char *) ptr, 2);
+ if (be16_to_cpu(be16_ptr) != KG2_TOK_MIC)
return GSS_S_DEFECTIVE_TOKEN;
flags = ptr[2];
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 26531193fce4..5089dbb96d58 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1375,7 +1375,7 @@ static int create_use_gss_proxy_proc_entry(struct net *net)
struct proc_dir_entry **p = &sn->use_gssp_proc;
sn->use_gss_proxy = -1;
- *p = proc_create_data("use-gss-proxy", S_IFREG|S_IRUSR|S_IWUSR,
+ *p = proc_create_data("use-gss-proxy", S_IFREG | 0600,
sn->proc_net_rpc,
&use_gss_proxy_ops, net);
if (!*p)
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 8a7e1c774f9c..cdda4744c9b1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1450,8 +1450,8 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
struct cache_detail *cd)
{
char tbuf[20];
- char *bp, *ep;
- time_t then, now;
+ char *ep;
+ time_t now;
if (*ppos || count > sizeof(tbuf)-1)
return -EINVAL;
@@ -1461,24 +1461,24 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
simple_strtoul(tbuf, &ep, 0);
if (*ep && *ep != '\n')
return -EINVAL;
+ /* Note that while we check that 'buf' holds a valid number,
+ * we always ignore the value and just flush everything.
+ * Making use of the number leads to races.
+ */
- bp = tbuf;
- then = get_expiry(&bp);
now = seconds_since_boot();
- cd->nextcheck = now;
- /* Can only set flush_time to 1 second beyond "now", or
- * possibly 1 second beyond flushtime. This is because
- * flush_time never goes backwards so it mustn't get too far
- * ahead of time.
+ /* Always flush everything, so behave like cache_purge()
+ * Do this by advancing flush_time to the current time,
+ * or by one second if it has already reached the current time.
+ * Newly added cache entries will always have ->last_refresh greater
+ * that ->flush_time, so they don't get flushed prematurely.
*/
- if (then >= now) {
- /* Want to flush everything, so behave like cache_purge() */
- if (cd->flush_time >= now)
- now = cd->flush_time + 1;
- then = now;
- }
- cd->flush_time = then;
+ if (cd->flush_time >= now)
+ now = cd->flush_time + 1;
+
+ cd->flush_time = now;
+ cd->nextcheck = now;
cache_flush();
*ppos += count;
@@ -1621,20 +1621,20 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
if (cd->procfs == NULL)
goto out_nomem;
- p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR,
+ p = proc_create_data("flush", S_IFREG | 0600,
cd->procfs, &cache_flush_operations_procfs, cd);
if (p == NULL)
goto out_nomem;
if (cd->cache_request || cd->cache_parse) {
- p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR,
- cd->procfs, &cache_file_operations_procfs, cd);
+ p = proc_create_data("channel", S_IFREG | 0600, cd->procfs,
+ &cache_file_operations_procfs, cd);
if (p == NULL)
goto out_nomem;
}
if (cd->cache_show) {
- p = proc_create_data("content", S_IFREG|S_IRUSR,
- cd->procfs, &content_file_operations_procfs, cd);
+ p = proc_create_data("content", S_IFREG | 0400, cd->procfs,
+ &content_file_operations_procfs, cd);
if (p == NULL)
goto out_nomem;
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 6e432ecd7f99..c2266f387213 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1231,7 +1231,7 @@ static const struct sockaddr_in6 rpc_in6addr_loopback = {
* negative errno is returned.
*/
static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
- struct sockaddr *buf, int buflen)
+ struct sockaddr *buf)
{
struct socket *sock;
int err;
@@ -1269,7 +1269,7 @@ static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
goto out_release;
}
- err = kernel_getsockname(sock, buf, &buflen);
+ err = kernel_getsockname(sock, buf);
if (err < 0) {
dprintk("RPC: getsockname failed (%d)\n", err);
goto out_release;
@@ -1353,7 +1353,7 @@ int rpc_localaddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t buflen)
rcu_read_unlock();
rpc_set_port(sap, 0);
- err = rpc_sockname(net, sap, salen, buf, buflen);
+ err = rpc_sockname(net, sap, salen, buf);
put_net(net);
if (err != 0)
/* Couldn't discover local address, return ANYADDR */
@@ -1887,7 +1887,7 @@ call_connect_status(struct rpc_task *task)
dprint_status(task);
- trace_rpc_connect_status(task, status);
+ trace_rpc_connect_status(task);
task->tk_status = 0;
switch (status) {
case -ECONNREFUSED:
@@ -2014,6 +2014,9 @@ call_transmit_status(struct rpc_task *task)
case -EPERM:
if (RPC_IS_SOFTCONN(task)) {
xprt_end_transmit(task);
+ if (!task->tk_msg.rpc_proc->p_proc)
+ trace_xprt_ping(task->tk_xprt,
+ task->tk_status);
rpc_exit(task, task->tk_status);
break;
}
@@ -2112,6 +2115,9 @@ call_status(struct rpc_task *task)
struct rpc_rqst *req = task->tk_rqstp;
int status;
+ if (!task->tk_msg.rpc_proc->p_proc)
+ trace_xprt_ping(task->tk_xprt, task->tk_status);
+
if (req->rq_reply_bytes_recvd > 0 && !req->rq_bytes_sent)
task->tk_status = req->rq_reply_bytes_recvd;
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index e980d2a493de..45a033329cd4 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -139,7 +139,7 @@ rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
return;
/* make tasks file */
- if (!debugfs_create_file("tasks", S_IFREG | S_IRUSR, clnt->cl_debugfs,
+ if (!debugfs_create_file("tasks", S_IFREG | 0400, clnt->cl_debugfs,
clnt, &tasks_fops))
goto out_err;
@@ -241,7 +241,7 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
return;
/* make tasks file */
- if (!debugfs_create_file("info", S_IFREG | S_IRUSR, xprt->debugfs,
+ if (!debugfs_create_file("info", S_IFREG | 0400, xprt->debugfs,
xprt, &xprt_info_fops)) {
debugfs_remove_recursive(xprt->debugfs);
xprt->debugfs = NULL;
@@ -317,7 +317,7 @@ inject_fault_dir(struct dentry *topdir)
if (!faultdir)
return NULL;
- if (!debugfs_create_file("disconnect", S_IFREG | S_IRUSR, faultdir,
+ if (!debugfs_create_file("disconnect", S_IFREG | 0400, faultdir,
NULL, &fault_disconnect_fops))
return NULL;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index fc97fc3ed637..0f08934b2cea 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -820,13 +820,13 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
{
struct dentry *dentry;
struct inode *dir = d_inode(parent);
- umode_t umode = S_IFIFO | S_IRUSR | S_IWUSR;
+ umode_t umode = S_IFIFO | 0600;
int err;
if (pipe->ops->upcall == NULL)
- umode &= ~S_IRUGO;
+ umode &= ~0444;
if (pipe->ops->downcall == NULL)
- umode &= ~S_IWUGO;
+ umode &= ~0222;
inode_lock_nested(dir, I_MUTEX_PARENT);
dentry = __rpc_lookup_create_exclusive(parent, name);
@@ -1035,7 +1035,7 @@ static const struct rpc_filelist authfiles[] = {
[RPCAUTH_info] = {
.name = "info",
.i_fop = &rpc_info_operations,
- .mode = S_IFREG | S_IRUSR,
+ .mode = S_IFREG | 0400,
},
};
@@ -1068,8 +1068,8 @@ struct dentry *rpc_create_client_dir(struct dentry *dentry,
{
struct dentry *ret;
- ret = rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL,
- rpc_clntdir_populate, rpc_client);
+ ret = rpc_mkdir_populate(dentry, name, 0555, NULL,
+ rpc_clntdir_populate, rpc_client);
if (!IS_ERR(ret)) {
rpc_client->cl_pipedir_objects.pdh_dentry = ret;
rpc_create_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
@@ -1096,17 +1096,17 @@ static const struct rpc_filelist cache_pipefs_files[3] = {
[0] = {
.name = "channel",
.i_fop = &cache_file_operations_pipefs,
- .mode = S_IFREG|S_IRUSR|S_IWUSR,
+ .mode = S_IFREG | 0600,
},
[1] = {
.name = "content",
.i_fop = &content_file_operations_pipefs,
- .mode = S_IFREG|S_IRUSR,
+ .mode = S_IFREG | 0400,
},
[2] = {
.name = "flush",
.i_fop = &cache_flush_operations_pipefs,
- .mode = S_IFREG|S_IRUSR|S_IWUSR,
+ .mode = S_IFREG | 0600,
},
};
@@ -1164,39 +1164,39 @@ enum {
static const struct rpc_filelist files[] = {
[RPCAUTH_lockd] = {
.name = "lockd",
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .mode = S_IFDIR | 0555,
},
[RPCAUTH_mount] = {
.name = "mount",
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .mode = S_IFDIR | 0555,
},
[RPCAUTH_nfs] = {
.name = "nfs",
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .mode = S_IFDIR | 0555,
},
[RPCAUTH_portmap] = {
.name = "portmap",
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .mode = S_IFDIR | 0555,
},
[RPCAUTH_statd] = {
.name = "statd",
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .mode = S_IFDIR | 0555,
},
[RPCAUTH_nfsd4_cb] = {
.name = "nfsd4_cb",
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .mode = S_IFDIR | 0555,
},
[RPCAUTH_cache] = {
.name = "cache",
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .mode = S_IFDIR | 0555,
},
[RPCAUTH_nfsd] = {
.name = "nfsd",
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .mode = S_IFDIR | 0555,
},
[RPCAUTH_gssd] = {
.name = "gssd",
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .mode = S_IFDIR | 0555,
},
};
@@ -1261,7 +1261,7 @@ EXPORT_SYMBOL_GPL(rpc_put_sb_net);
static const struct rpc_filelist gssd_dummy_clnt_dir[] = {
[0] = {
.name = "clntXX",
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .mode = S_IFDIR | 0555,
},
};
@@ -1310,7 +1310,7 @@ static const struct rpc_filelist gssd_dummy_info_file[] = {
[0] = {
.name = "info",
.i_fop = &rpc_dummy_info_operations,
- .mode = S_IFREG | S_IRUSR,
+ .mode = S_IFREG | 0400,
},
};
@@ -1397,7 +1397,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
sb->s_d_op = &simple_dentry_operations;
sb->s_time_gran = 1;
- inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO);
+ inode = rpc_get_inode(sb, S_IFDIR | 0555);
sb->s_root = root = d_make_root(inode);
if (!root)
return -ENOMEM;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index d9db2eab3a8d..3fe5d60ab0e2 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -276,7 +276,7 @@ static void rpc_set_active(struct rpc_task *task)
{
rpc_task_set_debuginfo(task);
set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
- trace_rpc_task_begin(task->tk_client, task, NULL);
+ trace_rpc_task_begin(task, NULL);
}
/*
@@ -291,7 +291,7 @@ static int rpc_complete_task(struct rpc_task *task)
unsigned long flags;
int ret;
- trace_rpc_task_complete(task->tk_client, task, NULL);
+ trace_rpc_task_complete(task, NULL);
spin_lock_irqsave(&wq->lock, flags);
clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
@@ -358,7 +358,7 @@ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
task->tk_pid, rpc_qname(q), jiffies);
- trace_rpc_task_sleep(task->tk_client, task, q);
+ trace_rpc_task_sleep(task, q);
__rpc_add_wait_queue(q, task, queue_priority);
@@ -428,7 +428,7 @@ static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq,
return;
}
- trace_rpc_task_wakeup(task->tk_client, task, queue);
+ trace_rpc_task_wakeup(task, queue);
__rpc_remove_wait_queue(queue, task);
@@ -780,7 +780,7 @@ static void __rpc_execute(struct rpc_task *task)
}
if (!do_action)
break;
- trace_rpc_task_run_action(task->tk_client, task, do_action);
+ trace_rpc_task_run_action(task, do_action);
do_action(task);
/*
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 1e671333c3d5..f68aa46c9dd7 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -24,6 +24,8 @@
#include <linux/sunrpc/metrics.h>
#include <linux/rcupdate.h>
+#include <trace/events/sunrpc.h>
+
#include "netns.h"
#define RPCDBG_FACILITY RPCDBG_MISC
@@ -148,7 +150,7 @@ void rpc_count_iostats_metrics(const struct rpc_task *task,
struct rpc_iostats *op_metrics)
{
struct rpc_rqst *req = task->tk_rqstp;
- ktime_t delta, now;
+ ktime_t backlog, execute, now;
if (!op_metrics || !req)
return;
@@ -164,16 +166,20 @@ void rpc_count_iostats_metrics(const struct rpc_task *task,
op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent;
op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;
+ backlog = 0;
if (ktime_to_ns(req->rq_xtime)) {
- delta = ktime_sub(req->rq_xtime, task->tk_start);
- op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);
+ backlog = ktime_sub(req->rq_xtime, task->tk_start);
+ op_metrics->om_queue = ktime_add(op_metrics->om_queue, backlog);
}
+
op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt);
- delta = ktime_sub(now, task->tk_start);
- op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta);
+ execute = ktime_sub(now, task->tk_start);
+ op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute);
spin_unlock(&op_metrics->om_lock);
+
+ trace_rpc_stats_latency(req->rq_task, backlog, req->rq_rtt, execute);
}
EXPORT_SYMBOL_GPL(rpc_count_iostats_metrics);
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index f2b7cb540e61..09a0315ea77b 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -37,12 +37,6 @@ struct rpc_buffer {
char data[];
};
-static inline int rpc_reply_expected(struct rpc_task *task)
-{
- return (task->tk_msg.rpc_proc != NULL) &&
- (task->tk_msg.rpc_proc->p_decode != NULL);
-}
-
static inline int sock_is_loopback(struct sock *sk)
{
struct dst_entry *dst;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 387cc4add6f6..30a4226baf03 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1255,6 +1255,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
/* Syntactic check complete */
serv->sv_stats->rpccnt++;
+ trace_svc_process(rqstp, progp->pg_name);
/* Build the reply header. */
statp = resv->iov_base +resv->iov_len;
@@ -1431,14 +1432,10 @@ svc_process(struct svc_rqst *rqstp)
}
/* Returns 1 for send, 0 for drop */
- if (likely(svc_process_common(rqstp, argv, resv))) {
- int ret = svc_send(rqstp);
+ if (likely(svc_process_common(rqstp, argv, resv)))
+ return svc_send(rqstp);
- trace_svc_process(rqstp, ret);
- return ret;
- }
out_drop:
- trace_svc_process(rqstp, 0);
svc_drop(rqstp);
return 0;
}
@@ -1536,3 +1533,112 @@ u32 svc_max_payload(const struct svc_rqst *rqstp)
return max;
}
EXPORT_SYMBOL_GPL(svc_max_payload);
+
+/**
+ * svc_fill_write_vector - Construct data argument for VFS write call
+ * @rqstp: svc_rqst to operate on
+ * @first: buffer containing first section of write payload
+ * @total: total number of bytes of write payload
+ *
+ * Returns the number of elements populated in the data argument array.
+ */
+unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first,
+ size_t total)
+{
+ struct kvec *vec = rqstp->rq_vec;
+ struct page **pages;
+ unsigned int i;
+
+ /* Some types of transport can present the write payload
+ * entirely in rq_arg.pages. In this case, @first is empty.
+ */
+ i = 0;
+ if (first->iov_len) {
+ vec[i].iov_base = first->iov_base;
+ vec[i].iov_len = min_t(size_t, total, first->iov_len);
+ total -= vec[i].iov_len;
+ ++i;
+ }
+
+ WARN_ON_ONCE(rqstp->rq_arg.page_base != 0);
+ pages = rqstp->rq_arg.pages;
+ while (total) {
+ vec[i].iov_base = page_address(*pages);
+ vec[i].iov_len = min_t(size_t, total, PAGE_SIZE);
+ total -= vec[i].iov_len;
+ ++i;
+
+ ++pages;
+ }
+
+ WARN_ON_ONCE(i > ARRAY_SIZE(rqstp->rq_vec));
+ return i;
+}
+EXPORT_SYMBOL_GPL(svc_fill_write_vector);
+
+/**
+ * svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call
+ * @rqstp: svc_rqst to operate on
+ * @first: buffer containing first section of pathname
+ * @total: total length of the pathname argument
+ *
+ * Returns pointer to a NUL-terminated string, or an ERR_PTR. The buffer is
+ * released automatically when @rqstp is recycled.
+ */
+char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first,
+ size_t total)
+{
+ struct xdr_buf *arg = &rqstp->rq_arg;
+ struct page **pages;
+ char *result;
+
+ /* VFS API demands a NUL-terminated pathname. This function
+ * uses a page from @rqstp as the pathname buffer, to enable
+ * direct placement. Thus the total buffer size is PAGE_SIZE.
+ * Space in this buffer for NUL-termination requires that we
+ * cap the size of the returned symlink pathname just a
+ * little early.
+ */
+ if (total > PAGE_SIZE - 1)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ /* Some types of transport can present the pathname entirely
+ * in rq_arg.pages. If not, then copy the pathname into one
+ * page.
+ */
+ pages = arg->pages;
+ WARN_ON_ONCE(arg->page_base != 0);
+ if (first->iov_base == 0) {
+ result = page_address(*pages);
+ result[total] = '\0';
+ } else {
+ size_t len, remaining;
+ char *dst;
+
+ result = page_address(*(rqstp->rq_next_page++));
+ dst = result;
+ remaining = total;
+
+ len = min_t(size_t, total, first->iov_len);
+ memcpy(dst, first->iov_base, len);
+ dst += len;
+ remaining -= len;
+
+ /* No more than one page left */
+ if (remaining) {
+ len = min_t(size_t, remaining, PAGE_SIZE);
+ memcpy(dst, page_address(*pages), len);
+ dst += len;
+ }
+
+ *dst = '\0';
+ }
+
+ /* Sanity check: we don't allow the pathname argument to
+ * contain a NUL byte.
+ */
+ if (strlen(result) != total)
+ return ERR_PTR(-EINVAL);
+ return result;
+}
+EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index f9307bd6644b..5185efb9027b 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -173,6 +173,7 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
set_bit(XPT_BUSY, &xprt->xpt_flags);
rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
xprt->xpt_net = get_net(net);
+ strcpy(xprt->xpt_remotebuf, "uninitialized");
}
EXPORT_SYMBOL_GPL(svc_xprt_init);
@@ -382,25 +383,21 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
int cpu;
if (!svc_xprt_has_something_to_do(xprt))
- goto out;
+ return;
/* Mark transport as busy. It will remain in this state until
* the provider calls svc_xprt_received. We update XPT_BUSY
* atomically because it also guards against trying to enqueue
* the transport twice.
*/
- if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) {
- /* Don't enqueue transport while already enqueued */
- dprintk("svc: transport %p busy, not enqueued\n", xprt);
- goto out;
- }
+ if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
+ return;
cpu = get_cpu();
pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
atomic_long_inc(&pool->sp_stats.packets);
- dprintk("svc: transport %p put into queue\n", xprt);
spin_lock_bh(&pool->sp_lock);
list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
pool->sp_stats.sockets_queued++;
@@ -412,6 +409,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
continue;
atomic_long_inc(&pool->sp_stats.threads_woken);
+ rqstp->rq_qtime = ktime_get();
wake_up_process(rqstp->rq_task);
goto out_unlock;
}
@@ -420,7 +418,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
out_unlock:
rcu_read_unlock();
put_cpu();
-out:
trace_svc_xprt_do_enqueue(xprt, rqstp);
}
EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue);
@@ -454,13 +451,9 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool)
struct svc_xprt, xpt_ready);
list_del_init(&xprt->xpt_ready);
svc_xprt_get(xprt);
-
- dprintk("svc: transport %p dequeued, inuse=%d\n",
- xprt, kref_read(&xprt->xpt_ref));
}
spin_unlock_bh(&pool->sp_lock);
out:
- trace_svc_xprt_dequeue(xprt);
return xprt;
}
@@ -492,7 +485,7 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt = rqstp->rq_xprt;
- rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);
+ xprt->xpt_ops->xpo_release_rqst(rqstp);
kfree(rqstp->rq_deferred);
rqstp->rq_deferred = NULL;
@@ -538,7 +531,6 @@ void svc_wake_up(struct svc_serv *serv)
if (test_bit(RQ_BUSY, &rqstp->rq_flags))
continue;
rcu_read_unlock();
- dprintk("svc: daemon %p woken up.\n", rqstp);
wake_up_process(rqstp->rq_task);
trace_svc_wake_up(rqstp->rq_task->pid);
return;
@@ -734,6 +726,7 @@ out_found:
rqstp->rq_chandle.thread_wait = 5*HZ;
else
rqstp->rq_chandle.thread_wait = 1*HZ;
+ trace_svc_xprt_dequeue(rqstp);
return rqstp->rq_xprt;
}
@@ -789,7 +782,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
len = svc_deferred_recv(rqstp);
else
len = xprt->xpt_ops->xpo_recvfrom(rqstp);
- dprintk("svc: got len=%d\n", len);
+ rqstp->rq_stime = ktime_get();
rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
}
@@ -844,10 +837,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
clear_bit(XPT_OLD, &xprt->xpt_flags);
- if (xprt->xpt_ops->xpo_secure_port(rqstp))
- set_bit(RQ_SECURE, &rqstp->rq_flags);
- else
- clear_bit(RQ_SECURE, &rqstp->rq_flags);
+ xprt->xpt_ops->xpo_secure_port(rqstp);
rqstp->rq_chandle.defer = svc_defer;
rqstp->rq_xid = svc_getu32(&rqstp->rq_arg.head[0]);
@@ -859,7 +849,6 @@ out_release:
rqstp->rq_res.len = 0;
svc_xprt_release(rqstp);
out:
- trace_svc_recv(rqstp, err);
return err;
}
EXPORT_SYMBOL_GPL(svc_recv);
@@ -889,7 +878,7 @@ int svc_send(struct svc_rqst *rqstp)
goto out;
/* release the receive skb before sending the reply */
- rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);
+ xprt->xpt_ops->xpo_release_rqst(rqstp);
/* calculate over-all length */
xb = &rqstp->rq_res;
@@ -899,6 +888,7 @@ int svc_send(struct svc_rqst *rqstp)
/* Grab mutex to serialize outgoing data. */
mutex_lock(&xprt->xpt_mutex);
+ trace_svc_stats_latency(rqstp);
if (test_bit(XPT_DEAD, &xprt->xpt_flags)
|| test_bit(XPT_CLOSE, &xprt->xpt_flags))
len = -ENOTCONN;
@@ -906,12 +896,12 @@ int svc_send(struct svc_rqst *rqstp)
len = xprt->xpt_ops->xpo_sendto(rqstp);
mutex_unlock(&xprt->xpt_mutex);
rpc_wake_up(&xprt->xpt_bc_pending);
+ trace_svc_send(rqstp, len);
svc_xprt_release(rqstp);
if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
len = 0;
out:
- trace_svc_send(rqstp, len);
return len;
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 943f2a745cd5..5445145e639c 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -391,9 +391,12 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
release_sock(sock->sk);
}
-static int svc_sock_secure_port(struct svc_rqst *rqstp)
+static void svc_sock_secure_port(struct svc_rqst *rqstp)
{
- return svc_port_is_privileged(svc_addr(rqstp));
+ if (svc_port_is_privileged(svc_addr(rqstp)))
+ set_bit(RQ_SECURE, &rqstp->rq_flags);
+ else
+ clear_bit(RQ_SECURE, &rqstp->rq_flags);
}
/*
@@ -832,12 +835,13 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
}
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
- err = kernel_getpeername(newsock, sin, &slen);
+ err = kernel_getpeername(newsock, sin);
if (err < 0) {
net_warn_ratelimited("%s: peername failed (err %d)!\n",
serv->sv_name, -err);
goto failed; /* aborted connection or whatever */
}
+ slen = err;
/* Ideally, we would want to reject connections from unauthorized
* hosts here, but when we get encryption, the IP of the host won't
@@ -866,7 +870,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
if (IS_ERR(newsvsk))
goto failed;
svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
- err = kernel_getsockname(newsock, sin, &slen);
+ err = kernel_getsockname(newsock, sin);
+ slen = err;
if (unlikely(err < 0)) {
dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
slen = offsetof(struct sockaddr, sa_data);
@@ -1307,6 +1312,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
if (sk->sk_state == TCP_LISTEN) {
dprintk("setting up TCP socket for listening\n");
+ strcpy(svsk->sk_xprt.xpt_remotebuf, "listener");
set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
sk->sk_data_ready = svc_tcp_listen_data_ready;
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
@@ -1465,7 +1471,8 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
err = PTR_ERR(svsk);
goto out;
}
- if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0)
+ salen = kernel_getsockname(svsk->sk_sock, sin);
+ if (salen >= 0)
svc_xprt_set_local(&svsk->sk_xprt, sin, salen);
svc_add_new_perm_xprt(serv, &svsk->sk_xprt);
return svc_one_sock_name(svsk, name_return, len);
@@ -1539,10 +1546,10 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
if (error < 0)
goto bummer;
- newlen = len;
- error = kernel_getsockname(sock, newsin, &newlen);
+ error = kernel_getsockname(sock, newsin);
if (error < 0)
goto bummer;
+ newlen = error;
if (protocol == IPPROTO_TCP) {
if ((error = kernel_listen(sock, 64)) < 0)
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index e34f4ee7f2b6..30afbd236656 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1519,6 +1519,88 @@ out:
EXPORT_SYMBOL_GPL(xdr_process_buf);
/**
+ * xdr_stream_decode_opaque - Decode variable length opaque
+ * @xdr: pointer to xdr_stream
+ * @ptr: location to store opaque data
+ * @size: size of storage buffer @ptr
+ *
+ * Return values:
+ * On success, returns size of object stored in *@ptr
+ * %-EBADMSG on XDR buffer overflow
+ * %-EMSGSIZE on overflow of storage buffer @ptr
+ */
+ssize_t xdr_stream_decode_opaque(struct xdr_stream *xdr, void *ptr, size_t size)
+{
+ ssize_t ret;
+ void *p;
+
+ ret = xdr_stream_decode_opaque_inline(xdr, &p, size);
+ if (ret <= 0)
+ return ret;
+ memcpy(ptr, p, ret);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque);
+
+/**
+ * xdr_stream_decode_opaque_dup - Decode and duplicate variable length opaque
+ * @xdr: pointer to xdr_stream
+ * @ptr: location to store pointer to opaque data
+ * @maxlen: maximum acceptable object size
+ * @gfp_flags: GFP mask to use
+ *
+ * Return values:
+ * On success, returns size of object stored in *@ptr
+ * %-EBADMSG on XDR buffer overflow
+ * %-EMSGSIZE if the size of the object would exceed @maxlen
+ * %-ENOMEM on memory allocation failure
+ */
+ssize_t xdr_stream_decode_opaque_dup(struct xdr_stream *xdr, void **ptr,
+ size_t maxlen, gfp_t gfp_flags)
+{
+ ssize_t ret;
+ void *p;
+
+ ret = xdr_stream_decode_opaque_inline(xdr, &p, maxlen);
+ if (ret > 0) {
+ *ptr = kmemdup(p, ret, gfp_flags);
+ if (*ptr != NULL)
+ return ret;
+ ret = -ENOMEM;
+ }
+ *ptr = NULL;
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque_dup);
+
+/**
+ * xdr_stream_decode_string - Decode variable length string
+ * @xdr: pointer to xdr_stream
+ * @str: location to store string
+ * @size: size of storage buffer @str
+ *
+ * Return values:
+ * On success, returns length of NUL-terminated string stored in *@str
+ * %-EBADMSG on XDR buffer overflow
+ * %-EMSGSIZE on overflow of storage buffer @str
+ */
+ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str, size_t size)
+{
+ ssize_t ret;
+ void *p;
+
+ ret = xdr_stream_decode_opaque_inline(xdr, &p, size);
+ if (ret > 0) {
+ memcpy(str, p, ret);
+ str[ret] = '\0';
+ return strlen(str);
+ }
+ *str = '\0';
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_decode_string);
+
+/**
* xdr_stream_decode_string_dup - Decode and duplicate variable length string
* @xdr: pointer to xdr_stream
* @str: location to store pointer to string
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 8f0ad4f268da..70f005044f06 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -826,6 +826,7 @@ static void xprt_connect_status(struct rpc_task *task)
* @xprt: transport on which the original request was transmitted
* @xid: RPC XID of incoming reply
*
+ * Caller holds xprt->recv_lock.
*/
struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
{
@@ -834,6 +835,7 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
list_for_each_entry(entry, &xprt->recv, rq_list)
if (entry->rq_xid == xid) {
trace_xprt_lookup_rqst(xprt, xid, 0);
+ entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime);
return entry;
}
@@ -889,7 +891,13 @@ __must_hold(&req->rq_xprt->recv_lock)
}
}
-static void xprt_update_rtt(struct rpc_task *task)
+/**
+ * xprt_update_rtt - Update RPC RTT statistics
+ * @task: RPC request that recently completed
+ *
+ * Caller holds xprt->recv_lock.
+ */
+void xprt_update_rtt(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_rtt *rtt = task->tk_client->cl_rtt;
@@ -902,13 +910,14 @@ static void xprt_update_rtt(struct rpc_task *task)
rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
}
}
+EXPORT_SYMBOL_GPL(xprt_update_rtt);
/**
* xprt_complete_rqst - called when reply processing is complete
* @task: RPC request that recently completed
* @copied: actual number of bytes received from the transport
*
- * Caller holds transport lock.
+ * Caller holds xprt->recv_lock.
*/
void xprt_complete_rqst(struct rpc_task *task, int copied)
{
@@ -920,9 +929,6 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
trace_xprt_complete_rqst(xprt, req->rq_xid, copied);
xprt->stat.recvs++;
- req->rq_rtt = ktime_sub(ktime_get(), req->rq_xtime);
- if (xprt->ops->timer != NULL)
- xprt_update_rtt(task);
list_del_init(&req->rq_list);
req->rq_private_buf.len = copied;
@@ -1003,7 +1009,7 @@ void xprt_transmit(struct rpc_task *task)
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
unsigned int connect_cookie;
- int status, numreqs;
+ int status;
dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
@@ -1027,7 +1033,6 @@ void xprt_transmit(struct rpc_task *task)
return;
connect_cookie = xprt->connect_cookie;
- req->rq_xtime = ktime_get();
status = xprt->ops->send_request(task);
trace_xprt_transmit(xprt, req->rq_xid, status);
if (status != 0) {
@@ -1042,9 +1047,6 @@ void xprt_transmit(struct rpc_task *task)
xprt->ops->set_retrans_timeout(task);
- numreqs = atomic_read(&xprt->num_reqs);
- if (numreqs > xprt->stat.max_slots)
- xprt->stat.max_slots = numreqs;
xprt->stat.sends++;
xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
xprt->stat.bklog_u += xprt->backlog.qlen;
@@ -1106,14 +1108,15 @@ static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt)
{
struct rpc_rqst *req = ERR_PTR(-EAGAIN);
- if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs))
+ if (xprt->num_reqs >= xprt->max_reqs)
goto out;
+ ++xprt->num_reqs;
spin_unlock(&xprt->reserve_lock);
req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS);
spin_lock(&xprt->reserve_lock);
if (req != NULL)
goto out;
- atomic_dec(&xprt->num_reqs);
+ --xprt->num_reqs;
req = ERR_PTR(-ENOMEM);
out:
return req;
@@ -1121,7 +1124,8 @@ out:
static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
- if (atomic_add_unless(&xprt->num_reqs, -1, xprt->min_reqs)) {
+ if (xprt->num_reqs > xprt->min_reqs) {
+ --xprt->num_reqs;
kfree(req);
return true;
}
@@ -1157,6 +1161,8 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
spin_unlock(&xprt->reserve_lock);
return;
out_init_req:
+ xprt->stat.max_slots = max_t(unsigned int, xprt->stat.max_slots,
+ xprt->num_reqs);
task->tk_status = 0;
task->tk_rqstp = req;
xprt_request_init(task, xprt);
@@ -1224,7 +1230,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
else
xprt->max_reqs = num_prealloc;
xprt->min_reqs = num_prealloc;
- atomic_set(&xprt->num_reqs, num_prealloc);
+ xprt->num_reqs = num_prealloc;
return xprt;
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index ed1a4a3065ee..47ebac949769 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -44,13 +44,6 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
if (IS_ERR(req))
return PTR_ERR(req);
- rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
- DMA_TO_DEVICE, GFP_KERNEL);
- if (IS_ERR(rb))
- goto out_fail;
- req->rl_rdmabuf = rb;
- xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
-
size = r_xprt->rx_data.inline_rsize;
rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
if (IS_ERR(rb))
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index d5f95bb39300..5cc68a824f45 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -191,7 +191,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
mr = rpcrdma_mr_get(r_xprt);
if (!mr)
- return ERR_PTR(-ENOBUFS);
+ return ERR_PTR(-EAGAIN);
pageoff = offset_in_page(seg1->mr_offset);
seg1->mr_offset -= pageoff; /* start of page */
@@ -251,6 +251,16 @@ out_maperr:
return ERR_PTR(-EIO);
}
+/* Post Send WR containing the RPC Call message.
+ */
+static int
+fmr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
+{
+ struct ib_send_wr *bad_wr;
+
+ return ib_post_send(ia->ri_id->qp, &req->rl_sendctx->sc_wr, &bad_wr);
+}
+
/* Invalidate all memory regions that were registered for "req".
*
* Sleeps until it is safe for the host CPU to access the
@@ -305,6 +315,7 @@ out_reset:
const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
.ro_map = fmr_op_map,
+ .ro_send = fmr_op_send,
.ro_unmap_sync = fmr_op_unmap_sync,
.ro_recover_mr = fmr_op_recover_mr,
.ro_open = fmr_op_open,
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 90f688f19783..c5743a0960be 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -357,8 +357,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
struct rpcrdma_mr *mr;
struct ib_mr *ibmr;
struct ib_reg_wr *reg_wr;
- struct ib_send_wr *bad_wr;
- int rc, i, n;
+ int i, n;
u8 key;
mr = NULL;
@@ -367,7 +366,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
rpcrdma_mr_defer_recovery(mr);
mr = rpcrdma_mr_get(r_xprt);
if (!mr)
- return ERR_PTR(-ENOBUFS);
+ return ERR_PTR(-EAGAIN);
} while (mr->frwr.fr_state != FRWR_IS_INVALID);
frwr = &mr->frwr;
frwr->fr_state = FRWR_IS_VALID;
@@ -407,22 +406,12 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
ib_update_fast_reg_key(ibmr, ++key);
reg_wr = &frwr->fr_regwr;
- reg_wr->wr.next = NULL;
- reg_wr->wr.opcode = IB_WR_REG_MR;
- frwr->fr_cqe.done = frwr_wc_fastreg;
- reg_wr->wr.wr_cqe = &frwr->fr_cqe;
- reg_wr->wr.num_sge = 0;
- reg_wr->wr.send_flags = 0;
reg_wr->mr = ibmr;
reg_wr->key = ibmr->rkey;
reg_wr->access = writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ;
- rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
- if (rc)
- goto out_senderr;
-
mr->mr_handle = ibmr->rkey;
mr->mr_length = ibmr->length;
mr->mr_offset = ibmr->iova;
@@ -442,11 +431,40 @@ out_mapmr_err:
frwr->fr_mr, n, mr->mr_nents);
rpcrdma_mr_defer_recovery(mr);
return ERR_PTR(-EIO);
+}
-out_senderr:
- pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc);
- rpcrdma_mr_defer_recovery(mr);
- return ERR_PTR(-ENOTCONN);
+/* Post Send WR containing the RPC Call message.
+ *
+ * For FRMR, chain any FastReg WRs to the Send WR. Only a
+ * single ib_post_send call is needed to register memory
+ * and then post the Send WR.
+ */
+static int
+frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
+{
+ struct ib_send_wr *post_wr, *bad_wr;
+ struct rpcrdma_mr *mr;
+
+ post_wr = &req->rl_sendctx->sc_wr;
+ list_for_each_entry(mr, &req->rl_registered, mr_list) {
+ struct rpcrdma_frwr *frwr;
+
+ frwr = &mr->frwr;
+
+ frwr->fr_cqe.done = frwr_wc_fastreg;
+ frwr->fr_regwr.wr.next = post_wr;
+ frwr->fr_regwr.wr.wr_cqe = &frwr->fr_cqe;
+ frwr->fr_regwr.wr.num_sge = 0;
+ frwr->fr_regwr.wr.opcode = IB_WR_REG_MR;
+ frwr->fr_regwr.wr.send_flags = 0;
+
+ post_wr = &frwr->fr_regwr.wr;
+ }
+
+ /* If ib_post_send fails, the next ->send_request for
+ * @req will queue these MWs for recovery.
+ */
+ return ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
}
/* Handle a remotely invalidated mr on the @mrs list
@@ -561,6 +579,7 @@ reset_mrs:
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_map = frwr_op_map,
+ .ro_send = frwr_op_send,
.ro_reminv = frwr_op_reminv,
.ro_unmap_sync = frwr_op_unmap_sync,
.ro_recover_mr = frwr_op_recover_mr,
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index f0855a959a27..e8adad33d0bb 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -365,7 +365,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
false, &mr);
if (IS_ERR(seg))
- return PTR_ERR(seg);
+ goto out_maperr;
rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_read_segment(xdr, mr, pos) < 0)
@@ -377,6 +377,11 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
} while (nsegs);
return 0;
+
+out_maperr:
+ if (PTR_ERR(seg) == -EAGAIN)
+ xprt_wait_for_buffer_space(rqst->rq_task, NULL);
+ return PTR_ERR(seg);
}
/* Register and XDR encode the Write list. Supports encoding a list
@@ -423,7 +428,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
true, &mr);
if (IS_ERR(seg))
- return PTR_ERR(seg);
+ goto out_maperr;
rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_rdma_segment(xdr, mr) < 0)
@@ -440,6 +445,11 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
*segcount = cpu_to_be32(nchunks);
return 0;
+
+out_maperr:
+ if (PTR_ERR(seg) == -EAGAIN)
+ xprt_wait_for_buffer_space(rqst->rq_task, NULL);
+ return PTR_ERR(seg);
}
/* Register and XDR encode the Reply chunk. Supports encoding an array
@@ -481,7 +491,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
true, &mr);
if (IS_ERR(seg))
- return PTR_ERR(seg);
+ goto out_maperr;
rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_rdma_segment(xdr, mr) < 0)
@@ -498,6 +508,11 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
*segcount = cpu_to_be32(nchunks);
return 0;
+
+out_maperr:
+ if (PTR_ERR(seg) == -EAGAIN)
+ xprt_wait_for_buffer_space(rqst->rq_task, NULL);
+ return PTR_ERR(seg);
}
/**
@@ -724,8 +739,8 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
* Returns:
* %0 if the RPC was sent successfully,
* %-ENOTCONN if the connection was lost,
- * %-EAGAIN if not enough pages are available for on-demand reply buffer,
- * %-ENOBUFS if no MRs are available to register chunks,
+ * %-EAGAIN if the caller should call again with the same arguments,
+ * %-ENOBUFS if the caller should call again after a delay,
* %-EMSGSIZE if the transport header is too small,
* %-EIO if a permanent problem occurred while marshaling.
*/
@@ -868,10 +883,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
return 0;
out_err:
- if (ret != -ENOBUFS) {
- pr_err("rpcrdma: header marshaling failed (%d)\n", ret);
- r_xprt->rx_stats.failed_marshal_count++;
- }
+ r_xprt->rx_stats.failed_marshal_count++;
return ret;
}
@@ -1366,7 +1378,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
- queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work);
+ queue_work(rpcrdma_receive_wq, &rep->rr_work);
return;
out_badstatus:
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index a4a8f6989ee7..dd8a431dc2ae 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -51,9 +51,9 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
/* RPC/RDMA parameters */
-unsigned int svcrdma_ord = RPCRDMA_ORD;
+unsigned int svcrdma_ord = 16; /* historical default */
static unsigned int min_ord = 1;
-static unsigned int max_ord = 4096;
+static unsigned int max_ord = 255;
unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
unsigned int svcrdma_max_bc_requests = RPCRDMA_MAX_BC_REQUESTS;
static unsigned int min_max_requests = 4;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 19e9c6b33042..3d45015dca97 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -110,15 +110,16 @@
* the RDMA_RECV completion. The SGL should contain full pages up until the
* last one.
*/
-static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *ctxt,
- u32 byte_count)
+static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
+ struct svc_rdma_op_ctxt *ctxt)
{
struct page *page;
- u32 bc;
int sge_no;
+ u32 len;
- /* Swap the page in the SGE with the page in argpages */
+ /* The reply path assumes the Call's transport header resides
+ * in rqstp->rq_pages[0].
+ */
page = ctxt->pages[0];
put_page(rqstp->rq_pages[0]);
rqstp->rq_pages[0] = page;
@@ -126,35 +127,35 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
/* Set up the XDR head */
rqstp->rq_arg.head[0].iov_base = page_address(page);
rqstp->rq_arg.head[0].iov_len =
- min_t(size_t, byte_count, ctxt->sge[0].length);
- rqstp->rq_arg.len = byte_count;
- rqstp->rq_arg.buflen = byte_count;
+ min_t(size_t, ctxt->byte_len, ctxt->sge[0].length);
+ rqstp->rq_arg.len = ctxt->byte_len;
+ rqstp->rq_arg.buflen = ctxt->byte_len;
/* Compute bytes past head in the SGL */
- bc = byte_count - rqstp->rq_arg.head[0].iov_len;
+ len = ctxt->byte_len - rqstp->rq_arg.head[0].iov_len;
/* If data remains, store it in the pagelist */
- rqstp->rq_arg.page_len = bc;
+ rqstp->rq_arg.page_len = len;
rqstp->rq_arg.page_base = 0;
sge_no = 1;
- while (bc && sge_no < ctxt->count) {
+ while (len && sge_no < ctxt->count) {
page = ctxt->pages[sge_no];
put_page(rqstp->rq_pages[sge_no]);
rqstp->rq_pages[sge_no] = page;
- bc -= min_t(u32, bc, ctxt->sge[sge_no].length);
+ len -= min_t(u32, len, ctxt->sge[sge_no].length);
sge_no++;
}
rqstp->rq_respages = &rqstp->rq_pages[sge_no];
rqstp->rq_next_page = rqstp->rq_respages + 1;
/* If not all pages were used from the SGL, free the remaining ones */
- bc = sge_no;
+ len = sge_no;
while (sge_no < ctxt->count) {
page = ctxt->pages[sge_no++];
put_page(page);
}
- ctxt->count = bc;
+ ctxt->count = len;
/* Set up tail */
rqstp->rq_arg.tail[0].iov_base = NULL;
@@ -534,10 +535,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
ctxt, rdma_xprt, rqstp);
atomic_inc(&rdma_stat_recv);
- /* Build up the XDR from the receive buffers. */
- rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len);
+ svc_rdma_build_arg_xdr(rqstp, ctxt);
- /* Decode the RDMA header. */
p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg);
if (ret < 0)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 9ad12a215b51..96cc8f6597d3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -69,7 +69,7 @@ static void svc_rdma_release_rqst(struct svc_rqst *);
static void svc_rdma_detach(struct svc_xprt *xprt);
static void svc_rdma_free(struct svc_xprt *xprt);
static int svc_rdma_has_wspace(struct svc_xprt *xprt);
-static int svc_rdma_secure_port(struct svc_rqst *);
+static void svc_rdma_secure_port(struct svc_rqst *);
static void svc_rdma_kill_temp_xprt(struct svc_xprt *);
static const struct svc_xprt_ops svc_rdma_ops = {
@@ -330,9 +330,9 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
flushed:
if (wc->status != IB_WC_WR_FLUSH_ERR)
- pr_warn("svcrdma: receive: %s (%u/0x%x)\n",
- ib_wc_status_msg(wc->status),
- wc->status, wc->vendor_err);
+ pr_err("svcrdma: Recv: %s (%u/0x%x)\n",
+ ib_wc_status_msg(wc->status),
+ wc->status, wc->vendor_err);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
svc_rdma_put_context(ctxt, 1);
@@ -401,8 +401,10 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
*/
set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags);
- if (listener)
+ if (listener) {
+ strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener");
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
+ }
return cma_xprt;
}
@@ -762,13 +764,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (!svc_rdma_prealloc_ctxts(newxprt))
goto errout;
- /*
- * Limit ORD based on client limit, local device limit, and
- * configured svcrdma limit.
- */
- newxprt->sc_ord = min_t(size_t, dev->attrs.max_qp_rd_atom, newxprt->sc_ord);
- newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord);
-
newxprt->sc_pd = ib_alloc_pd(dev, 0);
if (IS_ERR(newxprt->sc_pd)) {
dprintk("svcrdma: error creating PD for connect request\n");
@@ -843,15 +838,18 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
memset(&conn_param, 0, sizeof conn_param);
conn_param.responder_resources = 0;
- conn_param.initiator_depth = newxprt->sc_ord;
+ conn_param.initiator_depth = min_t(int, newxprt->sc_ord,
+ dev->attrs.max_qp_init_rd_atom);
+ if (!conn_param.initiator_depth) {
+ dprintk("svcrdma: invalid ORD setting\n");
+ ret = -EINVAL;
+ goto errout;
+ }
conn_param.private_data = &pmsg;
conn_param.private_data_len = sizeof(pmsg);
ret = rdma_accept(newxprt->sc_cm_id, &conn_param);
- if (ret) {
- dprintk("svcrdma: failed to accept new connection, ret=%d\n",
- ret);
+ if (ret)
goto errout;
- }
dprintk("svcrdma: new connection %p accepted:\n", newxprt);
sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
@@ -862,7 +860,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth);
dprintk(" rdma_rw_ctxs : %d\n", ctxts);
dprintk(" max_requests : %d\n", newxprt->sc_max_requests);
- dprintk(" ord : %d\n", newxprt->sc_ord);
+ dprintk(" ord : %d\n", conn_param.initiator_depth);
return &newxprt->sc_xprt;
@@ -992,9 +990,9 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
return 1;
}
-static int svc_rdma_secure_port(struct svc_rqst *rqstp)
+static void svc_rdma_secure_port(struct svc_rqst *rqstp)
{
- return 1;
+ set_bit(RQ_SECURE, &rqstp->rq_flags);
}
static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt)
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 4b1ecfe979cf..cc1aad325496 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -52,7 +52,6 @@
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/sunrpc/addr.h>
-#include <linux/smp.h>
#include "xprt_rdma.h"
@@ -237,8 +236,6 @@ rpcrdma_connect_worker(struct work_struct *work)
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
spin_lock_bh(&xprt->transport_lock);
- if (++xprt->connect_cookie == 0) /* maintain a reserved value */
- ++xprt->connect_cookie;
if (ep->rep_connected > 0) {
if (!xprt_test_and_set_connected(xprt))
xprt_wake_pending_tasks(xprt, 0);
@@ -540,29 +537,6 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
}
}
-/* Allocate a fixed-size buffer in which to construct and send the
- * RPC-over-RDMA header for this request.
- */
-static bool
-rpcrdma_get_rdmabuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
- gfp_t flags)
-{
- size_t size = RPCRDMA_HDRBUF_SIZE;
- struct rpcrdma_regbuf *rb;
-
- if (req->rl_rdmabuf)
- return true;
-
- rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags);
- if (IS_ERR(rb))
- return false;
-
- r_xprt->rx_stats.hardway_register_count += size;
- req->rl_rdmabuf = rb;
- xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
- return true;
-}
-
static bool
rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
size_t size, gfp_t flags)
@@ -644,15 +618,11 @@ xprt_rdma_allocate(struct rpc_task *task)
if (RPC_IS_SWAPPER(task))
flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
- if (!rpcrdma_get_rdmabuf(r_xprt, req, flags))
- goto out_fail;
if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags))
goto out_fail;
if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
goto out_fail;
- req->rl_cpu = smp_processor_id();
- req->rl_connect_cookie = 0; /* our reserved value */
rpcrdma_set_xprtdata(rqst, req);
rqst->rq_buffer = req->rl_sendbuf->rg_base;
rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
@@ -694,7 +664,8 @@ xprt_rdma_free(struct rpc_task *task)
* Returns:
* %0 if the RPC message has been sent
* %-ENOTCONN if the caller should reconnect and call again
- * %-ENOBUFS if the caller should call again later
+ * %-EAGAIN if the caller should call again
+ * %-ENOBUFS if the caller should call again after a delay
* %-EIO if a permanent error occurred and the request was not
* sent. Do not try to send this message again.
*/
@@ -723,9 +694,9 @@ xprt_rdma_send_request(struct rpc_task *task)
rpcrdma_recv_buffer_get(req);
/* Must suppress retransmit to maintain credits */
- if (req->rl_connect_cookie == xprt->connect_cookie)
+ if (rqst->rq_connect_cookie == xprt->connect_cookie)
goto drop_connection;
- req->rl_connect_cookie = xprt->connect_cookie;
+ rqst->rq_xtime = ktime_get();
__set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
@@ -733,6 +704,12 @@ xprt_rdma_send_request(struct rpc_task *task)
rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
rqst->rq_bytes_sent = 0;
+
+ /* An RPC with no reply will throw off credit accounting,
+ * so drop the connection to reset the credit grant.
+ */
+ if (!rpc_reply_expected(task))
+ goto drop_connection;
return 0;
failed_marshal:
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index e6f84a6434a0..fe5eaca2d197 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -250,11 +250,11 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
wait_for_completion(&ia->ri_remove_done);
ia->ri_id = NULL;
- ia->ri_pd = NULL;
ia->ri_device = NULL;
/* Return 1 to ensure the core destroys the id. */
return 1;
case RDMA_CM_EVENT_ESTABLISHED:
+ ++xprt->rx_xprt.connect_cookie;
connstate = 1;
rpcrdma_update_connect_private(xprt, &event->param.conn);
goto connected;
@@ -273,6 +273,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
connstate = -EAGAIN;
goto connected;
case RDMA_CM_EVENT_DISCONNECTED:
+ ++xprt->rx_xprt.connect_cookie;
connstate = -ECONNABORTED;
connected:
xprt->rx_buf.rb_credits = 1;
@@ -445,7 +446,9 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
ia->ri_id->qp = NULL;
}
ib_free_cq(ep->rep_attr.recv_cq);
+ ep->rep_attr.recv_cq = NULL;
ib_free_cq(ep->rep_attr.send_cq);
+ ep->rep_attr.send_cq = NULL;
/* The ULP is responsible for ensuring all DMA
* mappings and MRs are gone.
@@ -458,6 +461,8 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
}
rpcrdma_mrs_destroy(buf);
+ ib_dealloc_pd(ia->ri_pd);
+ ia->ri_pd = NULL;
/* Allow waiters to continue */
complete(&ia->ri_remove_done);
@@ -589,11 +594,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
/* Client offers RDMA Read but does not initiate */
ep->rep_remote_cma.initiator_depth = 0;
- if (ia->ri_device->attrs.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
- ep->rep_remote_cma.responder_resources = 32;
- else
- ep->rep_remote_cma.responder_resources =
- ia->ri_device->attrs.max_qp_rd_atom;
+ ep->rep_remote_cma.responder_resources =
+ min_t(int, U8_MAX, ia->ri_device->attrs.max_qp_rd_atom);
/* Limit transport retries so client can detect server
* GID changes quickly. RPC layer handles re-establishing
@@ -628,14 +630,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
cancel_delayed_work_sync(&ep->rep_connect_worker);
- if (ia->ri_id->qp) {
+ if (ia->ri_id && ia->ri_id->qp) {
rpcrdma_ep_disconnect(ep, ia);
rdma_destroy_qp(ia->ri_id);
ia->ri_id->qp = NULL;
}
- ib_free_cq(ep->rep_attr.recv_cq);
- ib_free_cq(ep->rep_attr.send_cq);
+ if (ep->rep_attr.recv_cq)
+ ib_free_cq(ep->rep_attr.recv_cq);
+ if (ep->rep_attr.send_cq)
+ ib_free_cq(ep->rep_attr.send_cq);
}
/* Re-establish a connection after a device removal event.
@@ -1024,7 +1028,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
LIST_HEAD(free);
LIST_HEAD(all);
- for (count = 0; count < 32; count++) {
+ for (count = 0; count < 3; count++) {
struct rpcrdma_mr *mr;
int rc;
@@ -1049,8 +1053,9 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
list_splice(&all, &buf->rb_all);
r_xprt->rx_stats.mrs_allocated += count;
spin_unlock(&buf->rb_mrlock);
-
trace_xprtrdma_createmrs(r_xprt, count);
+
+ xprt_write_space(&r_xprt->rx_xprt);
}
static void
@@ -1068,17 +1073,27 @@ struct rpcrdma_req *
rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
+ struct rpcrdma_regbuf *rb;
struct rpcrdma_req *req;
req = kzalloc(sizeof(*req), GFP_KERNEL);
if (req == NULL)
return ERR_PTR(-ENOMEM);
+ rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
+ DMA_TO_DEVICE, GFP_KERNEL);
+ if (IS_ERR(rb)) {
+ kfree(req);
+ return ERR_PTR(-ENOMEM);
+ }
+ req->rl_rdmabuf = rb;
+ xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
+ req->rl_buffer = buffer;
+ INIT_LIST_HEAD(&req->rl_registered);
+
spin_lock(&buffer->rb_reqslock);
list_add(&req->rl_all, &buffer->rb_allreqs);
spin_unlock(&buffer->rb_reqslock);
- req->rl_buffer = &r_xprt->rx_buf;
- INIT_LIST_HEAD(&req->rl_registered);
return req;
}
@@ -1535,7 +1550,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
struct rpcrdma_req *req)
{
struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
- struct ib_send_wr *send_wr_fail;
int rc;
if (req->rl_reply) {
@@ -1554,7 +1568,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
--ep->rep_send_count;
}
- rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
+ rc = ia->ri_ops->ro_send(ia, req);
trace_xprtrdma_post_send(req, rc);
if (rc)
return -ENOTCONN;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 69883a960a3f..3d3b423fa9c1 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -334,8 +334,6 @@ enum {
struct rpcrdma_buffer;
struct rpcrdma_req {
struct list_head rl_list;
- int rl_cpu;
- unsigned int rl_connect_cookie;
struct rpcrdma_buffer *rl_buffer;
struct rpcrdma_rep *rl_reply;
struct xdr_stream rl_stream;
@@ -474,6 +472,8 @@ struct rpcrdma_memreg_ops {
(*ro_map)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *, int, bool,
struct rpcrdma_mr **);
+ int (*ro_send)(struct rpcrdma_ia *ia,
+ struct rpcrdma_req *req);
void (*ro_reminv)(struct rpcrdma_rep *rep,
struct list_head *mrs);
void (*ro_unmap_sync)(struct rpcrdma_xprt *,
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index a6b8c1f8f92a..c8902f11efdd 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -527,6 +527,7 @@ static int xs_local_send_request(struct rpc_task *task)
xs_pktdump("packet data:",
req->rq_svec->iov_base, req->rq_svec->iov_len);
+ req->rq_xtime = ktime_get();
status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent,
true, &sent);
dprintk("RPC: %s(%u) = %d\n",
@@ -589,6 +590,7 @@ static int xs_udp_send_request(struct rpc_task *task)
if (!xprt_bound(xprt))
return -ENOTCONN;
+ req->rq_xtime = ktime_get();
status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
xdr, req->rq_bytes_sent, true, &sent);
@@ -678,6 +680,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
/* Continue transmitting the packet/record. We must be careful
* to cope with writespace callbacks arriving _after_ we have
* called sendmsg(). */
+ req->rq_xtime = ktime_get();
while (1) {
sent = 0;
status = xs_sendpages(transport->sock, NULL, 0, xdr,
@@ -1060,6 +1063,7 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
if (!rovr)
goto out_unlock;
xprt_pin_rqst(rovr);
+ xprt_update_rtt(rovr->rq_task);
spin_unlock(&xprt->recv_lock);
task = rovr->rq_task;
@@ -1794,10 +1798,9 @@ static void xs_sock_set_reuseport(struct socket *sock)
static unsigned short xs_sock_getport(struct socket *sock)
{
struct sockaddr_storage buf;
- int buflen;
unsigned short port = 0;
- if (kernel_getsockname(sock, (struct sockaddr *)&buf, &buflen) < 0)
+ if (kernel_getsockname(sock, (struct sockaddr *)&buf) < 0)
goto out;
switch (buf.ss_family) {
case AF_INET6:
OpenPOWER on IntegriCloud