summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/protocol.c11
-rw-r--r--net/9p/trans_virtio.c3
-rw-r--r--net/atm/mpc.c2
-rw-r--r--net/bluetooth/hci_core.c2
-rw-r--r--net/bluetooth/l2cap_core.c2
-rw-r--r--net/bridge/br_multicast.c2
-rw-r--r--net/bridge/netfilter/ebtables.c11
-rw-r--r--net/can/bcm.c13
-rw-r--r--net/ceph/messenger.c31
-rw-r--r--net/ceph/osd_client.c216
-rw-r--r--net/ceph/osdmap.c24
-rw-r--r--net/ceph/pagevec.c4
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/ethtool.c12
-rw-r--r--net/core/pktgen.c3
-rw-r--r--net/dcb/dcbnl.c3
-rw-r--r--net/dccp/ccids/ccid2.c3
-rw-r--r--net/ieee802154/nl-phy.c2
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/route.c5
-rw-r--r--net/ipv6/icmp.c2
-rw-r--r--net/ipv6/ila/ila_xlat.c3
-rw-r--r--net/mac80211/chan.c2
-rw-r--r--net/mac80211/main.c2
-rw-r--r--net/mac80211/rc80211_minstrel.c4
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c4
-rw-r--r--net/mac80211/scan.c2
-rw-r--r--net/mac80211/util.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c3
-rw-r--r--net/netfilter/nf_conntrack_proto.c3
-rw-r--r--net/netfilter/nf_nat_core.c5
-rw-r--r--net/netfilter/nf_tables_api.c6
-rw-r--r--net/netfilter/nfnetlink_cthelper.c5
-rw-r--r--net/netfilter/x_tables.c2
-rw-r--r--net/netlink/genetlink.c10
-rw-r--r--net/netrom/af_netrom.c2
-rw-r--r--net/openvswitch/datapath.c5
-rw-r--r--net/openvswitch/vport.c2
-rw-r--r--net/packet/af_packet.c2
-rw-r--r--net/rds/ib.c3
-rw-r--r--net/rds/ib_cm.c6
-rw-r--r--net/rds/info.c2
-rw-r--r--net/rose/af_rose.c3
-rw-r--r--net/rxrpc/rxkad.c2
-rw-r--r--net/sched/sch_fq_codel.c7
-rw-r--r--net/sched/sch_hhf.c9
-rw-r--r--net/sctp/auth.c5
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/smc/smc_wr.c6
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c3
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c6
-rw-r--r--net/sunrpc/cache.c2
-rw-r--r--net/sunrpc/clnt.c1
-rw-r--r--net/sunrpc/xprt.c17
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c107
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c26
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c33
-rw-r--r--net/sunrpc/xprtrdma/module.c5
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c73
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c3
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c55
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c439
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c133
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c510
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c481
-rw-r--r--net/sunrpc/xprtrdma/transport.c68
-rw-r--r--net/sunrpc/xprtrdma/verbs.c292
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h28
-rw-r--r--net/sunrpc/xprtsock.c4
-rw-r--r--net/tipc/netlink_compat.c5
-rw-r--r--net/wireless/nl80211.c4
71 files changed, 1401 insertions, 1361 deletions
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 16e10680518c..931ea00c4fed 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -242,8 +242,9 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
"w", nwname);
if (!errcode) {
*wnames =
- kmalloc(sizeof(char *) * *nwname,
- GFP_NOFS);
+ kmalloc_array(*nwname,
+ sizeof(char *),
+ GFP_NOFS);
if (!*wnames)
errcode = -ENOMEM;
}
@@ -285,9 +286,9 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
p9pdu_readf(pdu, proto_version, "w", nwqid);
if (!errcode) {
*wqids =
- kmalloc(*nwqid *
- sizeof(struct p9_qid),
- GFP_NOFS);
+ kmalloc_array(*nwqid,
+ sizeof(struct p9_qid),
+ GFP_NOFS);
if (*wqids == NULL)
errcode = -ENOMEM;
}
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 4d0372263e5d..05006cbb3361 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -360,7 +360,8 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
nr_pages = DIV_ROUND_UP((unsigned long)p + len, PAGE_SIZE) -
(unsigned long)p / PAGE_SIZE;
- *pages = kmalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
+ *pages = kmalloc_array(nr_pages, sizeof(struct page *),
+ GFP_NOFS);
if (!*pages)
return -ENOMEM;
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 31e0dcb970f8..75620c2f2617 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -472,7 +472,7 @@ static const uint8_t *copy_macs(struct mpoa_client *mpc,
if (mpc->number_of_mps_macs != 0)
kfree(mpc->mps_macs);
mpc->number_of_mps_macs = 0;
- mpc->mps_macs = kmalloc(num_macs * ETH_ALEN, GFP_KERNEL);
+ mpc->mps_macs = kmalloc_array(ETH_ALEN, num_macs, GFP_KERNEL);
if (mpc->mps_macs == NULL) {
pr_info("(%s) out of mem\n", mpc->dev->name);
return NULL;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 1dec33790198..ee8ef1228263 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1281,7 +1281,7 @@ int hci_inquiry(void __user *arg)
/* cache_dump can't sleep. Therefore we allocate temp buffer and then
* copy it to the user space.
*/
- buf = kmalloc(sizeof(struct inquiry_info) * max_rsp, GFP_KERNEL);
+ buf = kmalloc_array(max_rsp, sizeof(struct inquiry_info), GFP_KERNEL);
if (!buf) {
err = -ENOMEM;
goto done;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 9b7907ebfa01..d17a4736e47c 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -331,7 +331,7 @@ static int l2cap_seq_list_init(struct l2cap_seq_list *seq_list, u16 size)
*/
alloc_size = roundup_pow_of_two(size);
- seq_list->list = kmalloc(sizeof(u16) * alloc_size, GFP_KERNEL);
+ seq_list->list = kmalloc_array(alloc_size, sizeof(u16), GFP_KERNEL);
if (!seq_list->list)
return -ENOMEM;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index cb4729539b82..920665dd92db 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -333,7 +333,7 @@ static int br_mdb_rehash(struct net_bridge_mdb_htable __rcu **mdbp, int max,
mdb->max = max;
mdb->old = old;
- mdb->mhash = kzalloc(max * sizeof(*mdb->mhash), GFP_ATOMIC);
+ mdb->mhash = kcalloc(max, sizeof(*mdb->mhash), GFP_ATOMIC);
if (!mdb->mhash) {
kfree(mdb);
return -ENOMEM;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index bcec377b07e7..491828713e0b 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -918,12 +918,13 @@ static int translate_table(struct net *net, const char *name,
* if an error occurs
*/
newinfo->chainstack =
- vmalloc(nr_cpu_ids * sizeof(*(newinfo->chainstack)));
+ vmalloc(array_size(nr_cpu_ids,
+ sizeof(*(newinfo->chainstack))));
if (!newinfo->chainstack)
return -ENOMEM;
for_each_possible_cpu(i) {
newinfo->chainstack[i] =
- vmalloc(udc_cnt * sizeof(*(newinfo->chainstack[0])));
+ vmalloc(array_size(udc_cnt, sizeof(*(newinfo->chainstack[0]))));
if (!newinfo->chainstack[i]) {
while (i)
vfree(newinfo->chainstack[--i]);
@@ -933,7 +934,7 @@ static int translate_table(struct net *net, const char *name,
}
}
- cl_s = vmalloc(udc_cnt * sizeof(*cl_s));
+ cl_s = vmalloc(array_size(udc_cnt, sizeof(*cl_s)));
if (!cl_s)
return -ENOMEM;
i = 0; /* the i'th udc */
@@ -1308,7 +1309,7 @@ static int do_update_counters(struct net *net, const char *name,
if (num_counters == 0)
return -EINVAL;
- tmp = vmalloc(num_counters * sizeof(*tmp));
+ tmp = vmalloc(array_size(num_counters, sizeof(*tmp)));
if (!tmp)
return -ENOMEM;
@@ -1449,7 +1450,7 @@ static int copy_counters_to_user(struct ebt_table *t,
return -EINVAL;
}
- counterstmp = vmalloc(nentries * sizeof(*counterstmp));
+ counterstmp = vmalloc(array_size(nentries, sizeof(*counterstmp)));
if (!counterstmp)
return -ENOMEM;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 97fedff3f0c4..9393f25df08d 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -923,8 +923,9 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
/* create array for CAN frames and copy the data */
if (msg_head->nframes > 1) {
- op->frames = kmalloc(msg_head->nframes * op->cfsiz,
- GFP_KERNEL);
+ op->frames = kmalloc_array(msg_head->nframes,
+ op->cfsiz,
+ GFP_KERNEL);
if (!op->frames) {
kfree(op);
return -ENOMEM;
@@ -1095,15 +1096,17 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
if (msg_head->nframes > 1) {
/* create array for CAN frames and copy the data */
- op->frames = kmalloc(msg_head->nframes * op->cfsiz,
- GFP_KERNEL);
+ op->frames = kmalloc_array(msg_head->nframes,
+ op->cfsiz,
+ GFP_KERNEL);
if (!op->frames) {
kfree(op);
return -ENOMEM;
}
/* create and init array for received CAN frames */
- op->last_frames = kzalloc(msg_head->nframes * op->cfsiz,
+ op->last_frames = kcalloc(msg_head->nframes,
+ op->cfsiz,
GFP_KERNEL);
if (!op->last_frames) {
kfree(op->frames);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 3b3d33ea9ed8..c6413c360771 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -168,12 +168,6 @@ static char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2;
static struct lock_class_key socket_class;
#endif
-/*
- * When skipping (ignoring) a block of input we read it into a "skip
- * buffer," which is this many bytes in size.
- */
-#define SKIP_BUF_SIZE 1024
-
static void queue_con(struct ceph_connection *con);
static void cancel_con(struct ceph_connection *con);
static void ceph_con_workfn(struct work_struct *);
@@ -520,12 +514,18 @@ static int ceph_tcp_connect(struct ceph_connection *con)
return 0;
}
+/*
+ * If @buf is NULL, discard up to @len bytes.
+ */
static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
{
struct kvec iov = {buf, len};
struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
int r;
+ if (!buf)
+ msg.msg_flags |= MSG_TRUNC;
+
iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, len);
r = sock_recvmsg(sock, &msg, msg.msg_flags);
if (r == -EAGAIN)
@@ -2575,9 +2575,6 @@ static int try_write(struct ceph_connection *con)
con->state != CON_STATE_OPEN)
return 0;
-more:
- dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
-
/* open the socket first? */
if (con->state == CON_STATE_PREOPEN) {
BUG_ON(con->sock);
@@ -2598,7 +2595,8 @@ more:
}
}
-more_kvec:
+more:
+ dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
BUG_ON(!con->sock);
/* kvec data queued? */
@@ -2623,7 +2621,7 @@ more_kvec:
ret = write_partial_message_data(con);
if (ret == 1)
- goto more_kvec; /* we need to send the footer, too! */
+ goto more; /* we need to send the footer, too! */
if (ret == 0)
goto out;
if (ret < 0) {
@@ -2659,8 +2657,6 @@ out:
return ret;
}
-
-
/*
* Read what we can from the socket.
*/
@@ -2721,16 +2717,11 @@ more:
if (con->in_base_pos < 0) {
/*
* skipping + discarding content.
- *
- * FIXME: there must be a better way to do this!
*/
- static char buf[SKIP_BUF_SIZE];
- int skip = min((int) sizeof (buf), -con->in_base_pos);
-
- dout("skipping %d / %d bytes\n", skip, -con->in_base_pos);
- ret = ceph_tcp_recvmsg(con->sock, buf, skip);
+ ret = ceph_tcp_recvmsg(con->sock, NULL, -con->in_base_pos);
if (ret <= 0)
goto out;
+ dout("skipped %d / %d bytes\n", ret, -con->in_base_pos);
con->in_base_pos += ret;
if (con->in_base_pos)
goto more;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 69a2581ddbba..a00c74f1154e 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -766,7 +766,7 @@ void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
}
EXPORT_SYMBOL(osd_req_op_extent_dup_last);
-void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
+int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
u16 opcode, const char *class, const char *method)
{
struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
@@ -778,7 +778,9 @@ void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
BUG_ON(opcode != CEPH_OSD_OP_CALL);
pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
- BUG_ON(!pagelist);
+ if (!pagelist)
+ return -ENOMEM;
+
ceph_pagelist_init(pagelist);
op->cls.class_name = class;
@@ -798,6 +800,7 @@ void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
osd_req_op_cls_request_info_pagelist(osd_req, which, pagelist);
op->indata_len = payload_len;
+ return 0;
}
EXPORT_SYMBOL(osd_req_op_cls_init);
@@ -1026,7 +1029,6 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
truncate_size, truncate_seq);
}
- req->r_abort_on_full = true;
req->r_flags = flags;
req->r_base_oloc.pool = layout->pool_id;
req->r_base_oloc.pool_ns = ceph_try_get_string(layout->pool_ns);
@@ -1054,6 +1056,38 @@ EXPORT_SYMBOL(ceph_osdc_new_request);
DEFINE_RB_FUNCS(request, struct ceph_osd_request, r_tid, r_node)
DEFINE_RB_FUNCS(request_mc, struct ceph_osd_request, r_tid, r_mc_node)
+/*
+ * Call @fn on each OSD request as long as @fn returns 0.
+ */
+static void for_each_request(struct ceph_osd_client *osdc,
+ int (*fn)(struct ceph_osd_request *req, void *arg),
+ void *arg)
+{
+ struct rb_node *n, *p;
+
+ for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
+ struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
+
+ for (p = rb_first(&osd->o_requests); p; ) {
+ struct ceph_osd_request *req =
+ rb_entry(p, struct ceph_osd_request, r_node);
+
+ p = rb_next(p);
+ if (fn(req, arg))
+ return;
+ }
+ }
+
+ for (p = rb_first(&osdc->homeless_osd.o_requests); p; ) {
+ struct ceph_osd_request *req =
+ rb_entry(p, struct ceph_osd_request, r_node);
+
+ p = rb_next(p);
+ if (fn(req, arg))
+ return;
+ }
+}
+
static bool osd_homeless(struct ceph_osd *osd)
{
return osd->o_osd == CEPH_HOMELESS_OSD;
@@ -1395,7 +1429,6 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
bool recovery_deletes = ceph_osdmap_flag(osdc,
CEPH_OSDMAP_RECOVERY_DELETES);
enum calc_target_result ct_res;
- int ret;
t->epoch = osdc->osdmap->epoch;
pi = ceph_pg_pool_by_id(osdc->osdmap, t->base_oloc.pool);
@@ -1431,14 +1464,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
}
}
- ret = __ceph_object_locator_to_pg(pi, &t->target_oid, &t->target_oloc,
- &pgid);
- if (ret) {
- WARN_ON(ret != -ENOENT);
- t->osd = CEPH_HOMELESS_OSD;
- ct_res = CALC_TARGET_POOL_DNE;
- goto out;
- }
+ __ceph_object_locator_to_pg(pi, &t->target_oid, &t->target_oloc, &pgid);
last_pgid.pool = pgid.pool;
last_pgid.seed = ceph_stable_mod(pgid.seed, t->pg_num, t->pg_num_mask);
@@ -2161,9 +2187,9 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
struct ceph_osd_client *osdc = req->r_osdc;
struct ceph_osd *osd;
enum calc_target_result ct_res;
+ int err = 0;
bool need_send = false;
bool promoted = false;
- bool need_abort = false;
WARN_ON(req->r_tid);
dout("%s req %p wrlocked %d\n", __func__, req, wrlocked);
@@ -2179,7 +2205,10 @@ again:
goto promote;
}
- if (osdc->osdmap->epoch < osdc->epoch_barrier) {
+ if (osdc->abort_err) {
+ dout("req %p abort_err %d\n", req, osdc->abort_err);
+ err = osdc->abort_err;
+ } else if (osdc->osdmap->epoch < osdc->epoch_barrier) {
dout("req %p epoch %u barrier %u\n", req, osdc->osdmap->epoch,
osdc->epoch_barrier);
req->r_t.paused = true;
@@ -2200,11 +2229,13 @@ again:
(ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
pool_full(osdc, req->r_t.base_oloc.pool))) {
dout("req %p full/pool_full\n", req);
- pr_warn_ratelimited("FULL or reached pool quota\n");
- req->r_t.paused = true;
- maybe_request_map(osdc);
- if (req->r_abort_on_full)
- need_abort = true;
+ if (osdc->abort_on_full) {
+ err = -ENOSPC;
+ } else {
+ pr_warn_ratelimited("FULL or reached pool quota\n");
+ req->r_t.paused = true;
+ maybe_request_map(osdc);
+ }
} else if (!osd_homeless(osd)) {
need_send = true;
} else {
@@ -2221,11 +2252,11 @@ again:
link_request(osd, req);
if (need_send)
send_request(req);
- else if (need_abort)
- complete_request(req, -ENOSPC);
+ else if (err)
+ complete_request(req, err);
mutex_unlock(&osd->lock);
- if (ct_res == CALC_TARGET_POOL_DNE)
+ if (!err && ct_res == CALC_TARGET_POOL_DNE)
send_map_check(req);
if (promoted)
@@ -2281,11 +2312,21 @@ static void finish_request(struct ceph_osd_request *req)
static void __complete_request(struct ceph_osd_request *req)
{
- if (req->r_callback) {
- dout("%s req %p tid %llu cb %pf result %d\n", __func__, req,
- req->r_tid, req->r_callback, req->r_result);
+ dout("%s req %p tid %llu cb %pf result %d\n", __func__, req,
+ req->r_tid, req->r_callback, req->r_result);
+
+ if (req->r_callback)
req->r_callback(req);
- }
+ complete_all(&req->r_completion);
+ ceph_osdc_put_request(req);
+}
+
+static void complete_request_workfn(struct work_struct *work)
+{
+ struct ceph_osd_request *req =
+ container_of(work, struct ceph_osd_request, r_complete_work);
+
+ __complete_request(req);
}
/*
@@ -2297,9 +2338,9 @@ static void complete_request(struct ceph_osd_request *req, int err)
req->r_result = err;
finish_request(req);
- __complete_request(req);
- complete_all(&req->r_completion);
- ceph_osdc_put_request(req);
+
+ INIT_WORK(&req->r_complete_work, complete_request_workfn);
+ queue_work(req->r_osdc->completion_wq, &req->r_complete_work);
}
static void cancel_map_check(struct ceph_osd_request *req)
@@ -2336,6 +2377,28 @@ static void abort_request(struct ceph_osd_request *req, int err)
complete_request(req, err);
}
+static int abort_fn(struct ceph_osd_request *req, void *arg)
+{
+ int err = *(int *)arg;
+
+ abort_request(req, err);
+ return 0; /* continue iteration */
+}
+
+/*
+ * Abort all in-flight requests with @err and arrange for all future
+ * requests to be failed immediately.
+ */
+void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err)
+{
+ dout("%s osdc %p err %d\n", __func__, osdc, err);
+ down_write(&osdc->lock);
+ for_each_request(osdc, abort_fn, &err);
+ osdc->abort_err = err;
+ up_write(&osdc->lock);
+}
+EXPORT_SYMBOL(ceph_osdc_abort_requests);
+
static void update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb)
{
if (likely(eb > osdc->epoch_barrier)) {
@@ -2363,6 +2426,30 @@ void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb)
EXPORT_SYMBOL(ceph_osdc_update_epoch_barrier);
/*
+ * We can end up releasing caps as a result of abort_request().
+ * In that case, we probably want to ensure that the cap release message
+ * has an updated epoch barrier in it, so set the epoch barrier prior to
+ * aborting the first request.
+ */
+static int abort_on_full_fn(struct ceph_osd_request *req, void *arg)
+{
+ struct ceph_osd_client *osdc = req->r_osdc;
+ bool *victims = arg;
+
+ if ((req->r_flags & CEPH_OSD_FLAG_WRITE) &&
+ (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
+ pool_full(osdc, req->r_t.base_oloc.pool))) {
+ if (!*victims) {
+ update_epoch_barrier(osdc, osdc->osdmap->epoch);
+ *victims = true;
+ }
+ abort_request(req, -ENOSPC);
+ }
+
+ return 0; /* continue iteration */
+}
+
+/*
* Drop all pending requests that are stalled waiting on a full condition to
* clear, and complete them with ENOSPC as the return code. Set the
* osdc->epoch_barrier to the latest map epoch that we've seen if any were
@@ -2370,61 +2457,11 @@ EXPORT_SYMBOL(ceph_osdc_update_epoch_barrier);
*/
static void ceph_osdc_abort_on_full(struct ceph_osd_client *osdc)
{
- struct rb_node *n;
bool victims = false;
- dout("enter abort_on_full\n");
-
- if (!ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) && !have_pool_full(osdc))
- goto out;
-
- /* Scan list and see if there is anything to abort */
- for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
- struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
- struct rb_node *m;
-
- m = rb_first(&osd->o_requests);
- while (m) {
- struct ceph_osd_request *req = rb_entry(m,
- struct ceph_osd_request, r_node);
- m = rb_next(m);
-
- if (req->r_abort_on_full) {
- victims = true;
- break;
- }
- }
- if (victims)
- break;
- }
-
- if (!victims)
- goto out;
-
- /*
- * Update the barrier to current epoch if it's behind that point,
- * since we know we have some calls to be aborted in the tree.
- */
- update_epoch_barrier(osdc, osdc->osdmap->epoch);
-
- for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
- struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
- struct rb_node *m;
-
- m = rb_first(&osd->o_requests);
- while (m) {
- struct ceph_osd_request *req = rb_entry(m,
- struct ceph_osd_request, r_node);
- m = rb_next(m);
-
- if (req->r_abort_on_full &&
- (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
- pool_full(osdc, req->r_t.target_oloc.pool)))
- abort_request(req, -ENOSPC);
- }
- }
-out:
- dout("return abort_on_full barrier=%u\n", osdc->epoch_barrier);
+ if (osdc->abort_on_full &&
+ (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || have_pool_full(osdc)))
+ for_each_request(osdc, abort_on_full_fn, &victims);
}
static void check_pool_dne(struct ceph_osd_request *req)
@@ -3541,8 +3578,6 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
up_read(&osdc->lock);
__complete_request(req);
- complete_all(&req->r_completion);
- ceph_osdc_put_request(req);
return;
fail_request:
@@ -4927,7 +4962,10 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
if (ret)
goto out_put_req;
- osd_req_op_cls_init(req, 0, CEPH_OSD_OP_CALL, class, method);
+ ret = osd_req_op_cls_init(req, 0, CEPH_OSD_OP_CALL, class, method);
+ if (ret)
+ goto out_put_req;
+
if (req_page)
osd_req_op_cls_request_data_pages(req, 0, &req_page, req_len,
0, false, false);
@@ -4996,6 +5034,10 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
if (!osdc->notify_wq)
goto out_msgpool_reply;
+ osdc->completion_wq = create_singlethread_workqueue("ceph-completion");
+ if (!osdc->completion_wq)
+ goto out_notify_wq;
+
schedule_delayed_work(&osdc->timeout_work,
osdc->client->options->osd_keepalive_timeout);
schedule_delayed_work(&osdc->osds_timeout_work,
@@ -5003,6 +5045,8 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
return 0;
+out_notify_wq:
+ destroy_workqueue(osdc->notify_wq);
out_msgpool_reply:
ceph_msgpool_destroy(&osdc->msgpool_op_reply);
out_msgpool:
@@ -5017,7 +5061,7 @@ out:
void ceph_osdc_stop(struct ceph_osd_client *osdc)
{
- flush_workqueue(osdc->notify_wq);
+ destroy_workqueue(osdc->completion_wq);
destroy_workqueue(osdc->notify_wq);
cancel_delayed_work_sync(&osdc->timeout_work);
cancel_delayed_work_sync(&osdc->osds_timeout_work);
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 9645ffd6acfb..98c0ff3d6441 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1299,8 +1299,9 @@ static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff)
if (!map->osd_primary_affinity) {
int i;
- map->osd_primary_affinity = kmalloc(map->max_osd*sizeof(u32),
- GFP_NOFS);
+ map->osd_primary_affinity = kmalloc_array(map->max_osd,
+ sizeof(u32),
+ GFP_NOFS);
if (!map->osd_primary_affinity)
return -ENOMEM;
@@ -2145,10 +2146,10 @@ bool ceph_osds_changed(const struct ceph_osds *old_acting,
* Should only be called with target_oid and target_oloc (as opposed to
* base_oid and base_oloc), since tiering isn't taken into account.
*/
-int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
- const struct ceph_object_id *oid,
- const struct ceph_object_locator *oloc,
- struct ceph_pg *raw_pgid)
+void __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
+ const struct ceph_object_id *oid,
+ const struct ceph_object_locator *oloc,
+ struct ceph_pg *raw_pgid)
{
WARN_ON(pi->id != oloc->pool);
@@ -2164,11 +2165,8 @@ int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
int nsl = oloc->pool_ns->len;
size_t total = nsl + 1 + oid->name_len;
- if (total > sizeof(stack_buf)) {
- buf = kmalloc(total, GFP_NOIO);
- if (!buf)
- return -ENOMEM;
- }
+ if (total > sizeof(stack_buf))
+ buf = kmalloc(total, GFP_NOIO | __GFP_NOFAIL);
memcpy(buf, oloc->pool_ns->str, nsl);
buf[nsl] = '\037';
memcpy(buf + nsl + 1, oid->name, oid->name_len);
@@ -2180,7 +2178,6 @@ int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
oid->name, nsl, oloc->pool_ns->str,
raw_pgid->pool, raw_pgid->seed);
}
- return 0;
}
int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
@@ -2194,7 +2191,8 @@ int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
if (!pi)
return -ENOENT;
- return __ceph_object_locator_to_pg(pi, oid, oloc, raw_pgid);
+ __ceph_object_locator_to_pg(pi, oid, oloc, raw_pgid);
+ return 0;
}
EXPORT_SYMBOL(ceph_object_locator_to_pg);
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index a3d0adc828e6..e560d3975f41 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -20,7 +20,7 @@ struct page **ceph_get_direct_page_vector(const void __user *data,
int got = 0;
int rc = 0;
- pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
+ pages = kmalloc_array(num_pages, sizeof(*pages), GFP_NOFS);
if (!pages)
return ERR_PTR(-ENOMEM);
@@ -74,7 +74,7 @@ struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
struct page **pages;
int i;
- pages = kmalloc(sizeof(*pages) * num_pages, flags);
+ pages = kmalloc_array(num_pages, sizeof(*pages), flags);
if (!pages)
return ERR_PTR(-ENOMEM);
for (i = 0; i < num_pages; i++) {
diff --git a/net/core/dev.c b/net/core/dev.c
index 6e18242a1cae..57b7bab5f70b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8823,7 +8823,7 @@ static struct hlist_head * __net_init netdev_create_hash(void)
int i;
struct hlist_head *hash;
- hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
+ hash = kmalloc_array(NETDEV_HASHENTRIES, sizeof(*hash), GFP_KERNEL);
if (hash != NULL)
for (i = 0; i < NETDEV_HASHENTRIES; i++)
INIT_HLIST_HEAD(&hash[i]);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index c15075dc7572..e677a20180cf 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -911,7 +911,7 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
memset(&info, 0, sizeof(info));
info.cmd = ETHTOOL_GSSET_INFO;
- info_buf = kzalloc(n_bits * sizeof(u32), GFP_USER);
+ info_buf = kcalloc(n_bits, sizeof(u32), GFP_USER);
if (!info_buf)
return -ENOMEM;
@@ -1017,7 +1017,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
if (info.cmd == ETHTOOL_GRXCLSRLALL) {
if (info.rule_cnt > 0) {
if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
- rule_buf = kzalloc(info.rule_cnt * sizeof(u32),
+ rule_buf = kcalloc(info.rule_cnt, sizeof(u32),
GFP_USER);
if (!rule_buf)
return -ENOMEM;
@@ -1816,7 +1816,7 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
return -EFAULT;
test.len = test_len;
- data = kmalloc(test_len * sizeof(u64), GFP_USER);
+ data = kmalloc_array(test_len, sizeof(u64), GFP_USER);
if (!data)
return -ENOMEM;
@@ -1852,7 +1852,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
WARN_ON_ONCE(!ret);
gstrings.len = ret;
- data = vzalloc(gstrings.len * ETH_GSTRING_LEN);
+ data = vzalloc(array_size(gstrings.len, ETH_GSTRING_LEN));
if (gstrings.len && !data)
return -ENOMEM;
@@ -1952,7 +1952,7 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
return -EFAULT;
stats.n_stats = n_stats;
- data = vzalloc(n_stats * sizeof(u64));
+ data = vzalloc(array_size(n_stats, sizeof(u64)));
if (n_stats && !data)
return -ENOMEM;
@@ -1996,7 +1996,7 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
return -EFAULT;
stats.n_stats = n_stats;
- data = vzalloc(n_stats * sizeof(u64));
+ data = vzalloc(array_size(n_stats, sizeof(u64)));
if (n_stats && !data)
return -ENOMEM;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 7e4ede34cc52..49368e21d228 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3603,7 +3603,8 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
return -ENOMEM;
strcpy(pkt_dev->odevname, ifname);
- pkt_dev->flows = vzalloc_node(MAX_CFLOWS * sizeof(struct flow_state),
+ pkt_dev->flows = vzalloc_node(array_size(MAX_CFLOWS,
+ sizeof(struct flow_state)),
node);
if (pkt_dev->flows == NULL) {
kfree(pkt_dev);
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index d2f4e0c1faaf..2589a6b78aa1 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -984,7 +984,8 @@ static int dcbnl_build_peer_app(struct net_device *netdev, struct sk_buff* skb,
*/
err = ops->peer_getappinfo(netdev, &info, &app_count);
if (!err && app_count) {
- table = kmalloc(sizeof(struct dcb_app) * app_count, GFP_KERNEL);
+ table = kmalloc_array(app_count, sizeof(struct dcb_app),
+ GFP_KERNEL);
if (!table)
return -ENOMEM;
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 385f153fe031..2b75df469220 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -46,7 +46,8 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
return -ENOMEM;
/* allocate buffer and initialize linked list */
- seqp = kmalloc(CCID2_SEQBUF_LEN * sizeof(struct ccid2_seq), gfp_any());
+ seqp = kmalloc_array(CCID2_SEQBUF_LEN, sizeof(struct ccid2_seq),
+ gfp_any());
if (seqp == NULL)
return -ENOMEM;
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index dc2960be51e0..b231e40f006a 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -38,7 +38,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid,
{
void *hdr;
int i, pages = 0;
- uint32_t *buf = kzalloc(32 * sizeof(uint32_t), GFP_KERNEL);
+ uint32_t *buf = kcalloc(32, sizeof(uint32_t), GFP_KERNEL);
pr_debug("%s\n", __func__);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 63aa39b3af03..b21833651394 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -567,7 +567,7 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
struct nlattr *mx;
int len = 0;
- mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
+ mx = kcalloc(3, nla_total_size(4), GFP_KERNEL);
if (!mx)
return -ENOMEM;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index bf4e4adc2d00..1df6e97106d7 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -649,7 +649,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
hash = rcu_dereference(nh->nh_exceptions);
if (!hash) {
- hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
+ hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC);
if (!hash)
goto out_unlock;
rcu_assign_pointer(nh->nh_exceptions, hash);
@@ -3146,7 +3146,8 @@ int __init ip_rt_init(void)
{
int cpu;
- ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
+ ip_idents = kmalloc_array(IP_IDENTS_SZ, sizeof(*ip_idents),
+ GFP_KERNEL);
if (!ip_idents)
panic("IP: failed to allocate ip_idents\n");
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index d8c4b6374377..be491bf6ab6e 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -956,7 +956,7 @@ static int __net_init icmpv6_sk_init(struct net *net)
int err, i, j;
net->ipv6.icmp_sk =
- kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
+ kcalloc(nr_cpu_ids, sizeof(struct sock *), GFP_KERNEL);
if (!net->ipv6.icmp_sk)
return -ENOMEM;
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 44c39c5f0638..10ae13560b40 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -42,7 +42,8 @@ static int alloc_ila_locks(struct ila_net *ilan)
size = roundup_pow_of_two(nr_pcpus * LOCKS_PER_CPU);
if (sizeof(spinlock_t) != 0) {
- ilan->locks = kvmalloc(size * sizeof(spinlock_t), GFP_KERNEL);
+ ilan->locks = kvmalloc_array(size, sizeof(spinlock_t),
+ GFP_KERNEL);
if (!ilan->locks)
return -ENOMEM;
for (i = 0; i < size; i++)
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 89178b46b32f..d9558ffb8acf 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -1186,7 +1186,7 @@ static int ieee80211_chsw_switch_vifs(struct ieee80211_local *local,
lockdep_assert_held(&local->mtx);
lockdep_assert_held(&local->chanctx_mtx);
- vif_chsw = kzalloc(sizeof(vif_chsw[0]) * n_vifs, GFP_KERNEL);
+ vif_chsw = kcalloc(n_vifs, sizeof(vif_chsw[0]), GFP_KERNEL);
if (!vif_chsw)
return -ENOMEM;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 722f3d9fb416..fb73451ed85e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -772,7 +772,7 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
if (have_mfp)
n_suites += 4;
- suites = kmalloc(sizeof(u32) * n_suites, GFP_KERNEL);
+ suites = kmalloc_array(n_suites, sizeof(u32), GFP_KERNEL);
if (!suites)
return -ENOMEM;
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 8221bc5582ab..76048b53c5b2 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -592,11 +592,11 @@ minstrel_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
max_rates = sband->n_bitrates;
}
- mi->r = kzalloc(sizeof(struct minstrel_rate) * max_rates, gfp);
+ mi->r = kcalloc(max_rates, sizeof(struct minstrel_rate), gfp);
if (!mi->r)
goto error;
- mi->sample_table = kmalloc(SAMPLE_COLUMNS * max_rates, gfp);
+ mi->sample_table = kmalloc_array(max_rates, SAMPLE_COLUMNS, gfp);
if (!mi->sample_table)
goto error1;
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index fb586b6e5d49..67ebdeaffbbc 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -1313,11 +1313,11 @@ minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
if (!msp)
return NULL;
- msp->ratelist = kzalloc(sizeof(struct minstrel_rate) * max_rates, gfp);
+ msp->ratelist = kcalloc(max_rates, sizeof(struct minstrel_rate), gfp);
if (!msp->ratelist)
goto error;
- msp->sample_table = kmalloc(SAMPLE_COLUMNS * max_rates, gfp);
+ msp->sample_table = kmalloc_array(max_rates, SAMPLE_COLUMNS, gfp);
if (!msp->sample_table)
goto error1;
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index a3b1bcc2b461..2e917a6d239d 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -1157,7 +1157,7 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
}
}
- ie = kzalloc(num_bands * iebufsz, GFP_KERNEL);
+ ie = kcalloc(iebufsz, num_bands, GFP_KERNEL);
if (!ie) {
ret = -ENOMEM;
goto out;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 2d82c88efd0b..5e2e511c4a6f 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1803,8 +1803,9 @@ static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata)
if (WARN_ON(res))
return res;
- funcs = kzalloc((sdata->local->hw.max_nan_de_entries + 1) *
- sizeof(*funcs), GFP_KERNEL);
+ funcs = kcalloc(sdata->local->hw.max_nan_de_entries + 1,
+ sizeof(*funcs),
+ GFP_KERNEL);
if (!funcs)
return -ENOMEM;
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 61c3a389da89..99e0aa350dc5 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1380,7 +1380,8 @@ int __init ip_vs_conn_init(void)
/*
* Allocate the connection hash table and initialize its list heads
*/
- ip_vs_conn_tab = vmalloc(ip_vs_conn_tab_size * sizeof(*ip_vs_conn_tab));
+ ip_vs_conn_tab = vmalloc(array_size(ip_vs_conn_tab_size,
+ sizeof(*ip_vs_conn_tab)));
if (!ip_vs_conn_tab)
return -ENOMEM;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index afdeca53e88b..d88841fbc560 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -402,7 +402,8 @@ int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *l4proto)
struct nf_conntrack_l4proto __rcu **proto_array;
int i;
- proto_array = kmalloc(MAX_NF_CT_PROTO *
+ proto_array =
+ kmalloc_array(MAX_NF_CT_PROTO,
sizeof(struct nf_conntrack_l4proto *),
GFP_KERNEL);
if (proto_array == NULL) {
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index b7df32a56e7e..46f9df99d276 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -691,8 +691,9 @@ int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto)
mutex_lock(&nf_nat_proto_mutex);
if (nf_nat_l4protos[l3proto] == NULL) {
- l4protos = kmalloc(IPPROTO_MAX * sizeof(struct nf_nat_l4proto *),
- GFP_KERNEL);
+ l4protos = kmalloc_array(IPPROTO_MAX,
+ sizeof(struct nf_nat_l4proto *),
+ GFP_KERNEL);
if (l4protos == NULL) {
ret = -ENOMEM;
goto out;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d23a5c269c44..896d4a36081d 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5304,7 +5304,7 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
if (err < 0)
return err;
- ops = kzalloc(sizeof(struct nf_hook_ops) * n, GFP_KERNEL);
+ ops = kcalloc(n, sizeof(struct nf_hook_ops), GFP_KERNEL);
if (!ops)
return -ENOMEM;
@@ -7178,8 +7178,8 @@ static int __init nf_tables_module_init(void)
nft_chain_filter_init();
- info = kmalloc(sizeof(struct nft_expr_info) * NFT_RULE_MAXEXPRS,
- GFP_KERNEL);
+ info = kmalloc_array(NFT_RULE_MAXEXPRS, sizeof(struct nft_expr_info),
+ GFP_KERNEL);
if (info == NULL) {
err = -ENOMEM;
goto err1;
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index cb5b5f207777..e5d27b2e4eba 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -190,8 +190,9 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
if (class_max > NF_CT_MAX_EXPECT_CLASSES)
return -EOVERFLOW;
- expect_policy = kzalloc(sizeof(struct nf_conntrack_expect_policy) *
- class_max, GFP_KERNEL);
+ expect_policy = kcalloc(class_max,
+ sizeof(struct nf_conntrack_expect_policy),
+ GFP_KERNEL);
if (expect_policy == NULL)
return -ENOMEM;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index df9ab71b0ed9..d0d8397c9588 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1904,7 +1904,7 @@ static int __init xt_init(void)
seqcount_init(&per_cpu(xt_recseq, i));
}
- xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL);
+ xt = kmalloc_array(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL);
if (!xt)
return -ENOMEM;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index b9ce82c9440f..25eeb6d2a75a 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -352,8 +352,9 @@ int genl_register_family(struct genl_family *family)
}
if (family->maxattr && !family->parallel_ops) {
- family->attrbuf = kmalloc((family->maxattr+1) *
- sizeof(struct nlattr *), GFP_KERNEL);
+ family->attrbuf = kmalloc_array(family->maxattr + 1,
+ sizeof(struct nlattr *),
+ GFP_KERNEL);
if (family->attrbuf == NULL) {
err = -ENOMEM;
goto errout_locked;
@@ -566,8 +567,9 @@ static int genl_family_rcv_msg(const struct genl_family *family,
return -EOPNOTSUPP;
if (family->maxattr && family->parallel_ops) {
- attrbuf = kmalloc((family->maxattr+1) *
- sizeof(struct nlattr *), GFP_KERNEL);
+ attrbuf = kmalloc_array(family->maxattr + 1,
+ sizeof(struct nlattr *),
+ GFP_KERNEL);
if (attrbuf == NULL)
return -ENOMEM;
} else
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index b97eb766a1d5..93fbcafbf388 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1395,7 +1395,7 @@ static int __init nr_proto_init(void)
return -1;
}
- dev_nr = kzalloc(nr_ndevs * sizeof(struct net_device *), GFP_KERNEL);
+ dev_nr = kcalloc(nr_ndevs, sizeof(struct net_device *), GFP_KERNEL);
if (dev_nr == NULL) {
printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device array\n");
return -1;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index a61818e94396..0f5ce77460d4 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1578,8 +1578,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto err_destroy_table;
}
- dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
- GFP_KERNEL);
+ dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
+ sizeof(struct hlist_head),
+ GFP_KERNEL);
if (!dp->ports) {
err = -ENOMEM;
goto err_destroy_percpu;
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index f81c1d0ddff4..19f6765566e7 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -47,7 +47,7 @@ static struct hlist_head *dev_table;
*/
int ovs_vport_init(void)
{
- dev_table = kzalloc(VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
+ dev_table = kcalloc(VPORT_HASH_BUCKETS, sizeof(struct hlist_head),
GFP_KERNEL);
if (!dev_table)
return -ENOMEM;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ee018564b2b4..50809748c127 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4161,7 +4161,7 @@ static char *alloc_one_pg_vec_page(unsigned long order)
return buffer;
/* __get_free_pages failed, fall back to vmalloc */
- buffer = vzalloc((1 << order) * PAGE_SIZE);
+ buffer = vzalloc(array_size((1 << order), PAGE_SIZE));
if (buffer)
return buffer;
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 02deee29e7f1..b6ad38e48f62 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -163,7 +163,8 @@ static void rds_ib_add_one(struct ib_device *device)
rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom;
rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom;
- rds_ibdev->vector_load = kzalloc(sizeof(int) * device->num_comp_vectors,
+ rds_ibdev->vector_load = kcalloc(device->num_comp_vectors,
+ sizeof(int),
GFP_KERNEL);
if (!rds_ibdev->vector_load) {
pr_err("RDS/IB: %s failed to allocate vector memory\n",
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 13b38ad0fa4a..f1684ae6abfd 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -526,7 +526,8 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
goto recv_hdrs_dma_out;
}
- ic->i_sends = vzalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work),
+ ic->i_sends = vzalloc_node(array_size(sizeof(struct rds_ib_send_work),
+ ic->i_send_ring.w_nr),
ibdev_to_node(dev));
if (!ic->i_sends) {
ret = -ENOMEM;
@@ -534,7 +535,8 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
goto ack_dma_out;
}
- ic->i_recvs = vzalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work),
+ ic->i_recvs = vzalloc_node(array_size(sizeof(struct rds_ib_recv_work),
+ ic->i_recv_ring.w_nr),
ibdev_to_node(dev));
if (!ic->i_recvs) {
ret = -ENOMEM;
diff --git a/net/rds/info.c b/net/rds/info.c
index 140a44a5f7b7..e367a97a18c8 100644
--- a/net/rds/info.c
+++ b/net/rds/info.c
@@ -188,7 +188,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
nr_pages = (PAGE_ALIGN(start + len) - (start & PAGE_MASK))
>> PAGE_SHIFT;
- pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
+ pages = kmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
if (!pages) {
ret = -ENOMEM;
goto out;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 5b73fea849df..ebe42e7eb456 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1514,7 +1514,8 @@ static int __init rose_proto_init(void)
rose_callsign = null_ax25_address;
- dev_rose = kzalloc(rose_ndevs * sizeof(struct net_device *), GFP_KERNEL);
+ dev_rose = kcalloc(rose_ndevs, sizeof(struct net_device *),
+ GFP_KERNEL);
if (dev_rose == NULL) {
printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate device structure\n");
rc = -ENOMEM;
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 6c0ae27fff84..278ac0807a60 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -432,7 +432,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
sg = _sg;
if (unlikely(nsg > 4)) {
- sg = kmalloc(sizeof(*sg) * nsg, GFP_NOIO);
+ sg = kmalloc_array(nsg, sizeof(*sg), GFP_NOIO);
if (!sg)
goto nomem;
}
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 22fa13cf5d8b..cd2e0e342fb6 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -489,11 +489,12 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt,
return err;
if (!q->flows) {
- q->flows = kvzalloc(q->flows_cnt *
- sizeof(struct fq_codel_flow), GFP_KERNEL);
+ q->flows = kvcalloc(q->flows_cnt,
+ sizeof(struct fq_codel_flow),
+ GFP_KERNEL);
if (!q->flows)
return -ENOMEM;
- q->backlogs = kvzalloc(q->flows_cnt * sizeof(u32), GFP_KERNEL);
+ q->backlogs = kvcalloc(q->flows_cnt, sizeof(u32), GFP_KERNEL);
if (!q->backlogs)
return -ENOMEM;
for (i = 0; i < q->flows_cnt; i++) {
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index bce2632212d3..c3a8388dcdf6 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -599,8 +599,8 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt,
if (!q->hh_flows) {
/* Initialize heavy-hitter flow table. */
- q->hh_flows = kvzalloc(HH_FLOWS_CNT *
- sizeof(struct list_head), GFP_KERNEL);
+ q->hh_flows = kvcalloc(HH_FLOWS_CNT, sizeof(struct list_head),
+ GFP_KERNEL);
if (!q->hh_flows)
return -ENOMEM;
for (i = 0; i < HH_FLOWS_CNT; i++)
@@ -614,8 +614,9 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt,
/* Initialize heavy-hitter filter arrays. */
for (i = 0; i < HHF_ARRAYS_CNT; i++) {
- q->hhf_arrays[i] = kvzalloc(HHF_ARRAYS_LEN *
- sizeof(u32), GFP_KERNEL);
+ q->hhf_arrays[i] = kvcalloc(HHF_ARRAYS_LEN,
+ sizeof(u32),
+ GFP_KERNEL);
if (!q->hhf_arrays[i]) {
/* Note: hhf_destroy() will be called
* by our caller.
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index e64630cd3331..5b537613946f 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -482,8 +482,9 @@ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
return 0;
/* Allocated the array of pointers to transorms */
- ep->auth_hmacs = kzalloc(sizeof(struct crypto_shash *) *
- SCTP_AUTH_NUM_HMACS, gfp);
+ ep->auth_hmacs = kcalloc(SCTP_AUTH_NUM_HMACS,
+ sizeof(struct crypto_shash *),
+ gfp);
if (!ep->auth_hmacs)
return -ENOMEM;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 11d93377ba5e..5dffbc493008 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1438,7 +1438,7 @@ static __init int sctp_init(void)
/* Allocate and initialize the endpoint hash table. */
sctp_ep_hashsize = 64;
sctp_ep_hashtable =
- kmalloc(64 * sizeof(struct sctp_hashbucket), GFP_KERNEL);
+ kmalloc_array(64, sizeof(struct sctp_hashbucket), GFP_KERNEL);
if (!sctp_ep_hashtable) {
pr_err("Failed endpoint_hash alloc\n");
status = -ENOMEM;
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index cc7c1bb60fe8..dbd2605d1962 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -584,9 +584,9 @@ int smc_wr_alloc_link_mem(struct smc_link *link)
GFP_KERNEL);
if (!link->wr_rx_sges)
goto no_mem_wr_tx_sges;
- link->wr_tx_mask = kzalloc(
- BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*link->wr_tx_mask),
- GFP_KERNEL);
+ link->wr_tx_mask = kcalloc(BITS_TO_LONGS(SMC_WR_BUF_CNT),
+ sizeof(*link->wr_tx_mask),
+ GFP_KERNEL);
if (!link->wr_tx_mask)
goto no_mem_wr_rx_sges;
link->wr_tx_pends = kcalloc(SMC_WR_BUF_CNT,
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 9463af4b32e8..be8f103d22fd 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1753,7 +1753,8 @@ alloc_enc_pages(struct rpc_rqst *rqstp)
last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_SHIFT;
rqstp->rq_enc_pages_num = last - first + 1 + 1;
rqstp->rq_enc_pages
- = kmalloc(rqstp->rq_enc_pages_num * sizeof(struct page *),
+ = kmalloc_array(rqstp->rq_enc_pages_num,
+ sizeof(struct page *),
GFP_NOFS);
if (!rqstp->rq_enc_pages)
goto out;
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index 46b295e4f2b8..1c7c49dbf8ba 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -224,7 +224,7 @@ static void gssp_free_receive_pages(struct gssx_arg_accept_sec_context *arg)
static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg)
{
arg->npages = DIV_ROUND_UP(NGROUPS_MAX * 4, PAGE_SIZE);
- arg->pages = kzalloc(arg->npages * sizeof(struct page *), GFP_KERNEL);
+ arg->pages = kcalloc(arg->npages, sizeof(struct page *), GFP_KERNEL);
/*
* XXX: actual pages are allocated by xdr layer in
* xdr_partial_copy_from_skb.
@@ -298,9 +298,11 @@ int gssp_accept_sec_context_upcall(struct net *net,
if (res.context_handle) {
data->out_handle = rctxh.exported_context_token;
data->mech_oid.len = rctxh.mech.len;
- if (rctxh.mech.data)
+ if (rctxh.mech.data) {
memcpy(data->mech_oid.data, rctxh.mech.data,
data->mech_oid.len);
+ kfree(rctxh.mech.data);
+ }
client_name = rctxh.src_name.display_name;
}
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index cdda4744c9b1..109fbe591e7b 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1683,7 +1683,7 @@ struct cache_detail *cache_create_net(const struct cache_detail *tmpl, struct ne
if (cd == NULL)
return ERR_PTR(-ENOMEM);
- cd->hash_table = kzalloc(cd->hash_size * sizeof(struct hlist_head),
+ cd->hash_table = kcalloc(cd->hash_size, sizeof(struct hlist_head),
GFP_KERNEL);
if (cd->hash_table == NULL) {
kfree(cd);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index c2266f387213..d839c33ae7d9 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1546,6 +1546,7 @@ call_reserveresult(struct rpc_task *task)
task->tk_status = 0;
if (status >= 0) {
if (task->tk_rqstp) {
+ xprt_request_init(task);
task->tk_action = call_refresh;
return;
}
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 70f005044f06..3c85af058227 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -66,7 +66,7 @@
* Local functions
*/
static void xprt_init(struct rpc_xprt *xprt, struct net *net);
-static void xprt_request_init(struct rpc_task *, struct rpc_xprt *);
+static __be32 xprt_alloc_xid(struct rpc_xprt *xprt);
static void xprt_connect_status(struct rpc_task *task);
static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
static void __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *);
@@ -987,6 +987,8 @@ bool xprt_prepare_transmit(struct rpc_task *task)
task->tk_status = -EAGAIN;
goto out_unlock;
}
+ if (!bc_prealloc(req) && !req->rq_xmit_bytes_sent)
+ req->rq_xid = xprt_alloc_xid(xprt);
ret = true;
out_unlock:
spin_unlock_bh(&xprt->transport_lock);
@@ -1163,10 +1165,10 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
out_init_req:
xprt->stat.max_slots = max_t(unsigned int, xprt->stat.max_slots,
xprt->num_reqs);
+ spin_unlock(&xprt->reserve_lock);
+
task->tk_status = 0;
task->tk_rqstp = req;
- xprt_request_init(task, xprt);
- spin_unlock(&xprt->reserve_lock);
}
EXPORT_SYMBOL_GPL(xprt_alloc_slot);
@@ -1184,7 +1186,7 @@ void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
}
EXPORT_SYMBOL_GPL(xprt_lock_and_alloc_slot);
-static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
+void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
spin_lock(&xprt->reserve_lock);
if (!xprt_dynamic_free_slot(xprt, req)) {
@@ -1194,6 +1196,7 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
xprt_wake_up_backlog(xprt);
spin_unlock(&xprt->reserve_lock);
}
+EXPORT_SYMBOL_GPL(xprt_free_slot);
static void xprt_free_all_slots(struct rpc_xprt *xprt)
{
@@ -1303,8 +1306,9 @@ static inline void xprt_init_xid(struct rpc_xprt *xprt)
xprt->xid = prandom_u32();
}
-static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
+void xprt_request_init(struct rpc_task *task)
{
+ struct rpc_xprt *xprt = task->tk_xprt;
struct rpc_rqst *req = task->tk_rqstp;
INIT_LIST_HEAD(&req->rq_list);
@@ -1312,7 +1316,6 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
req->rq_task = task;
req->rq_xprt = xprt;
req->rq_buffer = NULL;
- req->rq_xid = xprt_alloc_xid(xprt);
req->rq_connect_cookie = xprt->connect_cookie - 1;
req->rq_bytes_sent = 0;
req->rq_snd_buf.len = 0;
@@ -1373,7 +1376,7 @@ void xprt_release(struct rpc_task *task)
dprintk("RPC: %5u release request %p\n", task->tk_pid, req);
if (likely(!bc_prealloc(req)))
- xprt_free_slot(xprt, req);
+ xprt->ops->free_slot(xprt, req);
else
xprt_free_bc_request(req);
}
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 47ebac949769..90adeff4c06b 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -9,8 +9,10 @@
#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svc_xprt.h>
+#include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_TRANS
@@ -29,29 +31,41 @@ static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt,
spin_unlock(&buf->rb_reqslock);
rpcrdma_destroy_req(req);
-
- kfree(rqst);
}
-static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
- struct rpc_rqst *rqst)
+static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
+ unsigned int count)
{
- struct rpcrdma_regbuf *rb;
- struct rpcrdma_req *req;
- size_t size;
+ struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+ struct rpc_rqst *rqst;
+ unsigned int i;
+
+ for (i = 0; i < (count << 1); i++) {
+ struct rpcrdma_regbuf *rb;
+ struct rpcrdma_req *req;
+ size_t size;
+
+ req = rpcrdma_create_req(r_xprt);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+ rqst = &req->rl_slot;
+
+ rqst->rq_xprt = xprt;
+ INIT_LIST_HEAD(&rqst->rq_list);
+ INIT_LIST_HEAD(&rqst->rq_bc_list);
+ __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
+ spin_lock_bh(&xprt->bc_pa_lock);
+ list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
+ spin_unlock_bh(&xprt->bc_pa_lock);
- req = rpcrdma_create_req(r_xprt);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- size = r_xprt->rx_data.inline_rsize;
- rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
- if (IS_ERR(rb))
- goto out_fail;
- req->rl_sendbuf = rb;
- xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base,
- min_t(size_t, size, PAGE_SIZE));
- rpcrdma_set_xprtdata(rqst, req);
+ size = r_xprt->rx_data.inline_rsize;
+ rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
+ if (IS_ERR(rb))
+ goto out_fail;
+ req->rl_sendbuf = rb;
+ xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base,
+ min_t(size_t, size, PAGE_SIZE));
+ }
return 0;
out_fail:
@@ -59,23 +73,6 @@ out_fail:
return -ENOMEM;
}
-/* Allocate and add receive buffers to the rpcrdma_buffer's
- * existing list of rep's. These are released when the
- * transport is destroyed.
- */
-static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
- unsigned int count)
-{
- int rc = 0;
-
- while (count--) {
- rc = rpcrdma_create_rep(r_xprt);
- if (rc)
- break;
- }
- return rc;
-}
-
/**
* xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests
* @xprt: transport associated with these backchannel resources
@@ -86,9 +83,6 @@ static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
- struct rpc_rqst *rqst;
- unsigned int i;
int rc;
/* The backchannel reply path returns each rpc_rqst to the
@@ -103,35 +97,11 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
if (reqs > RPCRDMA_BACKWARD_WRS >> 1)
goto out_err;
- for (i = 0; i < (reqs << 1); i++) {
- rqst = kzalloc(sizeof(*rqst), GFP_KERNEL);
- if (!rqst)
- goto out_free;
-
- dprintk("RPC: %s: new rqst %p\n", __func__, rqst);
-
- rqst->rq_xprt = &r_xprt->rx_xprt;
- INIT_LIST_HEAD(&rqst->rq_list);
- INIT_LIST_HEAD(&rqst->rq_bc_list);
- __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
-
- if (rpcrdma_bc_setup_rqst(r_xprt, rqst))
- goto out_free;
-
- spin_lock_bh(&xprt->bc_pa_lock);
- list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
- spin_unlock_bh(&xprt->bc_pa_lock);
- }
-
- rc = rpcrdma_bc_setup_reps(r_xprt, reqs);
+ rc = rpcrdma_bc_setup_reqs(r_xprt, reqs);
if (rc)
goto out_free;
- rc = rpcrdma_ep_post_extra_recv(r_xprt, reqs);
- if (rc)
- goto out_free;
-
- buffer->rb_bc_srv_max_requests = reqs;
+ r_xprt->rx_buf.rb_bc_srv_max_requests = reqs;
request_module("svcrdma");
trace_xprtrdma_cb_setup(r_xprt, reqs);
return 0;
@@ -235,6 +205,7 @@ int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
if (rc < 0)
goto failed_marshal;
+ rpcrdma_post_recvs(r_xprt, true);
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection;
return 0;
@@ -275,10 +246,14 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
*/
void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
{
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpc_xprt *xprt = rqst->rq_xprt;
dprintk("RPC: %s: freeing rqst %p (req %p)\n",
- __func__, rqst, rpcr_to_rdmar(rqst));
+ __func__, rqst, req);
+
+ rpcrdma_recv_buffer_put(req->rl_reply);
+ req->rl_reply = NULL;
spin_lock_bh(&xprt->bc_pa_lock);
list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index f2f63959fddd..17fb1e025654 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -20,7 +20,10 @@
* verb (fmr_op_unmap).
*/
+#include <linux/sunrpc/svc_rdma.h>
+
#include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_TRANS
@@ -156,10 +159,32 @@ out_release:
fmr_op_release_mr(mr);
}
+/* On success, sets:
+ * ep->rep_attr.cap.max_send_wr
+ * ep->rep_attr.cap.max_recv_wr
+ * cdata->max_requests
+ * ia->ri_max_segs
+ */
static int
fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct rpcrdma_create_data_internal *cdata)
{
+ int max_qp_wr;
+
+ max_qp_wr = ia->ri_device->attrs.max_qp_wr;
+ max_qp_wr -= RPCRDMA_BACKWARD_WRS;
+ max_qp_wr -= 1;
+ if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
+ return -ENOMEM;
+ if (cdata->max_requests > max_qp_wr)
+ cdata->max_requests = max_qp_wr;
+ ep->rep_attr.cap.max_send_wr = cdata->max_requests;
+ ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
+ ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
+ ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
+ ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
+ ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
+
ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
RPCRDMA_MAX_FMR_SGES);
return 0;
@@ -219,6 +244,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
mr->mr_sg, i, mr->mr_dir);
if (!mr->mr_nents)
goto out_dmamap_err;
+ trace_xprtrdma_dma_map(mr);
for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index c59c5c788db0..c040de196e13 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -71,8 +71,10 @@
*/
#include <linux/sunrpc/rpc_rdma.h>
+#include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_TRANS
@@ -202,12 +204,22 @@ out_release:
frwr_op_release_mr(mr);
}
+/* On success, sets:
+ * ep->rep_attr.cap.max_send_wr
+ * ep->rep_attr.cap.max_recv_wr
+ * cdata->max_requests
+ * ia->ri_max_segs
+ *
+ * And these FRWR-related fields:
+ * ia->ri_max_frwr_depth
+ * ia->ri_mrtype
+ */
static int
frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct rpcrdma_create_data_internal *cdata)
{
struct ib_device_attr *attrs = &ia->ri_device->attrs;
- int depth, delta;
+ int max_qp_wr, depth, delta;
ia->ri_mrtype = IB_MR_TYPE_MEM_REG;
if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
@@ -241,14 +253,26 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
} while (delta > 0);
}
- ep->rep_attr.cap.max_send_wr *= depth;
- if (ep->rep_attr.cap.max_send_wr > attrs->max_qp_wr) {
- cdata->max_requests = attrs->max_qp_wr / depth;
+ max_qp_wr = ia->ri_device->attrs.max_qp_wr;
+ max_qp_wr -= RPCRDMA_BACKWARD_WRS;
+ max_qp_wr -= 1;
+ if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
+ return -ENOMEM;
+ if (cdata->max_requests > max_qp_wr)
+ cdata->max_requests = max_qp_wr;
+ ep->rep_attr.cap.max_send_wr = cdata->max_requests * depth;
+ if (ep->rep_attr.cap.max_send_wr > max_qp_wr) {
+ cdata->max_requests = max_qp_wr / depth;
if (!cdata->max_requests)
return -EINVAL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests *
depth;
}
+ ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
+ ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
+ ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
+ ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
+ ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
ia->ri_max_frwr_depth);
@@ -393,6 +417,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
if (!mr->mr_nents)
goto out_dmamap_err;
+ trace_xprtrdma_dma_map(mr);
ibmr = frwr->fr_mr;
n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c
index a762d192372b..620327c01302 100644
--- a/net/sunrpc/xprtrdma/module.c
+++ b/net/sunrpc/xprtrdma/module.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright (c) 2015, 2017 Oracle. All rights reserved.
*/
@@ -13,9 +14,11 @@
#include <asm/swab.h>
-#define CREATE_TRACE_POINTS
#include "xprt_rdma.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/rpcrdma.h>
+
MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc.");
MODULE_DESCRIPTION("RPC/RDMA Transport");
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index e8adad33d0bb..c8ae983c6cc0 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@@ -46,22 +47,17 @@
* to the Linux RPC framework lives.
*/
-#include "xprt_rdma.h"
-
#include <linux/highmem.h>
+#include <linux/sunrpc/svc_rdma.h>
+
+#include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
+
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
-static const char transfertypes[][12] = {
- "inline", /* no chunks */
- "read list", /* some argument via rdma read */
- "*read list", /* entire request via rdma read */
- "write list", /* some result via rdma write */
- "reply chunk" /* entire reply via rdma write */
-};
-
/* Returns size of largest RPC-over-RDMA header in a Call message
*
* The largest Call header contains a full-size Read list and a
@@ -230,7 +226,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
*/
*ppages = alloc_page(GFP_ATOMIC);
if (!*ppages)
- return -EAGAIN;
+ return -ENOBUFS;
}
seg->mr_page = *ppages;
seg->mr_offset = (char *)page_base;
@@ -365,7 +361,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
false, &mr);
if (IS_ERR(seg))
- goto out_maperr;
+ return PTR_ERR(seg);
rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_read_segment(xdr, mr, pos) < 0)
@@ -377,11 +373,6 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
} while (nsegs);
return 0;
-
-out_maperr:
- if (PTR_ERR(seg) == -EAGAIN)
- xprt_wait_for_buffer_space(rqst->rq_task, NULL);
- return PTR_ERR(seg);
}
/* Register and XDR encode the Write list. Supports encoding a list
@@ -428,7 +419,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
true, &mr);
if (IS_ERR(seg))
- goto out_maperr;
+ return PTR_ERR(seg);
rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_rdma_segment(xdr, mr) < 0)
@@ -445,11 +436,6 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
*segcount = cpu_to_be32(nchunks);
return 0;
-
-out_maperr:
- if (PTR_ERR(seg) == -EAGAIN)
- xprt_wait_for_buffer_space(rqst->rq_task, NULL);
- return PTR_ERR(seg);
}
/* Register and XDR encode the Reply chunk. Supports encoding an array
@@ -491,7 +477,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
true, &mr);
if (IS_ERR(seg))
- goto out_maperr;
+ return PTR_ERR(seg);
rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_rdma_segment(xdr, mr) < 0)
@@ -508,11 +494,6 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
*segcount = cpu_to_be32(nchunks);
return 0;
-
-out_maperr:
- if (PTR_ERR(seg) == -EAGAIN)
- xprt_wait_for_buffer_space(rqst->rq_task, NULL);
- return PTR_ERR(seg);
}
/**
@@ -709,7 +690,7 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
{
req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf);
if (!req->rl_sendctx)
- return -ENOBUFS;
+ return -EAGAIN;
req->rl_sendctx->sc_wr.num_sge = 0;
req->rl_sendctx->sc_unmap_count = 0;
req->rl_sendctx->sc_req = req;
@@ -883,7 +864,15 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
return 0;
out_err:
- r_xprt->rx_stats.failed_marshal_count++;
+ switch (ret) {
+ case -EAGAIN:
+ xprt_wait_for_buffer_space(rqst->rq_task, NULL);
+ break;
+ case -ENOBUFS:
+ break;
+ default:
+ r_xprt->rx_stats.failed_marshal_count++;
+ }
return ret;
}
@@ -1026,8 +1015,6 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
out_short:
pr_warn("RPC/RDMA short backward direction call\n");
- if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))
- xprt_disconnect_done(&r_xprt->rx_xprt);
return true;
}
#else /* CONFIG_SUNRPC_BACKCHANNEL */
@@ -1333,13 +1320,14 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
u32 credits;
__be32 *p;
+ --buf->rb_posted_receives;
+
if (rep->rr_hdrbuf.head[0].iov_len == 0)
goto out_badstatus;
+ /* Fixed transport header fields */
xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
rep->rr_hdrbuf.head[0].iov_base);
-
- /* Fixed transport header fields */
p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
if (unlikely(!p))
goto out_shortreply;
@@ -1378,17 +1366,10 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
+ rpcrdma_post_recvs(r_xprt, false);
queue_work(rpcrdma_receive_wq, &rep->rr_work);
return;
-out_badstatus:
- rpcrdma_recv_buffer_put(rep);
- if (r_xprt->rx_ep.rep_connected == 1) {
- r_xprt->rx_ep.rep_connected = -EIO;
- rpcrdma_conn_func(&r_xprt->rx_ep);
- }
- return;
-
out_badversion:
trace_xprtrdma_reply_vers(rep);
goto repost;
@@ -1408,7 +1389,7 @@ out_shortreply:
* receive buffer before returning.
*/
repost:
- r_xprt->rx_stats.bad_reply_count++;
- if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))
- rpcrdma_recv_buffer_put(rep);
+ rpcrdma_post_recvs(r_xprt, false);
+out_badstatus:
+ rpcrdma_recv_buffer_put(rep);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index dd8a431dc2ae..357ba90c382d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -1,4 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
+ * Copyright (c) 2015-2018 Oracle. All rights reserved.
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -46,7 +48,6 @@
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/svc_rdma.h>
-#include "xprt_rdma.h"
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index a73632ca9048..a68180090554 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -1,13 +1,16 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (c) 2015 Oracle. All rights reserved.
+ * Copyright (c) 2015-2018 Oracle. All rights reserved.
*
* Support for backward direction RPCs on RPC/RDMA (server-side).
*/
#include <linux/module.h>
+
#include <linux/sunrpc/svc_rdma.h>
+
#include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
@@ -112,39 +115,21 @@ out_notfound:
* the adapter has a small maximum SQ depth.
*/
static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
- struct rpc_rqst *rqst)
+ struct rpc_rqst *rqst,
+ struct svc_rdma_send_ctxt *ctxt)
{
- struct svc_rdma_op_ctxt *ctxt;
int ret;
- ctxt = svc_rdma_get_context(rdma);
-
- /* rpcrdma_bc_send_request builds the transport header and
- * the backchannel RPC message in the same buffer. Thus only
- * one SGE is needed to send both.
- */
- ret = svc_rdma_map_reply_hdr(rdma, ctxt, rqst->rq_buffer,
- rqst->rq_snd_buf.len);
+ ret = svc_rdma_map_reply_msg(rdma, ctxt, &rqst->rq_snd_buf, NULL);
if (ret < 0)
- goto out_err;
+ return -EIO;
/* Bump page refcnt so Send completion doesn't release
* the rq_buffer before all retransmits are complete.
*/
get_page(virt_to_page(rqst->rq_buffer));
- ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0);
- if (ret)
- goto out_unmap;
-
-out_err:
- dprintk("svcrdma: %s returns %d\n", __func__, ret);
- return ret;
-
-out_unmap:
- svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_context(ctxt, 1);
- ret = -EIO;
- goto out_err;
+ ctxt->sc_send_wr.opcode = IB_WR_SEND;
+ return svc_rdma_send(rdma, &ctxt->sc_send_wr);
}
/* Server-side transport endpoint wants a whole page for its send
@@ -191,13 +176,15 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
{
struct rpc_xprt *xprt = rqst->rq_xprt;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct svc_rdma_send_ctxt *ctxt;
__be32 *p;
int rc;
- /* Space in the send buffer for an RPC/RDMA header is reserved
- * via xprt->tsh_size.
- */
- p = rqst->rq_buffer;
+ ctxt = svc_rdma_send_ctxt_get(rdma);
+ if (!ctxt)
+ goto drop_connection;
+
+ p = ctxt->sc_xprt_buf;
*p++ = rqst->rq_xid;
*p++ = rpcrdma_version;
*p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
@@ -205,14 +192,17 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
*p++ = xdr_zero;
*p++ = xdr_zero;
*p = xdr_zero;
+ svc_rdma_sync_reply_hdr(rdma, ctxt, RPCRDMA_HDRLEN_MIN);
#ifdef SVCRDMA_BACKCHANNEL_DEBUG
pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
#endif
- rc = svc_rdma_bc_sendto(rdma, rqst);
- if (rc)
+ rc = svc_rdma_bc_sendto(rdma, rqst, ctxt);
+ if (rc) {
+ svc_rdma_send_ctxt_put(rdma, ctxt);
goto drop_connection;
+ }
return rc;
drop_connection:
@@ -273,6 +263,7 @@ static const struct rpc_xprt_ops xprt_rdma_bc_procs = {
.reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong,
.alloc_slot = xprt_alloc_slot,
+ .free_slot = xprt_free_slot,
.release_request = xprt_release_rqst_cong,
.buf_alloc = xprt_rdma_bc_allocate,
.buf_free = xprt_rdma_bc_free,
@@ -320,7 +311,7 @@ xprt_setup_rdma_bc(struct xprt_create *args)
xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
xprt->prot = XPRT_TRANSPORT_BC_RDMA;
- xprt->tsh_size = RPCRDMA_HDRLEN_MIN / sizeof(__be32);
+ xprt->tsh_size = 0;
xprt->ops = &xprt_rdma_bc_procs;
memcpy(&xprt->addr, args->dstaddr, args->addrlen);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 3d45015dca97..841fca143804 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (c) 2016, 2017 Oracle. All rights reserved.
+ * Copyright (c) 2016-2018 Oracle. All rights reserved.
* Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
@@ -60,7 +61,7 @@
* svc_rdma_recvfrom must post RDMA Reads to pull the RPC Call's
* data payload from the client. svc_rdma_recvfrom sets up the
* RDMA Reads using pages in svc_rqst::rq_pages, which are
- * transferred to an svc_rdma_op_ctxt for the duration of the
+ * transferred to an svc_rdma_recv_ctxt for the duration of the
* I/O. svc_rdma_recvfrom then returns zero, since the RPC message
* is still not yet ready.
*
@@ -69,18 +70,18 @@
* svc_rdma_recvfrom again. This second call may use a different
* svc_rqst than the first one, thus any information that needs
* to be preserved across these two calls is kept in an
- * svc_rdma_op_ctxt.
+ * svc_rdma_recv_ctxt.
*
* The second call to svc_rdma_recvfrom performs final assembly
* of the RPC Call message, using the RDMA Read sink pages kept in
- * the svc_rdma_op_ctxt. The xdr_buf is copied from the
- * svc_rdma_op_ctxt to the second svc_rqst. The second call returns
+ * the svc_rdma_recv_ctxt. The xdr_buf is copied from the
+ * svc_rdma_recv_ctxt to the second svc_rqst. The second call returns
* the length of the completed RPC Call message.
*
* Page Management
*
* Pages under I/O must be transferred from the first svc_rqst to an
- * svc_rdma_op_ctxt before the first svc_rdma_recvfrom call returns.
+ * svc_rdma_recv_ctxt before the first svc_rdma_recvfrom call returns.
*
* The first svc_rqst supplies pages for RDMA Reads. These are moved
* from rqstp::rq_pages into ctxt::pages. The consumed elements of
@@ -88,78 +89,286 @@
* svc_rdma_recvfrom call returns.
*
* During the second svc_rdma_recvfrom call, RDMA Read sink pages
- * are transferred from the svc_rdma_op_ctxt to the second svc_rqst
+ * are transferred from the svc_rdma_recv_ctxt to the second svc_rqst
* (see rdma_read_complete() below).
*/
+#include <linux/spinlock.h>
#include <asm/unaligned.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
-#include <linux/spinlock.h>
-
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/debug.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_rdma.h>
+#include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
+
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
-/*
- * Replace the pages in the rq_argpages array with the pages from the SGE in
- * the RDMA_RECV completion. The SGL should contain full pages up until the
- * last one.
+static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc);
+
+static inline struct svc_rdma_recv_ctxt *
+svc_rdma_next_recv_ctxt(struct list_head *list)
+{
+ return list_first_entry_or_null(list, struct svc_rdma_recv_ctxt,
+ rc_list);
+}
+
+static struct svc_rdma_recv_ctxt *
+svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
+{
+ struct svc_rdma_recv_ctxt *ctxt;
+ dma_addr_t addr;
+ void *buffer;
+
+ ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (!ctxt)
+ goto fail0;
+ buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL);
+ if (!buffer)
+ goto fail1;
+ addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
+ rdma->sc_max_req_size, DMA_FROM_DEVICE);
+ if (ib_dma_mapping_error(rdma->sc_pd->device, addr))
+ goto fail2;
+
+ ctxt->rc_recv_wr.next = NULL;
+ ctxt->rc_recv_wr.wr_cqe = &ctxt->rc_cqe;
+ ctxt->rc_recv_wr.sg_list = &ctxt->rc_recv_sge;
+ ctxt->rc_recv_wr.num_sge = 1;
+ ctxt->rc_cqe.done = svc_rdma_wc_receive;
+ ctxt->rc_recv_sge.addr = addr;
+ ctxt->rc_recv_sge.length = rdma->sc_max_req_size;
+ ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey;
+ ctxt->rc_recv_buf = buffer;
+ ctxt->rc_temp = false;
+ return ctxt;
+
+fail2:
+ kfree(buffer);
+fail1:
+ kfree(ctxt);
+fail0:
+ return NULL;
+}
+
+static void svc_rdma_recv_ctxt_destroy(struct svcxprt_rdma *rdma,
+ struct svc_rdma_recv_ctxt *ctxt)
+{
+ ib_dma_unmap_single(rdma->sc_pd->device, ctxt->rc_recv_sge.addr,
+ ctxt->rc_recv_sge.length, DMA_FROM_DEVICE);
+ kfree(ctxt->rc_recv_buf);
+ kfree(ctxt);
+}
+
+/**
+ * svc_rdma_recv_ctxts_destroy - Release all recv_ctxt's for an xprt
+ * @rdma: svcxprt_rdma being torn down
+ *
*/
-static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *ctxt)
+void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma)
{
- struct page *page;
- int sge_no;
- u32 len;
+ struct svc_rdma_recv_ctxt *ctxt;
- /* The reply path assumes the Call's transport header resides
- * in rqstp->rq_pages[0].
- */
- page = ctxt->pages[0];
- put_page(rqstp->rq_pages[0]);
- rqstp->rq_pages[0] = page;
-
- /* Set up the XDR head */
- rqstp->rq_arg.head[0].iov_base = page_address(page);
- rqstp->rq_arg.head[0].iov_len =
- min_t(size_t, ctxt->byte_len, ctxt->sge[0].length);
- rqstp->rq_arg.len = ctxt->byte_len;
- rqstp->rq_arg.buflen = ctxt->byte_len;
-
- /* Compute bytes past head in the SGL */
- len = ctxt->byte_len - rqstp->rq_arg.head[0].iov_len;
-
- /* If data remains, store it in the pagelist */
- rqstp->rq_arg.page_len = len;
- rqstp->rq_arg.page_base = 0;
-
- sge_no = 1;
- while (len && sge_no < ctxt->count) {
- page = ctxt->pages[sge_no];
- put_page(rqstp->rq_pages[sge_no]);
- rqstp->rq_pages[sge_no] = page;
- len -= min_t(u32, len, ctxt->sge[sge_no].length);
- sge_no++;
+ while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts))) {
+ list_del(&ctxt->rc_list);
+ svc_rdma_recv_ctxt_destroy(rdma, ctxt);
}
- rqstp->rq_respages = &rqstp->rq_pages[sge_no];
- rqstp->rq_next_page = rqstp->rq_respages + 1;
+}
+
+static struct svc_rdma_recv_ctxt *
+svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
+{
+ struct svc_rdma_recv_ctxt *ctxt;
+
+ spin_lock(&rdma->sc_recv_lock);
+ ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts);
+ if (!ctxt)
+ goto out_empty;
+ list_del(&ctxt->rc_list);
+ spin_unlock(&rdma->sc_recv_lock);
+
+out:
+ ctxt->rc_page_count = 0;
+ return ctxt;
+
+out_empty:
+ spin_unlock(&rdma->sc_recv_lock);
+
+ ctxt = svc_rdma_recv_ctxt_alloc(rdma);
+ if (!ctxt)
+ return NULL;
+ goto out;
+}
+
+/**
+ * svc_rdma_recv_ctxt_put - Return recv_ctxt to free list
+ * @rdma: controlling svcxprt_rdma
+ * @ctxt: object to return to the free list
+ *
+ */
+void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
+ struct svc_rdma_recv_ctxt *ctxt)
+{
+ unsigned int i;
+
+ for (i = 0; i < ctxt->rc_page_count; i++)
+ put_page(ctxt->rc_pages[i]);
+
+ if (!ctxt->rc_temp) {
+ spin_lock(&rdma->sc_recv_lock);
+ list_add(&ctxt->rc_list, &rdma->sc_recv_ctxts);
+ spin_unlock(&rdma->sc_recv_lock);
+ } else
+ svc_rdma_recv_ctxt_destroy(rdma, ctxt);
+}
+
+static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,
+ struct svc_rdma_recv_ctxt *ctxt)
+{
+ struct ib_recv_wr *bad_recv_wr;
+ int ret;
+
+ svc_xprt_get(&rdma->sc_xprt);
+ ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, &bad_recv_wr);
+ trace_svcrdma_post_recv(&ctxt->rc_recv_wr, ret);
+ if (ret)
+ goto err_post;
+ return 0;
+
+err_post:
+ svc_rdma_recv_ctxt_put(rdma, ctxt);
+ svc_xprt_put(&rdma->sc_xprt);
+ return ret;
+}
- /* If not all pages were used from the SGL, free the remaining ones */
- len = sge_no;
- while (sge_no < ctxt->count) {
- page = ctxt->pages[sge_no++];
- put_page(page);
+static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
+{
+ struct svc_rdma_recv_ctxt *ctxt;
+
+ ctxt = svc_rdma_recv_ctxt_get(rdma);
+ if (!ctxt)
+ return -ENOMEM;
+ return __svc_rdma_post_recv(rdma, ctxt);
+}
+
+/**
+ * svc_rdma_post_recvs - Post initial set of Recv WRs
+ * @rdma: fresh svcxprt_rdma
+ *
+ * Returns true if successful, otherwise false.
+ */
+bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
+{
+ struct svc_rdma_recv_ctxt *ctxt;
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < rdma->sc_max_requests; i++) {
+ ctxt = svc_rdma_recv_ctxt_get(rdma);
+ if (!ctxt)
+ return false;
+ ctxt->rc_temp = true;
+ ret = __svc_rdma_post_recv(rdma, ctxt);
+ if (ret) {
+ pr_err("svcrdma: failure posting recv buffers: %d\n",
+ ret);
+ return false;
+ }
}
- ctxt->count = len;
+ return true;
+}
- /* Set up tail */
- rqstp->rq_arg.tail[0].iov_base = NULL;
- rqstp->rq_arg.tail[0].iov_len = 0;
+/**
+ * svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
+ * @cq: Completion Queue context
+ * @wc: Work Completion object
+ *
+ * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that
+ * the Receive completion handler could be running.
+ */
+static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct svcxprt_rdma *rdma = cq->cq_context;
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct svc_rdma_recv_ctxt *ctxt;
+
+ trace_svcrdma_wc_receive(wc);
+
+ /* WARNING: Only wc->wr_cqe and wc->status are reliable */
+ ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe);
+
+ if (wc->status != IB_WC_SUCCESS)
+ goto flushed;
+
+ if (svc_rdma_post_recv(rdma))
+ goto post_err;
+
+ /* All wc fields are now known to be valid */
+ ctxt->rc_byte_len = wc->byte_len;
+ ib_dma_sync_single_for_cpu(rdma->sc_pd->device,
+ ctxt->rc_recv_sge.addr,
+ wc->byte_len, DMA_FROM_DEVICE);
+
+ spin_lock(&rdma->sc_rq_dto_lock);
+ list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q);
+ spin_unlock(&rdma->sc_rq_dto_lock);
+ set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
+ if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags))
+ svc_xprt_enqueue(&rdma->sc_xprt);
+ goto out;
+
+flushed:
+ if (wc->status != IB_WC_WR_FLUSH_ERR)
+ pr_err("svcrdma: Recv: %s (%u/0x%x)\n",
+ ib_wc_status_msg(wc->status),
+ wc->status, wc->vendor_err);
+post_err:
+ svc_rdma_recv_ctxt_put(rdma, ctxt);
+ set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+ svc_xprt_enqueue(&rdma->sc_xprt);
+out:
+ svc_xprt_put(&rdma->sc_xprt);
+}
+
+/**
+ * svc_rdma_flush_recv_queues - Drain pending Receive work
+ * @rdma: svcxprt_rdma being shut down
+ *
+ */
+void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma)
+{
+ struct svc_rdma_recv_ctxt *ctxt;
+
+ while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_read_complete_q))) {
+ list_del(&ctxt->rc_list);
+ svc_rdma_recv_ctxt_put(rdma, ctxt);
+ }
+ while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_rq_dto_q))) {
+ list_del(&ctxt->rc_list);
+ svc_rdma_recv_ctxt_put(rdma, ctxt);
+ }
+}
+
+static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *ctxt)
+{
+ struct xdr_buf *arg = &rqstp->rq_arg;
+
+ arg->head[0].iov_base = ctxt->rc_recv_buf;
+ arg->head[0].iov_len = ctxt->rc_byte_len;
+ arg->tail[0].iov_base = NULL;
+ arg->tail[0].iov_len = 0;
+ arg->page_len = 0;
+ arg->page_base = 0;
+ arg->buflen = ctxt->rc_byte_len;
+ arg->len = ctxt->rc_byte_len;
+
+ rqstp->rq_respages = &rqstp->rq_pages[0];
+ rqstp->rq_next_page = rqstp->rq_respages + 1;
}
/* This accommodates the largest possible Write chunk,
@@ -294,7 +503,6 @@ static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
{
__be32 *p, *end, *rdma_argp;
unsigned int hdr_len;
- char *proc;
/* Verify that there's enough bytes for header + something */
if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
@@ -306,10 +514,8 @@ static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
switch (*(rdma_argp + 3)) {
case rdma_msg:
- proc = "RDMA_MSG";
break;
case rdma_nomsg:
- proc = "RDMA_NOMSG";
break;
case rdma_done:
@@ -339,103 +545,94 @@ static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
rq_arg->head[0].iov_len -= hdr_len;
rq_arg->len -= hdr_len;
- dprintk("svcrdma: received %s request for XID 0x%08x, hdr_len=%u\n",
- proc, be32_to_cpup(rdma_argp), hdr_len);
+ trace_svcrdma_decode_rqst(rdma_argp, hdr_len);
return hdr_len;
out_short:
- dprintk("svcrdma: header too short = %d\n", rq_arg->len);
+ trace_svcrdma_decode_short(rq_arg->len);
return -EINVAL;
out_version:
- dprintk("svcrdma: bad xprt version: %u\n",
- be32_to_cpup(rdma_argp + 1));
+ trace_svcrdma_decode_badvers(rdma_argp);
return -EPROTONOSUPPORT;
out_drop:
- dprintk("svcrdma: dropping RDMA_DONE/ERROR message\n");
+ trace_svcrdma_decode_drop(rdma_argp);
return 0;
out_proc:
- dprintk("svcrdma: bad rdma procedure (%u)\n",
- be32_to_cpup(rdma_argp + 3));
+ trace_svcrdma_decode_badproc(rdma_argp);
return -EINVAL;
out_inval:
- dprintk("svcrdma: failed to parse transport header\n");
+ trace_svcrdma_decode_parse(rdma_argp);
return -EINVAL;
}
static void rdma_read_complete(struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *head)
+ struct svc_rdma_recv_ctxt *head)
{
int page_no;
- /* Copy RPC pages */
- for (page_no = 0; page_no < head->count; page_no++) {
+ /* Move Read chunk pages to rqstp so that they will be released
+ * when svc_process is done with them.
+ */
+ for (page_no = 0; page_no < head->rc_page_count; page_no++) {
put_page(rqstp->rq_pages[page_no]);
- rqstp->rq_pages[page_no] = head->pages[page_no];
+ rqstp->rq_pages[page_no] = head->rc_pages[page_no];
}
+ head->rc_page_count = 0;
/* Point rq_arg.pages past header */
- rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
- rqstp->rq_arg.page_len = head->arg.page_len;
+ rqstp->rq_arg.pages = &rqstp->rq_pages[head->rc_hdr_count];
+ rqstp->rq_arg.page_len = head->rc_arg.page_len;
/* rq_respages starts after the last arg page */
rqstp->rq_respages = &rqstp->rq_pages[page_no];
rqstp->rq_next_page = rqstp->rq_respages + 1;
/* Rebuild rq_arg head and tail. */
- rqstp->rq_arg.head[0] = head->arg.head[0];
- rqstp->rq_arg.tail[0] = head->arg.tail[0];
- rqstp->rq_arg.len = head->arg.len;
- rqstp->rq_arg.buflen = head->arg.buflen;
+ rqstp->rq_arg.head[0] = head->rc_arg.head[0];
+ rqstp->rq_arg.tail[0] = head->rc_arg.tail[0];
+ rqstp->rq_arg.len = head->rc_arg.len;
+ rqstp->rq_arg.buflen = head->rc_arg.buflen;
}
static void svc_rdma_send_error(struct svcxprt_rdma *xprt,
__be32 *rdma_argp, int status)
{
- struct svc_rdma_op_ctxt *ctxt;
- __be32 *p, *err_msgp;
+ struct svc_rdma_send_ctxt *ctxt;
unsigned int length;
- struct page *page;
+ __be32 *p;
int ret;
- page = alloc_page(GFP_KERNEL);
- if (!page)
+ ctxt = svc_rdma_send_ctxt_get(xprt);
+ if (!ctxt)
return;
- err_msgp = page_address(page);
- p = err_msgp;
+ p = ctxt->sc_xprt_buf;
*p++ = *rdma_argp;
*p++ = *(rdma_argp + 1);
*p++ = xprt->sc_fc_credits;
*p++ = rdma_error;
- if (status == -EPROTONOSUPPORT) {
+ switch (status) {
+ case -EPROTONOSUPPORT:
*p++ = err_vers;
*p++ = rpcrdma_version;
*p++ = rpcrdma_version;
- } else {
+ trace_svcrdma_err_vers(*rdma_argp);
+ break;
+ default:
*p++ = err_chunk;
+ trace_svcrdma_err_chunk(*rdma_argp);
}
- length = (unsigned long)p - (unsigned long)err_msgp;
-
- /* Map transport header; no RPC message payload */
- ctxt = svc_rdma_get_context(xprt);
- ret = svc_rdma_map_reply_hdr(xprt, ctxt, err_msgp, length);
- if (ret) {
- dprintk("svcrdma: Error %d mapping send for protocol error\n",
- ret);
- return;
- }
+ length = (unsigned long)p - (unsigned long)ctxt->sc_xprt_buf;
+ svc_rdma_sync_reply_hdr(xprt, ctxt, length);
- ret = svc_rdma_post_send_wr(xprt, ctxt, 1, 0);
- if (ret) {
- dprintk("svcrdma: Error %d posting send for protocol error\n",
- ret);
- svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_context(ctxt, 1);
- }
+ ctxt->sc_send_wr.opcode = IB_WR_SEND;
+ ret = svc_rdma_send(xprt, &ctxt->sc_send_wr);
+ if (ret)
+ svc_rdma_send_ctxt_put(xprt, ctxt);
}
/* By convention, backchannel calls arrive via rdma_msg type
@@ -507,32 +704,28 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
struct svc_xprt *xprt = rqstp->rq_xprt;
struct svcxprt_rdma *rdma_xprt =
container_of(xprt, struct svcxprt_rdma, sc_xprt);
- struct svc_rdma_op_ctxt *ctxt;
+ struct svc_rdma_recv_ctxt *ctxt;
__be32 *p;
int ret;
spin_lock(&rdma_xprt->sc_rq_dto_lock);
- if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
- ctxt = list_first_entry(&rdma_xprt->sc_read_complete_q,
- struct svc_rdma_op_ctxt, list);
- list_del(&ctxt->list);
+ ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q);
+ if (ctxt) {
+ list_del(&ctxt->rc_list);
spin_unlock(&rdma_xprt->sc_rq_dto_lock);
rdma_read_complete(rqstp, ctxt);
goto complete;
- } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
- ctxt = list_first_entry(&rdma_xprt->sc_rq_dto_q,
- struct svc_rdma_op_ctxt, list);
- list_del(&ctxt->list);
- } else {
+ }
+ ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_rq_dto_q);
+ if (!ctxt) {
/* No new incoming requests, terminate the loop */
clear_bit(XPT_DATA, &xprt->xpt_flags);
spin_unlock(&rdma_xprt->sc_rq_dto_lock);
return 0;
}
+ list_del(&ctxt->rc_list);
spin_unlock(&rdma_xprt->sc_rq_dto_lock);
- dprintk("svcrdma: recvfrom: ctxt=%p on xprt=%p, rqstp=%p\n",
- ctxt, rdma_xprt, rqstp);
atomic_inc(&rdma_stat_recv);
svc_rdma_build_arg_xdr(rqstp, ctxt);
@@ -548,7 +741,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
if (svc_rdma_is_backchannel_reply(xprt, p)) {
ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p,
&rqstp->rq_arg);
- svc_rdma_put_context(ctxt, 0);
+ svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return ret;
}
@@ -557,9 +750,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
goto out_readchunk;
complete:
- svc_rdma_put_context(ctxt, 0);
- dprintk("svcrdma: recvfrom: xprt=%p, rqstp=%p, rq_arg.len=%u\n",
- rdma_xprt, rqstp, rqstp->rq_arg.len);
+ rqstp->rq_xprt_ctxt = ctxt;
rqstp->rq_prot = IPPROTO_MAX;
svc_xprt_copy_addrs(rqstp, xprt);
return rqstp->rq_arg.len;
@@ -572,16 +763,16 @@ out_readchunk:
out_err:
svc_rdma_send_error(rdma_xprt, p, ret);
- svc_rdma_put_context(ctxt, 0);
+ svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return 0;
out_postfail:
if (ret == -EINVAL)
svc_rdma_send_error(rdma_xprt, p, ret);
- svc_rdma_put_context(ctxt, 1);
+ svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return ret;
out_drop:
- svc_rdma_put_context(ctxt, 1);
+ svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return 0;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 12b9a7e0b6d2..ce3ea8419704 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -1,15 +1,18 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (c) 2016 Oracle. All rights reserved.
+ * Copyright (c) 2016-2018 Oracle. All rights reserved.
*
* Use the core R/W API to move RPC-over-RDMA Read and Write chunks.
*/
+#include <rdma/rw.h>
+
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_rdma.h>
#include <linux/sunrpc/debug.h>
-#include <rdma/rw.h>
+#include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
@@ -205,6 +208,8 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
struct svc_rdma_write_info *info =
container_of(cc, struct svc_rdma_write_info, wi_cc);
+ trace_svcrdma_wc_write(wc);
+
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wake_up(&rdma->sc_send_wait);
@@ -222,7 +227,7 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
/* State for pulling a Read chunk.
*/
struct svc_rdma_read_info {
- struct svc_rdma_op_ctxt *ri_readctxt;
+ struct svc_rdma_recv_ctxt *ri_readctxt;
unsigned int ri_position;
unsigned int ri_pageno;
unsigned int ri_pageoff;
@@ -266,6 +271,8 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
struct svc_rdma_read_info *info =
container_of(cc, struct svc_rdma_read_info, ri_cc);
+ trace_svcrdma_wc_read(wc);
+
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wake_up(&rdma->sc_send_wait);
@@ -275,10 +282,10 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
pr_err("svcrdma: read ctx: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
wc->status, wc->vendor_err);
- svc_rdma_put_context(info->ri_readctxt, 1);
+ svc_rdma_recv_ctxt_put(rdma, info->ri_readctxt);
} else {
spin_lock(&rdma->sc_rq_dto_lock);
- list_add_tail(&info->ri_readctxt->list,
+ list_add_tail(&info->ri_readctxt->rc_list,
&rdma->sc_read_complete_q);
spin_unlock(&rdma->sc_rq_dto_lock);
@@ -323,18 +330,20 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
if (atomic_sub_return(cc->cc_sqecount,
&rdma->sc_sq_avail) > 0) {
ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
+ trace_svcrdma_post_rw(&cc->cc_cqe,
+ cc->cc_sqecount, ret);
if (ret)
break;
return 0;
}
- atomic_inc(&rdma_stat_sq_starve);
+ trace_svcrdma_sq_full(rdma);
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wait_event(rdma->sc_send_wait,
atomic_read(&rdma->sc_sq_avail) > cc->cc_sqecount);
+ trace_svcrdma_sq_retry(rdma);
} while (1);
- pr_err("svcrdma: ib_post_send failed (%d)\n", ret);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
/* If even one was posted, there will be a completion. */
@@ -437,6 +446,7 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
if (ret < 0)
goto out_initerr;
+ trace_svcrdma_encode_wseg(seg_handle, write_len, seg_offset);
list_add(&ctxt->rw_list, &cc->cc_rwctxts);
cc->cc_sqecount += ret;
if (write_len == seg_length - info->wi_seg_off) {
@@ -462,7 +472,7 @@ out_noctx:
out_initerr:
svc_rdma_put_rw_ctxt(rdma, ctxt);
- pr_err("svcrdma: failed to map pagelist (%d)\n", ret);
+ trace_svcrdma_dma_map_rwctx(rdma, ret);
return -EIO;
}
@@ -526,6 +536,8 @@ int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch,
ret = svc_rdma_post_chunk_ctxt(&info->wi_cc);
if (ret < 0)
goto out_err;
+
+ trace_svcrdma_encode_write(xdr->page_len);
return xdr->page_len;
out_err:
@@ -582,6 +594,8 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, __be32 *rp_ch,
ret = svc_rdma_post_chunk_ctxt(&info->wi_cc);
if (ret < 0)
goto out_err;
+
+ trace_svcrdma_encode_reply(consumed);
return consumed;
out_err:
@@ -593,7 +607,7 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
struct svc_rqst *rqstp,
u32 rkey, u32 len, u64 offset)
{
- struct svc_rdma_op_ctxt *head = info->ri_readctxt;
+ struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
struct svc_rdma_chunk_ctxt *cc = &info->ri_cc;
struct svc_rdma_rw_ctxt *ctxt;
unsigned int sge_no, seg_len;
@@ -606,18 +620,15 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
goto out_noctx;
ctxt->rw_nents = sge_no;
- dprintk("svcrdma: reading segment %u@0x%016llx:0x%08x (%u sges)\n",
- len, offset, rkey, sge_no);
-
sg = ctxt->rw_sg_table.sgl;
for (sge_no = 0; sge_no < ctxt->rw_nents; sge_no++) {
seg_len = min_t(unsigned int, len,
PAGE_SIZE - info->ri_pageoff);
- head->arg.pages[info->ri_pageno] =
+ head->rc_arg.pages[info->ri_pageno] =
rqstp->rq_pages[info->ri_pageno];
if (!info->ri_pageoff)
- head->count++;
+ head->rc_page_count++;
sg_set_page(sg, rqstp->rq_pages[info->ri_pageno],
seg_len, info->ri_pageoff);
@@ -656,8 +667,8 @@ out_overrun:
return -EINVAL;
out_initerr:
+ trace_svcrdma_dma_map_rwctx(cc->cc_rdma, ret);
svc_rdma_put_rw_ctxt(cc->cc_rdma, ctxt);
- pr_err("svcrdma: failed to map pagelist (%d)\n", ret);
return -EIO;
}
@@ -686,6 +697,7 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
if (ret < 0)
break;
+ trace_svcrdma_encode_rseg(rs_handle, rs_length, rs_offset);
info->ri_chunklen += rs_length;
}
@@ -693,9 +705,9 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
}
/* Construct RDMA Reads to pull over a normal Read chunk. The chunk
- * data lands in the page list of head->arg.pages.
+ * data lands in the page list of head->rc_arg.pages.
*
- * Currently NFSD does not look at the head->arg.tail[0] iovec.
+ * Currently NFSD does not look at the head->rc_arg.tail[0] iovec.
* Therefore, XDR round-up of the Read chunk and trailing
* inline content must both be added at the end of the pagelist.
*/
@@ -703,29 +715,27 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
struct svc_rdma_read_info *info,
__be32 *p)
{
- struct svc_rdma_op_ctxt *head = info->ri_readctxt;
+ struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
int ret;
- dprintk("svcrdma: Reading Read chunk at position %u\n",
- info->ri_position);
-
- info->ri_pageno = head->hdr_count;
- info->ri_pageoff = 0;
-
ret = svc_rdma_build_read_chunk(rqstp, info, p);
if (ret < 0)
goto out;
+ trace_svcrdma_encode_read(info->ri_chunklen, info->ri_position);
+
+ head->rc_hdr_count = 0;
+
/* Split the Receive buffer between the head and tail
* buffers at Read chunk's position. XDR roundup of the
* chunk is not included in either the pagelist or in
* the tail.
*/
- head->arg.tail[0].iov_base =
- head->arg.head[0].iov_base + info->ri_position;
- head->arg.tail[0].iov_len =
- head->arg.head[0].iov_len - info->ri_position;
- head->arg.head[0].iov_len = info->ri_position;
+ head->rc_arg.tail[0].iov_base =
+ head->rc_arg.head[0].iov_base + info->ri_position;
+ head->rc_arg.tail[0].iov_len =
+ head->rc_arg.head[0].iov_len - info->ri_position;
+ head->rc_arg.head[0].iov_len = info->ri_position;
/* Read chunk may need XDR roundup (see RFC 8166, s. 3.4.5.2).
*
@@ -738,9 +748,9 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
*/
info->ri_chunklen = XDR_QUADLEN(info->ri_chunklen) << 2;
- head->arg.page_len = info->ri_chunklen;
- head->arg.len += info->ri_chunklen;
- head->arg.buflen += info->ri_chunklen;
+ head->rc_arg.page_len = info->ri_chunklen;
+ head->rc_arg.len += info->ri_chunklen;
+ head->rc_arg.buflen += info->ri_chunklen;
out:
return ret;
@@ -749,7 +759,7 @@ out:
/* Construct RDMA Reads to pull over a Position Zero Read chunk.
* The start of the data lands in the first page just after
* the Transport header, and the rest lands in the page list of
- * head->arg.pages.
+ * head->rc_arg.pages.
*
* Assumptions:
* - A PZRC has an XDR-aligned length (no implicit round-up).
@@ -761,35 +771,25 @@ static int svc_rdma_build_pz_read_chunk(struct svc_rqst *rqstp,
struct svc_rdma_read_info *info,
__be32 *p)
{
- struct svc_rdma_op_ctxt *head = info->ri_readctxt;
+ struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
int ret;
- dprintk("svcrdma: Reading Position Zero Read chunk\n");
-
- info->ri_pageno = head->hdr_count - 1;
- info->ri_pageoff = offset_in_page(head->byte_len);
-
ret = svc_rdma_build_read_chunk(rqstp, info, p);
if (ret < 0)
goto out;
- head->arg.len += info->ri_chunklen;
- head->arg.buflen += info->ri_chunklen;
+ trace_svcrdma_encode_pzr(info->ri_chunklen);
- if (head->arg.buflen <= head->sge[0].length) {
- /* Transport header and RPC message fit entirely
- * in page where head iovec resides.
- */
- head->arg.head[0].iov_len = info->ri_chunklen;
- } else {
- /* Transport header and part of RPC message reside
- * in the head iovec's page.
- */
- head->arg.head[0].iov_len =
- head->sge[0].length - head->byte_len;
- head->arg.page_len =
- info->ri_chunklen - head->arg.head[0].iov_len;
- }
+ head->rc_arg.len += info->ri_chunklen;
+ head->rc_arg.buflen += info->ri_chunklen;
+
+ head->rc_hdr_count = 1;
+ head->rc_arg.head[0].iov_base = page_address(head->rc_pages[0]);
+ head->rc_arg.head[0].iov_len = min_t(size_t, PAGE_SIZE,
+ info->ri_chunklen);
+
+ head->rc_arg.page_len = info->ri_chunklen -
+ head->rc_arg.head[0].iov_len;
out:
return ret;
@@ -813,29 +813,30 @@ out:
* - All Read segments in @p have the same Position value.
*/
int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *head, __be32 *p)
+ struct svc_rdma_recv_ctxt *head, __be32 *p)
{
struct svc_rdma_read_info *info;
struct page **page;
int ret;
/* The request (with page list) is constructed in
- * head->arg. Pages involved with RDMA Read I/O are
+ * head->rc_arg. Pages involved with RDMA Read I/O are
* transferred there.
*/
- head->hdr_count = head->count;
- head->arg.head[0] = rqstp->rq_arg.head[0];
- head->arg.tail[0] = rqstp->rq_arg.tail[0];
- head->arg.pages = head->pages;
- head->arg.page_base = 0;
- head->arg.page_len = 0;
- head->arg.len = rqstp->rq_arg.len;
- head->arg.buflen = rqstp->rq_arg.buflen;
+ head->rc_arg.head[0] = rqstp->rq_arg.head[0];
+ head->rc_arg.tail[0] = rqstp->rq_arg.tail[0];
+ head->rc_arg.pages = head->rc_pages;
+ head->rc_arg.page_base = 0;
+ head->rc_arg.page_len = 0;
+ head->rc_arg.len = rqstp->rq_arg.len;
+ head->rc_arg.buflen = rqstp->rq_arg.buflen;
info = svc_rdma_read_info_alloc(rdma);
if (!info)
return -ENOMEM;
info->ri_readctxt = head;
+ info->ri_pageno = 0;
+ info->ri_pageoff = 0;
info->ri_position = be32_to_cpup(p + 1);
if (info->ri_position)
@@ -856,7 +857,7 @@ int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp,
out:
/* Read sink pages have been moved from rqstp->rq_pages to
- * head->arg.pages. Force svc_recv to refill those slots
+ * head->rc_arg.pages. Force svc_recv to refill those slots
* in rq_pages.
*/
for (page = rqstp->rq_pages; page < rqstp->rq_respages; page++)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 649441d5087d..4a3efaea277c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (c) 2016 Oracle. All rights reserved.
+ * Copyright (c) 2016-2018 Oracle. All rights reserved.
* Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
@@ -74,11 +75,11 @@
* DMA-unmap the pages under I/O for that Write segment. The Write
* completion handler does not release any pages.
*
- * When the Send WR is constructed, it also gets its own svc_rdma_op_ctxt.
+ * When the Send WR is constructed, it also gets its own svc_rdma_send_ctxt.
* The ownership of all of the Reply's pages are transferred into that
* ctxt, the Send WR is posted, and sendto returns.
*
- * The svc_rdma_op_ctxt is presented when the Send WR completes. The
+ * The svc_rdma_send_ctxt is presented when the Send WR completes. The
* Send completion handler finally releases the Reply's pages.
*
* This mechanism also assumes that completions on the transport's Send
@@ -98,16 +99,230 @@
* where two different Write segments send portions of the same page.
*/
-#include <linux/sunrpc/debug.h>
-#include <linux/sunrpc/rpc_rdma.h>
#include <linux/spinlock.h>
#include <asm/unaligned.h>
+
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
+
+#include <linux/sunrpc/debug.h>
+#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_rdma.h>
+#include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
+
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
+static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc);
+
+static inline struct svc_rdma_send_ctxt *
+svc_rdma_next_send_ctxt(struct list_head *list)
+{
+ return list_first_entry_or_null(list, struct svc_rdma_send_ctxt,
+ sc_list);
+}
+
+static struct svc_rdma_send_ctxt *
+svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
+{
+ struct svc_rdma_send_ctxt *ctxt;
+ dma_addr_t addr;
+ void *buffer;
+ size_t size;
+ int i;
+
+ size = sizeof(*ctxt);
+ size += rdma->sc_max_send_sges * sizeof(struct ib_sge);
+ ctxt = kmalloc(size, GFP_KERNEL);
+ if (!ctxt)
+ goto fail0;
+ buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL);
+ if (!buffer)
+ goto fail1;
+ addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
+ rdma->sc_max_req_size, DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(rdma->sc_pd->device, addr))
+ goto fail2;
+
+ ctxt->sc_send_wr.next = NULL;
+ ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe;
+ ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
+ ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
+ ctxt->sc_cqe.done = svc_rdma_wc_send;
+ ctxt->sc_xprt_buf = buffer;
+ ctxt->sc_sges[0].addr = addr;
+
+ for (i = 0; i < rdma->sc_max_send_sges; i++)
+ ctxt->sc_sges[i].lkey = rdma->sc_pd->local_dma_lkey;
+ return ctxt;
+
+fail2:
+ kfree(buffer);
+fail1:
+ kfree(ctxt);
+fail0:
+ return NULL;
+}
+
+/**
+ * svc_rdma_send_ctxts_destroy - Release all send_ctxt's for an xprt
+ * @rdma: svcxprt_rdma being torn down
+ *
+ */
+void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma)
+{
+ struct svc_rdma_send_ctxt *ctxt;
+
+ while ((ctxt = svc_rdma_next_send_ctxt(&rdma->sc_send_ctxts))) {
+ list_del(&ctxt->sc_list);
+ ib_dma_unmap_single(rdma->sc_pd->device,
+ ctxt->sc_sges[0].addr,
+ rdma->sc_max_req_size,
+ DMA_TO_DEVICE);
+ kfree(ctxt->sc_xprt_buf);
+ kfree(ctxt);
+ }
+}
+
+/**
+ * svc_rdma_send_ctxt_get - Get a free send_ctxt
+ * @rdma: controlling svcxprt_rdma
+ *
+ * Returns a ready-to-use send_ctxt, or NULL if none are
+ * available and a fresh one cannot be allocated.
+ */
+struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma)
+{
+ struct svc_rdma_send_ctxt *ctxt;
+
+ spin_lock(&rdma->sc_send_lock);
+ ctxt = svc_rdma_next_send_ctxt(&rdma->sc_send_ctxts);
+ if (!ctxt)
+ goto out_empty;
+ list_del(&ctxt->sc_list);
+ spin_unlock(&rdma->sc_send_lock);
+
+out:
+ ctxt->sc_send_wr.num_sge = 0;
+ ctxt->sc_cur_sge_no = 0;
+ ctxt->sc_page_count = 0;
+ return ctxt;
+
+out_empty:
+ spin_unlock(&rdma->sc_send_lock);
+ ctxt = svc_rdma_send_ctxt_alloc(rdma);
+ if (!ctxt)
+ return NULL;
+ goto out;
+}
+
+/**
+ * svc_rdma_send_ctxt_put - Return send_ctxt to free list
+ * @rdma: controlling svcxprt_rdma
+ * @ctxt: object to return to the free list
+ *
+ * Pages left in sc_pages are DMA unmapped and released.
+ */
+void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt)
+{
+ struct ib_device *device = rdma->sc_cm_id->device;
+ unsigned int i;
+
+ /* The first SGE contains the transport header, which
+ * remains mapped until @ctxt is destroyed.
+ */
+ for (i = 1; i < ctxt->sc_send_wr.num_sge; i++)
+ ib_dma_unmap_page(device,
+ ctxt->sc_sges[i].addr,
+ ctxt->sc_sges[i].length,
+ DMA_TO_DEVICE);
+
+ for (i = 0; i < ctxt->sc_page_count; ++i)
+ put_page(ctxt->sc_pages[i]);
+
+ spin_lock(&rdma->sc_send_lock);
+ list_add(&ctxt->sc_list, &rdma->sc_send_ctxts);
+ spin_unlock(&rdma->sc_send_lock);
+}
+
+/**
+ * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC
+ * @cq: Completion Queue context
+ * @wc: Work Completion object
+ *
+ * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that
+ * the Send completion handler could be running.
+ */
+static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct svcxprt_rdma *rdma = cq->cq_context;
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct svc_rdma_send_ctxt *ctxt;
+
+ trace_svcrdma_wc_send(wc);
+
+ atomic_inc(&rdma->sc_sq_avail);
+ wake_up(&rdma->sc_send_wait);
+
+ ctxt = container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe);
+ svc_rdma_send_ctxt_put(rdma, ctxt);
+
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+ svc_xprt_enqueue(&rdma->sc_xprt);
+ if (wc->status != IB_WC_WR_FLUSH_ERR)
+ pr_err("svcrdma: Send: %s (%u/0x%x)\n",
+ ib_wc_status_msg(wc->status),
+ wc->status, wc->vendor_err);
+ }
+
+ svc_xprt_put(&rdma->sc_xprt);
+}
+
+/**
+ * svc_rdma_send - Post a single Send WR
+ * @rdma: transport on which to post the WR
+ * @wr: prepared Send WR to post
+ *
+ * Returns zero the Send WR was posted successfully. Otherwise, a
+ * negative errno is returned.
+ */
+int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr)
+{
+ struct ib_send_wr *bad_wr;
+ int ret;
+
+ might_sleep();
+
+ /* If the SQ is full, wait until an SQ entry is available */
+ while (1) {
+ if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) {
+ atomic_inc(&rdma_stat_sq_starve);
+ trace_svcrdma_sq_full(rdma);
+ atomic_inc(&rdma->sc_sq_avail);
+ wait_event(rdma->sc_send_wait,
+ atomic_read(&rdma->sc_sq_avail) > 1);
+ if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
+ return -ENOTCONN;
+ trace_svcrdma_sq_retry(rdma);
+ continue;
+ }
+
+ svc_xprt_get(&rdma->sc_xprt);
+ ret = ib_post_send(rdma->sc_qp, wr, &bad_wr);
+ trace_svcrdma_post_send(wr, ret);
+ if (ret) {
+ set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+ svc_xprt_put(&rdma->sc_xprt);
+ wake_up(&rdma->sc_send_wait);
+ }
+ break;
+ }
+ return ret;
+}
+
static u32 xdr_padsize(u32 len)
{
return (len & 3) ? (4 - (len & 3)) : 0;
@@ -296,41 +511,10 @@ static u32 svc_rdma_get_inv_rkey(__be32 *rdma_argp,
return be32_to_cpup(p);
}
-/* ib_dma_map_page() is used here because svc_rdma_dma_unmap()
- * is used during completion to DMA-unmap this memory, and
- * it uses ib_dma_unmap_page() exclusively.
- */
-static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma,
- struct svc_rdma_op_ctxt *ctxt,
- unsigned int sge_no,
- unsigned char *base,
- unsigned int len)
-{
- unsigned long offset = (unsigned long)base & ~PAGE_MASK;
- struct ib_device *dev = rdma->sc_cm_id->device;
- dma_addr_t dma_addr;
-
- dma_addr = ib_dma_map_page(dev, virt_to_page(base),
- offset, len, DMA_TO_DEVICE);
- if (ib_dma_mapping_error(dev, dma_addr))
- goto out_maperr;
-
- ctxt->sge[sge_no].addr = dma_addr;
- ctxt->sge[sge_no].length = len;
- ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
- svc_rdma_count_mappings(rdma, ctxt);
- return 0;
-
-out_maperr:
- pr_err("svcrdma: failed to map buffer\n");
- return -EIO;
-}
-
static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
- struct svc_rdma_op_ctxt *ctxt,
- unsigned int sge_no,
+ struct svc_rdma_send_ctxt *ctxt,
struct page *page,
- unsigned int offset,
+ unsigned long offset,
unsigned int len)
{
struct ib_device *dev = rdma->sc_cm_id->device;
@@ -340,58 +524,71 @@ static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
if (ib_dma_mapping_error(dev, dma_addr))
goto out_maperr;
- ctxt->sge[sge_no].addr = dma_addr;
- ctxt->sge[sge_no].length = len;
- ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
- svc_rdma_count_mappings(rdma, ctxt);
+ ctxt->sc_sges[ctxt->sc_cur_sge_no].addr = dma_addr;
+ ctxt->sc_sges[ctxt->sc_cur_sge_no].length = len;
+ ctxt->sc_send_wr.num_sge++;
return 0;
out_maperr:
- pr_err("svcrdma: failed to map page\n");
+ trace_svcrdma_dma_map_page(rdma, page);
return -EIO;
}
+/* ib_dma_map_page() is used here because svc_rdma_dma_unmap()
+ * handles DMA-unmap and it uses ib_dma_unmap_page() exclusively.
+ */
+static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt,
+ unsigned char *base,
+ unsigned int len)
+{
+ return svc_rdma_dma_map_page(rdma, ctxt, virt_to_page(base),
+ offset_in_page(base), len);
+}
+
/**
- * svc_rdma_map_reply_hdr - DMA map the transport header buffer
+ * svc_rdma_sync_reply_hdr - DMA sync the transport header buffer
* @rdma: controlling transport
- * @ctxt: op_ctxt for the Send WR
- * @rdma_resp: buffer containing transport header
+ * @ctxt: send_ctxt for the Send WR
* @len: length of transport header
*
- * Returns:
- * %0 if the header is DMA mapped,
- * %-EIO if DMA mapping failed.
*/
-int svc_rdma_map_reply_hdr(struct svcxprt_rdma *rdma,
- struct svc_rdma_op_ctxt *ctxt,
- __be32 *rdma_resp,
- unsigned int len)
+void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt,
+ unsigned int len)
{
- ctxt->direction = DMA_TO_DEVICE;
- ctxt->pages[0] = virt_to_page(rdma_resp);
- ctxt->count = 1;
- return svc_rdma_dma_map_page(rdma, ctxt, 0, ctxt->pages[0], 0, len);
+ ctxt->sc_sges[0].length = len;
+ ctxt->sc_send_wr.num_sge++;
+ ib_dma_sync_single_for_device(rdma->sc_pd->device,
+ ctxt->sc_sges[0].addr, len,
+ DMA_TO_DEVICE);
}
-/* Load the xdr_buf into the ctxt's sge array, and DMA map each
+/* svc_rdma_map_reply_msg - Map the buffer holding RPC message
+ * @rdma: controlling transport
+ * @ctxt: send_ctxt for the Send WR
+ * @xdr: prepared xdr_buf containing RPC message
+ * @wr_lst: pointer to Call header's Write list, or NULL
+ *
+ * Load the xdr_buf into the ctxt's sge array, and DMA map each
* element as it is added.
*
- * Returns the number of sge elements loaded on success, or
- * a negative errno on failure.
+ * Returns zero on success, or a negative errno on failure.
*/
-static int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
- struct svc_rdma_op_ctxt *ctxt,
- struct xdr_buf *xdr, __be32 *wr_lst)
+int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt,
+ struct xdr_buf *xdr, __be32 *wr_lst)
{
- unsigned int len, sge_no, remaining, page_off;
+ unsigned int len, remaining;
+ unsigned long page_off;
struct page **ppages;
unsigned char *base;
u32 xdr_pad;
int ret;
- sge_no = 1;
-
- ret = svc_rdma_dma_map_buf(rdma, ctxt, sge_no++,
+ if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
+ return -EIO;
+ ret = svc_rdma_dma_map_buf(rdma, ctxt,
xdr->head[0].iov_base,
xdr->head[0].iov_len);
if (ret < 0)
@@ -421,8 +618,10 @@ static int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
while (remaining) {
len = min_t(u32, PAGE_SIZE - page_off, remaining);
- ret = svc_rdma_dma_map_page(rdma, ctxt, sge_no++,
- *ppages++, page_off, len);
+ if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
+ return -EIO;
+ ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++,
+ page_off, len);
if (ret < 0)
return ret;
@@ -434,12 +633,14 @@ static int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
len = xdr->tail[0].iov_len;
tail:
if (len) {
- ret = svc_rdma_dma_map_buf(rdma, ctxt, sge_no++, base, len);
+ if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
+ return -EIO;
+ ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len);
if (ret < 0)
return ret;
}
- return sge_no - 1;
+ return 0;
}
/* The svc_rqst and all resources it owns are released as soon as
@@ -447,62 +648,25 @@ tail:
* so they are released by the Send completion handler.
*/
static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *ctxt)
+ struct svc_rdma_send_ctxt *ctxt)
{
int i, pages = rqstp->rq_next_page - rqstp->rq_respages;
- ctxt->count += pages;
+ ctxt->sc_page_count += pages;
for (i = 0; i < pages; i++) {
- ctxt->pages[i + 1] = rqstp->rq_respages[i];
+ ctxt->sc_pages[i] = rqstp->rq_respages[i];
rqstp->rq_respages[i] = NULL;
}
rqstp->rq_next_page = rqstp->rq_respages + 1;
}
-/**
- * svc_rdma_post_send_wr - Set up and post one Send Work Request
- * @rdma: controlling transport
- * @ctxt: op_ctxt for transmitting the Send WR
- * @num_sge: number of SGEs to send
- * @inv_rkey: R_key argument to Send With Invalidate, or zero
- *
- * Returns:
- * %0 if the Send* was posted successfully,
- * %-ENOTCONN if the connection was lost or dropped,
- * %-EINVAL if there was a problem with the Send we built,
- * %-ENOMEM if ib_post_send failed.
- */
-int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma,
- struct svc_rdma_op_ctxt *ctxt, int num_sge,
- u32 inv_rkey)
-{
- struct ib_send_wr *send_wr = &ctxt->send_wr;
-
- dprintk("svcrdma: posting Send WR with %u sge(s)\n", num_sge);
-
- send_wr->next = NULL;
- ctxt->cqe.done = svc_rdma_wc_send;
- send_wr->wr_cqe = &ctxt->cqe;
- send_wr->sg_list = ctxt->sge;
- send_wr->num_sge = num_sge;
- send_wr->send_flags = IB_SEND_SIGNALED;
- if (inv_rkey) {
- send_wr->opcode = IB_WR_SEND_WITH_INV;
- send_wr->ex.invalidate_rkey = inv_rkey;
- } else {
- send_wr->opcode = IB_WR_SEND;
- }
-
- return svc_rdma_send(rdma, send_wr);
-}
-
/* Prepare the portion of the RPC Reply that will be transmitted
* via RDMA Send. The RPC-over-RDMA transport header is prepared
- * in sge[0], and the RPC xdr_buf is prepared in following sges.
+ * in sc_sges[0], and the RPC xdr_buf is prepared in following sges.
*
* Depending on whether a Write list or Reply chunk is present,
* the server may send all, a portion of, or none of the xdr_buf.
- * In the latter case, only the transport header (sge[0]) is
+ * In the latter case, only the transport header (sc_sges[0]) is
* transmitted.
*
* RDMA Send is the last step of transmitting an RPC reply. Pages
@@ -515,49 +679,32 @@ int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma,
* - The Reply's transport header will never be larger than a page.
*/
static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
- __be32 *rdma_argp, __be32 *rdma_resp,
+ struct svc_rdma_send_ctxt *ctxt,
+ __be32 *rdma_argp,
struct svc_rqst *rqstp,
__be32 *wr_lst, __be32 *rp_ch)
{
- struct svc_rdma_op_ctxt *ctxt;
- u32 inv_rkey;
int ret;
- dprintk("svcrdma: sending %s reply: head=%zu, pagelen=%u, tail=%zu\n",
- (rp_ch ? "RDMA_NOMSG" : "RDMA_MSG"),
- rqstp->rq_res.head[0].iov_len,
- rqstp->rq_res.page_len,
- rqstp->rq_res.tail[0].iov_len);
-
- ctxt = svc_rdma_get_context(rdma);
-
- ret = svc_rdma_map_reply_hdr(rdma, ctxt, rdma_resp,
- svc_rdma_reply_hdr_len(rdma_resp));
- if (ret < 0)
- goto err;
-
if (!rp_ch) {
ret = svc_rdma_map_reply_msg(rdma, ctxt,
&rqstp->rq_res, wr_lst);
if (ret < 0)
- goto err;
+ return ret;
}
svc_rdma_save_io_pages(rqstp, ctxt);
- inv_rkey = 0;
- if (rdma->sc_snd_w_inv)
- inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch);
- ret = svc_rdma_post_send_wr(rdma, ctxt, 1 + ret, inv_rkey);
- if (ret)
- goto err;
-
- return 0;
-
-err:
- svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_context(ctxt, 1);
- return ret;
+ ctxt->sc_send_wr.opcode = IB_WR_SEND;
+ if (rdma->sc_snd_w_inv) {
+ ctxt->sc_send_wr.ex.invalidate_rkey =
+ svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch);
+ if (ctxt->sc_send_wr.ex.invalidate_rkey)
+ ctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
+ }
+ dprintk("svcrdma: posting Send WR with %u sge(s)\n",
+ ctxt->sc_send_wr.num_sge);
+ return svc_rdma_send(rdma, &ctxt->sc_send_wr);
}
/* Given the client-provided Write and Reply chunks, the server was not
@@ -568,38 +715,29 @@ err:
* Remote Invalidation is skipped for simplicity.
*/
static int svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
- __be32 *rdma_resp, struct svc_rqst *rqstp)
+ struct svc_rdma_send_ctxt *ctxt,
+ struct svc_rqst *rqstp)
{
- struct svc_rdma_op_ctxt *ctxt;
__be32 *p;
int ret;
- ctxt = svc_rdma_get_context(rdma);
-
- /* Replace the original transport header with an
- * RDMA_ERROR response. XID etc are preserved.
- */
- p = rdma_resp + 3;
+ p = ctxt->sc_xprt_buf;
+ trace_svcrdma_err_chunk(*p);
+ p += 3;
*p++ = rdma_error;
*p = err_chunk;
-
- ret = svc_rdma_map_reply_hdr(rdma, ctxt, rdma_resp, 20);
- if (ret < 0)
- goto err;
+ svc_rdma_sync_reply_hdr(rdma, ctxt, RPCRDMA_HDRLEN_ERR);
svc_rdma_save_io_pages(rqstp, ctxt);
- ret = svc_rdma_post_send_wr(rdma, ctxt, 1 + ret, 0);
- if (ret)
- goto err;
+ ctxt->sc_send_wr.opcode = IB_WR_SEND;
+ ret = svc_rdma_send(rdma, &ctxt->sc_send_wr);
+ if (ret) {
+ svc_rdma_send_ctxt_put(rdma, ctxt);
+ return ret;
+ }
return 0;
-
-err:
- pr_err("svcrdma: failed to post Send WR (%d)\n", ret);
- svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_context(ctxt, 1);
- return ret;
}
void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
@@ -623,20 +761,15 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
struct svc_xprt *xprt = rqstp->rq_xprt;
struct svcxprt_rdma *rdma =
container_of(xprt, struct svcxprt_rdma, sc_xprt);
+ struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt;
__be32 *p, *rdma_argp, *rdma_resp, *wr_lst, *rp_ch;
struct xdr_buf *xdr = &rqstp->rq_res;
- struct page *res_page;
+ struct svc_rdma_send_ctxt *sctxt;
int ret;
- /* Find the call's chunk lists to decide how to send the reply.
- * Receive places the Call's xprt header at the start of page 0.
- */
- rdma_argp = page_address(rqstp->rq_pages[0]);
+ rdma_argp = rctxt->rc_recv_buf;
svc_rdma_get_write_arrays(rdma_argp, &wr_lst, &rp_ch);
- dprintk("svcrdma: preparing response for XID 0x%08x\n",
- be32_to_cpup(rdma_argp));
-
/* Create the RDMA response header. xprt->xpt_mutex,
* acquired in svc_send(), serializes RPC replies. The
* code path below that inserts the credit grant value
@@ -644,10 +777,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
* critical section.
*/
ret = -ENOMEM;
- res_page = alloc_page(GFP_KERNEL);
- if (!res_page)
+ sctxt = svc_rdma_send_ctxt_get(rdma);
+ if (!sctxt)
goto err0;
- rdma_resp = page_address(res_page);
+ rdma_resp = sctxt->sc_xprt_buf;
p = rdma_resp;
*p++ = *rdma_argp;
@@ -674,26 +807,33 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
svc_rdma_xdr_encode_reply_chunk(rdma_resp, rp_ch, ret);
}
- ret = svc_rdma_send_reply_msg(rdma, rdma_argp, rdma_resp, rqstp,
+ svc_rdma_sync_reply_hdr(rdma, sctxt, svc_rdma_reply_hdr_len(rdma_resp));
+ ret = svc_rdma_send_reply_msg(rdma, sctxt, rdma_argp, rqstp,
wr_lst, rp_ch);
if (ret < 0)
- goto err0;
- return 0;
+ goto err1;
+ ret = 0;
+
+out:
+ rqstp->rq_xprt_ctxt = NULL;
+ svc_rdma_recv_ctxt_put(rdma, rctxt);
+ return ret;
err2:
if (ret != -E2BIG && ret != -EINVAL)
goto err1;
- ret = svc_rdma_send_error_msg(rdma, rdma_resp, rqstp);
+ ret = svc_rdma_send_error_msg(rdma, sctxt, rqstp);
if (ret < 0)
- goto err0;
- return 0;
+ goto err1;
+ ret = 0;
+ goto out;
err1:
- put_page(res_page);
+ svc_rdma_send_ctxt_put(rdma, sctxt);
err0:
- pr_err("svcrdma: Could not send reply, err=%d. Closing transport.\n",
- ret);
+ trace_svcrdma_send_failed(rqstp, ret);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
- return -ENOTCONN;
+ ret = -ENOTCONN;
+ goto out;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 96cc8f6597d3..e9535a66bab0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1,4 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
+ * Copyright (c) 2015-2018 Oracle. All rights reserved.
* Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved.
*
@@ -40,26 +42,30 @@
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
-#include <linux/sunrpc/svc_xprt.h>
-#include <linux/sunrpc/addr.h>
-#include <linux/sunrpc/debug.h>
-#include <linux/sunrpc/rpc_rdma.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
+#include <linux/export.h>
+
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include <rdma/rw.h>
+
+#include <linux/sunrpc/addr.h>
+#include <linux/sunrpc/debug.h>
+#include <linux/sunrpc/rpc_rdma.h>
+#include <linux/sunrpc/svc_xprt.h>
#include <linux/sunrpc/svc_rdma.h>
-#include <linux/export.h>
+
#include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
-static int svc_rdma_post_recv(struct svcxprt_rdma *xprt);
-static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *, int);
+static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
+ struct net *net);
static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
struct net *net,
struct sockaddr *sa, int salen,
@@ -123,7 +129,7 @@ static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv,
struct svcxprt_rdma *cma_xprt;
struct svc_xprt *xprt;
- cma_xprt = rdma_create_xprt(serv, 0);
+ cma_xprt = svc_rdma_create_xprt(serv, net);
if (!cma_xprt)
return ERR_PTR(-ENOMEM);
xprt = &cma_xprt->sc_xprt;
@@ -152,133 +158,20 @@ static void svc_rdma_bc_free(struct svc_xprt *xprt)
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
-static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt,
- gfp_t flags)
-{
- struct svc_rdma_op_ctxt *ctxt;
-
- ctxt = kmalloc(sizeof(*ctxt), flags);
- if (ctxt) {
- ctxt->xprt = xprt;
- INIT_LIST_HEAD(&ctxt->list);
- }
- return ctxt;
-}
-
-static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
-{
- unsigned int i;
-
- /* Each RPC/RDMA credit can consume one Receive and
- * one Send WQE at the same time.
- */
- i = xprt->sc_sq_depth + xprt->sc_rq_depth;
-
- while (i--) {
- struct svc_rdma_op_ctxt *ctxt;
-
- ctxt = alloc_ctxt(xprt, GFP_KERNEL);
- if (!ctxt) {
- dprintk("svcrdma: No memory for RDMA ctxt\n");
- return false;
- }
- list_add(&ctxt->list, &xprt->sc_ctxts);
- }
- return true;
-}
-
-struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
-{
- struct svc_rdma_op_ctxt *ctxt = NULL;
-
- spin_lock(&xprt->sc_ctxt_lock);
- xprt->sc_ctxt_used++;
- if (list_empty(&xprt->sc_ctxts))
- goto out_empty;
-
- ctxt = list_first_entry(&xprt->sc_ctxts,
- struct svc_rdma_op_ctxt, list);
- list_del(&ctxt->list);
- spin_unlock(&xprt->sc_ctxt_lock);
-
-out:
- ctxt->count = 0;
- ctxt->mapped_sges = 0;
- return ctxt;
-
-out_empty:
- /* Either pre-allocation missed the mark, or send
- * queue accounting is broken.
- */
- spin_unlock(&xprt->sc_ctxt_lock);
-
- ctxt = alloc_ctxt(xprt, GFP_NOIO);
- if (ctxt)
- goto out;
-
- spin_lock(&xprt->sc_ctxt_lock);
- xprt->sc_ctxt_used--;
- spin_unlock(&xprt->sc_ctxt_lock);
- WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n");
- return NULL;
-}
-
-void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
-{
- struct svcxprt_rdma *xprt = ctxt->xprt;
- struct ib_device *device = xprt->sc_cm_id->device;
- unsigned int i;
-
- for (i = 0; i < ctxt->mapped_sges; i++)
- ib_dma_unmap_page(device,
- ctxt->sge[i].addr,
- ctxt->sge[i].length,
- ctxt->direction);
- ctxt->mapped_sges = 0;
-}
-
-void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
-{
- struct svcxprt_rdma *xprt = ctxt->xprt;
- int i;
-
- if (free_pages)
- for (i = 0; i < ctxt->count; i++)
- put_page(ctxt->pages[i]);
-
- spin_lock(&xprt->sc_ctxt_lock);
- xprt->sc_ctxt_used--;
- list_add(&ctxt->list, &xprt->sc_ctxts);
- spin_unlock(&xprt->sc_ctxt_lock);
-}
-
-static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
-{
- while (!list_empty(&xprt->sc_ctxts)) {
- struct svc_rdma_op_ctxt *ctxt;
-
- ctxt = list_first_entry(&xprt->sc_ctxts,
- struct svc_rdma_op_ctxt, list);
- list_del(&ctxt->list);
- kfree(ctxt);
- }
-}
-
/* QP event handler */
static void qp_event_handler(struct ib_event *event, void *context)
{
struct svc_xprt *xprt = context;
+ trace_svcrdma_qp_error(event, (struct sockaddr *)&xprt->xpt_remote);
switch (event->event) {
/* These are considered benign events */
case IB_EVENT_PATH_MIG:
case IB_EVENT_COMM_EST:
case IB_EVENT_SQ_DRAINED:
case IB_EVENT_QP_LAST_WQE_REACHED:
- dprintk("svcrdma: QP event %s (%d) received for QP=%p\n",
- ib_event_msg(event->event), event->event,
- event->element.qp);
break;
+
/* These are considered fatal events */
case IB_EVENT_PATH_MIG_ERR:
case IB_EVENT_QP_FATAL:
@@ -286,111 +179,34 @@ static void qp_event_handler(struct ib_event *event, void *context)
case IB_EVENT_QP_ACCESS_ERR:
case IB_EVENT_DEVICE_FATAL:
default:
- dprintk("svcrdma: QP ERROR event %s (%d) received for QP=%p, "
- "closing transport\n",
- ib_event_msg(event->event), event->event,
- event->element.qp);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
break;
}
}
-/**
- * svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
- * @cq: completion queue
- * @wc: completed WR
- *
- */
-static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
-{
- struct svcxprt_rdma *xprt = cq->cq_context;
- struct ib_cqe *cqe = wc->wr_cqe;
- struct svc_rdma_op_ctxt *ctxt;
-
- /* WARNING: Only wc->wr_cqe and wc->status are reliable */
- ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
- svc_rdma_unmap_dma(ctxt);
-
- if (wc->status != IB_WC_SUCCESS)
- goto flushed;
-
- /* All wc fields are now known to be valid */
- ctxt->byte_len = wc->byte_len;
- spin_lock(&xprt->sc_rq_dto_lock);
- list_add_tail(&ctxt->list, &xprt->sc_rq_dto_q);
- spin_unlock(&xprt->sc_rq_dto_lock);
-
- svc_rdma_post_recv(xprt);
-
- set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
- if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
- goto out;
- goto out_enqueue;
-
-flushed:
- if (wc->status != IB_WC_WR_FLUSH_ERR)
- pr_err("svcrdma: Recv: %s (%u/0x%x)\n",
- ib_wc_status_msg(wc->status),
- wc->status, wc->vendor_err);
- set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
- svc_rdma_put_context(ctxt, 1);
-
-out_enqueue:
- svc_xprt_enqueue(&xprt->sc_xprt);
-out:
- svc_xprt_put(&xprt->sc_xprt);
-}
-
-/**
- * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC
- * @cq: completion queue
- * @wc: completed WR
- *
- */
-void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
-{
- struct svcxprt_rdma *xprt = cq->cq_context;
- struct ib_cqe *cqe = wc->wr_cqe;
- struct svc_rdma_op_ctxt *ctxt;
-
- atomic_inc(&xprt->sc_sq_avail);
- wake_up(&xprt->sc_send_wait);
-
- ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
- svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_context(ctxt, 1);
-
- if (unlikely(wc->status != IB_WC_SUCCESS)) {
- set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
- svc_xprt_enqueue(&xprt->sc_xprt);
- if (wc->status != IB_WC_WR_FLUSH_ERR)
- pr_err("svcrdma: Send: %s (%u/0x%x)\n",
- ib_wc_status_msg(wc->status),
- wc->status, wc->vendor_err);
- }
-
- svc_xprt_put(&xprt->sc_xprt);
-}
-
-static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
- int listener)
+static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
+ struct net *net)
{
struct svcxprt_rdma *cma_xprt = kzalloc(sizeof *cma_xprt, GFP_KERNEL);
- if (!cma_xprt)
+ if (!cma_xprt) {
+ dprintk("svcrdma: failed to create new transport\n");
return NULL;
- svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
+ }
+ svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
- INIT_LIST_HEAD(&cma_xprt->sc_ctxts);
+ INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts);
+ INIT_LIST_HEAD(&cma_xprt->sc_recv_ctxts);
INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
init_waitqueue_head(&cma_xprt->sc_send_wait);
spin_lock_init(&cma_xprt->sc_lock);
spin_lock_init(&cma_xprt->sc_rq_dto_lock);
- spin_lock_init(&cma_xprt->sc_ctxt_lock);
+ spin_lock_init(&cma_xprt->sc_send_lock);
+ spin_lock_init(&cma_xprt->sc_recv_lock);
spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
/*
@@ -401,70 +217,9 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
*/
set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags);
- if (listener) {
- strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener");
- set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
- }
-
return cma_xprt;
}
-static int
-svc_rdma_post_recv(struct svcxprt_rdma *xprt)
-{
- struct ib_recv_wr recv_wr, *bad_recv_wr;
- struct svc_rdma_op_ctxt *ctxt;
- struct page *page;
- dma_addr_t pa;
- int sge_no;
- int buflen;
- int ret;
-
- ctxt = svc_rdma_get_context(xprt);
- buflen = 0;
- ctxt->direction = DMA_FROM_DEVICE;
- ctxt->cqe.done = svc_rdma_wc_receive;
- for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) {
- if (sge_no >= xprt->sc_max_sge) {
- pr_err("svcrdma: Too many sges (%d)\n", sge_no);
- goto err_put_ctxt;
- }
- page = alloc_page(GFP_KERNEL);
- if (!page)
- goto err_put_ctxt;
- ctxt->pages[sge_no] = page;
- pa = ib_dma_map_page(xprt->sc_cm_id->device,
- page, 0, PAGE_SIZE,
- DMA_FROM_DEVICE);
- if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
- goto err_put_ctxt;
- svc_rdma_count_mappings(xprt, ctxt);
- ctxt->sge[sge_no].addr = pa;
- ctxt->sge[sge_no].length = PAGE_SIZE;
- ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
- ctxt->count = sge_no + 1;
- buflen += PAGE_SIZE;
- }
- recv_wr.next = NULL;
- recv_wr.sg_list = &ctxt->sge[0];
- recv_wr.num_sge = ctxt->count;
- recv_wr.wr_cqe = &ctxt->cqe;
-
- svc_xprt_get(&xprt->sc_xprt);
- ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
- if (ret) {
- svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_context(ctxt, 1);
- svc_xprt_put(&xprt->sc_xprt);
- }
- return ret;
-
- err_put_ctxt:
- svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_context(ctxt, 1);
- return -ENOMEM;
-}
-
static void
svc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt,
struct rdma_conn_param *param)
@@ -504,15 +259,12 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
struct sockaddr *sa;
/* Create a new transport */
- newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0);
- if (!newxprt) {
- dprintk("svcrdma: failed to create new transport\n");
+ newxprt = svc_rdma_create_xprt(listen_xprt->sc_xprt.xpt_server,
+ listen_xprt->sc_xprt.xpt_net);
+ if (!newxprt)
return;
- }
newxprt->sc_cm_id = new_cma_id;
new_cma_id->context = newxprt;
- dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n",
- newxprt, newxprt->sc_cm_id, listen_xprt);
svc_rdma_parse_connect_private(newxprt, param);
/* Save client advertised inbound read limit for use later in accept. */
@@ -543,9 +295,11 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
static int rdma_listen_handler(struct rdma_cm_id *cma_id,
struct rdma_cm_event *event)
{
- struct svcxprt_rdma *xprt = cma_id->context;
+ struct sockaddr *sap = (struct sockaddr *)&cma_id->route.addr.src_addr;
int ret = 0;
+ trace_svcrdma_cm_event(event, sap);
+
switch (event->event) {
case RDMA_CM_EVENT_CONNECT_REQUEST:
dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
@@ -553,23 +307,8 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
rdma_event_msg(event->event), event->event);
handle_connect_req(cma_id, &event->param.conn);
break;
-
- case RDMA_CM_EVENT_ESTABLISHED:
- /* Accept complete */
- dprintk("svcrdma: Connection completed on LISTEN xprt=%p, "
- "cm_id=%p\n", xprt, cma_id);
- break;
-
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
- dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n",
- xprt, cma_id);
- if (xprt) {
- set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
- svc_xprt_enqueue(&xprt->sc_xprt);
- }
- break;
-
default:
+ /* NB: No device removal upcall for INADDR_ANY listeners */
dprintk("svcrdma: Unexpected event on listening endpoint %p, "
"event = %s (%d)\n", cma_id,
rdma_event_msg(event->event), event->event);
@@ -582,9 +321,12 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
static int rdma_cma_handler(struct rdma_cm_id *cma_id,
struct rdma_cm_event *event)
{
- struct svc_xprt *xprt = cma_id->context;
- struct svcxprt_rdma *rdma =
- container_of(xprt, struct svcxprt_rdma, sc_xprt);
+ struct sockaddr *sap = (struct sockaddr *)&cma_id->route.addr.dst_addr;
+ struct svcxprt_rdma *rdma = cma_id->context;
+ struct svc_xprt *xprt = &rdma->sc_xprt;
+
+ trace_svcrdma_cm_event(event, sap);
+
switch (event->event) {
case RDMA_CM_EVENT_ESTABLISHED:
/* Accept complete */
@@ -597,21 +339,17 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
case RDMA_CM_EVENT_DISCONNECTED:
dprintk("svcrdma: Disconnect on DTO xprt=%p, cm_id=%p\n",
xprt, cma_id);
- if (xprt) {
- set_bit(XPT_CLOSE, &xprt->xpt_flags);
- svc_xprt_enqueue(xprt);
- svc_xprt_put(xprt);
- }
+ set_bit(XPT_CLOSE, &xprt->xpt_flags);
+ svc_xprt_enqueue(xprt);
+ svc_xprt_put(xprt);
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
dprintk("svcrdma: Device removal cma_id=%p, xprt = %p, "
"event = %s (%d)\n", cma_id, xprt,
rdma_event_msg(event->event), event->event);
- if (xprt) {
- set_bit(XPT_CLOSE, &xprt->xpt_flags);
- svc_xprt_enqueue(xprt);
- svc_xprt_put(xprt);
- }
+ set_bit(XPT_CLOSE, &xprt->xpt_flags);
+ svc_xprt_enqueue(xprt);
+ svc_xprt_put(xprt);
break;
default:
dprintk("svcrdma: Unexpected event on DTO endpoint %p, "
@@ -634,16 +372,18 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
struct svcxprt_rdma *cma_xprt;
int ret;
- dprintk("svcrdma: Creating RDMA socket\n");
+ dprintk("svcrdma: Creating RDMA listener\n");
if ((sa->sa_family != AF_INET) && (sa->sa_family != AF_INET6)) {
dprintk("svcrdma: Address family %d is not supported.\n", sa->sa_family);
return ERR_PTR(-EAFNOSUPPORT);
}
- cma_xprt = rdma_create_xprt(serv, 1);
+ cma_xprt = svc_rdma_create_xprt(serv, net);
if (!cma_xprt)
return ERR_PTR(-ENOMEM);
+ set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
+ strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener");
- listen_id = rdma_create_id(&init_net, rdma_listen_handler, cma_xprt,
+ listen_id = rdma_create_id(net, rdma_listen_handler, cma_xprt,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(listen_id)) {
ret = PTR_ERR(listen_id);
@@ -708,9 +448,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct rdma_conn_param conn_param;
struct rpcrdma_connect_private pmsg;
struct ib_qp_init_attr qp_attr;
+ unsigned int ctxts, rq_depth;
struct ib_device *dev;
struct sockaddr *sap;
- unsigned int i, ctxts;
int ret = 0;
listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
@@ -736,24 +476,28 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
/* Qualify the transport resource defaults with the
* capabilities of this particular device */
- newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge,
- (size_t)RPCSVC_MAXPAGES);
+ newxprt->sc_max_send_sges = dev->attrs.max_sge;
+ /* transport hdr, head iovec, one page list entry, tail iovec */
+ if (newxprt->sc_max_send_sges < 4) {
+ pr_err("svcrdma: too few Send SGEs available (%d)\n",
+ newxprt->sc_max_send_sges);
+ goto errout;
+ }
newxprt->sc_max_req_size = svcrdma_max_req_size;
newxprt->sc_max_requests = svcrdma_max_requests;
newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
- newxprt->sc_rq_depth = newxprt->sc_max_requests +
- newxprt->sc_max_bc_requests;
- if (newxprt->sc_rq_depth > dev->attrs.max_qp_wr) {
+ rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests;
+ if (rq_depth > dev->attrs.max_qp_wr) {
pr_warn("svcrdma: reducing receive depth to %d\n",
dev->attrs.max_qp_wr);
- newxprt->sc_rq_depth = dev->attrs.max_qp_wr;
- newxprt->sc_max_requests = newxprt->sc_rq_depth - 2;
+ rq_depth = dev->attrs.max_qp_wr;
+ newxprt->sc_max_requests = rq_depth - 2;
newxprt->sc_max_bc_requests = 2;
}
newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES);
ctxts *= newxprt->sc_max_requests;
- newxprt->sc_sq_depth = newxprt->sc_rq_depth + ctxts;
+ newxprt->sc_sq_depth = rq_depth + ctxts;
if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) {
pr_warn("svcrdma: reducing send depth to %d\n",
dev->attrs.max_qp_wr);
@@ -761,9 +505,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
}
atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
- if (!svc_rdma_prealloc_ctxts(newxprt))
- goto errout;
-
newxprt->sc_pd = ib_alloc_pd(dev, 0);
if (IS_ERR(newxprt->sc_pd)) {
dprintk("svcrdma: error creating PD for connect request\n");
@@ -775,7 +516,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk("svcrdma: error creating SQ CQ for connect request\n");
goto errout;
}
- newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_rq_depth,
+ newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, rq_depth,
0, IB_POLL_WORKQUEUE);
if (IS_ERR(newxprt->sc_rq_cq)) {
dprintk("svcrdma: error creating RQ CQ for connect request\n");
@@ -788,9 +529,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
qp_attr.port_num = newxprt->sc_port_num;
qp_attr.cap.max_rdma_ctxs = ctxts;
qp_attr.cap.max_send_wr = newxprt->sc_sq_depth - ctxts;
- qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth;
- qp_attr.cap.max_send_sge = newxprt->sc_max_sge;
- qp_attr.cap.max_recv_sge = newxprt->sc_max_sge;
+ qp_attr.cap.max_recv_wr = rq_depth;
+ qp_attr.cap.max_send_sge = newxprt->sc_max_send_sges;
+ qp_attr.cap.max_recv_sge = 1;
qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
qp_attr.qp_type = IB_QPT_RC;
qp_attr.send_cq = newxprt->sc_sq_cq;
@@ -815,14 +556,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
!rdma_ib_or_roce(dev, newxprt->sc_port_num))
goto errout;
- /* Post receive buffers */
- for (i = 0; i < newxprt->sc_max_requests; i++) {
- ret = svc_rdma_post_recv(newxprt);
- if (ret) {
- dprintk("svcrdma: failure posting receive buffers\n");
- goto errout;
- }
- }
+ if (!svc_rdma_post_recvs(newxprt))
+ goto errout;
/* Swap out the handler */
newxprt->sc_cm_id->event_handler = rdma_cma_handler;
@@ -856,16 +591,18 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap));
sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap));
- dprintk(" max_sge : %d\n", newxprt->sc_max_sge);
+ dprintk(" max_sge : %d\n", newxprt->sc_max_send_sges);
dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth);
dprintk(" rdma_rw_ctxs : %d\n", ctxts);
dprintk(" max_requests : %d\n", newxprt->sc_max_requests);
dprintk(" ord : %d\n", conn_param.initiator_depth);
+ trace_svcrdma_xprt_accept(&newxprt->sc_xprt);
return &newxprt->sc_xprt;
errout:
dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret);
+ trace_svcrdma_xprt_fail(&newxprt->sc_xprt);
/* Take a reference in case the DTO handler runs */
svc_xprt_get(&newxprt->sc_xprt);
if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp))
@@ -896,7 +633,6 @@ static void svc_rdma_detach(struct svc_xprt *xprt)
{
struct svcxprt_rdma *rdma =
container_of(xprt, struct svcxprt_rdma, sc_xprt);
- dprintk("svc: svc_rdma_detach(%p)\n", xprt);
/* Disconnect and flush posted WQE */
rdma_disconnect(rdma->sc_cm_id);
@@ -908,7 +644,7 @@ static void __svc_rdma_free(struct work_struct *work)
container_of(work, struct svcxprt_rdma, sc_work);
struct svc_xprt *xprt = &rdma->sc_xprt;
- dprintk("svcrdma: %s(%p)\n", __func__, rdma);
+ trace_svcrdma_xprt_free(xprt);
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
ib_drain_qp(rdma->sc_qp);
@@ -918,25 +654,7 @@ static void __svc_rdma_free(struct work_struct *work)
pr_err("svcrdma: sc_xprt still in use? (%d)\n",
kref_read(&xprt->xpt_ref));
- while (!list_empty(&rdma->sc_read_complete_q)) {
- struct svc_rdma_op_ctxt *ctxt;
- ctxt = list_first_entry(&rdma->sc_read_complete_q,
- struct svc_rdma_op_ctxt, list);
- list_del(&ctxt->list);
- svc_rdma_put_context(ctxt, 1);
- }
- while (!list_empty(&rdma->sc_rq_dto_q)) {
- struct svc_rdma_op_ctxt *ctxt;
- ctxt = list_first_entry(&rdma->sc_rq_dto_q,
- struct svc_rdma_op_ctxt, list);
- list_del(&ctxt->list);
- svc_rdma_put_context(ctxt, 1);
- }
-
- /* Warn if we leaked a resource or under-referenced */
- if (rdma->sc_ctxt_used != 0)
- pr_err("svcrdma: ctxt still in use? (%d)\n",
- rdma->sc_ctxt_used);
+ svc_rdma_flush_recv_queues(rdma);
/* Final put of backchannel client transport */
if (xprt->xpt_bc_xprt) {
@@ -945,7 +663,8 @@ static void __svc_rdma_free(struct work_struct *work)
}
svc_rdma_destroy_rw_ctxts(rdma);
- svc_rdma_destroy_ctxts(rdma);
+ svc_rdma_send_ctxts_destroy(rdma);
+ svc_rdma_recv_ctxts_destroy(rdma);
/* Destroy the QP if present (not a listener) */
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
@@ -998,51 +717,3 @@ static void svc_rdma_secure_port(struct svc_rqst *rqstp)
static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt)
{
}
-
-int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
-{
- struct ib_send_wr *bad_wr, *n_wr;
- int wr_count;
- int i;
- int ret;
-
- if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
- return -ENOTCONN;
-
- wr_count = 1;
- for (n_wr = wr->next; n_wr; n_wr = n_wr->next)
- wr_count++;
-
- /* If the SQ is full, wait until an SQ entry is available */
- while (1) {
- if ((atomic_sub_return(wr_count, &xprt->sc_sq_avail) < 0)) {
- atomic_inc(&rdma_stat_sq_starve);
-
- /* Wait until SQ WR available if SQ still full */
- atomic_add(wr_count, &xprt->sc_sq_avail);
- wait_event(xprt->sc_send_wait,
- atomic_read(&xprt->sc_sq_avail) > wr_count);
- if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
- return -ENOTCONN;
- continue;
- }
- /* Take a transport ref for each WR posted */
- for (i = 0; i < wr_count; i++)
- svc_xprt_get(&xprt->sc_xprt);
-
- /* Bump used SQ WR count and post */
- ret = ib_post_send(xprt->sc_qp, wr, &bad_wr);
- if (ret) {
- set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
- for (i = 0; i < wr_count; i ++)
- svc_xprt_put(&xprt->sc_xprt);
- dprintk("svcrdma: failed to post SQ WR rc=%d\n", ret);
- dprintk(" sc_sq_avail=%d, sc_sq_depth=%d\n",
- atomic_read(&xprt->sc_sq_avail),
- xprt->sc_sq_depth);
- wake_up(&xprt->sc_send_wait);
- }
- break;
- }
- return ret;
-}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index cc1aad325496..143ce2579ba9 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@@ -51,9 +52,13 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
+#include <linux/smp.h>
+
#include <linux/sunrpc/addr.h>
+#include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_TRANS
@@ -330,9 +335,7 @@ xprt_setup_rdma(struct xprt_create *args)
return ERR_PTR(-EBADF);
}
- xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
- xprt_rdma_slot_table_entries,
- xprt_rdma_slot_table_entries);
+ xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0);
if (xprt == NULL) {
dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
__func__);
@@ -364,7 +367,7 @@ xprt_setup_rdma(struct xprt_create *args)
xprt_set_bound(xprt);
xprt_rdma_format_addresses(xprt, sap);
- cdata.max_requests = xprt->max_reqs;
+ cdata.max_requests = xprt_rdma_slot_table_entries;
cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */
cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */
@@ -537,6 +540,47 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
}
}
+/**
+ * xprt_rdma_alloc_slot - allocate an rpc_rqst
+ * @xprt: controlling RPC transport
+ * @task: RPC task requesting a fresh rpc_rqst
+ *
+ * tk_status values:
+ * %0 if task->tk_rqstp points to a fresh rpc_rqst
+ * %-EAGAIN if no rpc_rqst is available; queued on backlog
+ */
+static void
+xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct rpcrdma_req *req;
+
+ req = rpcrdma_buffer_get(&r_xprt->rx_buf);
+ if (!req)
+ goto out_sleep;
+ task->tk_rqstp = &req->rl_slot;
+ task->tk_status = 0;
+ return;
+
+out_sleep:
+ rpc_sleep_on(&xprt->backlog, task, NULL);
+ task->tk_status = -EAGAIN;
+}
+
+/**
+ * xprt_rdma_free_slot - release an rpc_rqst
+ * @xprt: controlling RPC transport
+ * @rqst: rpc_rqst to release
+ *
+ */
+static void
+xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst)
+{
+ memset(rqst, 0, sizeof(*rqst));
+ rpcrdma_buffer_put(rpcr_to_rdmar(rqst));
+ rpc_wake_up_next(&xprt->backlog);
+}
+
static bool
rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
size_t size, gfp_t flags)
@@ -607,13 +651,9 @@ xprt_rdma_allocate(struct rpc_task *task)
{
struct rpc_rqst *rqst = task->tk_rqstp;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
- struct rpcrdma_req *req;
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
gfp_t flags;
- req = rpcrdma_buffer_get(&r_xprt->rx_buf);
- if (req == NULL)
- goto out_get;
-
flags = RPCRDMA_DEF_GFP;
if (RPC_IS_SWAPPER(task))
flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
@@ -623,15 +663,12 @@ xprt_rdma_allocate(struct rpc_task *task)
if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
goto out_fail;
- rpcrdma_set_xprtdata(rqst, req);
rqst->rq_buffer = req->rl_sendbuf->rg_base;
rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
trace_xprtrdma_allocate(task, req);
return 0;
out_fail:
- rpcrdma_buffer_put(req);
-out_get:
trace_xprtrdma_allocate(task, NULL);
return -ENOMEM;
}
@@ -652,7 +689,6 @@ xprt_rdma_free(struct rpc_task *task)
if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
rpcrdma_release_rqst(r_xprt, req);
trace_xprtrdma_rpc_done(task, req);
- rpcrdma_buffer_put(req);
}
/**
@@ -690,9 +726,6 @@ xprt_rdma_send_request(struct rpc_task *task)
if (rc < 0)
goto failed_marshal;
- if (req->rl_reply == NULL) /* e.g. reconnection */
- rpcrdma_recv_buffer_get(req);
-
/* Must suppress retransmit to maintain credits */
if (rqst->rq_connect_cookie == xprt->connect_cookie)
goto drop_connection;
@@ -779,7 +812,8 @@ xprt_rdma_disable_swap(struct rpc_xprt *xprt)
static const struct rpc_xprt_ops xprt_rdma_procs = {
.reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */
- .alloc_slot = xprt_alloc_slot,
+ .alloc_slot = xprt_rdma_alloc_slot,
+ .free_slot = xprt_rdma_free_slot,
.release_request = xprt_release_rqst_cong, /* ditto */
.set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */
.timer = xprt_rdma_timer,
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index c345d365af88..16161a36dc73 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@@ -59,6 +60,7 @@
#include <rdma/ib_cm.h>
#include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
/*
* Globals/Macros
@@ -71,8 +73,10 @@
/*
* internal functions
*/
+static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
+static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp);
static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
@@ -159,7 +163,7 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
rr_cqe);
/* WARNING: Only wr_id and status are reliable at this point */
- trace_xprtrdma_wc_receive(rep, wc);
+ trace_xprtrdma_wc_receive(wc);
if (wc->status != IB_WC_SUCCESS)
goto out_fail;
@@ -231,7 +235,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
complete(&ia->ri_done);
break;
case RDMA_CM_EVENT_ADDR_ERROR:
- ia->ri_async_rc = -EHOSTUNREACH;
+ ia->ri_async_rc = -EPROTO;
complete(&ia->ri_done);
break;
case RDMA_CM_EVENT_ROUTE_ERROR:
@@ -262,7 +266,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
connstate = -ENOTCONN;
goto connected;
case RDMA_CM_EVENT_UNREACHABLE:
- connstate = -ENETDOWN;
+ connstate = -ENETUNREACH;
goto connected;
case RDMA_CM_EVENT_REJECTED:
dprintk("rpcrdma: connection to %s:%s rejected: %s\n",
@@ -305,8 +309,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
init_completion(&ia->ri_done);
init_completion(&ia->ri_remove_done);
- id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP,
- IB_QPT_RC);
+ id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_conn_upcall,
+ xprt, RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(id)) {
rc = PTR_ERR(id);
dprintk("RPC: %s: rdma_create_id() failed %i\n",
@@ -500,8 +504,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
struct rpcrdma_create_data_internal *cdata)
{
struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
- unsigned int max_qp_wr, max_sge;
struct ib_cq *sendcq, *recvcq;
+ unsigned int max_sge;
int rc;
max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge,
@@ -512,29 +516,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
}
ia->ri_max_send_sges = max_sge;
- if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
- dprintk("RPC: %s: insufficient wqe's available\n",
- __func__);
- return -ENOMEM;
- }
- max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS - 1;
-
- /* check provider's send/recv wr limits */
- if (cdata->max_requests > max_qp_wr)
- cdata->max_requests = max_qp_wr;
+ rc = ia->ri_ops->ro_open(ia, ep, cdata);
+ if (rc)
+ return rc;
ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
ep->rep_attr.qp_context = ep;
ep->rep_attr.srq = NULL;
- ep->rep_attr.cap.max_send_wr = cdata->max_requests;
- ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
- ep->rep_attr.cap.max_send_wr += 1; /* drain cqe */
- rc = ia->ri_ops->ro_open(ia, ep, cdata);
- if (rc)
- return rc;
- ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
- ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
- ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */
ep->rep_attr.cap.max_send_sge = max_sge;
ep->rep_attr.cap.max_recv_sge = 1;
ep->rep_attr.cap.max_inline_data = 0;
@@ -741,7 +729,6 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
rx_ia);
- unsigned int extras;
int rc;
retry:
@@ -785,9 +772,8 @@ retry:
}
dprintk("RPC: %s: connected\n", __func__);
- extras = r_xprt->rx_buf.rb_bc_srv_max_requests;
- if (extras)
- rpcrdma_ep_post_extra_recv(r_xprt, extras);
+
+ rpcrdma_post_recvs(r_xprt, true);
out:
if (rc)
@@ -893,6 +879,7 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt)
sc->sc_xprt = r_xprt;
buf->rb_sc_ctxs[i] = sc;
}
+ buf->rb_flags = 0;
return 0;
@@ -950,7 +937,7 @@ out_emptyq:
* completions recently. This is a sign the Send Queue is
* backing up. Cause the caller to pause and try again.
*/
- dprintk("RPC: %s: empty sendctx queue\n", __func__);
+ set_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags);
r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf);
r_xprt->rx_stats.empty_sendctx_q++;
return NULL;
@@ -965,7 +952,8 @@ out_emptyq:
*
* The caller serializes calls to this function (per rpcrdma_buffer).
*/
-void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
+static void
+rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
{
struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf;
unsigned long next_tail;
@@ -984,6 +972,11 @@ void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
/* Paired with READ_ONCE */
smp_store_release(&buf->rb_sc_tail, next_tail);
+
+ if (test_and_clear_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags)) {
+ smp_mb__after_atomic();
+ xprt_write_space(&sc->sc_xprt->rx_xprt);
+ }
}
static void
@@ -1097,14 +1090,8 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
return req;
}
-/**
- * rpcrdma_create_rep - Allocate an rpcrdma_rep object
- * @r_xprt: controlling transport
- *
- * Returns 0 on success or a negative errno on failure.
- */
-int
-rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
+static int
+rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp)
{
struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
@@ -1132,6 +1119,7 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
rep->rr_recv_wr.num_sge = 1;
+ rep->rr_temp = temp;
spin_lock(&buf->rb_lock);
list_add(&rep->rr_list, &buf->rb_recv_bufs);
@@ -1183,12 +1171,8 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
list_add(&req->rl_list, &buf->rb_send_bufs);
}
+ buf->rb_posted_receives = 0;
INIT_LIST_HEAD(&buf->rb_recv_bufs);
- for (i = 0; i <= buf->rb_max_requests; i++) {
- rc = rpcrdma_create_rep(r_xprt);
- if (rc)
- goto out;
- }
rc = rpcrdma_sendctxs_create(r_xprt);
if (rc)
@@ -1200,28 +1184,6 @@ out:
return rc;
}
-static struct rpcrdma_req *
-rpcrdma_buffer_get_req_locked(struct rpcrdma_buffer *buf)
-{
- struct rpcrdma_req *req;
-
- req = list_first_entry(&buf->rb_send_bufs,
- struct rpcrdma_req, rl_list);
- list_del_init(&req->rl_list);
- return req;
-}
-
-static struct rpcrdma_rep *
-rpcrdma_buffer_get_rep_locked(struct rpcrdma_buffer *buf)
-{
- struct rpcrdma_rep *rep;
-
- rep = list_first_entry(&buf->rb_recv_bufs,
- struct rpcrdma_rep, rr_list);
- list_del(&rep->rr_list);
- return rep;
-}
-
static void
rpcrdma_destroy_rep(struct rpcrdma_rep *rep)
{
@@ -1280,10 +1242,11 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
while (!list_empty(&buf->rb_recv_bufs)) {
struct rpcrdma_rep *rep;
- rep = rpcrdma_buffer_get_rep_locked(buf);
+ rep = list_first_entry(&buf->rb_recv_bufs,
+ struct rpcrdma_rep, rr_list);
+ list_del(&rep->rr_list);
rpcrdma_destroy_rep(rep);
}
- buf->rb_send_count = 0;
spin_lock(&buf->rb_reqslock);
while (!list_empty(&buf->rb_allreqs)) {
@@ -1298,7 +1261,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
spin_lock(&buf->rb_reqslock);
}
spin_unlock(&buf->rb_reqslock);
- buf->rb_recv_count = 0;
rpcrdma_mrs_destroy(buf);
}
@@ -1371,27 +1333,11 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
__rpcrdma_mr_put(&r_xprt->rx_buf, mr);
}
-static struct rpcrdma_rep *
-rpcrdma_buffer_get_rep(struct rpcrdma_buffer *buffers)
-{
- /* If an RPC previously completed without a reply (say, a
- * credential problem or a soft timeout occurs) then hold off
- * on supplying more Receive buffers until the number of new
- * pending RPCs catches up to the number of posted Receives.
- */
- if (unlikely(buffers->rb_send_count < buffers->rb_recv_count))
- return NULL;
-
- if (unlikely(list_empty(&buffers->rb_recv_bufs)))
- return NULL;
- buffers->rb_recv_count++;
- return rpcrdma_buffer_get_rep_locked(buffers);
-}
-
-/*
- * Get a set of request/reply buffers.
+/**
+ * rpcrdma_buffer_get - Get a request buffer
+ * @buffers: Buffer pool from which to obtain a buffer
*
- * Reply buffer (if available) is attached to send buffer upon return.
+ * Returns a fresh rpcrdma_req, or NULL if none are available.
*/
struct rpcrdma_req *
rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
@@ -1399,23 +1345,18 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
struct rpcrdma_req *req;
spin_lock(&buffers->rb_lock);
- if (list_empty(&buffers->rb_send_bufs))
- goto out_reqbuf;
- buffers->rb_send_count++;
- req = rpcrdma_buffer_get_req_locked(buffers);
- req->rl_reply = rpcrdma_buffer_get_rep(buffers);
+ req = list_first_entry_or_null(&buffers->rb_send_bufs,
+ struct rpcrdma_req, rl_list);
+ if (req)
+ list_del_init(&req->rl_list);
spin_unlock(&buffers->rb_lock);
-
return req;
-
-out_reqbuf:
- spin_unlock(&buffers->rb_lock);
- return NULL;
}
-/*
- * Put request/reply buffers back into pool.
- * Pre-decrement counter/array index.
+/**
+ * rpcrdma_buffer_put - Put request/reply buffers back into pool
+ * @req: object to return
+ *
*/
void
rpcrdma_buffer_put(struct rpcrdma_req *req)
@@ -1426,27 +1367,16 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
req->rl_reply = NULL;
spin_lock(&buffers->rb_lock);
- buffers->rb_send_count--;
- list_add_tail(&req->rl_list, &buffers->rb_send_bufs);
+ list_add(&req->rl_list, &buffers->rb_send_bufs);
if (rep) {
- buffers->rb_recv_count--;
- list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
+ if (!rep->rr_temp) {
+ list_add(&rep->rr_list, &buffers->rb_recv_bufs);
+ rep = NULL;
+ }
}
spin_unlock(&buffers->rb_lock);
-}
-
-/*
- * Recover reply buffers from pool.
- * This happens when recovering from disconnect.
- */
-void
-rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
-{
- struct rpcrdma_buffer *buffers = req->rl_buffer;
-
- spin_lock(&buffers->rb_lock);
- req->rl_reply = rpcrdma_buffer_get_rep(buffers);
- spin_unlock(&buffers->rb_lock);
+ if (rep)
+ rpcrdma_destroy_rep(rep);
}
/*
@@ -1458,10 +1388,13 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
{
struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
- spin_lock(&buffers->rb_lock);
- buffers->rb_recv_count--;
- list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
- spin_unlock(&buffers->rb_lock);
+ if (!rep->rr_temp) {
+ spin_lock(&buffers->rb_lock);
+ list_add(&rep->rr_list, &buffers->rb_recv_bufs);
+ spin_unlock(&buffers->rb_lock);
+ } else {
+ rpcrdma_destroy_rep(rep);
+ }
}
/**
@@ -1557,13 +1490,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
int rc;
- if (req->rl_reply) {
- rc = rpcrdma_ep_post_recv(ia, req->rl_reply);
- if (rc)
- return rc;
- req->rl_reply = NULL;
- }
-
if (!ep->rep_send_count ||
test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
send_wr->send_flags |= IB_SEND_SIGNALED;
@@ -1580,61 +1506,69 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
return 0;
}
-int
-rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
- struct rpcrdma_rep *rep)
-{
- struct ib_recv_wr *recv_wr_fail;
- int rc;
-
- if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf))
- goto out_map;
- rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail);
- trace_xprtrdma_post_recv(rep, rc);
- if (rc)
- return -ENOTCONN;
- return 0;
-
-out_map:
- pr_err("rpcrdma: failed to DMA map the Receive buffer\n");
- return -EIO;
-}
-
/**
- * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests
- * @r_xprt: transport associated with these backchannel resources
- * @count: minimum number of incoming requests expected
+ * rpcrdma_post_recvs - Maybe post some Receive buffers
+ * @r_xprt: controlling transport
+ * @temp: when true, allocate temp rpcrdma_rep objects
*
- * Returns zero if all requested buffers were posted, or a negative errno.
*/
-int
-rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
+void
+rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
{
- struct rpcrdma_buffer *buffers = &r_xprt->rx_buf;
- struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- struct rpcrdma_rep *rep;
- int rc;
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+ struct ib_recv_wr *wr, *bad_wr;
+ int needed, count, rc;
- while (count--) {
- spin_lock(&buffers->rb_lock);
- if (list_empty(&buffers->rb_recv_bufs))
- goto out_reqbuf;
- rep = rpcrdma_buffer_get_rep_locked(buffers);
- spin_unlock(&buffers->rb_lock);
+ needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
+ if (buf->rb_posted_receives > needed)
+ return;
+ needed -= buf->rb_posted_receives;
- rc = rpcrdma_ep_post_recv(ia, rep);
- if (rc)
- goto out_rc;
- }
+ count = 0;
+ wr = NULL;
+ while (needed) {
+ struct rpcrdma_regbuf *rb;
+ struct rpcrdma_rep *rep;
- return 0;
+ spin_lock(&buf->rb_lock);
+ rep = list_first_entry_or_null(&buf->rb_recv_bufs,
+ struct rpcrdma_rep, rr_list);
+ if (likely(rep))
+ list_del(&rep->rr_list);
+ spin_unlock(&buf->rb_lock);
+ if (!rep) {
+ if (rpcrdma_create_rep(r_xprt, temp))
+ break;
+ continue;
+ }
-out_reqbuf:
- spin_unlock(&buffers->rb_lock);
- trace_xprtrdma_noreps(r_xprt);
- return -ENOMEM;
+ rb = rep->rr_rdmabuf;
+ if (!rpcrdma_regbuf_is_mapped(rb)) {
+ if (!__rpcrdma_dma_map_regbuf(&r_xprt->rx_ia, rb)) {
+ rpcrdma_recv_buffer_put(rep);
+ break;
+ }
+ }
-out_rc:
- rpcrdma_recv_buffer_put(rep);
- return rc;
+ trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe);
+ rep->rr_recv_wr.next = wr;
+ wr = &rep->rr_recv_wr;
+ ++count;
+ --needed;
+ }
+ if (!count)
+ return;
+
+ rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr, &bad_wr);
+ if (rc) {
+ for (wr = bad_wr; wr; wr = wr->next) {
+ struct rpcrdma_rep *rep;
+
+ rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr);
+ rpcrdma_recv_buffer_put(rep);
+ --count;
+ }
+ }
+ buf->rb_posted_receives += count;
+ trace_xprtrdma_post_recvs(r_xprt, count, rc);
}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index cb41b12a3bf8..2ca14f7c2d51 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright (c) 2014-2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@@ -196,6 +197,7 @@ struct rpcrdma_rep {
__be32 rr_proc;
int rr_wc_flags;
u32 rr_inv_rkey;
+ bool rr_temp;
struct rpcrdma_regbuf *rr_rdmabuf;
struct rpcrdma_xprt *rr_rxprt;
struct work_struct rr_work;
@@ -334,6 +336,7 @@ enum {
struct rpcrdma_buffer;
struct rpcrdma_req {
struct list_head rl_list;
+ struct rpc_rqst rl_slot;
struct rpcrdma_buffer *rl_buffer;
struct rpcrdma_rep *rl_reply;
struct xdr_stream rl_stream;
@@ -356,16 +359,10 @@ enum {
RPCRDMA_REQ_F_TX_RESOURCES,
};
-static inline void
-rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
-{
- rqst->rq_xprtdata = req;
-}
-
static inline struct rpcrdma_req *
rpcr_to_rdmar(const struct rpc_rqst *rqst)
{
- return rqst->rq_xprtdata;
+ return container_of(rqst, struct rpcrdma_req, rl_slot);
}
static inline void
@@ -401,11 +398,12 @@ struct rpcrdma_buffer {
struct rpcrdma_sendctx **rb_sc_ctxs;
spinlock_t rb_lock; /* protect buf lists */
- int rb_send_count, rb_recv_count;
struct list_head rb_send_bufs;
struct list_head rb_recv_bufs;
+ unsigned long rb_flags;
u32 rb_max_requests;
u32 rb_credits; /* most recent credit grant */
+ int rb_posted_receives;
u32 rb_bc_srv_max_requests;
spinlock_t rb_reqslock; /* protect rb_allreqs */
@@ -420,6 +418,11 @@ struct rpcrdma_buffer {
};
#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
+/* rb_flags */
+enum {
+ RPCRDMA_BUF_F_EMPTY_SCQ = 0,
+};
+
/*
* Internal structure for transport instance creation. This
* exists primarily for modularity.
@@ -561,18 +564,16 @@ void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
struct rpcrdma_req *);
-int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *);
+void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
/*
* Buffer calls - xprtrdma/verbs.c
*/
struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
void rpcrdma_destroy_req(struct rpcrdma_req *);
-int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
-void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
void rpcrdma_mr_put(struct rpcrdma_mr *mr);
@@ -581,7 +582,6 @@ void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr);
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_req *);
-void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
@@ -603,8 +603,6 @@ rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
return __rpcrdma_dma_map_regbuf(ia, rb);
}
-int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int);
-
int rpcrdma_alloc_wq(void);
void rpcrdma_destroy_wq(void);
@@ -675,5 +673,3 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
extern struct xprt_class xprt_rdma_bc;
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
-
-#include <trace/events/rpcrdma.h>
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index c8902f11efdd..9e1c5024aba9 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2763,6 +2763,7 @@ static const struct rpc_xprt_ops xs_local_ops = {
.reserve_xprt = xprt_reserve_xprt,
.release_xprt = xs_tcp_release_xprt,
.alloc_slot = xprt_alloc_slot,
+ .free_slot = xprt_free_slot,
.rpcbind = xs_local_rpcbind,
.set_port = xs_local_set_port,
.connect = xs_local_connect,
@@ -2782,6 +2783,7 @@ static const struct rpc_xprt_ops xs_udp_ops = {
.reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong,
.alloc_slot = xprt_alloc_slot,
+ .free_slot = xprt_free_slot,
.rpcbind = rpcb_getport_async,
.set_port = xs_set_port,
.connect = xs_connect,
@@ -2803,6 +2805,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
.reserve_xprt = xprt_reserve_xprt,
.release_xprt = xs_tcp_release_xprt,
.alloc_slot = xprt_lock_and_alloc_slot,
+ .free_slot = xprt_free_slot,
.rpcbind = rpcb_getport_async,
.set_port = xs_set_port,
.connect = xs_connect,
@@ -2834,6 +2837,7 @@ static const struct rpc_xprt_ops bc_tcp_ops = {
.reserve_xprt = xprt_reserve_xprt,
.release_xprt = xprt_release_xprt,
.alloc_slot = xprt_alloc_slot,
+ .free_slot = xprt_free_slot,
.buf_alloc = bc_malloc,
.buf_free = bc_free,
.send_request = bc_send_request,
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 4492cda45566..a2f76743c73a 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -285,8 +285,9 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd,
if (!trans_buf)
return -ENOMEM;
- attrbuf = kmalloc((tipc_genl_family.maxattr + 1) *
- sizeof(struct nlattr *), GFP_KERNEL);
+ attrbuf = kmalloc_array(tipc_genl_family.maxattr + 1,
+ sizeof(struct nlattr *),
+ GFP_KERNEL);
if (!attrbuf) {
err = -ENOMEM;
goto trans_out;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 07514ca011b2..c7bbe5f0aae8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -10833,7 +10833,7 @@ static int nl80211_parse_wowlan_nd(struct cfg80211_registered_device *rdev,
struct nlattr **tb;
int err;
- tb = kzalloc(NUM_NL80211_ATTR * sizeof(*tb), GFP_KERNEL);
+ tb = kcalloc(NUM_NL80211_ATTR, sizeof(*tb), GFP_KERNEL);
if (!tb)
return -ENOMEM;
@@ -11793,7 +11793,7 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
func->srf_num_macs = n_entries;
func->srf_macs =
- kzalloc(sizeof(*func->srf_macs) * n_entries,
+ kcalloc(n_entries, sizeof(*func->srf_macs),
GFP_KERNEL);
if (!func->srf_macs) {
err = -ENOMEM;
OpenPOWER on IntegriCloud