summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorTrond Myklebust <trond.myklebust@primarydata.com>2014-08-03 17:04:51 -0400
committerTrond Myklebust <trond.myklebust@primarydata.com>2014-08-03 17:04:51 -0400
commit9806755c56a9a8b3ff8bc9ce3092dcad4d562989 (patch)
tree19c1a92a65cfdee4bd2717f032a1f072cbe0c7af /net
parent3a505845cd58a7ff3bc75f96572045d8de34e34e (diff)
parent8079fb785e34de6dff34bd846b8b79c212861edf (diff)
downloadblackbird-op-linux-9806755c56a9a8b3ff8bc9ce3092dcad4d562989.tar.gz
blackbird-op-linux-9806755c56a9a8b3ff8bc9ce3092dcad4d562989.zip
Merge branch 'nfs-rdma' of git://git.linux-nfs.org/projects/anna/nfs-rdma into linux-next
* 'nfs-rdma' of git://git.linux-nfs.org/projects/anna/nfs-rdma: (916 commits) xprtrdma: Handle additional connection events xprtrdma: Remove RPCRDMA_PERSISTENT_REGISTRATION macro xprtrdma: Make rpcrdma_ep_disconnect() return void xprtrdma: Schedule reply tasklet once per upcall xprtrdma: Allocate each struct rpcrdma_mw separately xprtrdma: Rename frmr_wr xprtrdma: Disable completions for LOCAL_INV Work Requests xprtrdma: Disable completions for FAST_REG_MR Work Requests xprtrdma: Don't post a LOCAL_INV in rpcrdma_register_frmr_external() xprtrdma: Reset FRMRs after a flushed LOCAL_INV Work Request xprtrdma: Reset FRMRs when FAST_REG_MR is flushed by a disconnect xprtrdma: Properly handle exhaustion of the rb_mws list xprtrdma: Chain together all MWs in same buffer pool xprtrdma: Back off rkey when FAST_REG_MR fails xprtrdma: Unclutter struct rpcrdma_mr_seg xprtrdma: Don't invalidate FRMRs if registration fails xprtrdma: On disconnect, don't ignore pending CQEs xprtrdma: Update rkeys after transport reconnect xprtrdma: Limit data payload size for ALLPHYSICAL xprtrdma: Protect ia->ri_id when unmapping/invalidating MRs ...
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_core.c5
-rw-r--r--net/8021q/vlan_dev.c13
-rw-r--r--net/appletalk/ddp.c3
-rw-r--r--net/bluetooth/hci_conn.c19
-rw-r--r--net/bluetooth/hci_event.c17
-rw-r--r--net/bluetooth/l2cap_core.c8
-rw-r--r--net/bluetooth/l2cap_sock.c5
-rw-r--r--net/bluetooth/mgmt.c104
-rw-r--r--net/bluetooth/smp.c69
-rw-r--r--net/core/dev.c30
-rw-r--r--net/core/dst.c16
-rw-r--r--net/core/filter.c10
-rw-r--r--net/core/iovec.c55
-rw-r--r--net/core/neighbour.c9
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/ipv4/gre_demux.c1
-rw-r--r--net/ipv4/icmp.c2
-rw-r--r--net/ipv4/igmp.c10
-rw-r--r--net/ipv4/ip_tunnel.c26
-rw-r--r--net/ipv4/route.c15
-rw-r--r--net/ipv4/tcp.c3
-rw-r--r--net/ipv4/tcp_fastopen.c2
-rw-r--r--net/ipv4/tcp_input.c10
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/udp.c5
-rw-r--r--net/ipv6/mcast.c13
-rw-r--r--net/ipv6/udp.c6
-rw-r--r--net/l2tp/l2tp_ppp.c4
-rw-r--r--net/mac80211/util.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c20
-rw-r--r--net/netfilter/nf_nat_core.c35
-rw-r--r--net/netfilter/nf_tables_api.c11
-rw-r--r--net/netfilter/nft_compat.c18
-rw-r--r--net/netfilter/nft_nat.c14
-rw-r--r--net/netlink/af_netlink.c4
-rw-r--r--net/openvswitch/actions.c2
-rw-r--r--net/openvswitch/datapath.c27
-rw-r--r--net/openvswitch/flow.c4
-rw-r--r--net/openvswitch/flow.h5
-rw-r--r--net/openvswitch/flow_table.c16
-rw-r--r--net/openvswitch/flow_table.h3
-rw-r--r--net/openvswitch/vport-gre.c17
-rw-r--r--net/sctp/sysctl.c46
-rw-r--r--net/sctp/ulpevent.c122
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c83
-rw-r--r--net/sunrpc/xprtrdma/transport.c17
-rw-r--r--net/sunrpc/xprtrdma/verbs.c739
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h61
-rw-r--r--net/tipc/bcast.c1
-rw-r--r--net/tipc/msg.c11
-rw-r--r--net/wireless/core.h2
-rw-r--r--net/wireless/nl80211.c11
-rw-r--r--net/wireless/reg.c22
54 files changed, 1065 insertions, 701 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 9012b1c922b6..75d427763992 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -114,8 +114,11 @@ EXPORT_SYMBOL(vlan_dev_vlan_proto);
static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
{
- if (skb_cow(skb, skb_headroom(skb)) < 0)
+ if (skb_cow(skb, skb_headroom(skb)) < 0) {
+ kfree_skb(skb);
return NULL;
+ }
+
memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
skb->mac_header += VLAN_HLEN;
return skb;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index ad2ac3c00398..dd11f612e03e 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -627,8 +627,6 @@ static void vlan_dev_uninit(struct net_device *dev)
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
int i;
- free_percpu(vlan->vlan_pcpu_stats);
- vlan->vlan_pcpu_stats = NULL;
for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) {
while ((pm = vlan->egress_priority_map[i]) != NULL) {
vlan->egress_priority_map[i] = pm->next;
@@ -785,6 +783,15 @@ static const struct net_device_ops vlan_netdev_ops = {
.ndo_get_lock_subclass = vlan_dev_get_lock_subclass,
};
+static void vlan_dev_free(struct net_device *dev)
+{
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+
+ free_percpu(vlan->vlan_pcpu_stats);
+ vlan->vlan_pcpu_stats = NULL;
+ free_netdev(dev);
+}
+
void vlan_setup(struct net_device *dev)
{
ether_setup(dev);
@@ -794,7 +801,7 @@ void vlan_setup(struct net_device *dev)
dev->tx_queue_len = 0;
dev->netdev_ops = &vlan_netdev_ops;
- dev->destructor = free_netdev;
+ dev->destructor = vlan_dev_free;
dev->ethtool_ops = &vlan_ethtool_ops;
memset(dev->broadcast, 0, ETH_ALEN);
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 01a1082e02b3..bfcf6be1d665 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1489,8 +1489,6 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
goto drop;
/* Queue packet (standard) */
- skb->sk = sock;
-
if (sock_queue_rcv_skb(sock, skb) < 0)
goto drop;
@@ -1644,7 +1642,6 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
if (!skb)
goto out;
- skb->sk = sk;
skb_reserve(skb, ddp_dl->header_length);
skb_reserve(skb, dev->hard_header_len);
skb->dev = dev;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 8671bc79a35b..a7a27bc2c0b1 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -289,10 +289,20 @@ static void hci_conn_timeout(struct work_struct *work)
{
struct hci_conn *conn = container_of(work, struct hci_conn,
disc_work.work);
+ int refcnt = atomic_read(&conn->refcnt);
BT_DBG("hcon %p state %s", conn, state_to_string(conn->state));
- if (atomic_read(&conn->refcnt))
+ WARN_ON(refcnt < 0);
+
+ /* FIXME: It was observed that in pairing failed scenario, refcnt
+ * drops below 0. Probably this is because l2cap_conn_del calls
+ * l2cap_chan_del for each channel, and inside l2cap_chan_del conn is
+ * dropped. After that loop hci_chan_del is called which also drops
+ * conn. For now make sure that ACL is alive if refcnt is higher then 0,
+ * otherwise drop it.
+ */
+ if (refcnt > 0)
return;
switch (conn->state) {
@@ -610,11 +620,6 @@ static void hci_req_add_le_create_conn(struct hci_request *req,
if (hci_update_random_address(req, false, &own_addr_type))
return;
- /* Save the address type used for this connnection attempt so we able
- * to retrieve this information if we need it.
- */
- conn->src_type = own_addr_type;
-
cp.scan_interval = cpu_to_le16(hdev->le_scan_interval);
cp.scan_window = cpu_to_le16(hdev->le_scan_window);
bacpy(&cp.peer_addr, &conn->dst);
@@ -894,7 +899,7 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
/* If we're already encrypted set the REAUTH_PEND flag,
* otherwise set the ENCRYPT_PEND.
*/
- if (conn->key_type != 0xff)
+ if (conn->link_mode & HCI_LM_ENCRYPT)
set_bit(HCI_CONN_REAUTH_PEND, &conn->flags);
else
set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 21e5913d12e0..640c54ec1bd2 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -48,6 +48,10 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb)
smp_mb__after_atomic(); /* wake_up_bit advises about this barrier */
wake_up_bit(&hdev->flags, HCI_INQUIRY);
+ hci_dev_lock(hdev);
+ hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
+ hci_dev_unlock(hdev);
+
hci_conn_check_pending(hdev);
}
@@ -3537,7 +3541,11 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
cp.authentication = conn->auth_type;
/* Request MITM protection if our IO caps allow it
- * except for the no-bonding case
+ * except for the no-bonding case.
+ * conn->auth_type is not updated here since
+ * that might cause the user confirmation to be
+ * rejected in case the remote doesn't have the
+ * IO capabilities for MITM.
*/
if (conn->io_capability != HCI_IO_NO_INPUT_OUTPUT &&
cp.authentication != HCI_AT_NO_BONDING)
@@ -3628,8 +3636,11 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev,
/* If we're not the initiators request authorization to
* proceed from user space (mgmt_user_confirm with
- * confirm_hint set to 1). */
- if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags)) {
+ * confirm_hint set to 1). The exception is if neither
+ * side had MITM in which case we do auto-accept.
+ */
+ if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) &&
+ (loc_mitm || rem_mitm)) {
BT_DBG("Confirming auto-accept as acceptor");
confirm_hint = 1;
goto confirm;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 6eabbe05fe54..323f23cd2c37 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1663,7 +1663,13 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
kfree_skb(conn->rx_skb);
skb_queue_purge(&conn->pending_rx);
- flush_work(&conn->pending_rx_work);
+
+ /* We can not call flush_work(&conn->pending_rx_work) here since we
+ * might block if we are running on a worker from the same workqueue
+ * pending_rx_work is waiting on.
+ */
+ if (work_pending(&conn->pending_rx_work))
+ cancel_work_sync(&conn->pending_rx_work);
l2cap_unregister_all_users(conn);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index ade3fb4c23bc..e1378693cc90 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -787,11 +787,6 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
/*change security for LE channels */
if (chan->scid == L2CAP_CID_ATT) {
- if (!conn->hcon->out) {
- err = -EINVAL;
- break;
- }
-
if (smp_conn_security(conn->hcon, sec.level))
break;
sk->sk_state = BT_CONFIG;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 0fce54412ffd..af8e0a6243b7 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1047,6 +1047,43 @@ static void clean_up_hci_complete(struct hci_dev *hdev, u8 status)
}
}
+static void hci_stop_discovery(struct hci_request *req)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_cp_remote_name_req_cancel cp;
+ struct inquiry_entry *e;
+
+ switch (hdev->discovery.state) {
+ case DISCOVERY_FINDING:
+ if (test_bit(HCI_INQUIRY, &hdev->flags)) {
+ hci_req_add(req, HCI_OP_INQUIRY_CANCEL, 0, NULL);
+ } else {
+ cancel_delayed_work(&hdev->le_scan_disable);
+ hci_req_add_le_scan_disable(req);
+ }
+
+ break;
+
+ case DISCOVERY_RESOLVING:
+ e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY,
+ NAME_PENDING);
+ if (!e)
+ return;
+
+ bacpy(&cp.bdaddr, &e->data.bdaddr);
+ hci_req_add(req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp),
+ &cp);
+
+ break;
+
+ default:
+ /* Passive scanning */
+ if (test_bit(HCI_LE_SCAN, &hdev->dev_flags))
+ hci_req_add_le_scan_disable(req);
+ break;
+ }
+}
+
static int clean_up_hci_state(struct hci_dev *hdev)
{
struct hci_request req;
@@ -1063,9 +1100,7 @@ static int clean_up_hci_state(struct hci_dev *hdev)
if (test_bit(HCI_ADVERTISING, &hdev->dev_flags))
disable_advertising(&req);
- if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) {
- hci_req_add_le_scan_disable(&req);
- }
+ hci_stop_discovery(&req);
list_for_each_entry(conn, &hdev->conn_hash.list, list) {
struct hci_cp_disconnect dc;
@@ -2996,8 +3031,13 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
}
if (addr->type == BDADDR_LE_PUBLIC || addr->type == BDADDR_LE_RANDOM) {
- /* Continue with pairing via SMP */
+ /* Continue with pairing via SMP. The hdev lock must be
+ * released as SMP may try to recquire it for crypto
+ * purposes.
+ */
+ hci_dev_unlock(hdev);
err = smp_user_confirm_reply(conn, mgmt_op, passkey);
+ hci_dev_lock(hdev);
if (!err)
err = cmd_complete(sk, hdev->id, mgmt_op,
@@ -3574,8 +3614,6 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data,
{
struct mgmt_cp_stop_discovery *mgmt_cp = data;
struct pending_cmd *cmd;
- struct hci_cp_remote_name_req_cancel cp;
- struct inquiry_entry *e;
struct hci_request req;
int err;
@@ -3605,52 +3643,22 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data,
hci_req_init(&req, hdev);
- switch (hdev->discovery.state) {
- case DISCOVERY_FINDING:
- if (test_bit(HCI_INQUIRY, &hdev->flags)) {
- hci_req_add(&req, HCI_OP_INQUIRY_CANCEL, 0, NULL);
- } else {
- cancel_delayed_work(&hdev->le_scan_disable);
-
- hci_req_add_le_scan_disable(&req);
- }
-
- break;
+ hci_stop_discovery(&req);
- case DISCOVERY_RESOLVING:
- e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY,
- NAME_PENDING);
- if (!e) {
- mgmt_pending_remove(cmd);
- err = cmd_complete(sk, hdev->id,
- MGMT_OP_STOP_DISCOVERY, 0,
- &mgmt_cp->type,
- sizeof(mgmt_cp->type));
- hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
- goto unlock;
- }
-
- bacpy(&cp.bdaddr, &e->data.bdaddr);
- hci_req_add(&req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp),
- &cp);
-
- break;
-
- default:
- BT_DBG("unknown discovery state %u", hdev->discovery.state);
-
- mgmt_pending_remove(cmd);
- err = cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY,
- MGMT_STATUS_FAILED, &mgmt_cp->type,
- sizeof(mgmt_cp->type));
+ err = hci_req_run(&req, stop_discovery_complete);
+ if (!err) {
+ hci_discovery_set_state(hdev, DISCOVERY_STOPPING);
goto unlock;
}
- err = hci_req_run(&req, stop_discovery_complete);
- if (err < 0)
- mgmt_pending_remove(cmd);
- else
- hci_discovery_set_state(hdev, DISCOVERY_STOPPING);
+ mgmt_pending_remove(cmd);
+
+ /* If no HCI commands were sent we're done */
+ if (err == -ENODATA) {
+ err = cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, 0,
+ &mgmt_cp->type, sizeof(mgmt_cp->type));
+ hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
+ }
unlock:
hci_dev_unlock(hdev);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 3d1cc164557d..e33a982161c1 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -385,6 +385,16 @@ static const u8 gen_method[5][5] = {
{ CFM_PASSKEY, CFM_PASSKEY, REQ_PASSKEY, JUST_WORKS, OVERLAP },
};
+static u8 get_auth_method(struct smp_chan *smp, u8 local_io, u8 remote_io)
+{
+ /* If either side has unknown io_caps, use JUST WORKS */
+ if (local_io > SMP_IO_KEYBOARD_DISPLAY ||
+ remote_io > SMP_IO_KEYBOARD_DISPLAY)
+ return JUST_WORKS;
+
+ return gen_method[remote_io][local_io];
+}
+
static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
u8 local_io, u8 remote_io)
{
@@ -401,14 +411,11 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
BT_DBG("tk_request: auth:%d lcl:%d rem:%d", auth, local_io, remote_io);
/* If neither side wants MITM, use JUST WORKS */
- /* If either side has unknown io_caps, use JUST WORKS */
/* Otherwise, look up method from the table */
- if (!(auth & SMP_AUTH_MITM) ||
- local_io > SMP_IO_KEYBOARD_DISPLAY ||
- remote_io > SMP_IO_KEYBOARD_DISPLAY)
+ if (!(auth & SMP_AUTH_MITM))
method = JUST_WORKS;
else
- method = gen_method[remote_io][local_io];
+ method = get_auth_method(smp, local_io, remote_io);
/* If not bonding, don't ask user to confirm a Zero TK */
if (!(auth & SMP_AUTH_BONDING) && method == JUST_CFM)
@@ -544,7 +551,7 @@ static u8 smp_random(struct smp_chan *smp)
hci_le_start_enc(hcon, ediv, rand, stk);
hcon->enc_key_size = smp->enc_key_size;
} else {
- u8 stk[16];
+ u8 stk[16], auth;
__le64 rand = 0;
__le16 ediv = 0;
@@ -556,8 +563,13 @@ static u8 smp_random(struct smp_chan *smp)
memset(stk + smp->enc_key_size, 0,
SMP_MAX_ENC_KEY_SIZE - smp->enc_key_size);
+ if (hcon->pending_sec_level == BT_SECURITY_HIGH)
+ auth = 1;
+ else
+ auth = 0;
+
hci_add_ltk(hcon->hdev, &hcon->dst, hcon->dst_type,
- HCI_SMP_STK_SLAVE, 0, stk, smp->enc_key_size,
+ HCI_SMP_STK_SLAVE, auth, stk, smp->enc_key_size,
ediv, rand);
}
@@ -664,7 +676,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
{
struct smp_cmd_pairing rsp, *req = (void *) skb->data;
struct smp_chan *smp;
- u8 key_size, auth;
+ u8 key_size, auth, sec_level;
int ret;
BT_DBG("conn %p", conn);
@@ -690,7 +702,19 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
/* We didn't start the pairing, so match remote */
auth = req->auth_req;
- conn->hcon->pending_sec_level = authreq_to_seclevel(auth);
+ sec_level = authreq_to_seclevel(auth);
+ if (sec_level > conn->hcon->pending_sec_level)
+ conn->hcon->pending_sec_level = sec_level;
+
+ /* If we need MITM check that it can be acheived */
+ if (conn->hcon->pending_sec_level >= BT_SECURITY_HIGH) {
+ u8 method;
+
+ method = get_auth_method(smp, conn->hcon->io_capability,
+ req->io_capability);
+ if (method == JUST_WORKS || method == JUST_CFM)
+ return SMP_AUTH_REQUIREMENTS;
+ }
build_pairing_cmd(conn, req, &rsp, auth);
@@ -738,6 +762,16 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb)
if (check_enc_key_size(conn, key_size))
return SMP_ENC_KEY_SIZE;
+ /* If we need MITM check that it can be acheived */
+ if (conn->hcon->pending_sec_level >= BT_SECURITY_HIGH) {
+ u8 method;
+
+ method = get_auth_method(smp, req->io_capability,
+ rsp->io_capability);
+ if (method == JUST_WORKS || method == JUST_CFM)
+ return SMP_AUTH_REQUIREMENTS;
+ }
+
get_random_bytes(smp->prnd, sizeof(smp->prnd));
smp->prsp[0] = SMP_CMD_PAIRING_RSP;
@@ -833,6 +867,7 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb)
struct smp_cmd_pairing cp;
struct hci_conn *hcon = conn->hcon;
struct smp_chan *smp;
+ u8 sec_level;
BT_DBG("conn %p", conn);
@@ -842,7 +877,9 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb)
if (!(conn->hcon->link_mode & HCI_LM_MASTER))
return SMP_CMD_NOTSUPP;
- hcon->pending_sec_level = authreq_to_seclevel(rp->auth_req);
+ sec_level = authreq_to_seclevel(rp->auth_req);
+ if (sec_level > hcon->pending_sec_level)
+ hcon->pending_sec_level = sec_level;
if (smp_ltk_encrypt(conn, hcon->pending_sec_level))
return 0;
@@ -896,9 +933,12 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
if (smp_sufficient_security(hcon, sec_level))
return 1;
+ if (sec_level > hcon->pending_sec_level)
+ hcon->pending_sec_level = sec_level;
+
if (hcon->link_mode & HCI_LM_MASTER)
- if (smp_ltk_encrypt(conn, sec_level))
- goto done;
+ if (smp_ltk_encrypt(conn, hcon->pending_sec_level))
+ return 0;
if (test_and_set_bit(HCI_CONN_LE_SMP_PEND, &hcon->flags))
return 0;
@@ -913,7 +953,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
* requires it.
*/
if (hcon->io_capability != HCI_IO_NO_INPUT_OUTPUT ||
- sec_level > BT_SECURITY_MEDIUM)
+ hcon->pending_sec_level > BT_SECURITY_MEDIUM)
authreq |= SMP_AUTH_MITM;
if (hcon->link_mode & HCI_LM_MASTER) {
@@ -932,9 +972,6 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
set_bit(SMP_FLAG_INITIATOR, &smp->flags);
-done:
- hcon->pending_sec_level = sec_level;
-
return 0;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 30eedf677913..7990984ca364 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -148,6 +148,9 @@ struct list_head ptype_all __read_mostly; /* Taps */
static struct list_head offload_base __read_mostly;
static int netif_rx_internal(struct sk_buff *skb);
+static int call_netdevice_notifiers_info(unsigned long val,
+ struct net_device *dev,
+ struct netdev_notifier_info *info);
/*
* The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -1207,7 +1210,11 @@ EXPORT_SYMBOL(netdev_features_change);
void netdev_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
- call_netdevice_notifiers(NETDEV_CHANGE, dev);
+ struct netdev_notifier_change_info change_info;
+
+ change_info.flags_changed = 0;
+ call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
+ &change_info.info);
rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
}
}
@@ -4227,9 +4234,8 @@ static int process_backlog(struct napi_struct *napi, int quota)
#endif
napi->weight = weight_p;
local_irq_disable();
- while (work < quota) {
+ while (1) {
struct sk_buff *skb;
- unsigned int qlen;
while ((skb = __skb_dequeue(&sd->process_queue))) {
local_irq_enable();
@@ -4243,24 +4249,24 @@ static int process_backlog(struct napi_struct *napi, int quota)
}
rps_lock(sd);
- qlen = skb_queue_len(&sd->input_pkt_queue);
- if (qlen)
- skb_queue_splice_tail_init(&sd->input_pkt_queue,
- &sd->process_queue);
-
- if (qlen < quota - work) {
+ if (skb_queue_empty(&sd->input_pkt_queue)) {
/*
* Inline a custom version of __napi_complete().
* only current cpu owns and manipulates this napi,
- * and NAPI_STATE_SCHED is the only possible flag set on backlog.
- * we can use a plain write instead of clear_bit(),
+ * and NAPI_STATE_SCHED is the only possible flag set
+ * on backlog.
+ * We can use a plain write instead of clear_bit(),
* and we dont need an smp_mb() memory barrier.
*/
list_del(&napi->poll_list);
napi->state = 0;
+ rps_unlock(sd);
- quota = work + qlen;
+ break;
}
+
+ skb_queue_splice_tail_init(&sd->input_pkt_queue,
+ &sd->process_queue);
rps_unlock(sd);
}
local_irq_enable();
diff --git a/net/core/dst.c b/net/core/dst.c
index 80d6286c8b62..a028409ee438 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -269,6 +269,15 @@ again:
}
EXPORT_SYMBOL(dst_destroy);
+static void dst_destroy_rcu(struct rcu_head *head)
+{
+ struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
+
+ dst = dst_destroy(dst);
+ if (dst)
+ __dst_free(dst);
+}
+
void dst_release(struct dst_entry *dst)
{
if (dst) {
@@ -276,11 +285,8 @@ void dst_release(struct dst_entry *dst)
newrefcnt = atomic_dec_return(&dst->__refcnt);
WARN_ON(newrefcnt < 0);
- if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) {
- dst = dst_destroy(dst);
- if (dst)
- __dst_free(dst);
- }
+ if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt)
+ call_rcu(&dst->rcu_head, dst_destroy_rcu);
}
}
EXPORT_SYMBOL(dst_release);
diff --git a/net/core/filter.c b/net/core/filter.c
index 735fad897496..1dbf6462f766 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -840,11 +840,11 @@ int sk_convert_filter(struct sock_filter *prog, int len,
BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
- if (len <= 0 || len >= BPF_MAXINSNS)
+ if (len <= 0 || len > BPF_MAXINSNS)
return -EINVAL;
if (new_prog) {
- addrs = kzalloc(len * sizeof(*addrs), GFP_KERNEL);
+ addrs = kcalloc(len, sizeof(*addrs), GFP_KERNEL);
if (!addrs)
return -ENOMEM;
}
@@ -1101,7 +1101,7 @@ static int check_load_and_stores(struct sock_filter *filter, int flen)
BUILD_BUG_ON(BPF_MEMWORDS > 16);
- masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
+ masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL);
if (!masks)
return -ENOMEM;
@@ -1382,7 +1382,7 @@ static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp,
fp_new = sock_kmalloc(sk, len, GFP_KERNEL);
if (fp_new) {
*fp_new = *fp;
- /* As we're kepping orig_prog in fp_new along,
+ /* As we're keeping orig_prog in fp_new along,
* we need to make sure we're not evicting it
* from the old fp.
*/
@@ -1524,8 +1524,8 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
/**
* sk_unattached_filter_create - create an unattached filter
- * @fprog: the filter program
* @pfp: the unattached filter that is created
+ * @fprog: the filter program
*
* Create a filter independent of any socket. We first run some
* sanity checks on it to make sure it does not explode on us later.
diff --git a/net/core/iovec.c b/net/core/iovec.c
index b61869429f4c..827dd6beb49c 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -75,61 +75,6 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a
}
/*
- * Copy kernel to iovec. Returns -EFAULT on error.
- */
-
-int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
- int offset, int len)
-{
- int copy;
- for (; len > 0; ++iov) {
- /* Skip over the finished iovecs */
- if (unlikely(offset >= iov->iov_len)) {
- offset -= iov->iov_len;
- continue;
- }
- copy = min_t(unsigned int, iov->iov_len - offset, len);
- if (copy_to_user(iov->iov_base + offset, kdata, copy))
- return -EFAULT;
- offset = 0;
- kdata += copy;
- len -= copy;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(memcpy_toiovecend);
-
-/*
- * Copy iovec to kernel. Returns -EFAULT on error.
- */
-
-int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
- int offset, int len)
-{
- /* Skip over the finished iovecs */
- while (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- iov++;
- }
-
- while (len > 0) {
- u8 __user *base = iov->iov_base + offset;
- int copy = min_t(unsigned int, len, iov->iov_len - offset);
-
- offset = 0;
- if (copy_from_user(kdata, base, copy))
- return -EFAULT;
- len -= copy;
- kdata += copy;
- iov++;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(memcpy_fromiovecend);
-
-/*
* And now for the all-in-one: copy and checksum from a user iovec
* directly to a datagram
* Calls to csum_partial but the last must be in 32 bit chunks
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 32d872eec7f5..559890b0f0a2 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3059,11 +3059,12 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
} else {
+ struct neigh_table *tbl = p->tbl;
dev_name_source = "default";
- t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
- t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
- t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
- t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
+ t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
+ t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
+ t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
+ t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
}
if (handler) {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 9cd5344fad73..c1a33033cbe2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2993,7 +2993,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
skb_put(nskb, len),
len, 0);
SKB_GSO_CB(nskb)->csum_start =
- skb_headroom(nskb) + offset;
+ skb_headroom(nskb) + doffset;
continue;
}
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 4e9619bca732..0485bf7f8f03 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -68,6 +68,7 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
skb_push(skb, hdr_len);
+ skb_reset_transport_header(skb);
greh = (struct gre_base_hdr *)skb->data;
greh->flags = tnl_flags_to_gre_flags(tpi->flags);
greh->protocol = tpi->proto;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 79c3d947a481..42b7bcf8045b 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -739,8 +739,6 @@ static void icmp_unreach(struct sk_buff *skb)
/* fall through */
case 0:
info = ntohs(icmph->un.frag.mtu);
- if (!info)
- goto out;
}
break;
case ICMP_SR_FAILED:
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6748d420f714..db710b059bab 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1944,6 +1944,10 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
rtnl_lock();
in_dev = ip_mc_find_dev(net, imr);
+ if (!in_dev) {
+ ret = -ENODEV;
+ goto out;
+ }
ifindex = imr->imr_ifindex;
for (imlp = &inet->mc_list;
(iml = rtnl_dereference(*imlp)) != NULL;
@@ -1961,16 +1965,14 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
*imlp = iml->next_rcu;
- if (in_dev)
- ip_mc_dec_group(in_dev, group);
+ ip_mc_dec_group(in_dev, group);
rtnl_unlock();
/* decrease mem now to avoid the memleak warning */
atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
kfree_rcu(iml, rcu);
return 0;
}
- if (!in_dev)
- ret = -ENODEV;
+out:
rtnl_unlock();
return ret;
}
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 097b3e7c1e8f..6f9de61dce5f 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -73,12 +73,7 @@ static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
{
struct dst_entry *old_dst;
- if (dst) {
- if (dst->flags & DST_NOCACHE)
- dst = NULL;
- else
- dst_clone(dst);
- }
+ dst_clone(dst);
old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
dst_release(old_dst);
}
@@ -108,13 +103,14 @@ static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
rcu_read_lock();
dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
+ if (dst && !atomic_inc_not_zero(&dst->__refcnt))
+ dst = NULL;
if (dst) {
if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
- rcu_read_unlock();
tunnel_dst_reset(t);
- return NULL;
+ dst_release(dst);
+ dst = NULL;
}
- dst_hold(dst);
}
rcu_read_unlock();
return (struct rtable *)dst;
@@ -173,6 +169,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
hlist_for_each_entry_rcu(t, head, hash_node) {
if (remote != t->parms.iph.daddr ||
+ t->parms.iph.saddr != 0 ||
!(t->dev->flags & IFF_UP))
continue;
@@ -189,10 +186,11 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
head = &itn->tunnels[hash];
hlist_for_each_entry_rcu(t, head, hash_node) {
- if ((local != t->parms.iph.saddr &&
- (local != t->parms.iph.daddr ||
- !ipv4_is_multicast(local))) ||
- !(t->dev->flags & IFF_UP))
+ if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
+ (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
+ continue;
+
+ if (!(t->dev->flags & IFF_UP))
continue;
if (!ip_tunnel_key_match(&t->parms, flags, key))
@@ -209,6 +207,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
hlist_for_each_entry_rcu(t, head, hash_node) {
if (t->parms.i_key != key ||
+ t->parms.iph.saddr != 0 ||
+ t->parms.iph.daddr != 0 ||
!(t->dev->flags & IFF_UP))
continue;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 082239ffe34a..3162ea923ded 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1010,7 +1010,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
const struct iphdr *iph = (const struct iphdr *) skb->data;
struct flowi4 fl4;
struct rtable *rt;
- struct dst_entry *dst;
+ struct dst_entry *odst = NULL;
bool new = false;
bh_lock_sock(sk);
@@ -1018,16 +1018,17 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
if (!ip_sk_accept_pmtu(sk))
goto out;
- rt = (struct rtable *) __sk_dst_get(sk);
+ odst = sk_dst_get(sk);
- if (sock_owned_by_user(sk) || !rt) {
+ if (sock_owned_by_user(sk) || !odst) {
__ipv4_sk_update_pmtu(skb, sk, mtu);
goto out;
}
__build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
- if (!__sk_dst_check(sk, 0)) {
+ rt = (struct rtable *)odst;
+ if (odst->obsolete && odst->ops->check(odst, 0) == NULL) {
rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
if (IS_ERR(rt))
goto out;
@@ -1037,8 +1038,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
__ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
- dst = dst_check(&rt->dst, 0);
- if (!dst) {
+ if (!dst_check(&rt->dst, 0)) {
if (new)
dst_release(&rt->dst);
@@ -1050,10 +1050,11 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
}
if (new)
- __sk_dst_set(sk, &rt->dst);
+ sk_dst_set(sk, &rt->dst);
out:
bh_unlock_sock(sk);
+ dst_release(odst);
}
EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index eb1dde37e678..9d2118e5fbc7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1108,7 +1108,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (unlikely(tp->repair)) {
if (tp->repair_queue == TCP_RECV_QUEUE) {
copied = tcp_send_rcvq(sk, msg, size);
- goto out;
+ goto out_nopush;
}
err = -EINVAL;
@@ -1282,6 +1282,7 @@ wait_for_memory:
out:
if (copied)
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
+out_nopush:
release_sock(sk);
return copied + copied_syn;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 62e48cf84e60..9771563ab564 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -131,7 +131,7 @@ static bool tcp_fastopen_create_child(struct sock *sk,
struct dst_entry *dst,
struct request_sock *req)
{
- struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_sock *tp;
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
struct sock *child;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 40661fc1e233..40639c288dc2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1106,7 +1106,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
}
/* D-SACK for already forgotten data... Do dumb counting. */
- if (dup_sack && tp->undo_marker && tp->undo_retrans &&
+ if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 &&
!after(end_seq_0, prior_snd_una) &&
after(end_seq_0, tp->undo_marker))
tp->undo_retrans--;
@@ -1162,7 +1162,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
unsigned int new_len = (pkt_len / mss) * mss;
if (!in_sack && new_len < pkt_len) {
new_len += mss;
- if (new_len > skb->len)
+ if (new_len >= skb->len)
return 0;
}
pkt_len = new_len;
@@ -1187,7 +1187,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
/* Account D-SACK for retransmitted packet. */
if (dup_sack && (sacked & TCPCB_RETRANS)) {
- if (tp->undo_marker && tp->undo_retrans &&
+ if (tp->undo_marker && tp->undo_retrans > 0 &&
after(end_seq, tp->undo_marker))
tp->undo_retrans--;
if (sacked & TCPCB_SACKED_ACKED)
@@ -1893,7 +1893,7 @@ static void tcp_clear_retrans_partial(struct tcp_sock *tp)
tp->lost_out = 0;
tp->undo_marker = 0;
- tp->undo_retrans = 0;
+ tp->undo_retrans = -1;
}
void tcp_clear_retrans(struct tcp_sock *tp)
@@ -2665,7 +2665,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
tp->prior_ssthresh = 0;
tp->undo_marker = tp->snd_una;
- tp->undo_retrans = tp->retrans_out;
+ tp->undo_retrans = tp->retrans_out ? : -1;
if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
if (!ece_ack)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d92bce0ea24e..179b51e6bda3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2525,8 +2525,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
if (!tp->retrans_stamp)
tp->retrans_stamp = TCP_SKB_CB(skb)->when;
- tp->undo_retrans += tcp_skb_pcount(skb);
-
/* snd_nxt is stored to detect loss of retransmitted segment,
* see tcp_input.c tcp_sacktag_write_queue().
*/
@@ -2534,6 +2532,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
} else if (err != -EBUSY) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
}
+
+ if (tp->undo_retrans < 0)
+ tp->undo_retrans = 0;
+ tp->undo_retrans += tcp_skb_pcount(skb);
return err;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d92f94b7e402..7d5a8661df76 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1588,8 +1588,11 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto csum_error;
- if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf))
+ if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
+ UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
+ is_udplite);
goto drop;
+ }
rc = 0;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 08b367c6b9cf..617f0958e164 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1301,8 +1301,17 @@ int igmp6_event_query(struct sk_buff *skb)
len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr);
len -= skb_network_header_len(skb);
- /* Drop queries with not link local source */
- if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
+ /* RFC3810 6.2
+ * Upon reception of an MLD message that contains a Query, the node
+ * checks if the source address of the message is a valid link-local
+ * address, if the Hop Limit is set to 1, and if the Router Alert
+ * option is present in the Hop-By-Hop Options header of the IPv6
+ * packet. If any of these checks fails, the packet is dropped.
+ */
+ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL) ||
+ ipv6_hdr(skb)->hop_limit != 1 ||
+ !(IP6CB(skb)->flags & IP6SKB_ROUTERALERT) ||
+ IP6CB(skb)->ra != htons(IPV6_OPT_ROUTERALERT_MLD))
return -EINVAL;
idev = __in6_dev_get(skb->dev);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 95c834799288..7092ff78fd84 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -674,8 +674,11 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto csum_error;
}
- if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf))
+ if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
+ UDP6_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_RCVBUFERRORS, is_udplite);
goto drop;
+ }
skb_dst_drop(skb);
@@ -690,6 +693,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
bh_unlock_sock(sk);
return rc;
+
csum_error:
UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
drop:
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 950909f04ee6..13752d96275e 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1365,7 +1365,7 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
int err;
if (level != SOL_PPPOL2TP)
- return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+ return -EINVAL;
if (optlen < sizeof(int))
return -EINVAL;
@@ -1491,7 +1491,7 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
struct pppol2tp_session *ps;
if (level != SOL_PPPOL2TP)
- return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+ return -EINVAL;
if (get_user(len, optlen))
return -EFAULT;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 6886601afe1c..a6cda52ed920 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1096,11 +1096,12 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
int err;
/* 24 + 6 = header + auth_algo + auth_transaction + status_code */
- skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24 + 6 + extra_len);
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom + IEEE80211_WEP_IV_LEN +
+ 24 + 6 + extra_len + IEEE80211_WEP_ICV_LEN);
if (!skb)
return;
- skb_reserve(skb, local->hw.extra_tx_headroom);
+ skb_reserve(skb, local->hw.extra_tx_headroom + IEEE80211_WEP_IV_LEN);
mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6);
memset(mgmt, 0, 24 + 6);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c42e83d2751c..581a6584ed0c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3778,6 +3778,7 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
cancel_delayed_work_sync(&ipvs->defense_work);
cancel_work_sync(&ipvs->defense_work.work);
unregister_net_sysctl_table(ipvs->sysctl_hdr);
+ ip_vs_stop_estimator(net, &ipvs->tot_stats);
}
#else
@@ -3840,7 +3841,6 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net)
struct netns_ipvs *ipvs = net_ipvs(net);
ip_vs_trash_cleanup(net);
- ip_vs_stop_estimator(net, &ipvs->tot_stats);
ip_vs_control_net_cleanup_sysctl(net);
remove_proc_entry("ip_vs_stats_percpu", net->proc_net);
remove_proc_entry("ip_vs_stats", net->proc_net);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 58579634427d..300ed1eec729 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -597,6 +597,9 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
#ifdef CONFIG_NF_CONNTRACK_MARK
+ nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */
+#endif
+ ctnetlink_proto_size(ct)
+ ctnetlink_label_size(ct)
;
@@ -1150,7 +1153,7 @@ static int ctnetlink_done_list(struct netlink_callback *cb)
static int
ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying)
{
- struct nf_conn *ct, *last = NULL;
+ struct nf_conn *ct, *last;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
@@ -1163,8 +1166,7 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying
if (cb->args[2])
return 0;
- if (cb->args[0] == nr_cpu_ids)
- return 0;
+ last = (struct nf_conn *)cb->args[1];
for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) {
struct ct_pcpu *pcpu;
@@ -1174,7 +1176,6 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying
pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
spin_lock_bh(&pcpu->lock);
- last = (struct nf_conn *)cb->args[1];
list = dying ? &pcpu->dying : &pcpu->unconfirmed;
restart:
hlist_nulls_for_each_entry(h, n, list, hnnode) {
@@ -1193,7 +1194,9 @@ restart:
ct);
rcu_read_unlock();
if (res < 0) {
- nf_conntrack_get(&ct->ct_general);
+ if (!atomic_inc_not_zero(&ct->ct_general.use))
+ continue;
+ cb->args[0] = cpu;
cb->args[1] = (unsigned long)ct;
spin_unlock_bh(&pcpu->lock);
goto out;
@@ -1202,10 +1205,10 @@ restart:
if (cb->args[1]) {
cb->args[1] = 0;
goto restart;
- } else
- cb->args[2] = 1;
+ }
spin_unlock_bh(&pcpu->lock);
}
+ cb->args[2] = 1;
out:
if (last)
nf_ct_put(last);
@@ -2040,6 +2043,9 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
#ifdef CONFIG_NF_CONNTRACK_MARK
+ nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */
+#endif
+ ctnetlink_proto_size(ct)
;
}
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 09096a670c45..a49907b1dabc 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -525,6 +525,39 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data)
return i->status & IPS_NAT_MASK ? 1 : 0;
}
+static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
+{
+ struct nf_conn_nat *nat = nfct_nat(ct);
+
+ if (nf_nat_proto_remove(ct, data))
+ return 1;
+
+ if (!nat || !nat->ct)
+ return 0;
+
+ /* This netns is being destroyed, and conntrack has nat null binding.
+ * Remove it from bysource hash, as the table will be freed soon.
+ *
+ * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
+ * will delete entry from already-freed table.
+ */
+ if (!del_timer(&ct->timeout))
+ return 1;
+
+ spin_lock_bh(&nf_nat_lock);
+ hlist_del_rcu(&nat->bysource);
+ ct->status &= ~IPS_NAT_DONE_MASK;
+ nat->ct = NULL;
+ spin_unlock_bh(&nf_nat_lock);
+
+ add_timer(&ct->timeout);
+
+ /* don't delete conntrack. Although that would make things a lot
+ * simpler, we'd end up flushing all conntracks on nat rmmod.
+ */
+ return 0;
+}
+
static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
{
struct nf_nat_proto_clean clean = {
@@ -795,7 +828,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
{
struct nf_nat_proto_clean clean = {};
- nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean, 0, 0);
+ nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean, 0, 0);
synchronize_rcu();
nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 624e083125b9..ab4566cfcbe4 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1730,6 +1730,9 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
if (!create || nlh->nlmsg_flags & NLM_F_REPLACE)
return -EINVAL;
handle = nf_tables_alloc_handle(table);
+
+ if (chain->use == UINT_MAX)
+ return -EOVERFLOW;
}
if (nla[NFTA_RULE_POSITION]) {
@@ -1789,14 +1792,15 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
if (nlh->nlmsg_flags & NLM_F_REPLACE) {
if (nft_rule_is_active_next(net, old_rule)) {
- trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE,
+ trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE,
old_rule);
if (trans == NULL) {
err = -ENOMEM;
goto err2;
}
nft_rule_disactivate_next(net, old_rule);
- list_add_tail(&rule->list, &old_rule->list);
+ chain->use--;
+ list_add_tail_rcu(&rule->list, &old_rule->list);
} else {
err = -ENOENT;
goto err2;
@@ -1826,6 +1830,7 @@ err3:
list_del_rcu(&nft_trans_rule(trans)->list);
nft_rule_clear(net, nft_trans_rule(trans));
nft_trans_destroy(trans);
+ chain->use++;
}
err2:
nf_tables_rule_destroy(&ctx, rule);
@@ -2845,7 +2850,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
goto nla_put_failure;
nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = NFPROTO_UNSPEC;
+ nfmsg->nfgen_family = ctx.afi->family;
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 8a779be832fb..1840989092ed 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -195,6 +195,15 @@ static void
nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
struct xt_target *target = expr->ops->data;
+ void *info = nft_expr_priv(expr);
+ struct xt_tgdtor_param par;
+
+ par.net = ctx->net;
+ par.target = target;
+ par.targinfo = info;
+ par.family = ctx->afi->family;
+ if (par.target->destroy != NULL)
+ par.target->destroy(&par);
module_put(target->me);
}
@@ -382,6 +391,15 @@ static void
nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
struct xt_match *match = expr->ops->data;
+ void *info = nft_expr_priv(expr);
+ struct xt_mtdtor_param par;
+
+ par.net = ctx->net;
+ par.match = match;
+ par.matchinfo = info;
+ par.family = ctx->afi->family;
+ if (par.match->destroy != NULL)
+ par.match->destroy(&par);
module_put(match->me);
}
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index a0195d28bcfc..79ff58cd36dc 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -175,12 +175,14 @@ static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr)
if (nla_put_be32(skb,
NFTA_NAT_REG_ADDR_MAX, htonl(priv->sreg_addr_max)))
goto nla_put_failure;
- if (nla_put_be32(skb,
- NFTA_NAT_REG_PROTO_MIN, htonl(priv->sreg_proto_min)))
- goto nla_put_failure;
- if (nla_put_be32(skb,
- NFTA_NAT_REG_PROTO_MAX, htonl(priv->sreg_proto_max)))
- goto nla_put_failure;
+ if (priv->sreg_proto_min) {
+ if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MIN,
+ htonl(priv->sreg_proto_min)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX,
+ htonl(priv->sreg_proto_max)))
+ goto nla_put_failure;
+ }
return 0;
nla_put_failure:
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 15c731f03fa6..e6fac7e3db52 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -636,7 +636,7 @@ static unsigned int netlink_poll(struct file *file, struct socket *sock,
while (nlk->cb_running && netlink_dump_space(nlk)) {
err = netlink_dump(sk);
if (err < 0) {
- sk->sk_err = err;
+ sk->sk_err = -err;
sk->sk_error_report(sk);
break;
}
@@ -2483,7 +2483,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
ret = netlink_dump(sk);
if (ret) {
- sk->sk_err = ret;
+ sk->sk_err = -ret;
sk->sk_error_report(sk);
}
}
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index c36856a457ca..e70d8b18e962 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -551,6 +551,8 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
case OVS_ACTION_ATTR_SAMPLE:
err = sample(dp, skb, a);
+ if (unlikely(err)) /* skb already freed. */
+ return err;
break;
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 0d407bca81e3..9db4bf6740d1 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2013 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -276,7 +276,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
OVS_CB(skb)->flow = flow;
OVS_CB(skb)->pkt_key = &key;
- ovs_flow_stats_update(OVS_CB(skb)->flow, skb);
+ ovs_flow_stats_update(OVS_CB(skb)->flow, key.tp.flags, skb);
ovs_execute_actions(dp, skb);
stats_counter = &stats->n_hit;
@@ -889,8 +889,11 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
}
/* The unmasked key has to be the same for flow updates. */
if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) {
- error = -EEXIST;
- goto err_unlock_ovs;
+ flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+ if (!flow) {
+ error = -ENOENT;
+ goto err_unlock_ovs;
+ }
}
/* Update actions. */
old_acts = ovsl_dereference(flow->sf_acts);
@@ -981,16 +984,12 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
goto err_unlock_ovs;
}
/* Check that the flow exists. */
- flow = ovs_flow_tbl_lookup(&dp->table, &key);
+ flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
if (unlikely(!flow)) {
error = -ENOENT;
goto err_unlock_ovs;
}
- /* The unmasked key has to be the same for flow updates. */
- if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) {
- error = -EEXIST;
- goto err_unlock_ovs;
- }
+
/* Update actions, if present. */
if (likely(acts)) {
old_acts = ovsl_dereference(flow->sf_acts);
@@ -1063,8 +1062,8 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
goto unlock;
}
- flow = ovs_flow_tbl_lookup(&dp->table, &key);
- if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) {
+ flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+ if (!flow) {
err = -ENOENT;
goto unlock;
}
@@ -1113,8 +1112,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
goto unlock;
}
- flow = ovs_flow_tbl_lookup(&dp->table, &key);
- if (unlikely(!flow || !ovs_flow_cmp_unmasked_key(flow, &match))) {
+ flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+ if (unlikely(!flow)) {
err = -ENOENT;
goto unlock;
}
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 334751cb1528..d07ab538fc9d 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -61,10 +61,10 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
#define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF))
-void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
+void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
+ struct sk_buff *skb)
{
struct flow_stats *stats;
- __be16 tcp_flags = flow->key.tp.flags;
int node = numa_node_id();
stats = rcu_dereference(flow->stats[node]);
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index ac395d2cd821..5e5aaed3a85b 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2013 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -180,7 +180,8 @@ struct arp_eth_header {
unsigned char ar_tip[4]; /* target IP address */
} __packed;
-void ovs_flow_stats_update(struct sw_flow *, struct sk_buff *);
+void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags,
+ struct sk_buff *);
void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
unsigned long *used, __be16 *tcp_flags);
void ovs_flow_stats_clear(struct sw_flow *);
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 574c3abc9b30..cf2d853646f0 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -456,6 +456,22 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit);
}
+struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
+ struct sw_flow_match *match)
+{
+ struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+ struct sw_flow_mask *mask;
+ struct sw_flow *flow;
+
+ /* Always called under ovs-mutex. */
+ list_for_each_entry(mask, &tbl->mask_list, list) {
+ flow = masked_flow_lookup(ti, match->key, mask);
+ if (flow && ovs_flow_cmp_unmasked_key(flow, match)) /* Found */
+ return flow;
+ }
+ return NULL;
+}
+
int ovs_flow_tbl_num_masks(const struct flow_table *table)
{
struct sw_flow_mask *mask;
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index ca8a5820f615..5918bff7f3f6 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -76,7 +76,8 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
u32 *n_mask_hit);
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
const struct sw_flow_key *);
-
+struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
+ struct sw_flow_match *match);
bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
struct sw_flow_match *match);
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 35ec4fed09e2..f49148a07da2 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -110,6 +110,22 @@ static int gre_rcv(struct sk_buff *skb,
return PACKET_RCVD;
}
+/* Called with rcu_read_lock and BH disabled. */
+static int gre_err(struct sk_buff *skb, u32 info,
+ const struct tnl_ptk_info *tpi)
+{
+ struct ovs_net *ovs_net;
+ struct vport *vport;
+
+ ovs_net = net_generic(dev_net(skb->dev), ovs_net_id);
+ vport = rcu_dereference(ovs_net->vport_net.gre_vport);
+
+ if (unlikely(!vport))
+ return PACKET_REJECT;
+ else
+ return PACKET_RCVD;
+}
+
static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
{
struct net *net = ovs_dp_get_net(vport->dp);
@@ -186,6 +202,7 @@ error:
static struct gre_cisco_protocol gre_protocol = {
.handler = gre_rcv,
+ .err_handler = gre_err,
.priority = 1,
};
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index dcb19592761e..12c7e01c2677 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -321,41 +321,40 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write,
loff_t *ppos)
{
struct net *net = current->nsproxy->net_ns;
- char tmp[8];
struct ctl_table tbl;
- int ret;
- int changed = 0;
+ bool changed = false;
char *none = "none";
+ char tmp[8];
+ int ret;
memset(&tbl, 0, sizeof(struct ctl_table));
if (write) {
tbl.data = tmp;
- tbl.maxlen = 8;
+ tbl.maxlen = sizeof(tmp);
} else {
tbl.data = net->sctp.sctp_hmac_alg ? : none;
tbl.maxlen = strlen(tbl.data);
}
- ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
- if (write) {
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+ if (write && ret == 0) {
#ifdef CONFIG_CRYPTO_MD5
if (!strncmp(tmp, "md5", 3)) {
net->sctp.sctp_hmac_alg = "md5";
- changed = 1;
+ changed = true;
}
#endif
#ifdef CONFIG_CRYPTO_SHA1
if (!strncmp(tmp, "sha1", 4)) {
net->sctp.sctp_hmac_alg = "sha1";
- changed = 1;
+ changed = true;
}
#endif
if (!strncmp(tmp, "none", 4)) {
net->sctp.sctp_hmac_alg = NULL;
- changed = 1;
+ changed = true;
}
-
if (!changed)
ret = -EINVAL;
}
@@ -368,11 +367,10 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write,
loff_t *ppos)
{
struct net *net = current->nsproxy->net_ns;
- int new_value;
- struct ctl_table tbl;
unsigned int min = *(unsigned int *) ctl->extra1;
unsigned int max = *(unsigned int *) ctl->extra2;
- int ret;
+ struct ctl_table tbl;
+ int ret, new_value;
memset(&tbl, 0, sizeof(struct ctl_table));
tbl.maxlen = sizeof(unsigned int);
@@ -381,12 +379,15 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write,
tbl.data = &new_value;
else
tbl.data = &net->sctp.rto_min;
+
ret = proc_dointvec(&tbl, write, buffer, lenp, ppos);
- if (write) {
- if (ret || new_value > max || new_value < min)
+ if (write && ret == 0) {
+ if (new_value > max || new_value < min)
return -EINVAL;
+
net->sctp.rto_min = new_value;
}
+
return ret;
}
@@ -395,11 +396,10 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write,
loff_t *ppos)
{
struct net *net = current->nsproxy->net_ns;
- int new_value;
- struct ctl_table tbl;
unsigned int min = *(unsigned int *) ctl->extra1;
unsigned int max = *(unsigned int *) ctl->extra2;
- int ret;
+ struct ctl_table tbl;
+ int ret, new_value;
memset(&tbl, 0, sizeof(struct ctl_table));
tbl.maxlen = sizeof(unsigned int);
@@ -408,12 +408,15 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write,
tbl.data = &new_value;
else
tbl.data = &net->sctp.rto_max;
+
ret = proc_dointvec(&tbl, write, buffer, lenp, ppos);
- if (write) {
- if (ret || new_value > max || new_value < min)
+ if (write && ret == 0) {
+ if (new_value > max || new_value < min)
return -EINVAL;
+
net->sctp.rto_max = new_value;
}
+
return ret;
}
@@ -444,8 +447,7 @@ static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
tbl.data = &net->sctp.auth_enable;
ret = proc_dointvec(&tbl, write, buffer, lenp, ppos);
-
- if (write) {
+ if (write && ret == 0) {
struct sock *sk = net->sctp.ctl_sock;
net->sctp.auth_enable = new_value;
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 85c64658bd0b..b6842fdb53d4 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -366,9 +366,10 @@ fail:
* specification [SCTP] and any extensions for a list of possible
* error formats.
*/
-struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
- const struct sctp_association *asoc, struct sctp_chunk *chunk,
- __u16 flags, gfp_t gfp)
+struct sctp_ulpevent *
+sctp_ulpevent_make_remote_error(const struct sctp_association *asoc,
+ struct sctp_chunk *chunk, __u16 flags,
+ gfp_t gfp)
{
struct sctp_ulpevent *event;
struct sctp_remote_error *sre;
@@ -387,8 +388,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
/* Copy the skb to a new skb with room for us to prepend
* notification with.
*/
- skb = skb_copy_expand(chunk->skb, sizeof(struct sctp_remote_error),
- 0, gfp);
+ skb = skb_copy_expand(chunk->skb, sizeof(*sre), 0, gfp);
/* Pull off the rest of the cause TLV from the chunk. */
skb_pull(chunk->skb, elen);
@@ -399,62 +399,21 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
event = sctp_skb2event(skb);
sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize);
- sre = (struct sctp_remote_error *)
- skb_push(skb, sizeof(struct sctp_remote_error));
+ sre = (struct sctp_remote_error *) skb_push(skb, sizeof(*sre));
/* Trim the buffer to the right length. */
- skb_trim(skb, sizeof(struct sctp_remote_error) + elen);
+ skb_trim(skb, sizeof(*sre) + elen);
- /* Socket Extensions for SCTP
- * 5.3.1.3 SCTP_REMOTE_ERROR
- *
- * sre_type:
- * It should be SCTP_REMOTE_ERROR.
- */
+ /* RFC6458, Section 6.1.3. SCTP_REMOTE_ERROR */
+ memset(sre, 0, sizeof(*sre));
sre->sre_type = SCTP_REMOTE_ERROR;
-
- /*
- * Socket Extensions for SCTP
- * 5.3.1.3 SCTP_REMOTE_ERROR
- *
- * sre_flags: 16 bits (unsigned integer)
- * Currently unused.
- */
sre->sre_flags = 0;
-
- /* Socket Extensions for SCTP
- * 5.3.1.3 SCTP_REMOTE_ERROR
- *
- * sre_length: sizeof (__u32)
- *
- * This field is the total length of the notification data,
- * including the notification header.
- */
sre->sre_length = skb->len;
-
- /* Socket Extensions for SCTP
- * 5.3.1.3 SCTP_REMOTE_ERROR
- *
- * sre_error: 16 bits (unsigned integer)
- * This value represents one of the Operational Error causes defined in
- * the SCTP specification, in network byte order.
- */
sre->sre_error = cause;
-
- /* Socket Extensions for SCTP
- * 5.3.1.3 SCTP_REMOTE_ERROR
- *
- * sre_assoc_id: sizeof (sctp_assoc_t)
- *
- * The association id field, holds the identifier for the association.
- * All notifications for a given association have the same association
- * identifier. For TCP style socket, this field is ignored.
- */
sctp_ulpevent_set_owner(event, asoc);
sre->sre_assoc_id = sctp_assoc2id(asoc);
return event;
-
fail:
return NULL;
}
@@ -899,7 +858,9 @@ __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event)
return notification->sn_header.sn_type;
}
-/* Copy out the sndrcvinfo into a msghdr. */
+/* RFC6458, Section 5.3.2. SCTP Header Information Structure
+ * (SCTP_SNDRCV, DEPRECATED)
+ */
void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
struct msghdr *msghdr)
{
@@ -908,74 +869,21 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
if (sctp_ulpevent_is_notification(event))
return;
- /* Sockets API Extensions for SCTP
- * Section 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV)
- *
- * sinfo_stream: 16 bits (unsigned integer)
- *
- * For recvmsg() the SCTP stack places the message's stream number in
- * this value.
- */
+ memset(&sinfo, 0, sizeof(sinfo));
sinfo.sinfo_stream = event->stream;
- /* sinfo_ssn: 16 bits (unsigned integer)
- *
- * For recvmsg() this value contains the stream sequence number that
- * the remote endpoint placed in the DATA chunk. For fragmented
- * messages this is the same number for all deliveries of the message
- * (if more than one recvmsg() is needed to read the message).
- */
sinfo.sinfo_ssn = event->ssn;
- /* sinfo_ppid: 32 bits (unsigned integer)
- *
- * In recvmsg() this value is
- * the same information that was passed by the upper layer in the peer
- * application. Please note that byte order issues are NOT accounted
- * for and this information is passed opaquely by the SCTP stack from
- * one end to the other.
- */
sinfo.sinfo_ppid = event->ppid;
- /* sinfo_flags: 16 bits (unsigned integer)
- *
- * This field may contain any of the following flags and is composed of
- * a bitwise OR of these values.
- *
- * recvmsg() flags:
- *
- * SCTP_UNORDERED - This flag is present when the message was sent
- * non-ordered.
- */
sinfo.sinfo_flags = event->flags;
- /* sinfo_tsn: 32 bit (unsigned integer)
- *
- * For the receiving side, this field holds a TSN that was
- * assigned to one of the SCTP Data Chunks.
- */
sinfo.sinfo_tsn = event->tsn;
- /* sinfo_cumtsn: 32 bit (unsigned integer)
- *
- * This field will hold the current cumulative TSN as
- * known by the underlying SCTP layer. Note this field is
- * ignored when sending and only valid for a receive
- * operation when sinfo_flags are set to SCTP_UNORDERED.
- */
sinfo.sinfo_cumtsn = event->cumtsn;
- /* sinfo_assoc_id: sizeof (sctp_assoc_t)
- *
- * The association handle field, sinfo_assoc_id, holds the identifier
- * for the association announced in the COMMUNICATION_UP notification.
- * All notifications for a given association have the same identifier.
- * Ignored for one-to-one style sockets.
- */
sinfo.sinfo_assoc_id = sctp_assoc2id(event->asoc);
-
- /* context value that is set via SCTP_CONTEXT socket option. */
+ /* Context value that is set via SCTP_CONTEXT socket option. */
sinfo.sinfo_context = event->asoc->default_rcv_context;
-
/* These fields are not used while receiving. */
sinfo.sinfo_timetolive = 0;
put_cmsg(msghdr, IPPROTO_SCTP, SCTP_SNDRCV,
- sizeof(struct sctp_sndrcvinfo), (void *)&sinfo);
+ sizeof(sinfo), &sinfo);
}
/* Do accounting for bytes received and hold a reference to the association
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 693966d3f33b..6166c985fe24 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -53,14 +53,6 @@
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
-enum rpcrdma_chunktype {
- rpcrdma_noch = 0,
- rpcrdma_readch,
- rpcrdma_areadch,
- rpcrdma_writech,
- rpcrdma_replych
-};
-
#ifdef RPC_DEBUG
static const char transfertypes[][12] = {
"pure inline", /* no chunks */
@@ -279,13 +271,37 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
return (unsigned char *)iptr - (unsigned char *)headerp;
out:
- for (pos = 0; nchunks--;)
- pos += rpcrdma_deregister_external(
- &req->rl_segments[pos], r_xprt);
+ if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) {
+ for (pos = 0; nchunks--;)
+ pos += rpcrdma_deregister_external(
+ &req->rl_segments[pos], r_xprt);
+ }
return n;
}
/*
+ * Marshal chunks. This routine returns the header length
+ * consumed by marshaling.
+ *
+ * Returns positive RPC/RDMA header size, or negative errno.
+ */
+
+ssize_t
+rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result)
+{
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)req->rl_base;
+
+ if (req->rl_rtype != rpcrdma_noch)
+ result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf,
+ headerp, req->rl_rtype);
+ else if (req->rl_wtype != rpcrdma_noch)
+ result = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf,
+ headerp, req->rl_wtype);
+ return result;
+}
+
+/*
* Copy write data inline.
* This function is used for "small" requests. Data which is passed
* to RPC via iovecs (or page list) is copied directly into the
@@ -377,7 +393,6 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
char *base;
size_t rpclen, padlen;
ssize_t hdrlen;
- enum rpcrdma_chunktype rtype, wtype;
struct rpcrdma_msg *headerp;
/*
@@ -415,13 +430,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
* into pages; otherwise use reply chunks.
*/
if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst))
- wtype = rpcrdma_noch;
+ req->rl_wtype = rpcrdma_noch;
else if (rqst->rq_rcv_buf.page_len == 0)
- wtype = rpcrdma_replych;
+ req->rl_wtype = rpcrdma_replych;
else if (rqst->rq_rcv_buf.flags & XDRBUF_READ)
- wtype = rpcrdma_writech;
+ req->rl_wtype = rpcrdma_writech;
else
- wtype = rpcrdma_replych;
+ req->rl_wtype = rpcrdma_replych;
/*
* Chunks needed for arguments?
@@ -438,16 +453,16 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
* TBD check NFSv4 setacl
*/
if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
- rtype = rpcrdma_noch;
+ req->rl_rtype = rpcrdma_noch;
else if (rqst->rq_snd_buf.page_len == 0)
- rtype = rpcrdma_areadch;
+ req->rl_rtype = rpcrdma_areadch;
else
- rtype = rpcrdma_readch;
+ req->rl_rtype = rpcrdma_readch;
/* The following simplification is not true forever */
- if (rtype != rpcrdma_noch && wtype == rpcrdma_replych)
- wtype = rpcrdma_noch;
- if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) {
+ if (req->rl_rtype != rpcrdma_noch && req->rl_wtype == rpcrdma_replych)
+ req->rl_wtype = rpcrdma_noch;
+ if (req->rl_rtype != rpcrdma_noch && req->rl_wtype != rpcrdma_noch) {
dprintk("RPC: %s: cannot marshal multiple chunk lists\n",
__func__);
return -EIO;
@@ -461,7 +476,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
* When padding is in use and applies to the transfer, insert
* it and change the message type.
*/
- if (rtype == rpcrdma_noch) {
+ if (req->rl_rtype == rpcrdma_noch) {
padlen = rpcrdma_inline_pullup(rqst,
RPCRDMA_INLINE_PAD_VALUE(rqst));
@@ -476,7 +491,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
hdrlen += 2 * sizeof(u32); /* extra words in padhdr */
- if (wtype != rpcrdma_noch) {
+ if (req->rl_wtype != rpcrdma_noch) {
dprintk("RPC: %s: invalid chunk list\n",
__func__);
return -EIO;
@@ -497,30 +512,18 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
* on receive. Therefore, we request a reply chunk
* for non-writes wherever feasible and efficient.
*/
- if (wtype == rpcrdma_noch)
- wtype = rpcrdma_replych;
+ if (req->rl_wtype == rpcrdma_noch)
+ req->rl_wtype = rpcrdma_replych;
}
}
- /*
- * Marshal chunks. This routine will return the header length
- * consumed by marshaling.
- */
- if (rtype != rpcrdma_noch) {
- hdrlen = rpcrdma_create_chunks(rqst,
- &rqst->rq_snd_buf, headerp, rtype);
- wtype = rtype; /* simplify dprintk */
-
- } else if (wtype != rpcrdma_noch) {
- hdrlen = rpcrdma_create_chunks(rqst,
- &rqst->rq_rcv_buf, headerp, wtype);
- }
+ hdrlen = rpcrdma_marshal_chunks(rqst, hdrlen);
if (hdrlen < 0)
return hdrlen;
dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd"
" headerp 0x%p base 0x%p lkey 0x%x\n",
- __func__, transfertypes[wtype], hdrlen, rpclen, padlen,
+ __func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen,
headerp, base, req->rl_iov.lkey);
/*
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 66f91f0d071a..2faac4940563 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -296,7 +296,6 @@ xprt_setup_rdma(struct xprt_create *args)
xprt->resvport = 0; /* privileged port not needed */
xprt->tsh_size = 0; /* RPC-RDMA handles framing */
- xprt->max_payload = RPCRDMA_MAX_DATA_SEGS * PAGE_SIZE;
xprt->ops = &xprt_rdma_procs;
/*
@@ -382,6 +381,9 @@ xprt_setup_rdma(struct xprt_create *args)
new_ep->rep_xprt = xprt;
xprt_rdma_format_addresses(xprt);
+ xprt->max_payload = rpcrdma_max_payload(new_xprt);
+ dprintk("RPC: %s: transport data payload maximum: %zu bytes\n",
+ __func__, xprt->max_payload);
if (!try_module_get(THIS_MODULE))
goto out4;
@@ -412,7 +414,7 @@ xprt_rdma_close(struct rpc_xprt *xprt)
if (r_xprt->rx_ep.rep_connected > 0)
xprt->reestablish_timeout = 0;
xprt_disconnect_done(xprt);
- (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
+ rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
}
static void
@@ -595,13 +597,14 @@ xprt_rdma_send_request(struct rpc_task *task)
struct rpc_xprt *xprt = rqst->rq_xprt;
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- int rc;
+ int rc = 0;
- if (req->rl_niovs == 0) {
+ if (req->rl_niovs == 0)
rc = rpcrdma_marshal_req(rqst);
- if (rc < 0)
- goto failed_marshal;
- }
+ else if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
+ rc = rpcrdma_marshal_chunks(rqst, 0);
+ if (rc < 0)
+ goto failed_marshal;
if (req->rl_reply == NULL) /* e.g. reconnection */
rpcrdma_recv_buffer_get(req);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 13dbd1c389ff..61c41298b4ea 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -61,6 +61,8 @@
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
+static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
+
/*
* internal functions
*/
@@ -103,17 +105,6 @@ rpcrdma_run_tasklet(unsigned long data)
static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
-static inline void
-rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
- list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g);
- spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
- tasklet_schedule(&rpcrdma_tasklet_g);
-}
-
static void
rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
{
@@ -153,12 +144,7 @@ rpcrdma_sendcq_process_wc(struct ib_wc *wc)
if (wc->wr_id == 0ULL)
return;
if (wc->status != IB_WC_SUCCESS)
- return;
-
- if (wc->opcode == IB_WC_FAST_REG_MR)
- frmr->r.frmr.state = FRMR_IS_VALID;
- else if (wc->opcode == IB_WC_LOCAL_INV)
- frmr->r.frmr.state = FRMR_IS_INVALID;
+ frmr->r.frmr.fr_state = FRMR_IS_STALE;
}
static int
@@ -217,7 +203,7 @@ rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
}
static void
-rpcrdma_recvcq_process_wc(struct ib_wc *wc)
+rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
{
struct rpcrdma_rep *rep =
(struct rpcrdma_rep *)(unsigned long)wc->wr_id;
@@ -248,28 +234,38 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc)
}
out_schedule:
- rpcrdma_schedule_tasklet(rep);
+ list_add_tail(&rep->rr_list, sched_list);
}
static int
rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
{
+ struct list_head sched_list;
struct ib_wc *wcs;
int budget, count, rc;
+ unsigned long flags;
+ INIT_LIST_HEAD(&sched_list);
budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
do {
wcs = ep->rep_recv_wcs;
rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
if (rc <= 0)
- return rc;
+ goto out_schedule;
count = rc;
while (count-- > 0)
- rpcrdma_recvcq_process_wc(wcs++);
+ rpcrdma_recvcq_process_wc(wcs++, &sched_list);
} while (rc == RPCRDMA_POLLSIZE && --budget);
- return 0;
+ rc = 0;
+
+out_schedule:
+ spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
+ list_splice_tail(&sched_list, &rpcrdma_tasklets_g);
+ spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
+ tasklet_schedule(&rpcrdma_tasklet_g);
+ return rc;
}
/*
@@ -310,6 +306,13 @@ rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
rpcrdma_recvcq_poll(cq, ep);
}
+static void
+rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
+{
+ rpcrdma_recvcq_upcall(ep->rep_attr.recv_cq, ep);
+ rpcrdma_sendcq_upcall(ep->rep_attr.send_cq, ep);
+}
+
#ifdef RPC_DEBUG
static const char * const conn[] = {
"address resolved",
@@ -323,8 +326,16 @@ static const char * const conn[] = {
"rejected",
"established",
"disconnected",
- "device removal"
+ "device removal",
+ "multicast join",
+ "multicast error",
+ "address change",
+ "timewait exit",
};
+
+#define CONNECTION_MSG(status) \
+ ((status) < ARRAY_SIZE(conn) ? \
+ conn[(status)] : "unrecognized connection error")
#endif
static int
@@ -382,23 +393,18 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
case RDMA_CM_EVENT_DEVICE_REMOVAL:
connstate = -ENODEV;
connected:
- dprintk("RPC: %s: %s: %pI4:%u (ep 0x%p event 0x%x)\n",
- __func__,
- (event->event <= 11) ? conn[event->event] :
- "unknown connection error",
- &addr->sin_addr.s_addr,
- ntohs(addr->sin_port),
- ep, event->event);
atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
dprintk("RPC: %s: %sconnected\n",
__func__, connstate > 0 ? "" : "dis");
ep->rep_connected = connstate;
ep->rep_func(ep);
wake_up_all(&ep->rep_connect_wait);
- break;
+ /*FALLTHROUGH*/
default:
- dprintk("RPC: %s: unexpected CM event %d\n",
- __func__, event->event);
+ dprintk("RPC: %s: %pI4:%u (ep 0x%p): %s\n",
+ __func__, &addr->sin_addr.s_addr,
+ ntohs(addr->sin_port), ep,
+ CONNECTION_MSG(event->event));
break;
}
@@ -558,12 +564,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
if (!ia->ri_id->device->alloc_fmr) {
dprintk("RPC: %s: MTHCAFMR registration "
"not supported by HCA\n", __func__);
-#if RPCRDMA_PERSISTENT_REGISTRATION
memreg = RPCRDMA_ALLPHYSICAL;
-#else
- rc = -ENOMEM;
- goto out2;
-#endif
}
}
@@ -578,20 +579,16 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
switch (memreg) {
case RPCRDMA_FRMR:
break;
-#if RPCRDMA_PERSISTENT_REGISTRATION
case RPCRDMA_ALLPHYSICAL:
mem_priv = IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ;
goto register_setup;
-#endif
case RPCRDMA_MTHCAFMR:
if (ia->ri_have_dma_lkey)
break;
mem_priv = IB_ACCESS_LOCAL_WRITE;
-#if RPCRDMA_PERSISTENT_REGISTRATION
register_setup:
-#endif
ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
if (IS_ERR(ia->ri_bind_mem)) {
printk(KERN_ALERT "%s: ib_get_dma_mr for "
@@ -613,6 +610,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
/* Else will do memory reg/dereg for each chunk */
ia->ri_memreg_strategy = memreg;
+ rwlock_init(&ia->ri_qplock);
return 0;
out2:
rdma_destroy_id(ia->ri_id);
@@ -826,10 +824,7 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
cancel_delayed_work_sync(&ep->rep_connect_worker);
if (ia->ri_id->qp) {
- rc = rpcrdma_ep_disconnect(ep, ia);
- if (rc)
- dprintk("RPC: %s: rpcrdma_ep_disconnect"
- " returned %i\n", __func__, rc);
+ rpcrdma_ep_disconnect(ep, ia);
rdma_destroy_qp(ia->ri_id);
ia->ri_id->qp = NULL;
}
@@ -859,7 +854,7 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
int
rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
- struct rdma_cm_id *id;
+ struct rdma_cm_id *id, *old;
int rc = 0;
int retry_count = 0;
@@ -867,13 +862,12 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
struct rpcrdma_xprt *xprt;
retry:
dprintk("RPC: %s: reconnecting...\n", __func__);
- rc = rpcrdma_ep_disconnect(ep, ia);
- if (rc && rc != -ENOTCONN)
- dprintk("RPC: %s: rpcrdma_ep_disconnect"
- " status %i\n", __func__, rc);
- rpcrdma_clean_cq(ep->rep_attr.recv_cq);
- rpcrdma_clean_cq(ep->rep_attr.send_cq);
+ rpcrdma_ep_disconnect(ep, ia);
+ rpcrdma_flush_cqs(ep);
+
+ if (ia->ri_memreg_strategy == RPCRDMA_FRMR)
+ rpcrdma_reset_frmrs(ia);
xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
id = rpcrdma_create_id(xprt, ia,
@@ -905,9 +899,14 @@ retry:
rc = -ENETUNREACH;
goto out;
}
- rdma_destroy_qp(ia->ri_id);
- rdma_destroy_id(ia->ri_id);
+
+ write_lock(&ia->ri_qplock);
+ old = ia->ri_id;
ia->ri_id = id;
+ write_unlock(&ia->ri_qplock);
+
+ rdma_destroy_qp(old);
+ rdma_destroy_id(old);
} else {
dprintk("RPC: %s: connecting...\n", __func__);
rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
@@ -974,13 +973,12 @@ out:
* This call is not reentrant, and must not be made in parallel
* on the same endpoint.
*/
-int
+void
rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
int rc;
- rpcrdma_clean_cq(ep->rep_attr.recv_cq);
- rpcrdma_clean_cq(ep->rep_attr.send_cq);
+ rpcrdma_flush_cqs(ep);
rc = rdma_disconnect(ia->ri_id);
if (!rc) {
/* returns without wait if not connected */
@@ -992,12 +990,93 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
ep->rep_connected = rc;
}
+}
+
+static int
+rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
+{
+ int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
+ struct ib_fmr_attr fmr_attr = {
+ .max_pages = RPCRDMA_MAX_DATA_SEGS,
+ .max_maps = 1,
+ .page_shift = PAGE_SHIFT
+ };
+ struct rpcrdma_mw *r;
+ int i, rc;
+
+ i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
+ dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
+
+ while (i--) {
+ r = kzalloc(sizeof(*r), GFP_KERNEL);
+ if (r == NULL)
+ return -ENOMEM;
+
+ r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr);
+ if (IS_ERR(r->r.fmr)) {
+ rc = PTR_ERR(r->r.fmr);
+ dprintk("RPC: %s: ib_alloc_fmr failed %i\n",
+ __func__, rc);
+ goto out_free;
+ }
+
+ list_add(&r->mw_list, &buf->rb_mws);
+ list_add(&r->mw_all, &buf->rb_all);
+ }
+ return 0;
+
+out_free:
+ kfree(r);
+ return rc;
+}
+
+static int
+rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
+{
+ struct rpcrdma_frmr *f;
+ struct rpcrdma_mw *r;
+ int i, rc;
+
+ i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
+ dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i);
+
+ while (i--) {
+ r = kzalloc(sizeof(*r), GFP_KERNEL);
+ if (r == NULL)
+ return -ENOMEM;
+ f = &r->r.frmr;
+
+ f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
+ ia->ri_max_frmr_depth);
+ if (IS_ERR(f->fr_mr)) {
+ rc = PTR_ERR(f->fr_mr);
+ dprintk("RPC: %s: ib_alloc_fast_reg_mr "
+ "failed %i\n", __func__, rc);
+ goto out_free;
+ }
+
+ f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device,
+ ia->ri_max_frmr_depth);
+ if (IS_ERR(f->fr_pgl)) {
+ rc = PTR_ERR(f->fr_pgl);
+ dprintk("RPC: %s: ib_alloc_fast_reg_page_list "
+ "failed %i\n", __func__, rc);
+
+ ib_dereg_mr(f->fr_mr);
+ goto out_free;
+ }
+
+ list_add(&r->mw_list, &buf->rb_mws);
+ list_add(&r->mw_all, &buf->rb_all);
+ }
+
+ return 0;
+
+out_free:
+ kfree(r);
return rc;
}
-/*
- * Initialize buffer memory
- */
int
rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
@@ -1005,7 +1084,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
char *p;
size_t len, rlen, wlen;
int i, rc;
- struct rpcrdma_mw *r;
buf->rb_max_requests = cdata->max_requests;
spin_lock_init(&buf->rb_lock);
@@ -1016,28 +1094,12 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
* 2. arrays of struct rpcrdma_req to fill in pointers
* 3. array of struct rpcrdma_rep for replies
* 4. padding, if any
- * 5. mw's, fmr's or frmr's, if any
* Send/recv buffers in req/rep need to be registered
*/
-
len = buf->rb_max_requests *
(sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
len += cdata->padding;
- switch (ia->ri_memreg_strategy) {
- case RPCRDMA_FRMR:
- len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
- sizeof(struct rpcrdma_mw);
- break;
- case RPCRDMA_MTHCAFMR:
- /* TBD we are perhaps overallocating here */
- len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
- sizeof(struct rpcrdma_mw);
- break;
- default:
- break;
- }
- /* allocate 1, 4 and 5 in one shot */
p = kzalloc(len, GFP_KERNEL);
if (p == NULL) {
dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
@@ -1064,51 +1126,17 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
p += cdata->padding;
INIT_LIST_HEAD(&buf->rb_mws);
- r = (struct rpcrdma_mw *)p;
+ INIT_LIST_HEAD(&buf->rb_all);
switch (ia->ri_memreg_strategy) {
case RPCRDMA_FRMR:
- for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
- r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
- ia->ri_max_frmr_depth);
- if (IS_ERR(r->r.frmr.fr_mr)) {
- rc = PTR_ERR(r->r.frmr.fr_mr);
- dprintk("RPC: %s: ib_alloc_fast_reg_mr"
- " failed %i\n", __func__, rc);
- goto out;
- }
- r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
- ia->ri_id->device,
- ia->ri_max_frmr_depth);
- if (IS_ERR(r->r.frmr.fr_pgl)) {
- rc = PTR_ERR(r->r.frmr.fr_pgl);
- dprintk("RPC: %s: "
- "ib_alloc_fast_reg_page_list "
- "failed %i\n", __func__, rc);
-
- ib_dereg_mr(r->r.frmr.fr_mr);
- goto out;
- }
- list_add(&r->mw_list, &buf->rb_mws);
- ++r;
- }
+ rc = rpcrdma_init_frmrs(ia, buf);
+ if (rc)
+ goto out;
break;
case RPCRDMA_MTHCAFMR:
- /* TBD we are perhaps overallocating here */
- for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
- static struct ib_fmr_attr fa =
- { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT };
- r->r.fmr = ib_alloc_fmr(ia->ri_pd,
- IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
- &fa);
- if (IS_ERR(r->r.fmr)) {
- rc = PTR_ERR(r->r.fmr);
- dprintk("RPC: %s: ib_alloc_fmr"
- " failed %i\n", __func__, rc);
- goto out;
- }
- list_add(&r->mw_list, &buf->rb_mws);
- ++r;
- }
+ rc = rpcrdma_init_fmrs(ia, buf);
+ if (rc)
+ goto out;
break;
default:
break;
@@ -1176,24 +1204,57 @@ out:
return rc;
}
-/*
- * Unregister and destroy buffer memory. Need to deal with
- * partial initialization, so it's callable from failed create.
- * Must be called before destroying endpoint, as registrations
- * reference it.
- */
+static void
+rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf)
+{
+ struct rpcrdma_mw *r;
+ int rc;
+
+ while (!list_empty(&buf->rb_all)) {
+ r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
+ list_del(&r->mw_all);
+ list_del(&r->mw_list);
+
+ rc = ib_dealloc_fmr(r->r.fmr);
+ if (rc)
+ dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
+ __func__, rc);
+
+ kfree(r);
+ }
+}
+
+static void
+rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf)
+{
+ struct rpcrdma_mw *r;
+ int rc;
+
+ while (!list_empty(&buf->rb_all)) {
+ r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
+ list_del(&r->mw_all);
+ list_del(&r->mw_list);
+
+ rc = ib_dereg_mr(r->r.frmr.fr_mr);
+ if (rc)
+ dprintk("RPC: %s: ib_dereg_mr failed %i\n",
+ __func__, rc);
+ ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
+
+ kfree(r);
+ }
+}
+
void
rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
{
- int rc, i;
struct rpcrdma_ia *ia = rdmab_to_ia(buf);
- struct rpcrdma_mw *r;
+ int i;
/* clean up in reverse order from create
* 1. recv mr memory (mr free, then kfree)
* 2. send mr memory (mr free, then kfree)
- * 3. padding (if any) [moved to rpcrdma_ep_destroy]
- * 4. arrays
+ * 3. MWs
*/
dprintk("RPC: %s: entering\n", __func__);
@@ -1212,34 +1273,217 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
}
}
+ switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_FRMR:
+ rpcrdma_destroy_frmrs(buf);
+ break;
+ case RPCRDMA_MTHCAFMR:
+ rpcrdma_destroy_fmrs(buf);
+ break;
+ default:
+ break;
+ }
+
+ kfree(buf->rb_pool);
+}
+
+/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
+ * an unusable state. Find FRMRs in this state and dereg / reg
+ * each. FRMRs that are VALID and attached to an rpcrdma_req are
+ * also torn down.
+ *
+ * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
+ *
+ * This is invoked only in the transport connect worker in order
+ * to serialize with rpcrdma_register_frmr_external().
+ */
+static void
+rpcrdma_reset_frmrs(struct rpcrdma_ia *ia)
+{
+ struct rpcrdma_xprt *r_xprt =
+ container_of(ia, struct rpcrdma_xprt, rx_ia);
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+ struct list_head *pos;
+ struct rpcrdma_mw *r;
+ int rc;
+
+ list_for_each(pos, &buf->rb_all) {
+ r = list_entry(pos, struct rpcrdma_mw, mw_all);
+
+ if (r->r.frmr.fr_state == FRMR_IS_INVALID)
+ continue;
+
+ rc = ib_dereg_mr(r->r.frmr.fr_mr);
+ if (rc)
+ dprintk("RPC: %s: ib_dereg_mr failed %i\n",
+ __func__, rc);
+ ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
+
+ r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
+ ia->ri_max_frmr_depth);
+ if (IS_ERR(r->r.frmr.fr_mr)) {
+ rc = PTR_ERR(r->r.frmr.fr_mr);
+ dprintk("RPC: %s: ib_alloc_fast_reg_mr"
+ " failed %i\n", __func__, rc);
+ continue;
+ }
+ r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
+ ia->ri_id->device,
+ ia->ri_max_frmr_depth);
+ if (IS_ERR(r->r.frmr.fr_pgl)) {
+ rc = PTR_ERR(r->r.frmr.fr_pgl);
+ dprintk("RPC: %s: "
+ "ib_alloc_fast_reg_page_list "
+ "failed %i\n", __func__, rc);
+
+ ib_dereg_mr(r->r.frmr.fr_mr);
+ continue;
+ }
+ r->r.frmr.fr_state = FRMR_IS_INVALID;
+ }
+}
+
+/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
+ * some req segments uninitialized.
+ */
+static void
+rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
+{
+ if (*mw) {
+ list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
+ *mw = NULL;
+ }
+}
+
+/* Cycle mw's back in reverse order, and "spin" them.
+ * This delays and scrambles reuse as much as possible.
+ */
+static void
+rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
+{
+ struct rpcrdma_mr_seg *seg = req->rl_segments;
+ struct rpcrdma_mr_seg *seg1 = seg;
+ int i;
+
+ for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
+ rpcrdma_buffer_put_mr(&seg->mr_chunk.rl_mw, buf);
+ rpcrdma_buffer_put_mr(&seg1->mr_chunk.rl_mw, buf);
+}
+
+static void
+rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
+{
+ buf->rb_send_bufs[--buf->rb_send_index] = req;
+ req->rl_niovs = 0;
+ if (req->rl_reply) {
+ buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
+ req->rl_reply->rr_func = NULL;
+ req->rl_reply = NULL;
+ }
+}
+
+/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
+ * Redo only the ib_post_send().
+ */
+static void
+rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
+{
+ struct rpcrdma_xprt *r_xprt =
+ container_of(ia, struct rpcrdma_xprt, rx_ia);
+ struct ib_send_wr invalidate_wr, *bad_wr;
+ int rc;
+
+ dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
+
+ /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
+ r->r.frmr.fr_state = FRMR_IS_INVALID;
+
+ memset(&invalidate_wr, 0, sizeof(invalidate_wr));
+ invalidate_wr.wr_id = (unsigned long)(void *)r;
+ invalidate_wr.opcode = IB_WR_LOCAL_INV;
+ invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
+ DECR_CQCOUNT(&r_xprt->rx_ep);
+
+ dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
+ __func__, r, r->r.frmr.fr_mr->rkey);
+
+ read_lock(&ia->ri_qplock);
+ rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
+ read_unlock(&ia->ri_qplock);
+ if (rc) {
+ /* Force rpcrdma_buffer_get() to retry */
+ r->r.frmr.fr_state = FRMR_IS_STALE;
+ dprintk("RPC: %s: ib_post_send failed, %i\n",
+ __func__, rc);
+ }
+}
+
+static void
+rpcrdma_retry_flushed_linv(struct list_head *stale,
+ struct rpcrdma_buffer *buf)
+{
+ struct rpcrdma_ia *ia = rdmab_to_ia(buf);
+ struct list_head *pos;
+ struct rpcrdma_mw *r;
+ unsigned long flags;
+
+ list_for_each(pos, stale) {
+ r = list_entry(pos, struct rpcrdma_mw, mw_list);
+ rpcrdma_retry_local_inv(r, ia);
+ }
+
+ spin_lock_irqsave(&buf->rb_lock, flags);
+ list_splice_tail(stale, &buf->rb_mws);
+ spin_unlock_irqrestore(&buf->rb_lock, flags);
+}
+
+static struct rpcrdma_req *
+rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
+ struct list_head *stale)
+{
+ struct rpcrdma_mw *r;
+ int i;
+
+ i = RPCRDMA_MAX_SEGS - 1;
while (!list_empty(&buf->rb_mws)) {
r = list_entry(buf->rb_mws.next,
- struct rpcrdma_mw, mw_list);
+ struct rpcrdma_mw, mw_list);
list_del(&r->mw_list);
- switch (ia->ri_memreg_strategy) {
- case RPCRDMA_FRMR:
- rc = ib_dereg_mr(r->r.frmr.fr_mr);
- if (rc)
- dprintk("RPC: %s:"
- " ib_dereg_mr"
- " failed %i\n",
- __func__, rc);
- ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
- break;
- case RPCRDMA_MTHCAFMR:
- rc = ib_dealloc_fmr(r->r.fmr);
- if (rc)
- dprintk("RPC: %s:"
- " ib_dealloc_fmr"
- " failed %i\n",
- __func__, rc);
- break;
- default:
- break;
+ if (r->r.frmr.fr_state == FRMR_IS_STALE) {
+ list_add(&r->mw_list, stale);
+ continue;
}
+ req->rl_segments[i].mr_chunk.rl_mw = r;
+ if (unlikely(i-- == 0))
+ return req; /* Success */
}
- kfree(buf->rb_pool);
+ /* Not enough entries on rb_mws for this req */
+ rpcrdma_buffer_put_sendbuf(req, buf);
+ rpcrdma_buffer_put_mrs(req, buf);
+ return NULL;
+}
+
+static struct rpcrdma_req *
+rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
+{
+ struct rpcrdma_mw *r;
+ int i;
+
+ i = RPCRDMA_MAX_SEGS - 1;
+ while (!list_empty(&buf->rb_mws)) {
+ r = list_entry(buf->rb_mws.next,
+ struct rpcrdma_mw, mw_list);
+ list_del(&r->mw_list);
+ req->rl_segments[i].mr_chunk.rl_mw = r;
+ if (unlikely(i-- == 0))
+ return req; /* Success */
+ }
+
+ /* Not enough entries on rb_mws for this req */
+ rpcrdma_buffer_put_sendbuf(req, buf);
+ rpcrdma_buffer_put_mrs(req, buf);
+ return NULL;
}
/*
@@ -1254,10 +1498,10 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
struct rpcrdma_req *
rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
{
+ struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
+ struct list_head stale;
struct rpcrdma_req *req;
unsigned long flags;
- int i;
- struct rpcrdma_mw *r;
spin_lock_irqsave(&buffers->rb_lock, flags);
if (buffers->rb_send_index == buffers->rb_max_requests) {
@@ -1277,16 +1521,21 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
}
buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
- if (!list_empty(&buffers->rb_mws)) {
- i = RPCRDMA_MAX_SEGS - 1;
- do {
- r = list_entry(buffers->rb_mws.next,
- struct rpcrdma_mw, mw_list);
- list_del(&r->mw_list);
- req->rl_segments[i].mr_chunk.rl_mw = r;
- } while (--i >= 0);
+
+ INIT_LIST_HEAD(&stale);
+ switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_FRMR:
+ req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
+ break;
+ case RPCRDMA_MTHCAFMR:
+ req = rpcrdma_buffer_get_fmrs(req, buffers);
+ break;
+ default:
+ break;
}
spin_unlock_irqrestore(&buffers->rb_lock, flags);
+ if (!list_empty(&stale))
+ rpcrdma_retry_flushed_linv(&stale, buffers);
return req;
}
@@ -1299,34 +1548,14 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
{
struct rpcrdma_buffer *buffers = req->rl_buffer;
struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
- int i;
unsigned long flags;
spin_lock_irqsave(&buffers->rb_lock, flags);
- buffers->rb_send_bufs[--buffers->rb_send_index] = req;
- req->rl_niovs = 0;
- if (req->rl_reply) {
- buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply;
- req->rl_reply->rr_func = NULL;
- req->rl_reply = NULL;
- }
+ rpcrdma_buffer_put_sendbuf(req, buffers);
switch (ia->ri_memreg_strategy) {
case RPCRDMA_FRMR:
case RPCRDMA_MTHCAFMR:
- /*
- * Cycle mw's back in reverse order, and "spin" them.
- * This delays and scrambles reuse as much as possible.
- */
- i = 1;
- do {
- struct rpcrdma_mw **mw;
- mw = &req->rl_segments[i].mr_chunk.rl_mw;
- list_add_tail(&(*mw)->mw_list, &buffers->rb_mws);
- *mw = NULL;
- } while (++i < RPCRDMA_MAX_SEGS);
- list_add_tail(&req->rl_segments[0].mr_chunk.rl_mw->mw_list,
- &buffers->rb_mws);
- req->rl_segments[0].mr_chunk.rl_mw = NULL;
+ rpcrdma_buffer_put_mrs(req, buffers);
break;
default:
break;
@@ -1388,6 +1617,9 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
*/
iov->addr = ib_dma_map_single(ia->ri_id->device,
va, len, DMA_BIDIRECTIONAL);
+ if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
+ return -ENOMEM;
+
iov->length = len;
if (ia->ri_have_dma_lkey) {
@@ -1483,8 +1715,10 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_mr_seg *seg1 = seg;
- struct ib_send_wr invalidate_wr, frmr_wr, *bad_wr, *post_wr;
-
+ struct rpcrdma_mw *mw = seg1->mr_chunk.rl_mw;
+ struct rpcrdma_frmr *frmr = &mw->r.frmr;
+ struct ib_mr *mr = frmr->fr_mr;
+ struct ib_send_wr fastreg_wr, *bad_wr;
u8 key;
int len, pageoff;
int i, rc;
@@ -1502,8 +1736,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
rpcrdma_map_one(ia, seg, writing);
pa = seg->mr_dma;
for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
- seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->
- page_list[page_no++] = pa;
+ frmr->fr_pgl->page_list[page_no++] = pa;
pa += PAGE_SIZE;
}
len += seg->mr_len;
@@ -1515,65 +1748,51 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
break;
}
dprintk("RPC: %s: Using frmr %p to map %d segments\n",
- __func__, seg1->mr_chunk.rl_mw, i);
-
- if (unlikely(seg1->mr_chunk.rl_mw->r.frmr.state == FRMR_IS_VALID)) {
- dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n",
- __func__,
- seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey);
- /* Invalidate before using. */
- memset(&invalidate_wr, 0, sizeof invalidate_wr);
- invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
- invalidate_wr.next = &frmr_wr;
- invalidate_wr.opcode = IB_WR_LOCAL_INV;
- invalidate_wr.send_flags = IB_SEND_SIGNALED;
- invalidate_wr.ex.invalidate_rkey =
- seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
- DECR_CQCOUNT(&r_xprt->rx_ep);
- post_wr = &invalidate_wr;
- } else
- post_wr = &frmr_wr;
-
- /* Prepare FRMR WR */
- memset(&frmr_wr, 0, sizeof frmr_wr);
- frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
- frmr_wr.opcode = IB_WR_FAST_REG_MR;
- frmr_wr.send_flags = IB_SEND_SIGNALED;
- frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
- frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
- frmr_wr.wr.fast_reg.page_list_len = page_no;
- frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
- frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
- if (frmr_wr.wr.fast_reg.length < len) {
- while (seg1->mr_nsegs--)
- rpcrdma_unmap_one(ia, seg++);
- return -EIO;
+ __func__, mw, i);
+
+ frmr->fr_state = FRMR_IS_VALID;
+
+ memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+ fastreg_wr.wr_id = (unsigned long)(void *)mw;
+ fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+ fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
+ fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
+ fastreg_wr.wr.fast_reg.page_list_len = page_no;
+ fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+ fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
+ if (fastreg_wr.wr.fast_reg.length < len) {
+ rc = -EIO;
+ goto out_err;
}
/* Bump the key */
- key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
- ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
+ key = (u8)(mr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(mr, ++key);
- frmr_wr.wr.fast_reg.access_flags = (writing ?
+ fastreg_wr.wr.fast_reg.access_flags = (writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ);
- frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
+ fastreg_wr.wr.fast_reg.rkey = mr->rkey;
DECR_CQCOUNT(&r_xprt->rx_ep);
- rc = ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
-
+ rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
if (rc) {
dprintk("RPC: %s: failed ib_post_send for register,"
" status %i\n", __func__, rc);
- while (i--)
- rpcrdma_unmap_one(ia, --seg);
+ ib_update_fast_reg_key(mr, --key);
+ goto out_err;
} else {
- seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
+ seg1->mr_rkey = mr->rkey;
seg1->mr_base = seg1->mr_dma + pageoff;
seg1->mr_nsegs = i;
seg1->mr_len = len;
}
*nsegs = i;
+ return 0;
+out_err:
+ frmr->fr_state = FRMR_IS_INVALID;
+ while (i--)
+ rpcrdma_unmap_one(ia, --seg);
return rc;
}
@@ -1585,20 +1804,25 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
struct ib_send_wr invalidate_wr, *bad_wr;
int rc;
- while (seg1->mr_nsegs--)
- rpcrdma_unmap_one(ia, seg++);
+ seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
memset(&invalidate_wr, 0, sizeof invalidate_wr);
invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
invalidate_wr.opcode = IB_WR_LOCAL_INV;
- invalidate_wr.send_flags = IB_SEND_SIGNALED;
invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
DECR_CQCOUNT(&r_xprt->rx_ep);
+ read_lock(&ia->ri_qplock);
+ while (seg1->mr_nsegs--)
+ rpcrdma_unmap_one(ia, seg++);
rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
- if (rc)
+ read_unlock(&ia->ri_qplock);
+ if (rc) {
+ /* Force rpcrdma_buffer_get() to retry */
+ seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
dprintk("RPC: %s: failed ib_post_send for invalidate,"
" status %i\n", __func__, rc);
+ }
return rc;
}
@@ -1656,8 +1880,10 @@ rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
rc = ib_unmap_fmr(&l);
+ read_lock(&ia->ri_qplock);
while (seg1->mr_nsegs--)
rpcrdma_unmap_one(ia, seg++);
+ read_unlock(&ia->ri_qplock);
if (rc)
dprintk("RPC: %s: failed ib_unmap_fmr,"
" status %i\n", __func__, rc);
@@ -1673,7 +1899,6 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
switch (ia->ri_memreg_strategy) {
-#if RPCRDMA_PERSISTENT_REGISTRATION
case RPCRDMA_ALLPHYSICAL:
rpcrdma_map_one(ia, seg, writing);
seg->mr_rkey = ia->ri_bind_mem->rkey;
@@ -1681,7 +1906,6 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
seg->mr_nsegs = 1;
nsegs = 1;
break;
-#endif
/* Registration using frmr registration */
case RPCRDMA_FRMR:
@@ -1711,11 +1935,11 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
switch (ia->ri_memreg_strategy) {
-#if RPCRDMA_PERSISTENT_REGISTRATION
case RPCRDMA_ALLPHYSICAL:
+ read_lock(&ia->ri_qplock);
rpcrdma_unmap_one(ia, seg);
+ read_unlock(&ia->ri_qplock);
break;
-#endif
case RPCRDMA_FRMR:
rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
@@ -1809,3 +2033,44 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
rc);
return rc;
}
+
+/* Physical mapping means one Read/Write list entry per-page.
+ * All list entries must fit within an inline buffer
+ *
+ * NB: The server must return a Write list for NFS READ,
+ * which has the same constraint. Factor in the inline
+ * rsize as well.
+ */
+static size_t
+rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt)
+{
+ struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
+ unsigned int inline_size, pages;
+
+ inline_size = min_t(unsigned int,
+ cdata->inline_wsize, cdata->inline_rsize);
+ inline_size -= RPCRDMA_HDRLEN_MIN;
+ pages = inline_size / sizeof(struct rpcrdma_segment);
+ return pages << PAGE_SHIFT;
+}
+
+static size_t
+rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt)
+{
+ return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
+}
+
+size_t
+rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt)
+{
+ size_t result;
+
+ switch (r_xprt->rx_ia.ri_memreg_strategy) {
+ case RPCRDMA_ALLPHYSICAL:
+ result = rpcrdma_physical_max_payload(r_xprt);
+ break;
+ default:
+ result = rpcrdma_mr_max_payload(r_xprt);
+ }
+ return result;
+}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 89e7cd479705..c419498b8f46 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -59,6 +59,7 @@
* Interface Adapter -- one per transport instance
*/
struct rpcrdma_ia {
+ rwlock_t ri_qplock;
struct rdma_cm_id *ri_id;
struct ib_pd *ri_pd;
struct ib_mr *ri_bind_mem;
@@ -98,6 +99,14 @@ struct rpcrdma_ep {
#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
+enum rpcrdma_chunktype {
+ rpcrdma_noch = 0,
+ rpcrdma_readch,
+ rpcrdma_areadch,
+ rpcrdma_writech,
+ rpcrdma_replych
+};
+
/*
* struct rpcrdma_rep -- this structure encapsulates state required to recv
* and complete a reply, asychronously. It needs several pieces of
@@ -137,6 +146,40 @@ struct rpcrdma_rep {
};
/*
+ * struct rpcrdma_mw - external memory region metadata
+ *
+ * An external memory region is any buffer or page that is registered
+ * on the fly (ie, not pre-registered).
+ *
+ * Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During
+ * call_allocate, rpcrdma_buffer_get() assigns one to each segment in
+ * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep
+ * track of registration metadata while each RPC is pending.
+ * rpcrdma_deregister_external() uses this metadata to unmap and
+ * release these resources when an RPC is complete.
+ */
+enum rpcrdma_frmr_state {
+ FRMR_IS_INVALID, /* ready to be used */
+ FRMR_IS_VALID, /* in use */
+ FRMR_IS_STALE, /* failed completion */
+};
+
+struct rpcrdma_frmr {
+ struct ib_fast_reg_page_list *fr_pgl;
+ struct ib_mr *fr_mr;
+ enum rpcrdma_frmr_state fr_state;
+};
+
+struct rpcrdma_mw {
+ union {
+ struct ib_fmr *fmr;
+ struct rpcrdma_frmr frmr;
+ } r;
+ struct list_head mw_list;
+ struct list_head mw_all;
+};
+
+/*
* struct rpcrdma_req -- structure central to the request/reply sequence.
*
* N of these are associated with a transport instance, and stored in
@@ -163,17 +206,7 @@ struct rpcrdma_rep {
struct rpcrdma_mr_seg { /* chunk descriptors */
union { /* chunk memory handles */
struct ib_mr *rl_mr; /* if registered directly */
- struct rpcrdma_mw { /* if registered from region */
- union {
- struct ib_fmr *fmr;
- struct {
- struct ib_fast_reg_page_list *fr_pgl;
- struct ib_mr *fr_mr;
- enum { FRMR_IS_INVALID, FRMR_IS_VALID } state;
- } frmr;
- } r;
- struct list_head mw_list;
- } *rl_mw;
+ struct rpcrdma_mw *rl_mw; /* if registered from region */
} mr_chunk;
u64 mr_base; /* registration result */
u32 mr_rkey; /* registration result */
@@ -191,6 +224,7 @@ struct rpcrdma_req {
unsigned int rl_niovs; /* 0, 2 or 4 */
unsigned int rl_nchunks; /* non-zero if chunks */
unsigned int rl_connect_cookie; /* retry detection */
+ enum rpcrdma_chunktype rl_rtype, rl_wtype;
struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
struct rpcrdma_rep *rl_reply;/* holder for reply buffer */
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */
@@ -214,6 +248,7 @@ struct rpcrdma_buffer {
atomic_t rb_credits; /* most recent server credits */
int rb_max_requests;/* client max requests */
struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */
+ struct list_head rb_all;
int rb_send_index;
struct rpcrdma_req **rb_send_bufs;
int rb_recv_index;
@@ -306,7 +341,7 @@ int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
struct rpcrdma_create_data_internal *);
void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
-int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
+void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
struct rpcrdma_req *);
@@ -346,7 +381,9 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *);
/*
* RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
*/
+ssize_t rpcrdma_marshal_chunks(struct rpc_rqst *, ssize_t);
int rpcrdma_marshal_req(struct rpc_rqst *);
+size_t rpcrdma_max_payload(struct rpcrdma_xprt *);
/* Temporary NFS request map cache. Created in svc_rdma.c */
extern struct kmem_cache *svc_rdma_map_cachep;
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 26631679a1fa..55c6c9d3e1ce 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -559,6 +559,7 @@ receive:
buf = node->bclink.deferred_head;
node->bclink.deferred_head = buf->next;
+ buf->next = NULL;
node->bclink.deferred_size--;
goto receive;
}
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 8be6e94a1ca9..0a37a472c29f 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -101,9 +101,11 @@ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect,
}
/* tipc_buf_append(): Append a buffer to the fragment list of another buffer
- * Let first buffer become head buffer
- * Returns 1 and sets *buf to headbuf if chain is complete, otherwise 0
- * Leaves headbuf pointer at NULL if failure
+ * @*headbuf: in: NULL for first frag, otherwise value returned from prev call
+ * out: set when successful non-complete reassembly, otherwise NULL
+ * @*buf: in: the buffer to append. Always defined
+ * out: head buf after sucessful complete reassembly, otherwise NULL
+ * Returns 1 when reassembly complete, otherwise 0
*/
int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
{
@@ -122,6 +124,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
goto out_free;
head = *headbuf = frag;
skb_frag_list_init(head);
+ *buf = NULL;
return 0;
}
if (!head)
@@ -150,5 +153,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
out_free:
pr_warn_ratelimited("Unable to build fragment list\n");
kfree_skb(*buf);
+ kfree_skb(*headbuf);
+ *buf = *headbuf = NULL;
return 0;
}
diff --git a/net/wireless/core.h b/net/wireless/core.h
index e9afbf10e756..7e3a3cef7df9 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -424,7 +424,7 @@ static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
if (end >= start)
return jiffies_to_msecs(end - start);
- return jiffies_to_msecs(end + (MAX_JIFFY_OFFSET - start) + 1);
+ return jiffies_to_msecs(end + (ULONG_MAX - start) + 1);
}
void
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index ba4f1723c83a..6668daf69326 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1497,18 +1497,17 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
}
CMD(start_p2p_device, START_P2P_DEVICE);
CMD(set_mcast_rate, SET_MCAST_RATE);
+#ifdef CONFIG_NL80211_TESTMODE
+ CMD(testmode_cmd, TESTMODE);
+#endif
if (state->split) {
CMD(crit_proto_start, CRIT_PROTOCOL_START);
CMD(crit_proto_stop, CRIT_PROTOCOL_STOP);
if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)
CMD(channel_switch, CHANNEL_SWITCH);
+ CMD(set_qos_map, SET_QOS_MAP);
}
- CMD(set_qos_map, SET_QOS_MAP);
-
-#ifdef CONFIG_NL80211_TESTMODE
- CMD(testmode_cmd, TESTMODE);
-#endif
-
+ /* add into the if now */
#undef CMD
if (rdev->ops->connect || rdev->ops->auth) {
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 558b0e3a02d8..1afdf45db38f 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -935,7 +935,7 @@ freq_reg_info_regd(struct wiphy *wiphy, u32 center_freq,
if (!band_rule_found)
band_rule_found = freq_in_rule_band(fr, center_freq);
- bw_fits = reg_does_bw_fit(fr, center_freq, MHZ_TO_KHZ(5));
+ bw_fits = reg_does_bw_fit(fr, center_freq, MHZ_TO_KHZ(20));
if (band_rule_found && bw_fits)
return rr;
@@ -1019,10 +1019,10 @@ static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd,
}
#endif
-/* Find an ieee80211_reg_rule such that a 5MHz channel with frequency
- * chan->center_freq fits there.
- * If there is no such reg_rule, disable the channel, otherwise set the
- * flags corresponding to the bandwidths allowed in the particular reg_rule
+/*
+ * Note that right now we assume the desired channel bandwidth
+ * is always 20 MHz for each individual channel (HT40 uses 20 MHz
+ * per channel, the primary and the extension channel).
*/
static void handle_channel(struct wiphy *wiphy,
enum nl80211_reg_initiator initiator,
@@ -1083,12 +1083,8 @@ static void handle_channel(struct wiphy *wiphy,
if (reg_rule->flags & NL80211_RRF_AUTO_BW)
max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule);
- if (max_bandwidth_khz < MHZ_TO_KHZ(10))
- bw_flags = IEEE80211_CHAN_NO_10MHZ;
- if (max_bandwidth_khz < MHZ_TO_KHZ(20))
- bw_flags |= IEEE80211_CHAN_NO_20MHZ;
if (max_bandwidth_khz < MHZ_TO_KHZ(40))
- bw_flags |= IEEE80211_CHAN_NO_HT40;
+ bw_flags = IEEE80211_CHAN_NO_HT40;
if (max_bandwidth_khz < MHZ_TO_KHZ(80))
bw_flags |= IEEE80211_CHAN_NO_80MHZ;
if (max_bandwidth_khz < MHZ_TO_KHZ(160))
@@ -1522,12 +1518,8 @@ static void handle_channel_custom(struct wiphy *wiphy,
if (reg_rule->flags & NL80211_RRF_AUTO_BW)
max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule);
- if (max_bandwidth_khz < MHZ_TO_KHZ(10))
- bw_flags = IEEE80211_CHAN_NO_10MHZ;
- if (max_bandwidth_khz < MHZ_TO_KHZ(20))
- bw_flags |= IEEE80211_CHAN_NO_20MHZ;
if (max_bandwidth_khz < MHZ_TO_KHZ(40))
- bw_flags |= IEEE80211_CHAN_NO_HT40;
+ bw_flags = IEEE80211_CHAN_NO_HT40;
if (max_bandwidth_khz < MHZ_TO_KHZ(80))
bw_flags |= IEEE80211_CHAN_NO_80MHZ;
if (max_bandwidth_khz < MHZ_TO_KHZ(160))
OpenPOWER on IntegriCloud