summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c2
-rw-r--r--net/9p/trans_rdma.c2
-rw-r--r--net/batman-adv/debugfs.h2
-rw-r--r--net/batman-adv/hard-interface.c1
-rw-r--r--net/batman-adv/log.h2
-rw-r--r--net/batman-adv/originator.c2
-rw-r--r--net/bluetooth/hci_request.c65
-rw-r--r--net/bluetooth/hci_request.h25
-rw-r--r--net/bluetooth/mgmt.c47
-rw-r--r--net/bridge/br_multicast.c23
-rw-r--r--net/bridge/br_sysfs_if.c1
-rw-r--r--net/ceph/Makefile1
-rw-r--r--net/ceph/auth.c13
-rw-r--r--net/ceph/auth_none.c2
-rw-r--r--net/ceph/ceph_common.c13
-rw-r--r--net/ceph/ceph_strings.c1
-rw-r--r--net/ceph/cls_lock_client.c325
-rw-r--r--net/ceph/crush/mapper.c17
-rw-r--r--net/ceph/mon_client.c82
-rw-r--r--net/ceph/osd_client.c169
-rw-r--r--net/ceph/pagevec.c2
-rw-r--r--net/core/dev.c16
-rw-r--r--net/core/flow_dissector.c12
-rw-r--r--net/core/net_namespace.c35
-rw-r--r--net/core/pktgen.c17
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/core/skbuff.c28
-rw-r--r--net/core/sock.c6
-rw-r--r--net/core/sock_reuseport.c1
-rw-r--r--net/ethernet/eth.c2
-rw-r--r--net/hsr/hsr_forward.c4
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/fou.c4
-rw-r--r--net/ipv4/gre_offload.c2
-rw-r--r--net/ipv4/inet_hashtables.c8
-rw-r--r--net/ipv4/ip_output.c3
-rw-r--r--net/ipv4/ip_sockglue.c11
-rw-r--r--net/ipv4/ping.c17
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c3
-rw-r--r--net/ipv4/sysctl_net_ipv4.c8
-rw-r--r--net/ipv4/tcp.c5
-rw-r--r--net/ipv4/tcp_ipv4.c6
-rw-r--r--net/ipv4/udp.c15
-rw-r--r--net/ipv4/udp_offload.c2
-rw-r--r--net/ipv6/addrconf.c105
-rw-r--r--net/ipv6/inet6_hashtables.c13
-rw-r--r--net/ipv6/ip6_offload.c2
-rw-r--r--net/ipv6/ip6_tunnel.c3
-rw-r--r--net/ipv6/ipv6_sockglue.c3
-rw-r--r--net/ipv6/mcast.c17
-rw-r--r--net/ipv6/ping.c2
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/ipv6/reassembly.c3
-rw-r--r--net/ipv6/route.c74
-rw-r--r--net/ipv6/tcp_ipv6.c20
-rw-r--r--net/ipv6/udp.c3
-rw-r--r--net/kcm/kcmsock.c16
-rw-r--r--net/l2tp/l2tp_ip.c2
-rw-r--r--net/l2tp/l2tp_ip6.c2
-rw-r--r--net/mac80211/aes_ccm.c46
-rw-r--r--net/mac80211/aes_ccm.h8
-rw-r--r--net/mac80211/aes_gcm.c43
-rw-r--r--net/mac80211/aes_gcm.h6
-rw-r--r--net/mac80211/aes_gmac.c26
-rw-r--r--net/mac80211/aes_gmac.h4
-rw-r--r--net/mac80211/offchannel.c2
-rw-r--r--net/mac80211/rx.c51
-rw-r--r--net/mac80211/wpa.c22
-rw-r--r--net/ncsi/internal.h2
-rw-r--r--net/ncsi/ncsi-aen.c18
-rw-r--r--net/ncsi/ncsi-manage.c126
-rw-r--r--net/netfilter/core.c121
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_internals.h2
-rw-r--r--net/netfilter/nf_queue.c48
-rw-r--r--net/netfilter/nf_tables_api.c2
-rw-r--r--net/netfilter/nft_dynset.c6
-rw-r--r--net/netfilter/nft_exthdr.c3
-rw-r--r--net/netfilter/nft_hash.c1
-rw-r--r--net/netfilter/nft_range.c26
-rw-r--r--net/netfilter/x_tables.c2
-rw-r--r--net/netfilter/xt_NFLOG.c1
-rw-r--r--net/netfilter/xt_hashlimit.c4
-rw-r--r--net/netfilter/xt_ipcomp.c2
-rw-r--r--net/netlink/af_netlink.c7
-rw-r--r--net/openvswitch/flow.c2
-rw-r--r--net/openvswitch/vport-internal_dev.c2
-rw-r--r--net/openvswitch/vport.c3
-rw-r--r--net/packet/af_packet.c10
-rw-r--r--net/rds/Makefile2
-rw-r--r--net/rds/ib.c2
-rw-r--r--net/rds/rds.h2
-rw-r--r--net/rxrpc/af_rxrpc.c4
-rw-r--r--net/rxrpc/ar-internal.h18
-rw-r--r--net/rxrpc/call_accept.c4
-rw-r--r--net/rxrpc/call_event.c77
-rw-r--r--net/rxrpc/call_object.c5
-rw-r--r--net/rxrpc/input.c44
-rw-r--r--net/rxrpc/misc.c6
-rw-r--r--net/rxrpc/output.c179
-rw-r--r--net/rxrpc/peer_object.c4
-rw-r--r--net/rxrpc/recvmsg.c14
-rw-r--r--net/rxrpc/rxkad.c6
-rw-r--r--net/rxrpc/sendmsg.c12
-rw-r--r--net/sched/act_api.c22
-rw-r--r--net/sched/act_mirred.c5
-rw-r--r--net/sched/cls_api.c21
-rw-r--r--net/sctp/output.c8
-rw-r--r--net/sctp/sm_statefuns.c12
-rw-r--r--net/sctp/socket.c5
-rw-r--r--net/socket.c61
-rw-r--r--net/strparser/strparser.c2
-rw-r--r--net/sunrpc/auth.c2
-rw-r--r--net/sunrpc/auth_generic.c13
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c20
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c82
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c2
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c23
-rw-r--r--net/sunrpc/auth_unix.c13
-rw-r--r--net/sunrpc/backchannel_rqst.c8
-rw-r--r--net/sunrpc/cache.c5
-rw-r--r--net/sunrpc/clnt.c132
-rw-r--r--net/sunrpc/rpc_pipe.c2
-rw-r--r--net/sunrpc/sched.c35
-rw-r--r--net/sunrpc/svc.c17
-rw-r--r--net/sunrpc/svcauth_unix.c6
-rw-r--r--net/sunrpc/xdr.c11
-rw-r--r--net/sunrpc/xprt.c2
-rw-r--r--net/sunrpc/xprtmultipath.c24
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c53
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c7
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c28
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c323
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c31
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c82
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c62
-rw-r--r--net/sunrpc/xprtrdma/transport.c202
-rw-r--r--net/sunrpc/xprtrdma/verbs.c239
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h108
-rw-r--r--net/sunrpc/xprtsock.c35
-rw-r--r--net/switchdev/switchdev.c9
-rw-r--r--net/tipc/bcast.c14
-rw-r--r--net/tipc/bcast.h3
-rw-r--r--net/tipc/link.c2
-rw-r--r--net/tipc/msg.h17
-rw-r--r--net/tipc/name_distr.c1
-rw-r--r--net/tipc/node.c2
-rw-r--r--net/tipc/udp_media.c2
-rw-r--r--net/unix/af_unix.c17
-rw-r--r--net/wireless/sysfs.c5
-rw-r--r--net/wireless/util.c34
153 files changed, 2704 insertions, 1359 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8de138d3306b..f2531ad66b68 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -664,7 +664,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head,
skb_gro_pull(skb, sizeof(*vhdr));
skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr));
- pp = ptype->callbacks.gro_receive(head, skb);
+ pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
out_unlock:
rcu_read_unlock();
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 1852e383afd6..553ed4ecb6a0 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -680,7 +680,7 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
goto error;
/* Create the Protection Domain */
- rdma->pd = ib_alloc_pd(rdma->cm_id->device);
+ rdma->pd = ib_alloc_pd(rdma->cm_id->device, 0);
if (IS_ERR(rdma->pd))
goto error;
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index c68ff3dcb926..e49121ee55f6 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -20,8 +20,6 @@
#include "main.h"
-#include <linux/kconfig.h>
-
struct net_device;
#define BATADV_DEBUGFS_SUBDIR "batman_adv"
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 08ce36147c4c..e034afbd1bb0 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -652,7 +652,6 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
batadv_softif_destroy_sysfs(hard_iface->soft_iface);
}
- hard_iface->soft_iface = NULL;
batadv_hardif_put(hard_iface);
out:
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
index e0e1a88c3e58..d2905a855d1b 100644
--- a/net/batman-adv/log.h
+++ b/net/batman-adv/log.h
@@ -63,7 +63,7 @@ enum batadv_dbg_level {
BATADV_DBG_NC = BIT(5),
BATADV_DBG_MCAST = BIT(6),
BATADV_DBG_TP_METER = BIT(7),
- BATADV_DBG_ALL = 127,
+ BATADV_DBG_ALL = 255,
};
#ifdef CONFIG_BATMAN_ADV_DEBUG
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 5f3bfc41aeb1..7c8d16086f0f 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -544,7 +544,7 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
if (bat_priv->algo_ops->neigh.hardif_init)
bat_priv->algo_ops->neigh.hardif_init(hardif_neigh);
- hlist_add_head(&hardif_neigh->list, &hard_iface->neigh_list);
+ hlist_add_head_rcu(&hardif_neigh->list, &hard_iface->neigh_list);
out:
spin_unlock_bh(&hard_iface->neigh_list_lock);
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index c8135680c43e..1015d9c8d97d 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -21,8 +21,6 @@
SOFTWARE IS DISCLAIMED.
*/
-#include <asm/unaligned.h>
-
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/mgmt.h>
@@ -971,35 +969,57 @@ void __hci_req_enable_advertising(struct hci_request *req)
hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
}
-static u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len)
+u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len)
{
- size_t name_len;
- int max_len;
+ size_t short_len;
+ size_t complete_len;
- max_len = HCI_MAX_AD_LENGTH - ad_len - 2;
- name_len = strlen(hdev->dev_name);
- if (name_len > 0 && max_len > 0) {
+ /* no space left for name (+ NULL + type + len) */
+ if ((HCI_MAX_AD_LENGTH - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3)
+ return ad_len;
- if (name_len > max_len) {
- name_len = max_len;
- ptr[1] = EIR_NAME_SHORT;
- } else
- ptr[1] = EIR_NAME_COMPLETE;
+ /* use complete name if present and fits */
+ complete_len = strlen(hdev->dev_name);
+ if (complete_len && complete_len <= HCI_MAX_SHORT_NAME_LENGTH)
+ return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE,
+ hdev->dev_name, complete_len + 1);
- ptr[0] = name_len + 1;
+ /* use short name if present */
+ short_len = strlen(hdev->short_name);
+ if (short_len)
+ return eir_append_data(ptr, ad_len, EIR_NAME_SHORT,
+ hdev->short_name, short_len + 1);
- memcpy(ptr + 2, hdev->dev_name, name_len);
+ /* use shortened full name if present, we already know that name
+ * is longer then HCI_MAX_SHORT_NAME_LENGTH
+ */
+ if (complete_len) {
+ u8 name[HCI_MAX_SHORT_NAME_LENGTH + 1];
- ad_len += (name_len + 2);
- ptr += (name_len + 2);
+ memcpy(name, hdev->dev_name, HCI_MAX_SHORT_NAME_LENGTH);
+ name[HCI_MAX_SHORT_NAME_LENGTH] = '\0';
+
+ return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, name,
+ sizeof(name));
}
return ad_len;
}
+static u8 append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len)
+{
+ return eir_append_le16(ptr, ad_len, EIR_APPEARANCE, hdev->appearance);
+}
+
static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr)
{
- return append_local_name(hdev, ptr, 0);
+ u8 scan_rsp_len = 0;
+
+ if (hdev->appearance) {
+ scan_rsp_len = append_appearance(hdev, ptr, scan_rsp_len);
+ }
+
+ return append_local_name(hdev, ptr, scan_rsp_len);
}
static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance,
@@ -1016,18 +1036,13 @@ static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance,
instance_flags = adv_instance->flags;
if ((instance_flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance) {
- ptr[0] = 3;
- ptr[1] = EIR_APPEARANCE;
- put_unaligned_le16(hdev->appearance, ptr + 2);
- scan_rsp_len += 4;
- ptr += 4;
+ scan_rsp_len = append_appearance(hdev, ptr, scan_rsp_len);
}
- memcpy(ptr, adv_instance->scan_rsp_data,
+ memcpy(&ptr[scan_rsp_len], adv_instance->scan_rsp_data,
adv_instance->scan_rsp_len);
scan_rsp_len += adv_instance->scan_rsp_len;
- ptr += adv_instance->scan_rsp_len;
if (instance_flags & MGMT_ADV_FLAG_LOCAL_NAME)
scan_rsp_len = append_local_name(hdev, ptr, scan_rsp_len);
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index ac1e11006f38..dde77bd59f91 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -20,6 +20,8 @@
SOFTWARE IS DISCLAIMED.
*/
+#include <asm/unaligned.h>
+
#define hci_req_sync_lock(hdev) mutex_lock(&hdev->req_lock)
#define hci_req_sync_unlock(hdev) mutex_unlock(&hdev->req_lock)
@@ -103,3 +105,26 @@ static inline void hci_update_background_scan(struct hci_dev *hdev)
void hci_request_setup(struct hci_dev *hdev);
void hci_request_cancel_all(struct hci_dev *hdev);
+
+u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len);
+
+static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type,
+ u8 *data, u8 data_len)
+{
+ eir[eir_len++] = sizeof(type) + data_len;
+ eir[eir_len++] = type;
+ memcpy(&eir[eir_len], data, data_len);
+ eir_len += data_len;
+
+ return eir_len;
+}
+
+static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data)
+{
+ eir[eir_len++] = sizeof(type) + sizeof(data);
+ eir[eir_len++] = type;
+ put_unaligned_le16(data, &eir[eir_len]);
+ eir_len += sizeof(data);
+
+ return eir_len;
+}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 19b8a5e9420d..1fba2a03f8ae 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -867,27 +867,6 @@ static int read_controller_info(struct sock *sk, struct hci_dev *hdev,
sizeof(rp));
}
-static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data,
- u8 data_len)
-{
- eir[eir_len++] = sizeof(type) + data_len;
- eir[eir_len++] = type;
- memcpy(&eir[eir_len], data, data_len);
- eir_len += data_len;
-
- return eir_len;
-}
-
-static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data)
-{
- eir[eir_len++] = sizeof(type) + sizeof(data);
- eir[eir_len++] = type;
- put_unaligned_le16(data, &eir[eir_len]);
- eir_len += sizeof(data);
-
- return eir_len;
-}
-
static u16 append_eir_data_to_buf(struct hci_dev *hdev, u8 *eir)
{
u16 eir_len = 0;
@@ -6038,7 +6017,15 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
return err;
}
-static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data)
+static u8 calculate_name_len(struct hci_dev *hdev)
+{
+ u8 buf[HCI_MAX_SHORT_NAME_LENGTH + 3];
+
+ return append_local_name(hdev, buf, 0);
+}
+
+static u8 tlv_data_max_len(struct hci_dev *hdev, u32 adv_flags,
+ bool is_adv_data)
{
u8 max_len = HCI_MAX_AD_LENGTH;
@@ -6051,9 +6038,8 @@ static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data)
if (adv_flags & MGMT_ADV_FLAG_TX_POWER)
max_len -= 3;
} else {
- /* at least 1 byte of name should fit in */
if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME)
- max_len -= 3;
+ max_len -= calculate_name_len(hdev);
if (adv_flags & (MGMT_ADV_FLAG_APPEARANCE))
max_len -= 4;
@@ -6084,12 +6070,13 @@ static bool appearance_managed(u32 adv_flags)
return adv_flags & MGMT_ADV_FLAG_APPEARANCE;
}
-static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data)
+static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data,
+ u8 len, bool is_adv_data)
{
int i, cur_len;
u8 max_len;
- max_len = tlv_data_max_len(adv_flags, is_adv_data);
+ max_len = tlv_data_max_len(hdev, adv_flags, is_adv_data);
if (len > max_len)
return false;
@@ -6236,8 +6223,8 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- if (!tlv_data_is_valid(flags, cp->data, cp->adv_data_len, true) ||
- !tlv_data_is_valid(flags, cp->data + cp->adv_data_len,
+ if (!tlv_data_is_valid(hdev, flags, cp->data, cp->adv_data_len, true) ||
+ !tlv_data_is_valid(hdev, flags, cp->data + cp->adv_data_len,
cp->scan_rsp_len, false)) {
err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
MGMT_STATUS_INVALID_PARAMS);
@@ -6450,8 +6437,8 @@ static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev,
rp.instance = cp->instance;
rp.flags = cp->flags;
- rp.max_adv_data_len = tlv_data_max_len(flags, true);
- rp.max_scan_rsp_len = tlv_data_max_len(flags, false);
+ rp.max_adv_data_len = tlv_data_max_len(hdev, flags, true);
+ rp.max_scan_rsp_len = tlv_data_max_len(hdev, flags, false);
err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO,
MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index c5fea9393946..2136e45f5277 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -972,13 +972,12 @@ static void br_multicast_enable(struct bridge_mcast_own_query *query)
mod_timer(&query->timer, jiffies);
}
-void br_multicast_enable_port(struct net_bridge_port *port)
+static void __br_multicast_enable_port(struct net_bridge_port *port)
{
struct net_bridge *br = port->br;
- spin_lock(&br->multicast_lock);
if (br->multicast_disabled || !netif_running(br->dev))
- goto out;
+ return;
br_multicast_enable(&port->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
@@ -987,8 +986,14 @@ void br_multicast_enable_port(struct net_bridge_port *port)
if (port->multicast_router == MDB_RTR_TYPE_PERM &&
hlist_unhashed(&port->rlist))
br_multicast_add_router(br, port);
+}
-out:
+void br_multicast_enable_port(struct net_bridge_port *port)
+{
+ struct net_bridge *br = port->br;
+
+ spin_lock(&br->multicast_lock);
+ __br_multicast_enable_port(port);
spin_unlock(&br->multicast_lock);
}
@@ -1994,8 +1999,9 @@ static void br_multicast_start_querier(struct net_bridge *br,
int br_multicast_toggle(struct net_bridge *br, unsigned long val)
{
- int err = 0;
struct net_bridge_mdb_htable *mdb;
+ struct net_bridge_port *port;
+ int err = 0;
spin_lock_bh(&br->multicast_lock);
if (br->multicast_disabled == !val)
@@ -2023,10 +2029,9 @@ rollback:
goto rollback;
}
- br_multicast_start_querier(br, &br->ip4_own_query);
-#if IS_ENABLED(CONFIG_IPV6)
- br_multicast_start_querier(br, &br->ip6_own_query);
-#endif
+ br_multicast_open(br);
+ list_for_each_entry(port, &br->port_list, list)
+ __br_multicast_enable_port(port);
unlock:
spin_unlock_bh(&br->multicast_lock);
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index e657258e1f2c..8bd569695e76 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -217,6 +217,7 @@ static const struct brport_attribute *brport_attrs[] = {
#endif
&brport_attr_proxyarp,
&brport_attr_proxyarp_wifi,
+ &brport_attr_multicast_flood,
NULL
};
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index 84cbed630c4b..6a5180903e7b 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_CEPH_LIB) += libceph.o
libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
mon_client.o \
+ cls_lock_client.o \
osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
debugfs.o \
auth.o auth_none.o \
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index 2bc5965fdd1e..c822b3ae1bd3 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -82,7 +82,10 @@ void ceph_auth_reset(struct ceph_auth_client *ac)
mutex_unlock(&ac->mutex);
}
-int ceph_entity_name_encode(const char *name, void **p, void *end)
+/*
+ * EntityName, not to be confused with entity_name_t
+ */
+int ceph_auth_entity_name_encode(const char *name, void **p, void *end)
{
int len = strlen(name);
@@ -111,7 +114,7 @@ int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len)
monhdr->session_mon = cpu_to_le16(-1);
monhdr->session_mon_tid = 0;
- ceph_encode_32(&p, 0); /* no protocol, yet */
+ ceph_encode_32(&p, CEPH_AUTH_UNKNOWN); /* no protocol, yet */
lenp = p;
p += sizeof(u32);
@@ -124,7 +127,7 @@ int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len)
for (i = 0; i < num; i++)
ceph_encode_32(&p, supported_protocols[i]);
- ret = ceph_entity_name_encode(ac->name, &p, end);
+ ret = ceph_auth_entity_name_encode(ac->name, &p, end);
if (ret < 0)
goto out;
ceph_decode_need(&p, end, sizeof(u64), bad);
@@ -259,9 +262,7 @@ int ceph_build_auth(struct ceph_auth_client *ac,
int ret = 0;
mutex_lock(&ac->mutex);
- if (!ac->protocol)
- ret = ceph_auth_build_hello(ac, msg_buf, msg_len);
- else if (ac->ops->should_authenticate(ac))
+ if (ac->ops->should_authenticate(ac))
ret = ceph_build_auth_request(ac, msg_buf, msg_len);
mutex_unlock(&ac->mutex);
return ret;
diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
index 5f836f02ae36..df45e467c81f 100644
--- a/net/ceph/auth_none.c
+++ b/net/ceph/auth_none.c
@@ -46,7 +46,7 @@ static int ceph_auth_none_build_authorizer(struct ceph_auth_client *ac,
int ret;
ceph_encode_8_safe(&p, end, 1, e_range);
- ret = ceph_entity_name_encode(ac->name, &p, end);
+ ret = ceph_auth_entity_name_encode(ac->name, &p, end);
if (ret < 0)
return ret;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index bddfcf6f09c2..464e88599b9d 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -566,11 +566,17 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
}
EXPORT_SYMBOL(ceph_print_client_options);
-u64 ceph_client_id(struct ceph_client *client)
+struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client)
+{
+ return &client->msgr.inst.addr;
+}
+EXPORT_SYMBOL(ceph_client_addr);
+
+u64 ceph_client_gid(struct ceph_client *client)
{
return client->monc.auth->global_id;
}
-EXPORT_SYMBOL(ceph_client_id);
+EXPORT_SYMBOL(ceph_client_gid);
/*
* create a fresh client instance
@@ -685,7 +691,8 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
return client->auth_err;
}
- pr_info("client%llu fsid %pU\n", ceph_client_id(client), &client->fsid);
+ pr_info("client%llu fsid %pU\n", ceph_client_gid(client),
+ &client->fsid);
ceph_debugfs_client_init(client);
return 0;
diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c
index 3773a4fa11e3..19b7d8aa915c 100644
--- a/net/ceph/ceph_strings.c
+++ b/net/ceph/ceph_strings.c
@@ -15,6 +15,7 @@ const char *ceph_entity_type_name(int type)
default: return "unknown";
}
}
+EXPORT_SYMBOL(ceph_entity_type_name);
const char *ceph_osd_op_name(int op)
{
diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c
new file mode 100644
index 000000000000..50f040fdb2a9
--- /dev/null
+++ b/net/ceph/cls_lock_client.c
@@ -0,0 +1,325 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/cls_lock_client.h>
+#include <linux/ceph/decode.h>
+
+/**
+ * ceph_cls_lock - grab rados lock for object
+ * @oid, @oloc: object to lock
+ * @lock_name: the name of the lock
+ * @type: lock type (CEPH_CLS_LOCK_EXCLUSIVE or CEPH_CLS_LOCK_SHARED)
+ * @cookie: user-defined identifier for this instance of the lock
+ * @tag: user-defined tag
+ * @desc: user-defined lock description
+ * @flags: lock flags
+ *
+ * All operations on the same lock should use the same tag.
+ */
+int ceph_cls_lock(struct ceph_osd_client *osdc,
+ struct ceph_object_id *oid,
+ struct ceph_object_locator *oloc,
+ char *lock_name, u8 type, char *cookie,
+ char *tag, char *desc, u8 flags)
+{
+ int lock_op_buf_size;
+ int name_len = strlen(lock_name);
+ int cookie_len = strlen(cookie);
+ int tag_len = strlen(tag);
+ int desc_len = strlen(desc);
+ void *p, *end;
+ struct page *lock_op_page;
+ struct timespec mtime;
+ int ret;
+
+ lock_op_buf_size = name_len + sizeof(__le32) +
+ cookie_len + sizeof(__le32) +
+ tag_len + sizeof(__le32) +
+ desc_len + sizeof(__le32) +
+ sizeof(struct ceph_timespec) +
+ /* flag and type */
+ sizeof(u8) + sizeof(u8) +
+ CEPH_ENCODING_START_BLK_LEN;
+ if (lock_op_buf_size > PAGE_SIZE)
+ return -E2BIG;
+
+ lock_op_page = alloc_page(GFP_NOIO);
+ if (!lock_op_page)
+ return -ENOMEM;
+
+ p = page_address(lock_op_page);
+ end = p + lock_op_buf_size;
+
+ /* encode cls_lock_lock_op struct */
+ ceph_start_encoding(&p, 1, 1,
+ lock_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
+ ceph_encode_string(&p, end, lock_name, name_len);
+ ceph_encode_8(&p, type);
+ ceph_encode_string(&p, end, cookie, cookie_len);
+ ceph_encode_string(&p, end, tag, tag_len);
+ ceph_encode_string(&p, end, desc, desc_len);
+ /* only support infinite duration */
+ memset(&mtime, 0, sizeof(mtime));
+ ceph_encode_timespec(p, &mtime);
+ p += sizeof(struct ceph_timespec);
+ ceph_encode_8(&p, flags);
+
+ dout("%s lock_name %s type %d cookie %s tag %s desc %s flags 0x%x\n",
+ __func__, lock_name, type, cookie, tag, desc, flags);
+ ret = ceph_osdc_call(osdc, oid, oloc, "lock", "lock",
+ CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
+ lock_op_page, lock_op_buf_size, NULL, NULL);
+
+ dout("%s: status %d\n", __func__, ret);
+ __free_page(lock_op_page);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_cls_lock);
+
+/**
+ * ceph_cls_unlock - release rados lock for object
+ * @oid, @oloc: object to lock
+ * @lock_name: the name of the lock
+ * @cookie: user-defined identifier for this instance of the lock
+ */
+int ceph_cls_unlock(struct ceph_osd_client *osdc,
+ struct ceph_object_id *oid,
+ struct ceph_object_locator *oloc,
+ char *lock_name, char *cookie)
+{
+ int unlock_op_buf_size;
+ int name_len = strlen(lock_name);
+ int cookie_len = strlen(cookie);
+ void *p, *end;
+ struct page *unlock_op_page;
+ int ret;
+
+ unlock_op_buf_size = name_len + sizeof(__le32) +
+ cookie_len + sizeof(__le32) +
+ CEPH_ENCODING_START_BLK_LEN;
+ if (unlock_op_buf_size > PAGE_SIZE)
+ return -E2BIG;
+
+ unlock_op_page = alloc_page(GFP_NOIO);
+ if (!unlock_op_page)
+ return -ENOMEM;
+
+ p = page_address(unlock_op_page);
+ end = p + unlock_op_buf_size;
+
+ /* encode cls_lock_unlock_op struct */
+ ceph_start_encoding(&p, 1, 1,
+ unlock_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
+ ceph_encode_string(&p, end, lock_name, name_len);
+ ceph_encode_string(&p, end, cookie, cookie_len);
+
+ dout("%s lock_name %s cookie %s\n", __func__, lock_name, cookie);
+ ret = ceph_osdc_call(osdc, oid, oloc, "lock", "unlock",
+ CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
+ unlock_op_page, unlock_op_buf_size, NULL, NULL);
+
+ dout("%s: status %d\n", __func__, ret);
+ __free_page(unlock_op_page);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_cls_unlock);
+
+/**
+ * ceph_cls_break_lock - release rados lock for object for specified client
+ * @oid, @oloc: object to lock
+ * @lock_name: the name of the lock
+ * @cookie: user-defined identifier for this instance of the lock
+ * @locker: current lock owner
+ */
+int ceph_cls_break_lock(struct ceph_osd_client *osdc,
+ struct ceph_object_id *oid,
+ struct ceph_object_locator *oloc,
+ char *lock_name, char *cookie,
+ struct ceph_entity_name *locker)
+{
+ int break_op_buf_size;
+ int name_len = strlen(lock_name);
+ int cookie_len = strlen(cookie);
+ struct page *break_op_page;
+ void *p, *end;
+ int ret;
+
+ break_op_buf_size = name_len + sizeof(__le32) +
+ cookie_len + sizeof(__le32) +
+ sizeof(u8) + sizeof(__le64) +
+ CEPH_ENCODING_START_BLK_LEN;
+ if (break_op_buf_size > PAGE_SIZE)
+ return -E2BIG;
+
+ break_op_page = alloc_page(GFP_NOIO);
+ if (!break_op_page)
+ return -ENOMEM;
+
+ p = page_address(break_op_page);
+ end = p + break_op_buf_size;
+
+ /* encode cls_lock_break_op struct */
+ ceph_start_encoding(&p, 1, 1,
+ break_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
+ ceph_encode_string(&p, end, lock_name, name_len);
+ ceph_encode_copy(&p, locker, sizeof(*locker));
+ ceph_encode_string(&p, end, cookie, cookie_len);
+
+ dout("%s lock_name %s cookie %s locker %s%llu\n", __func__, lock_name,
+ cookie, ENTITY_NAME(*locker));
+ ret = ceph_osdc_call(osdc, oid, oloc, "lock", "break_lock",
+ CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
+ break_op_page, break_op_buf_size, NULL, NULL);
+
+ dout("%s: status %d\n", __func__, ret);
+ __free_page(break_op_page);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_cls_break_lock);
+
+void ceph_free_lockers(struct ceph_locker *lockers, u32 num_lockers)
+{
+ int i;
+
+ for (i = 0; i < num_lockers; i++)
+ kfree(lockers[i].id.cookie);
+ kfree(lockers);
+}
+EXPORT_SYMBOL(ceph_free_lockers);
+
+static int decode_locker(void **p, void *end, struct ceph_locker *locker)
+{
+ u8 struct_v;
+ u32 len;
+ char *s;
+ int ret;
+
+ ret = ceph_start_decoding(p, end, 1, "locker_id_t", &struct_v, &len);
+ if (ret)
+ return ret;
+
+ ceph_decode_copy(p, &locker->id.name, sizeof(locker->id.name));
+ s = ceph_extract_encoded_string(p, end, NULL, GFP_NOIO);
+ if (IS_ERR(s))
+ return PTR_ERR(s);
+
+ locker->id.cookie = s;
+
+ ret = ceph_start_decoding(p, end, 1, "locker_info_t", &struct_v, &len);
+ if (ret)
+ return ret;
+
+ *p += sizeof(struct ceph_timespec); /* skip expiration */
+ ceph_decode_copy(p, &locker->info.addr, sizeof(locker->info.addr));
+ ceph_decode_addr(&locker->info.addr);
+ len = ceph_decode_32(p);
+ *p += len; /* skip description */
+
+ dout("%s %s%llu cookie %s addr %s\n", __func__,
+ ENTITY_NAME(locker->id.name), locker->id.cookie,
+ ceph_pr_addr(&locker->info.addr.in_addr));
+ return 0;
+}
+
+static int decode_lockers(void **p, void *end, u8 *type, char **tag,
+ struct ceph_locker **lockers, u32 *num_lockers)
+{
+ u8 struct_v;
+ u32 struct_len;
+ char *s;
+ int i;
+ int ret;
+
+ ret = ceph_start_decoding(p, end, 1, "cls_lock_get_info_reply",
+ &struct_v, &struct_len);
+ if (ret)
+ return ret;
+
+ *num_lockers = ceph_decode_32(p);
+ *lockers = kcalloc(*num_lockers, sizeof(**lockers), GFP_NOIO);
+ if (!*lockers)
+ return -ENOMEM;
+
+ for (i = 0; i < *num_lockers; i++) {
+ ret = decode_locker(p, end, *lockers + i);
+ if (ret)
+ goto err_free_lockers;
+ }
+
+ *type = ceph_decode_8(p);
+ s = ceph_extract_encoded_string(p, end, NULL, GFP_NOIO);
+ if (IS_ERR(s)) {
+ ret = PTR_ERR(s);
+ goto err_free_lockers;
+ }
+
+ *tag = s;
+ return 0;
+
+err_free_lockers:
+ ceph_free_lockers(*lockers, *num_lockers);
+ return ret;
+}
+
+/*
+ * On success, the caller is responsible for:
+ *
+ * kfree(tag);
+ * ceph_free_lockers(lockers, num_lockers);
+ */
+int ceph_cls_lock_info(struct ceph_osd_client *osdc,
+ struct ceph_object_id *oid,
+ struct ceph_object_locator *oloc,
+ char *lock_name, u8 *type, char **tag,
+ struct ceph_locker **lockers, u32 *num_lockers)
+{
+ int get_info_op_buf_size;
+ int name_len = strlen(lock_name);
+ struct page *get_info_op_page, *reply_page;
+ size_t reply_len;
+ void *p, *end;
+ int ret;
+
+ get_info_op_buf_size = name_len + sizeof(__le32) +
+ CEPH_ENCODING_START_BLK_LEN;
+ if (get_info_op_buf_size > PAGE_SIZE)
+ return -E2BIG;
+
+ get_info_op_page = alloc_page(GFP_NOIO);
+ if (!get_info_op_page)
+ return -ENOMEM;
+
+ reply_page = alloc_page(GFP_NOIO);
+ if (!reply_page) {
+ __free_page(get_info_op_page);
+ return -ENOMEM;
+ }
+
+ p = page_address(get_info_op_page);
+ end = p + get_info_op_buf_size;
+
+ /* encode cls_lock_get_info_op struct */
+ ceph_start_encoding(&p, 1, 1,
+ get_info_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
+ ceph_encode_string(&p, end, lock_name, name_len);
+
+ dout("%s lock_name %s\n", __func__, lock_name);
+ ret = ceph_osdc_call(osdc, oid, oloc, "lock", "get_info",
+ CEPH_OSD_FLAG_READ, get_info_op_page,
+ get_info_op_buf_size, reply_page, &reply_len);
+
+ dout("%s: status %d\n", __func__, ret);
+ if (ret >= 0) {
+ p = page_address(reply_page);
+ end = p + reply_len;
+
+ ret = decode_lockers(&p, end, type, tag, lockers, num_lockers);
+ }
+
+ __free_page(get_info_op_page);
+ __free_page(reply_page);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_cls_lock_info);
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 5fcfb98f309e..a421e905331a 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -245,7 +245,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
/* compute 2^44*log2(input+1) */
static __u64 crush_ln(unsigned int xin)
{
- unsigned int x = xin, x1;
+ unsigned int x = xin;
int iexpon, index1, index2;
__u64 RH, LH, LL, xl64, result;
@@ -253,9 +253,15 @@ static __u64 crush_ln(unsigned int xin)
/* normalize input */
iexpon = 15;
- while (!(x & 0x18000)) {
- x <<= 1;
- iexpon--;
+
+ /*
+ * figure out number of bits we need to shift and
+ * do it in one step instead of iteratively
+ */
+ if (!(x & 0x18000)) {
+ int bits = __builtin_clz(x & 0x1FFFF) - 16;
+ x <<= bits;
+ iexpon = 15 - bits;
}
index1 = (x >> 8) << 1;
@@ -267,12 +273,11 @@ static __u64 crush_ln(unsigned int xin)
/* RH*x ~ 2^48 * (2^15 + xf), xf<2^8 */
xl64 = (__s64)x * RH;
xl64 >>= 48;
- x1 = xl64;
result = iexpon;
result <<= (12 + 32);
- index2 = x1 & 0xff;
+ index2 = xl64 & 0xff;
/* LL ~ 2^48*log2(1.0+index2/2^15) */
LL = __LL_tbl[index2];
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index ef34a02719d7..a8effc8b7280 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -835,6 +835,83 @@ int ceph_monc_get_version_async(struct ceph_mon_client *monc, const char *what,
}
EXPORT_SYMBOL(ceph_monc_get_version_async);
+static void handle_command_ack(struct ceph_mon_client *monc,
+ struct ceph_msg *msg)
+{
+ struct ceph_mon_generic_request *req;
+ void *p = msg->front.iov_base;
+ void *const end = p + msg->front_alloc_len;
+ u64 tid = le64_to_cpu(msg->hdr.tid);
+
+ dout("%s msg %p tid %llu\n", __func__, msg, tid);
+
+ ceph_decode_need(&p, end, sizeof(struct ceph_mon_request_header) +
+ sizeof(u32), bad);
+ p += sizeof(struct ceph_mon_request_header);
+
+ mutex_lock(&monc->mutex);
+ req = lookup_generic_request(&monc->generic_request_tree, tid);
+ if (!req) {
+ mutex_unlock(&monc->mutex);
+ return;
+ }
+
+ req->result = ceph_decode_32(&p);
+ __finish_generic_request(req);
+ mutex_unlock(&monc->mutex);
+
+ complete_generic_request(req);
+ return;
+
+bad:
+ pr_err("corrupt mon_command ack, tid %llu\n", tid);
+ ceph_msg_dump(msg);
+}
+
+int ceph_monc_blacklist_add(struct ceph_mon_client *monc,
+ struct ceph_entity_addr *client_addr)
+{
+ struct ceph_mon_generic_request *req;
+ struct ceph_mon_command *h;
+ int ret = -ENOMEM;
+ int len;
+
+ req = alloc_generic_request(monc, GFP_NOIO);
+ if (!req)
+ goto out;
+
+ req->request = ceph_msg_new(CEPH_MSG_MON_COMMAND, 256, GFP_NOIO, true);
+ if (!req->request)
+ goto out;
+
+ req->reply = ceph_msg_new(CEPH_MSG_MON_COMMAND_ACK, 512, GFP_NOIO,
+ true);
+ if (!req->reply)
+ goto out;
+
+ mutex_lock(&monc->mutex);
+ register_generic_request(req);
+ h = req->request->front.iov_base;
+ h->monhdr.have_version = 0;
+ h->monhdr.session_mon = cpu_to_le16(-1);
+ h->monhdr.session_mon_tid = 0;
+ h->fsid = monc->monmap->fsid;
+ h->num_strs = cpu_to_le32(1);
+ len = sprintf(h->str, "{ \"prefix\": \"osd blacklist\", \
+ \"blacklistop\": \"add\", \
+ \"addr\": \"%pISpc/%u\" }",
+ &client_addr->in_addr, le32_to_cpu(client_addr->nonce));
+ h->str_len = cpu_to_le32(len);
+ send_generic_request(monc, req);
+ mutex_unlock(&monc->mutex);
+
+ ret = wait_generic_request(req);
+out:
+ put_generic_request(req);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_monc_blacklist_add);
+
/*
* Resend pending generic requests.
*/
@@ -1139,6 +1216,10 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
handle_get_version_reply(monc, msg);
break;
+ case CEPH_MSG_MON_COMMAND_ACK:
+ handle_command_ack(monc, msg);
+ break;
+
case CEPH_MSG_MON_MAP:
ceph_monc_handle_map(monc, msg);
break;
@@ -1178,6 +1259,7 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
m = ceph_msg_get(monc->m_subscribe_ack);
break;
case CEPH_MSG_STATFS_REPLY:
+ case CEPH_MSG_MON_COMMAND_ACK:
return get_generic_reply(con, hdr, skip);
case CEPH_MSG_AUTH_REPLY:
m = ceph_msg_get(monc->m_auth_reply);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index a97e7b506612..d9bf7a1d0a58 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -338,6 +338,9 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
ceph_osd_data_release(&op->notify.request_data);
ceph_osd_data_release(&op->notify.response_data);
break;
+ case CEPH_OSD_OP_LIST_WATCHERS:
+ ceph_osd_data_release(&op->list_watchers.response_data);
+ break;
default:
break;
}
@@ -863,6 +866,8 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
case CEPH_OSD_OP_NOTIFY:
dst->notify.cookie = cpu_to_le64(src->notify.cookie);
break;
+ case CEPH_OSD_OP_LIST_WATCHERS:
+ break;
case CEPH_OSD_OP_SETALLOCHINT:
dst->alloc_hint.expected_object_size =
cpu_to_le64(src->alloc_hint.expected_object_size);
@@ -1445,6 +1450,10 @@ static void setup_request_data(struct ceph_osd_request *req,
ceph_osdc_msg_data_add(req->r_reply,
&op->extent.osd_data);
break;
+ case CEPH_OSD_OP_LIST_WATCHERS:
+ ceph_osdc_msg_data_add(req->r_reply,
+ &op->list_watchers.response_data);
+ break;
/* both */
case CEPH_OSD_OP_CALL:
@@ -3891,12 +3900,121 @@ int ceph_osdc_watch_check(struct ceph_osd_client *osdc,
return ret;
}
+static int decode_watcher(void **p, void *end, struct ceph_watch_item *item)
+{
+ u8 struct_v;
+ u32 struct_len;
+ int ret;
+
+ ret = ceph_start_decoding(p, end, 2, "watch_item_t",
+ &struct_v, &struct_len);
+ if (ret)
+ return ret;
+
+ ceph_decode_copy(p, &item->name, sizeof(item->name));
+ item->cookie = ceph_decode_64(p);
+ *p += 4; /* skip timeout_seconds */
+ if (struct_v >= 2) {
+ ceph_decode_copy(p, &item->addr, sizeof(item->addr));
+ ceph_decode_addr(&item->addr);
+ }
+
+ dout("%s %s%llu cookie %llu addr %s\n", __func__,
+ ENTITY_NAME(item->name), item->cookie,
+ ceph_pr_addr(&item->addr.in_addr));
+ return 0;
+}
+
+static int decode_watchers(void **p, void *end,
+ struct ceph_watch_item **watchers,
+ u32 *num_watchers)
+{
+ u8 struct_v;
+ u32 struct_len;
+ int i;
+ int ret;
+
+ ret = ceph_start_decoding(p, end, 1, "obj_list_watch_response_t",
+ &struct_v, &struct_len);
+ if (ret)
+ return ret;
+
+ *num_watchers = ceph_decode_32(p);
+ *watchers = kcalloc(*num_watchers, sizeof(**watchers), GFP_NOIO);
+ if (!*watchers)
+ return -ENOMEM;
+
+ for (i = 0; i < *num_watchers; i++) {
+ ret = decode_watcher(p, end, *watchers + i);
+ if (ret) {
+ kfree(*watchers);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * On success, the caller is responsible for:
+ *
+ * kfree(watchers);
+ */
+int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
+ struct ceph_object_id *oid,
+ struct ceph_object_locator *oloc,
+ struct ceph_watch_item **watchers,
+ u32 *num_watchers)
+{
+ struct ceph_osd_request *req;
+ struct page **pages;
+ int ret;
+
+ req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO);
+ if (!req)
+ return -ENOMEM;
+
+ ceph_oid_copy(&req->r_base_oid, oid);
+ ceph_oloc_copy(&req->r_base_oloc, oloc);
+ req->r_flags = CEPH_OSD_FLAG_READ;
+
+ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+ if (ret)
+ goto out_put_req;
+
+ pages = ceph_alloc_page_vector(1, GFP_NOIO);
+ if (IS_ERR(pages)) {
+ ret = PTR_ERR(pages);
+ goto out_put_req;
+ }
+
+ osd_req_op_init(req, 0, CEPH_OSD_OP_LIST_WATCHERS, 0);
+ ceph_osd_data_pages_init(osd_req_op_data(req, 0, list_watchers,
+ response_data),
+ pages, PAGE_SIZE, 0, false, true);
+
+ ceph_osdc_start_request(osdc, req, false);
+ ret = ceph_osdc_wait_request(osdc, req);
+ if (ret >= 0) {
+ void *p = page_address(pages[0]);
+ void *const end = p + req->r_ops[0].outdata_len;
+
+ ret = decode_watchers(&p, end, watchers, num_watchers);
+ }
+
+out_put_req:
+ ceph_osdc_put_request(req);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_osdc_list_watchers);
+
/*
* Call all pending notify callbacks - for use after a watch is
* unregistered, to make sure no more callbacks for it will be invoked
*/
void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc)
{
+ dout("%s osdc %p\n", __func__, osdc);
flush_workqueue(osdc->notify_wq);
}
EXPORT_SYMBOL(ceph_osdc_flush_notifies);
@@ -3910,6 +4028,57 @@ void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc)
EXPORT_SYMBOL(ceph_osdc_maybe_request_map);
/*
+ * Execute an OSD class method on an object.
+ *
+ * @flags: CEPH_OSD_FLAG_*
+ * @resp_len: out param for reply length
+ */
+int ceph_osdc_call(struct ceph_osd_client *osdc,
+ struct ceph_object_id *oid,
+ struct ceph_object_locator *oloc,
+ const char *class, const char *method,
+ unsigned int flags,
+ struct page *req_page, size_t req_len,
+ struct page *resp_page, size_t *resp_len)
+{
+ struct ceph_osd_request *req;
+ int ret;
+
+ req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO);
+ if (!req)
+ return -ENOMEM;
+
+ ceph_oid_copy(&req->r_base_oid, oid);
+ ceph_oloc_copy(&req->r_base_oloc, oloc);
+ req->r_flags = flags;
+
+ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+ if (ret)
+ goto out_put_req;
+
+ osd_req_op_cls_init(req, 0, CEPH_OSD_OP_CALL, class, method);
+ if (req_page)
+ osd_req_op_cls_request_data_pages(req, 0, &req_page, req_len,
+ 0, false, false);
+ if (resp_page)
+ osd_req_op_cls_response_data_pages(req, 0, &resp_page,
+ PAGE_SIZE, 0, false, false);
+
+ ceph_osdc_start_request(osdc, req, false);
+ ret = ceph_osdc_wait_request(osdc, req);
+ if (ret >= 0) {
+ ret = req->r_ops[0].rval;
+ if (resp_page)
+ *resp_len = req->r_ops[0].outdata_len;
+ }
+
+out_put_req:
+ ceph_osdc_put_request(req);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_osdc_call);
+
+/*
* init, shutdown
*/
int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 00d2601407c5..1a7c9a79a53c 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -26,7 +26,7 @@ struct page **ceph_get_direct_page_vector(const void __user *data,
while (got < num_pages) {
rc = get_user_pages_unlocked(
(unsigned long)data + ((unsigned long)got * PAGE_SIZE),
- num_pages - got, write_page, 0, pages + got);
+ num_pages - got, pages + got, write_page ? FOLL_WRITE : 0);
if (rc < 0)
break;
BUG_ON(rc == 0);
diff --git a/net/core/dev.c b/net/core/dev.c
index f1fe26f66458..820bac239738 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3035,6 +3035,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d
}
return head;
}
+EXPORT_SYMBOL_GPL(validate_xmit_skb_list);
static void qdisc_pkt_len_init(struct sk_buff *skb)
{
@@ -3845,7 +3846,7 @@ int netif_rx_ni(struct sk_buff *skb)
}
EXPORT_SYMBOL(netif_rx_ni);
-static void net_tx_action(struct softirq_action *h)
+static __latent_entropy void net_tx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
@@ -4511,6 +4512,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->flush = 0;
NAPI_GRO_CB(skb)->free = 0;
NAPI_GRO_CB(skb)->encap_mark = 0;
+ NAPI_GRO_CB(skb)->recursion_counter = 0;
NAPI_GRO_CB(skb)->is_fou = 0;
NAPI_GRO_CB(skb)->is_atomic = 1;
NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
@@ -5198,7 +5200,7 @@ out_unlock:
return work;
}
-static void net_rx_action(struct softirq_action *h)
+static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
unsigned long time_limit = jiffies + 2;
@@ -5511,10 +5513,14 @@ struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
{
struct netdev_adjacent *lower;
- lower = list_first_or_null_rcu(&dev->all_adj_list.lower,
- struct netdev_adjacent, list);
+ lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
+
+ if (&lower->list == &dev->all_adj_list.lower)
+ return NULL;
+
+ *iter = &lower->list;
- return lower ? lower->dev : NULL;
+ return lower->dev;
}
EXPORT_SYMBOL(netdev_all_lower_get_next_rcu);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 1a7b80f73376..ab193e5def07 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -246,15 +246,13 @@ ipv6:
case htons(ETH_P_8021AD):
case htons(ETH_P_8021Q): {
const struct vlan_hdr *vlan;
+ struct vlan_hdr _vlan;
+ bool vlan_tag_present = skb && skb_vlan_tag_present(skb);
- if (skb_vlan_tag_present(skb))
+ if (vlan_tag_present)
proto = skb->protocol;
- if (!skb_vlan_tag_present(skb) ||
- proto == cpu_to_be16(ETH_P_8021Q) ||
- proto == cpu_to_be16(ETH_P_8021AD)) {
- struct vlan_hdr _vlan;
-
+ if (!vlan_tag_present || eth_type_vlan(skb->protocol)) {
vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
data, hlen, &_vlan);
if (!vlan)
@@ -272,7 +270,7 @@ ipv6:
FLOW_DISSECTOR_KEY_VLAN,
target_container);
- if (skb_vlan_tag_present(skb)) {
+ if (vlan_tag_present) {
key_vlan->vlan_id = skb_vlan_tag_get_id(skb);
key_vlan->vlan_priority =
(skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 989434f36f96..f61c0e02a413 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -215,13 +215,14 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id);
*/
int peernet2id_alloc(struct net *net, struct net *peer)
{
+ unsigned long flags;
bool alloc;
int id;
- spin_lock_bh(&net->nsid_lock);
+ spin_lock_irqsave(&net->nsid_lock, flags);
alloc = atomic_read(&peer->count) == 0 ? false : true;
id = __peernet2id_alloc(net, peer, &alloc);
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock_irqrestore(&net->nsid_lock, flags);
if (alloc && id >= 0)
rtnl_net_notifyid(net, RTM_NEWNSID, id);
return id;
@@ -230,11 +231,12 @@ int peernet2id_alloc(struct net *net, struct net *peer)
/* This function returns, if assigned, the id of a peer netns. */
int peernet2id(struct net *net, struct net *peer)
{
+ unsigned long flags;
int id;
- spin_lock_bh(&net->nsid_lock);
+ spin_lock_irqsave(&net->nsid_lock, flags);
id = __peernet2id(net, peer);
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock_irqrestore(&net->nsid_lock, flags);
return id;
}
EXPORT_SYMBOL(peernet2id);
@@ -249,17 +251,18 @@ bool peernet_has_id(struct net *net, struct net *peer)
struct net *get_net_ns_by_id(struct net *net, int id)
{
+ unsigned long flags;
struct net *peer;
if (id < 0)
return NULL;
rcu_read_lock();
- spin_lock_bh(&net->nsid_lock);
+ spin_lock_irqsave(&net->nsid_lock, flags);
peer = idr_find(&net->netns_ids, id);
if (peer)
get_net(peer);
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock_irqrestore(&net->nsid_lock, flags);
rcu_read_unlock();
return peer;
@@ -422,17 +425,17 @@ static void cleanup_net(struct work_struct *work)
for_each_net(tmp) {
int id;
- spin_lock_bh(&tmp->nsid_lock);
+ spin_lock_irq(&tmp->nsid_lock);
id = __peernet2id(tmp, net);
if (id >= 0)
idr_remove(&tmp->netns_ids, id);
- spin_unlock_bh(&tmp->nsid_lock);
+ spin_unlock_irq(&tmp->nsid_lock);
if (id >= 0)
rtnl_net_notifyid(tmp, RTM_DELNSID, id);
}
- spin_lock_bh(&net->nsid_lock);
+ spin_lock_irq(&net->nsid_lock);
idr_destroy(&net->netns_ids);
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock_irq(&net->nsid_lock);
}
rtnl_unlock();
@@ -561,6 +564,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[NETNSA_MAX + 1];
+ unsigned long flags;
struct net *peer;
int nsid, err;
@@ -581,15 +585,15 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
if (IS_ERR(peer))
return PTR_ERR(peer);
- spin_lock_bh(&net->nsid_lock);
+ spin_lock_irqsave(&net->nsid_lock, flags);
if (__peernet2id(net, peer) >= 0) {
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock_irqrestore(&net->nsid_lock, flags);
err = -EEXIST;
goto out;
}
err = alloc_netid(net, peer, nsid);
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock_irqrestore(&net->nsid_lock, flags);
if (err >= 0) {
rtnl_net_notifyid(net, RTM_NEWNSID, err);
err = 0;
@@ -711,10 +715,11 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
.idx = 0,
.s_idx = cb->args[0],
};
+ unsigned long flags;
- spin_lock_bh(&net->nsid_lock);
+ spin_lock_irqsave(&net->nsid_lock, flags);
idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock_irqrestore(&net->nsid_lock, flags);
cb->args[0] = net_cb.idx;
return skb->len;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 5219a9e2127a..306b8f0e03c1 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -216,8 +216,8 @@
#define M_QUEUE_XMIT 2 /* Inject packet into qdisc */
/* If lock -- protects updating of if_list */
-#define if_lock(t) spin_lock(&(t->if_lock));
-#define if_unlock(t) spin_unlock(&(t->if_lock));
+#define if_lock(t) mutex_lock(&(t->if_lock));
+#define if_unlock(t) mutex_unlock(&(t->if_lock));
/* Used to help with determining the pkts on receive */
#define PKTGEN_MAGIC 0xbe9be955
@@ -423,7 +423,7 @@ struct pktgen_net {
};
struct pktgen_thread {
- spinlock_t if_lock; /* for list of devices */
+ struct mutex if_lock; /* for list of devices */
struct list_head if_list; /* All device here */
struct list_head th_list;
struct task_struct *tsk;
@@ -2010,11 +2010,13 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
{
struct pktgen_thread *t;
+ mutex_lock(&pktgen_thread_lock);
+
list_for_each_entry(t, &pn->pktgen_threads, th_list) {
struct pktgen_dev *pkt_dev;
- rcu_read_lock();
- list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
+ if_lock(t);
+ list_for_each_entry(pkt_dev, &t->if_list, list) {
if (pkt_dev->odev != dev)
continue;
@@ -2029,8 +2031,9 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
dev->name);
break;
}
- rcu_read_unlock();
+ if_unlock(t);
}
+ mutex_unlock(&pktgen_thread_lock);
}
static int pktgen_device_event(struct notifier_block *unused,
@@ -3762,7 +3765,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn)
return -ENOMEM;
}
- spin_lock_init(&t->if_lock);
+ mutex_init(&t->if_lock);
t->cpu = cpu;
INIT_LIST_HEAD(&t->if_list);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b06d2f46b83e..fb7348f13501 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1144,6 +1144,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi))
return 0;
+ memset(&vf_vlan_info, 0, sizeof(vf_vlan_info));
+
vf_mac.vf =
vf_vlan.vf =
vf_vlan_info.vf =
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index cbd19d250947..1e3e0087245b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1962,37 +1962,13 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
return false;
}
-ssize_t skb_socket_splice(struct sock *sk,
- struct pipe_inode_info *pipe,
- struct splice_pipe_desc *spd)
-{
- int ret;
-
- /* Drop the socket lock, otherwise we have reverse
- * locking dependencies between sk_lock and i_mutex
- * here as compared to sendfile(). We enter here
- * with the socket lock held, and splice_to_pipe() will
- * grab the pipe inode lock. For sendfile() emulation,
- * we call into ->sendpage() with the i_mutex lock held
- * and networking will grab the socket lock.
- */
- release_sock(sk);
- ret = splice_to_pipe(pipe, spd);
- lock_sock(sk);
-
- return ret;
-}
-
/*
* Map data from the skb to a pipe. Should handle both the linear part,
* the fragments, and the frag list.
*/
int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
struct pipe_inode_info *pipe, unsigned int tlen,
- unsigned int flags,
- ssize_t (*splice_cb)(struct sock *,
- struct pipe_inode_info *,
- struct splice_pipe_desc *))
+ unsigned int flags)
{
struct partial_page partial[MAX_SKB_FRAGS];
struct page *pages[MAX_SKB_FRAGS];
@@ -2009,7 +1985,7 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk);
if (spd.nr_pages)
- ret = splice_cb(sk, pipe, &spd);
+ ret = splice_to_pipe(pipe, &spd);
return ret;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 038e660ef844..c73e28fc9c2a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1363,6 +1363,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
slab = prot->slab;
cgroup_sk_free(&sk->sk_cgrp_data);
+ mem_cgroup_sk_free(sk);
security_sk_free(sk);
if (slab != NULL)
kmem_cache_free(slab, sk);
@@ -1399,6 +1400,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
sock_net_set(sk, net);
atomic_set(&sk->sk_wmem_alloc, 1);
+ mem_cgroup_sk_alloc(sk);
cgroup_sk_alloc(&sk->sk_cgrp_data);
sock_update_classid(&sk->sk_cgrp_data);
sock_update_netprioidx(&sk->sk_cgrp_data);
@@ -1545,6 +1547,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_incoming_cpu = raw_smp_processor_id();
atomic64_set(&newsk->sk_cookie, 0);
+ mem_cgroup_sk_alloc(newsk);
cgroup_sk_alloc(&newsk->sk_cgrp_data);
/*
@@ -1569,9 +1572,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
sk_set_socket(newsk, NULL);
newsk->sk_wq = NULL;
- if (mem_cgroup_sockets_enabled && sk->sk_memcg)
- sock_update_memcg(newsk);
-
if (newsk->sk_prot->sockets_allocated)
sk_sockets_allocated_inc(newsk);
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index e92b759d906c..9a1a352fd1eb 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -129,7 +129,6 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2)
return 0;
}
-EXPORT_SYMBOL(reuseport_add_sock);
static void reuseport_free_rcu(struct rcu_head *head)
{
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 66dff5e3d772..02acfff36028 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -439,7 +439,7 @@ struct sk_buff **eth_gro_receive(struct sk_buff **head,
skb_gro_pull(skb, sizeof(*eh));
skb_gro_postpull_rcsum(skb, eh, sizeof(*eh));
- pp = ptype->callbacks.gro_receive(head, skb);
+ pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
out_unlock:
rcu_read_unlock();
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 5ee1d43f1310..4ebe2aa3e7d3 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -300,10 +300,6 @@ static void hsr_forward_do(struct hsr_frame_info *frame)
static void check_local_dest(struct hsr_priv *hsr, struct sk_buff *skb,
struct hsr_frame_info *frame)
{
- struct net_device *master_dev;
-
- master_dev = hsr_port_get_hsr(hsr, HSR_PT_MASTER)->dev;
-
if (hsr_addr_is_self(hsr, eth_hdr(skb)->h_dest)) {
frame->is_local_exclusive = true;
skb->pkt_type = PACKET_HOST;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1effc986739e..9648c97e541f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1391,7 +1391,7 @@ struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb)
skb_gro_pull(skb, sizeof(*iph));
skb_set_transport_header(skb, skb_gro_offset(skb));
- pp = ops->callbacks.gro_receive(head, skb);
+ pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
out_unlock:
rcu_read_unlock();
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index cf50f7e2b012..030d1531e897 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -249,7 +249,7 @@ static struct sk_buff **fou_gro_receive(struct sock *sk,
if (!ops || !ops->callbacks.gro_receive)
goto out_unlock;
- pp = ops->callbacks.gro_receive(head, skb);
+ pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
out_unlock:
rcu_read_unlock();
@@ -441,7 +441,7 @@ next_proto:
if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive))
goto out_unlock;
- pp = ops->callbacks.gro_receive(head, skb);
+ pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
flush = 0;
out_unlock:
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 96e0efecefa6..d5cac99170b1 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -229,7 +229,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
/* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/
skb_gro_postpull_rcsum(skb, greh, grehlen);
- pp = ptype->callbacks.gro_receive(head, skb);
+ pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
flush = 0;
out_unlock:
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 77c20a489218..ca97835bfec4 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -25,6 +25,7 @@
#include <net/inet_hashtables.h>
#include <net/secure_seq.h>
#include <net/ip.h>
+#include <net/tcp.h>
#include <net/sock_reuseport.h>
static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
@@ -172,7 +173,7 @@ EXPORT_SYMBOL_GPL(__inet_inherit_port);
static inline int compute_score(struct sock *sk, struct net *net,
const unsigned short hnum, const __be32 daddr,
- const int dif)
+ const int dif, bool exact_dif)
{
int score = -1;
struct inet_sock *inet = inet_sk(sk);
@@ -186,7 +187,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
return -1;
score += 4;
}
- if (sk->sk_bound_dev_if) {
+ if (sk->sk_bound_dev_if || exact_dif) {
if (sk->sk_bound_dev_if != dif)
return -1;
score += 4;
@@ -215,11 +216,12 @@ struct sock *__inet_lookup_listener(struct net *net,
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
int score, hiscore = 0, matches = 0, reuseport = 0;
+ bool exact_dif = inet_exact_dif_match(net, skb);
struct sock *sk, *result = NULL;
u32 phash = 0;
sk_for_each_rcu(sk, &ilb->head) {
- score = compute_score(sk, net, hnum, daddr, dif);
+ score = compute_score(sk, net, hnum, daddr, dif, exact_dif);
if (score > hiscore) {
reuseport = sk->sk_reuseport;
if (reuseport) {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 05d105832bdb..03e7f7310423 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -538,7 +538,6 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
{
struct iphdr *iph;
int ptr;
- struct net_device *dev;
struct sk_buff *skb2;
unsigned int mtu, hlen, left, len, ll_rs;
int offset;
@@ -546,8 +545,6 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct rtable *rt = skb_rtable(skb);
int err = 0;
- dev = rt->dst.dev;
-
/* for offloaded checksums cleanup checksum before fragmentation */
if (skb->ip_summed == CHECKSUM_PARTIAL &&
(err = skb_checksum_help(skb)))
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index af4919792b6a..b8a2d63d1fb8 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -98,7 +98,7 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
}
static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb,
- int offset)
+ int tlen, int offset)
{
__wsum csum = skb->csum;
@@ -106,8 +106,9 @@ static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb,
return;
if (offset != 0)
- csum = csum_sub(csum, csum_partial(skb_transport_header(skb),
- offset, 0));
+ csum = csum_sub(csum,
+ csum_partial(skb_transport_header(skb) + tlen,
+ offset, 0));
put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum);
}
@@ -153,7 +154,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
}
void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb,
- int offset)
+ int tlen, int offset)
{
struct inet_sock *inet = inet_sk(skb->sk);
unsigned int flags = inet->cmsg_flags;
@@ -216,7 +217,7 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb,
}
if (flags & IP_CMSG_CHECKSUM)
- ip_cmsg_recv_checksum(msg, skb, offset);
+ ip_cmsg_recv_checksum(msg, skb, tlen, offset);
}
EXPORT_SYMBOL(ip_cmsg_recv_offset);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 66ddcb60519a..205e2000d395 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -258,7 +258,7 @@ int ping_init_sock(struct sock *sk)
struct net *net = sock_net(sk);
kgid_t group = current_egid();
struct group_info *group_info;
- int i, j, count;
+ int i;
kgid_t low, high;
int ret = 0;
@@ -270,16 +270,11 @@ int ping_init_sock(struct sock *sk)
return 0;
group_info = get_current_groups();
- count = group_info->ngroups;
- for (i = 0; i < group_info->nblocks; i++) {
- int cp_count = min_t(int, NGROUPS_PER_BLOCK, count);
- for (j = 0; j < cp_count; j++) {
- kgid_t gid = group_info->blocks[i][j];
- if (gid_lte(low, gid) && gid_lte(gid, high))
- goto out_release_group;
- }
+ for (i = 0; i < group_info->ngroups; i++) {
+ kgid_t gid = group_info->gid[i];
- count -= cp_count;
+ if (gid_lte(low, gid) && gid_lte(gid, high))
+ goto out_release_group;
}
ret = -EACCES;
@@ -999,7 +994,7 @@ struct proto ping_prot = {
.init = ping_init_sock,
.close = ping_close,
.connect = ip4_datagram_connect,
- .disconnect = udp_disconnect,
+ .disconnect = __udp_disconnect,
.setsockopt = ip_setsockopt,
.getsockopt = ip_getsockopt,
.sendmsg = ping_v4_sendmsg,
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 90a85c955872..ecbe5a7c2d6d 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -918,7 +918,7 @@ struct proto raw_prot = {
.close = raw_close,
.destroy = raw_destroy,
.connect = ip4_datagram_connect,
- .disconnect = udp_disconnect,
+ .disconnect = __udp_disconnect,
.ioctl = raw_ioctl,
.init = raw_init,
.setsockopt = raw_setsockopt,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f2be689a6c85..62d4d90c1389 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2265,7 +2265,8 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
if (err) {
res.fi = NULL;
res.table = NULL;
- if (fl4->flowi4_oif) {
+ if (fl4->flowi4_oif &&
+ !netif_index_is_l3_master(net, fl4->flowi4_oif)) {
/* Apparently, routing tables are wrong. Assume,
that the destination is on link.
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 1cb67de106fe..80bc36b25de2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -96,11 +96,11 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low
container_of(table->data, struct net, ipv4.ping_group_range.range);
unsigned int seq;
do {
- seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
+ seq = read_seqbegin(&net->ipv4.ping_group_range.lock);
*low = data[0];
*high = data[1];
- } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
+ } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq));
}
/* Update system visible IP port range */
@@ -109,10 +109,10 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig
kgid_t *data = table->data;
struct net *net =
container_of(table->data, struct net, ipv4.ping_group_range.range);
- write_seqlock(&net->ipv4.ip_local_ports.lock);
+ write_seqlock(&net->ipv4.ping_group_range.lock);
data[0] = low;
data[1] = high;
- write_sequnlock(&net->ipv4.ip_local_ports.lock);
+ write_sequnlock(&net->ipv4.ping_group_range.lock);
}
/* Validate changes from /proc interface. */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f253e5019d22..3251fe71f39f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -424,8 +424,6 @@ void tcp_init_sock(struct sock *sk)
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
local_bh_disable();
- if (mem_cgroup_sockets_enabled)
- sock_update_memcg(sk);
sk_sockets_allocated_inc(sk);
local_bh_enable();
}
@@ -691,8 +689,7 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
int ret;
ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe,
- min(rd_desc->count, len), tss->flags,
- skb_socket_splice);
+ min(rd_desc->count, len), tss->flags);
if (ret > 0)
rd_desc->count -= ret;
return ret;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7ac37c314312..61b7be303eec 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -86,7 +86,6 @@
int sysctl_tcp_tw_reuse __read_mostly;
int sysctl_tcp_low_latency __read_mostly;
-EXPORT_SYMBOL(sysctl_tcp_low_latency);
#ifdef CONFIG_TCP_MD5SIG
static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
@@ -1871,9 +1870,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
local_bh_disable();
sk_sockets_allocated_dec(sk);
local_bh_enable();
-
- if (mem_cgroup_sockets_enabled && sk->sk_memcg)
- sock_release_memcg(sk);
}
EXPORT_SYMBOL(tcp_v4_destroy_sock);
@@ -1890,7 +1886,6 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq);
struct inet_listen_hashbucket *ilb;
- struct inet_connection_sock *icsk;
struct sock *sk = cur;
if (!sk) {
@@ -1912,7 +1907,6 @@ get_sk:
continue;
if (sk->sk_family == st->family)
return sk;
- icsk = inet_csk(sk);
}
spin_unlock_bh(&ilb->lock);
st->offset = 0;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7d96dc2d3d08..d123d68f4d1d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1322,7 +1322,7 @@ try_again:
*addr_len = sizeof(*sin);
}
if (inet->cmsg_flags)
- ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr) + off);
+ ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr), off);
err = copied;
if (flags & MSG_TRUNC)
@@ -1345,7 +1345,7 @@ csum_copy_err:
goto try_again;
}
-int udp_disconnect(struct sock *sk, int flags)
+int __udp_disconnect(struct sock *sk, int flags)
{
struct inet_sock *inet = inet_sk(sk);
/*
@@ -1367,6 +1367,15 @@ int udp_disconnect(struct sock *sk, int flags)
sk_dst_reset(sk);
return 0;
}
+EXPORT_SYMBOL(__udp_disconnect);
+
+int udp_disconnect(struct sock *sk, int flags)
+{
+ lock_sock(sk);
+ __udp_disconnect(sk, flags);
+ release_sock(sk);
+ return 0;
+}
EXPORT_SYMBOL(udp_disconnect);
void udp_lib_unhash(struct sock *sk)
@@ -2193,7 +2202,7 @@ int udp_abort(struct sock *sk, int err)
sk->sk_err = err;
sk->sk_error_report(sk);
- udp_disconnect(sk, 0);
+ __udp_disconnect(sk, 0);
release_sock(sk);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index f9333c963607..b2be1d9757ef 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -295,7 +295,7 @@ unflush:
skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
- pp = udp_sk(sk)->gro_receive(sk, head, skb);
+ pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb);
out_unlock:
rcu_read_unlock();
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index cbd9343751a2..060dd9922018 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -147,9 +147,8 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
}
#endif
-static void __ipv6_regen_rndid(struct inet6_dev *idev);
-static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
-static void ipv6_regen_rndid(unsigned long data);
+static void ipv6_regen_rndid(struct inet6_dev *idev);
+static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
static int ipv6_count_addresses(struct inet6_dev *idev);
@@ -409,9 +408,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
goto err_release;
}
- /* One reference from device. We must do this before
- * we invoke __ipv6_regen_rndid().
- */
+ /* One reference from device. */
in6_dev_hold(ndev);
if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
@@ -425,17 +422,15 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
#endif
INIT_LIST_HEAD(&ndev->tempaddr_list);
- setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev);
+ ndev->desync_factor = U32_MAX;
if ((dev->flags&IFF_LOOPBACK) ||
dev->type == ARPHRD_TUNNEL ||
dev->type == ARPHRD_TUNNEL6 ||
dev->type == ARPHRD_SIT ||
dev->type == ARPHRD_NONE) {
ndev->cnf.use_tempaddr = -1;
- } else {
- in6_dev_hold(ndev);
- ipv6_regen_rndid((unsigned long) ndev);
- }
+ } else
+ ipv6_regen_rndid(ndev);
ndev->token = in6addr_any;
@@ -447,7 +442,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
err = addrconf_sysctl_register(ndev);
if (err) {
ipv6_mc_destroy_dev(ndev);
- del_timer(&ndev->regen_timer);
snmp6_unregister_dev(ndev);
goto err_release;
}
@@ -1190,6 +1184,8 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
int ret = 0;
u32 addr_flags;
unsigned long now = jiffies;
+ long max_desync_factor;
+ s32 cnf_temp_preferred_lft;
write_lock_bh(&idev->lock);
if (ift) {
@@ -1222,23 +1218,42 @@ retry:
}
in6_ifa_hold(ifp);
memcpy(addr.s6_addr, ifp->addr.s6_addr, 8);
- __ipv6_try_regen_rndid(idev, tmpaddr);
+ ipv6_try_regen_rndid(idev, tmpaddr);
memcpy(&addr.s6_addr[8], idev->rndid, 8);
age = (now - ifp->tstamp) / HZ;
+
+ regen_advance = idev->cnf.regen_max_retry *
+ idev->cnf.dad_transmits *
+ NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ;
+
+ /* recalculate max_desync_factor each time and update
+ * idev->desync_factor if it's larger
+ */
+ cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft);
+ max_desync_factor = min_t(__u32,
+ idev->cnf.max_desync_factor,
+ cnf_temp_preferred_lft - regen_advance);
+
+ if (unlikely(idev->desync_factor > max_desync_factor)) {
+ if (max_desync_factor > 0) {
+ get_random_bytes(&idev->desync_factor,
+ sizeof(idev->desync_factor));
+ idev->desync_factor %= max_desync_factor;
+ } else {
+ idev->desync_factor = 0;
+ }
+ }
+
tmp_valid_lft = min_t(__u32,
ifp->valid_lft,
idev->cnf.temp_valid_lft + age);
- tmp_prefered_lft = min_t(__u32,
- ifp->prefered_lft,
- idev->cnf.temp_prefered_lft + age -
- idev->cnf.max_desync_factor);
+ tmp_prefered_lft = cnf_temp_preferred_lft + age -
+ idev->desync_factor;
+ tmp_prefered_lft = min_t(__u32, ifp->prefered_lft, tmp_prefered_lft);
tmp_plen = ifp->prefix_len;
tmp_tstamp = ifp->tstamp;
spin_unlock_bh(&ifp->lock);
- regen_advance = idev->cnf.regen_max_retry *
- idev->cnf.dad_transmits *
- NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ;
write_unlock_bh(&idev->lock);
/* A temporary address is created only if this calculated Preferred
@@ -2150,7 +2165,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
}
/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
-static void __ipv6_regen_rndid(struct inet6_dev *idev)
+static void ipv6_regen_rndid(struct inet6_dev *idev)
{
regen:
get_random_bytes(idev->rndid, sizeof(idev->rndid));
@@ -2179,43 +2194,10 @@ regen:
}
}
-static void ipv6_regen_rndid(unsigned long data)
-{
- struct inet6_dev *idev = (struct inet6_dev *) data;
- unsigned long expires;
-
- rcu_read_lock_bh();
- write_lock_bh(&idev->lock);
-
- if (idev->dead)
- goto out;
-
- __ipv6_regen_rndid(idev);
-
- expires = jiffies +
- idev->cnf.temp_prefered_lft * HZ -
- idev->cnf.regen_max_retry * idev->cnf.dad_transmits *
- NEIGH_VAR(idev->nd_parms, RETRANS_TIME) -
- idev->cnf.max_desync_factor * HZ;
- if (time_before(expires, jiffies)) {
- pr_warn("%s: too short regeneration interval; timer disabled for %s\n",
- __func__, idev->dev->name);
- goto out;
- }
-
- if (!mod_timer(&idev->regen_timer, expires))
- in6_dev_hold(idev);
-
-out:
- write_unlock_bh(&idev->lock);
- rcu_read_unlock_bh();
- in6_dev_put(idev);
-}
-
-static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr)
+static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr)
{
if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
- __ipv6_regen_rndid(idev);
+ ipv6_regen_rndid(idev);
}
/*
@@ -2356,7 +2338,7 @@ static void manage_tempaddrs(struct inet6_dev *idev,
max_valid = 0;
max_prefered = idev->cnf.temp_prefered_lft -
- idev->cnf.max_desync_factor - age;
+ idev->desync_factor - age;
if (max_prefered < 0)
max_prefered = 0;
@@ -3018,7 +3000,7 @@ static void init_loopback(struct net_device *dev)
* lo device down, release this obsolete dst and
* reallocate a new router for ifa.
*/
- if (sp_ifa->rt->dst.obsolete > 0) {
+ if (!atomic_read(&sp_ifa->rt->rt6i_ref)) {
ip6_rt_put(sp_ifa->rt);
sp_ifa->rt = NULL;
} else {
@@ -3594,9 +3576,6 @@ restart:
if (!how)
idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
- if (how && del_timer(&idev->regen_timer))
- in6_dev_put(idev);
-
/* Step 3: clear tempaddr list */
while (!list_empty(&idev->tempaddr_list)) {
ifa = list_first_entry(&idev->tempaddr_list,
@@ -5729,6 +5708,7 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl,
return ret;
}
+static int minus_one = -1;
static const int one = 1;
static const int two_five_five = 255;
@@ -5789,7 +5769,8 @@ static const struct ctl_table addrconf_sysctl[] = {
.data = &ipv6_devconf.rtr_solicits,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &minus_one,
},
{
.procname = "router_solicitation_interval",
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 00cf28ad4565..02761c9fe43e 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -96,7 +96,7 @@ EXPORT_SYMBOL(__inet6_lookup_established);
static inline int compute_score(struct sock *sk, struct net *net,
const unsigned short hnum,
const struct in6_addr *daddr,
- const int dif)
+ const int dif, bool exact_dif)
{
int score = -1;
@@ -109,7 +109,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
return -1;
score++;
}
- if (sk->sk_bound_dev_if) {
+ if (sk->sk_bound_dev_if || exact_dif) {
if (sk->sk_bound_dev_if != dif)
return -1;
score++;
@@ -131,11 +131,12 @@ struct sock *inet6_lookup_listener(struct net *net,
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
int score, hiscore = 0, matches = 0, reuseport = 0;
+ bool exact_dif = inet6_exact_dif_match(net, skb);
struct sock *sk, *result = NULL;
u32 phash = 0;
sk_for_each(sk, &ilb->head) {
- score = compute_score(sk, net, hnum, daddr, dif);
+ score = compute_score(sk, net, hnum, daddr, dif, exact_dif);
if (score > hiscore) {
reuseport = sk->sk_reuseport;
if (reuseport) {
@@ -263,13 +264,15 @@ EXPORT_SYMBOL_GPL(inet6_hash_connect);
int inet6_hash(struct sock *sk)
{
+ int err = 0;
+
if (sk->sk_state != TCP_CLOSE) {
local_bh_disable();
- __inet_hash(sk, NULL, ipv6_rcv_saddr_equal);
+ err = __inet_hash(sk, NULL, ipv6_rcv_saddr_equal);
local_bh_enable();
}
- return 0;
+ return err;
}
EXPORT_SYMBOL_GPL(inet6_hash);
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index e7bfd55899a3..1fcf61f1cbc3 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -246,7 +246,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
skb_gro_postpull_rcsum(skb, iph, nlen);
- pp = ops->callbacks.gro_receive(head, skb);
+ pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
out_unlock:
rcu_read_unlock();
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 6a66adba0c22..87784560dc46 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -157,6 +157,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_
hash = HASH(&any, local);
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
+ ipv6_addr_any(&t->parms.raddr) &&
(t->dev->flags & IFF_UP))
return t;
}
@@ -164,6 +165,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_
hash = HASH(remote, &any);
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
if (ipv6_addr_equal(remote, &t->parms.raddr) &&
+ ipv6_addr_any(&t->parms.laddr) &&
(t->dev->flags & IFF_UP))
return t;
}
@@ -1170,6 +1172,7 @@ route_lookup:
if (err)
return err;
+ skb->protocol = htons(ETH_P_IPV6);
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
ipv6h = ipv6_hdr(skb);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 5330262ab673..636ec56f5f50 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -120,6 +120,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
static bool setsockopt_needs_rtnl(int optname)
{
switch (optname) {
+ case IPV6_ADDRFORM:
case IPV6_ADD_MEMBERSHIP:
case IPV6_DROP_MEMBERSHIP:
case IPV6_JOIN_ANYCAST:
@@ -198,7 +199,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
}
fl6_free_socklist(sk);
- ipv6_sock_mc_close(sk);
+ __ipv6_sock_mc_close(sk);
/*
* Sock is moving from IPv6 to IPv4 (sk_prot), so
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 75c1fc54f188..14a3903f1c82 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -276,16 +276,14 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
return idev;
}
-void ipv6_sock_mc_close(struct sock *sk)
+void __ipv6_sock_mc_close(struct sock *sk)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct ipv6_mc_socklist *mc_lst;
struct net *net = sock_net(sk);
- if (!rcu_access_pointer(np->ipv6_mc_list))
- return;
+ ASSERT_RTNL();
- rtnl_lock();
while ((mc_lst = rtnl_dereference(np->ipv6_mc_list)) != NULL) {
struct net_device *dev;
@@ -303,8 +301,17 @@ void ipv6_sock_mc_close(struct sock *sk)
atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
kfree_rcu(mc_lst, rcu);
-
}
+}
+
+void ipv6_sock_mc_close(struct sock *sk)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ if (!rcu_access_pointer(np->ipv6_mc_list))
+ return;
+ rtnl_lock();
+ __ipv6_sock_mc_close(sk);
rtnl_unlock();
}
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 0e983b694ee8..66e2d9dfc43a 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -180,7 +180,7 @@ struct proto pingv6_prot = {
.init = ping_init_sock,
.close = ping_close,
.connect = ip6_datagram_connect_v6_only,
- .disconnect = udp_disconnect,
+ .disconnect = __udp_disconnect,
.setsockopt = ipv6_setsockopt,
.getsockopt = ipv6_getsockopt,
.sendmsg = ping_v6_sendmsg,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 54404f08efcc..054a1d84fc5e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1241,7 +1241,7 @@ struct proto rawv6_prot = {
.close = rawv6_close,
.destroy = raw6_destroy,
.connect = ip6_datagram_connect_v6_only,
- .disconnect = udp_disconnect,
+ .disconnect = __udp_disconnect,
.ioctl = rawv6_ioctl,
.init = rawv6_init_sk,
.setsockopt = rawv6_setsockopt,
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 2160d5d009cb..3815e8505ed2 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -456,7 +456,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
memmove(head->head + sizeof(struct frag_hdr), head->head,
(head->data - head->head) - sizeof(struct frag_hdr));
- head->mac_header += sizeof(struct frag_hdr);
+ if (skb_mac_header_was_set(head))
+ head->mac_header += sizeof(struct frag_hdr);
head->network_header += sizeof(struct frag_hdr);
skb_reset_transport_header(head);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index bdbc38e8bf29..947ed1ded026 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -102,11 +102,13 @@ static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
#ifdef CONFIG_IPV6_ROUTE_INFO
static struct rt6_info *rt6_add_route_info(struct net *net,
const struct in6_addr *prefix, int prefixlen,
- const struct in6_addr *gwaddr, int ifindex,
+ const struct in6_addr *gwaddr,
+ struct net_device *dev,
unsigned int pref);
static struct rt6_info *rt6_get_route_info(struct net *net,
const struct in6_addr *prefix, int prefixlen,
- const struct in6_addr *gwaddr, int ifindex);
+ const struct in6_addr *gwaddr,
+ struct net_device *dev);
#endif
struct uncached_list {
@@ -656,7 +658,8 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
struct net_device *dev = rt->dst.dev;
if (dev && !netif_carrier_ok(dev) &&
- idev->cnf.ignore_routes_with_linkdown)
+ idev->cnf.ignore_routes_with_linkdown &&
+ !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
goto out;
if (rt6_check_expired(rt))
@@ -803,7 +806,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
rt = rt6_get_dflt_router(gwaddr, dev);
else
rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
- gwaddr, dev->ifindex);
+ gwaddr, dev);
if (rt && !lifetime) {
ip6_del_rt(rt);
@@ -811,8 +814,8 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
}
if (!rt && lifetime)
- rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
- pref);
+ rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
+ dev, pref);
else if (rt)
rt->rt6i_flags = RTF_ROUTEINFO |
(rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
@@ -1050,6 +1053,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
int strict = 0;
strict |= flags & RT6_LOOKUP_F_IFACE;
+ strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
if (net->ipv6.devconf_all->forwarding == 0)
strict |= RT6_LOOKUP_F_REACHABLE;
@@ -1789,7 +1793,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
};
struct fib6_table *table;
struct rt6_info *rt;
- int flags = RT6_LOOKUP_F_IFACE;
+ int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
table = fib6_get_table(net, cfg->fc_table);
if (!table)
@@ -2325,13 +2329,16 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
#ifdef CONFIG_IPV6_ROUTE_INFO
static struct rt6_info *rt6_get_route_info(struct net *net,
const struct in6_addr *prefix, int prefixlen,
- const struct in6_addr *gwaddr, int ifindex)
+ const struct in6_addr *gwaddr,
+ struct net_device *dev)
{
+ u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
+ int ifindex = dev->ifindex;
struct fib6_node *fn;
struct rt6_info *rt = NULL;
struct fib6_table *table;
- table = fib6_get_table(net, RT6_TABLE_INFO);
+ table = fib6_get_table(net, tb_id);
if (!table)
return NULL;
@@ -2357,12 +2364,13 @@ out:
static struct rt6_info *rt6_add_route_info(struct net *net,
const struct in6_addr *prefix, int prefixlen,
- const struct in6_addr *gwaddr, int ifindex,
+ const struct in6_addr *gwaddr,
+ struct net_device *dev,
unsigned int pref)
{
struct fib6_config cfg = {
.fc_metric = IP6_RT_PRIO_USER,
- .fc_ifindex = ifindex,
+ .fc_ifindex = dev->ifindex,
.fc_dst_len = prefixlen,
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
RTF_UP | RTF_PREF(pref),
@@ -2371,7 +2379,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
.fc_nlinfo.nl_net = net,
};
- cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
+ cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
cfg.fc_dst = *prefix;
cfg.fc_gateway = *gwaddr;
@@ -2381,16 +2389,17 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
ip6_route_add(&cfg);
- return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
+ return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
}
#endif
struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
{
+ u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
struct rt6_info *rt;
struct fib6_table *table;
- table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
+ table = fib6_get_table(dev_net(dev), tb_id);
if (!table)
return NULL;
@@ -2424,20 +2433,20 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
cfg.fc_gateway = *gwaddr;
- ip6_route_add(&cfg);
+ if (!ip6_route_add(&cfg)) {
+ struct fib6_table *table;
+
+ table = fib6_get_table(dev_net(dev), cfg.fc_table);
+ if (table)
+ table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
+ }
return rt6_get_dflt_router(gwaddr, dev);
}
-void rt6_purge_dflt_routers(struct net *net)
+static void __rt6_purge_dflt_routers(struct fib6_table *table)
{
struct rt6_info *rt;
- struct fib6_table *table;
-
- /* NOTE: Keep consistent with rt6_get_dflt_router */
- table = fib6_get_table(net, RT6_TABLE_DFLT);
- if (!table)
- return;
restart:
read_lock_bh(&table->tb6_lock);
@@ -2451,6 +2460,27 @@ restart:
}
}
read_unlock_bh(&table->tb6_lock);
+
+ table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
+}
+
+void rt6_purge_dflt_routers(struct net *net)
+{
+ struct fib6_table *table;
+ struct hlist_head *head;
+ unsigned int h;
+
+ rcu_read_lock();
+
+ for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ head = &net->ipv6.fib_table_hash[h];
+ hlist_for_each_entry_rcu(table, head, tb6_hlist) {
+ if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
+ __rt6_purge_dflt_routers(table);
+ }
+ }
+
+ rcu_read_unlock();
}
static void rtmsg_to_fib6_config(struct net *net,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 54cf7197c7ab..5a27ab4eab39 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1190,6 +1190,16 @@ out:
return NULL;
}
+static void tcp_v6_restore_cb(struct sk_buff *skb)
+{
+ /* We need to move header back to the beginning if xfrm6_policy_check()
+ * and tcp_v6_fill_cb() are going to be called again.
+ * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
+ */
+ memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
+ sizeof(struct inet6_skb_parm));
+}
+
/* The socket must have it's spinlock held when we get
* here, unless it is a TCP_LISTEN socket.
*
@@ -1319,6 +1329,7 @@ ipv6_pktoptions:
np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
skb_set_owner_r(opt_skb, sk);
+ tcp_v6_restore_cb(opt_skb);
opt_skb = xchg(&np->pktoptions, opt_skb);
} else {
__kfree_skb(opt_skb);
@@ -1352,15 +1363,6 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
TCP_SKB_CB(skb)->sacked = 0;
}
-static void tcp_v6_restore_cb(struct sk_buff *skb)
-{
- /* We need to move header back to the beginning if xfrm6_policy_check()
- * and tcp_v6_fill_cb() are going to be called again.
- */
- memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
- sizeof(struct inet6_skb_parm));
-}
-
static int tcp_v6_rcv(struct sk_buff *skb)
{
const struct tcphdr *th;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 9aa7c1c7a9ce..b2ef061e6836 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -427,7 +427,8 @@ try_again:
if (is_udp4) {
if (inet->cmsg_flags)
- ip_cmsg_recv(msg, skb);
+ ip_cmsg_recv_offset(msg, skb,
+ sizeof(struct udphdr), off);
} else {
if (np->rxopt.all)
ip6_datagram_recv_specific_ctl(sk, msg, skb);
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index b7f869a85ab7..7e08a4d3d77d 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1160,19 +1160,6 @@ out:
return copied ? : err;
}
-static ssize_t kcm_sock_splice(struct sock *sk,
- struct pipe_inode_info *pipe,
- struct splice_pipe_desc *spd)
-{
- int ret;
-
- release_sock(sk);
- ret = splice_to_pipe(pipe, spd);
- lock_sock(sk);
-
- return ret;
-}
-
static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
@@ -1202,8 +1189,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
if (len > rxm->full_len)
len = rxm->full_len;
- copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags,
- kcm_sock_splice);
+ copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags);
if (copied < 0) {
err = copied;
goto err_out;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 42de4ccd159f..fce25afb652a 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -338,7 +338,7 @@ static int l2tp_ip_disconnect(struct sock *sk, int flags)
if (sock_flag(sk, SOCK_ZAPPED))
return 0;
- return udp_disconnect(sk, flags);
+ return __udp_disconnect(sk, flags);
}
static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index ea2ae6664cc8..ad3468c32b53 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -410,7 +410,7 @@ static int l2tp_ip6_disconnect(struct sock *sk, int flags)
if (sock_flag(sk, SOCK_ZAPPED))
return 0;
- return udp_disconnect(sk, flags);
+ return __udp_disconnect(sk, flags);
}
static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index 7663c28ba353..a4e0d59a40dd 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c
@@ -18,21 +18,24 @@
#include "key.h"
#include "aes_ccm.h"
-void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic,
- size_t mic_len)
+int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
+ u8 *data, size_t data_len, u8 *mic,
+ size_t mic_len)
{
struct scatterlist sg[3];
+ struct aead_request *aead_req;
+ int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
+ u8 *__aad;
- char aead_req_data[sizeof(struct aead_request) +
- crypto_aead_reqsize(tfm)]
- __aligned(__alignof__(struct aead_request));
- struct aead_request *aead_req = (void *) aead_req_data;
+ aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC);
+ if (!aead_req)
+ return -ENOMEM;
- memset(aead_req, 0, sizeof(aead_req_data));
+ __aad = (u8 *)aead_req + reqsize;
+ memcpy(__aad, aad, CCM_AAD_LEN);
sg_init_table(sg, 3);
- sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad));
+ sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
sg_set_buf(&sg[1], data, data_len);
sg_set_buf(&sg[2], mic, mic_len);
@@ -41,6 +44,9 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
aead_request_set_ad(aead_req, sg[0].length);
crypto_aead_encrypt(aead_req);
+ kzfree(aead_req);
+
+ return 0;
}
int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
@@ -48,18 +54,23 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
size_t mic_len)
{
struct scatterlist sg[3];
- char aead_req_data[sizeof(struct aead_request) +
- crypto_aead_reqsize(tfm)]
- __aligned(__alignof__(struct aead_request));
- struct aead_request *aead_req = (void *) aead_req_data;
+ struct aead_request *aead_req;
+ int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
+ u8 *__aad;
+ int err;
if (data_len == 0)
return -EINVAL;
- memset(aead_req, 0, sizeof(aead_req_data));
+ aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC);
+ if (!aead_req)
+ return -ENOMEM;
+
+ __aad = (u8 *)aead_req + reqsize;
+ memcpy(__aad, aad, CCM_AAD_LEN);
sg_init_table(sg, 3);
- sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad));
+ sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
sg_set_buf(&sg[1], data, data_len);
sg_set_buf(&sg[2], mic, mic_len);
@@ -67,7 +78,10 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
aead_request_set_crypt(aead_req, sg, sg, data_len + mic_len, b_0);
aead_request_set_ad(aead_req, sg[0].length);
- return crypto_aead_decrypt(aead_req);
+ err = crypto_aead_decrypt(aead_req);
+ kzfree(aead_req);
+
+ return err;
}
struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[],
diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h
index 6a73d1e4d186..fcd3254c5cf0 100644
--- a/net/mac80211/aes_ccm.h
+++ b/net/mac80211/aes_ccm.h
@@ -12,12 +12,14 @@
#include <linux/crypto.h>
+#define CCM_AAD_LEN 32
+
struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[],
size_t key_len,
size_t mic_len);
-void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic,
- size_t mic_len);
+int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
+ u8 *data, size_t data_len, u8 *mic,
+ size_t mic_len);
int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
u8 *data, size_t data_len, u8 *mic,
size_t mic_len);
diff --git a/net/mac80211/aes_gcm.c b/net/mac80211/aes_gcm.c
index 3afe361fd27c..8a4397cc1b08 100644
--- a/net/mac80211/aes_gcm.c
+++ b/net/mac80211/aes_gcm.c
@@ -15,20 +15,23 @@
#include "key.h"
#include "aes_gcm.h"
-void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic)
+int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
+ u8 *data, size_t data_len, u8 *mic)
{
struct scatterlist sg[3];
+ struct aead_request *aead_req;
+ int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
+ u8 *__aad;
- char aead_req_data[sizeof(struct aead_request) +
- crypto_aead_reqsize(tfm)]
- __aligned(__alignof__(struct aead_request));
- struct aead_request *aead_req = (void *)aead_req_data;
+ aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC);
+ if (!aead_req)
+ return -ENOMEM;
- memset(aead_req, 0, sizeof(aead_req_data));
+ __aad = (u8 *)aead_req + reqsize;
+ memcpy(__aad, aad, GCM_AAD_LEN);
sg_init_table(sg, 3);
- sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad));
+ sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
sg_set_buf(&sg[1], data, data_len);
sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN);
@@ -37,24 +40,31 @@ void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
aead_request_set_ad(aead_req, sg[0].length);
crypto_aead_encrypt(aead_req);
+ kzfree(aead_req);
+ return 0;
}
int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
u8 *data, size_t data_len, u8 *mic)
{
struct scatterlist sg[3];
- char aead_req_data[sizeof(struct aead_request) +
- crypto_aead_reqsize(tfm)]
- __aligned(__alignof__(struct aead_request));
- struct aead_request *aead_req = (void *)aead_req_data;
+ struct aead_request *aead_req;
+ int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
+ u8 *__aad;
+ int err;
if (data_len == 0)
return -EINVAL;
- memset(aead_req, 0, sizeof(aead_req_data));
+ aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC);
+ if (!aead_req)
+ return -ENOMEM;
+
+ __aad = (u8 *)aead_req + reqsize;
+ memcpy(__aad, aad, GCM_AAD_LEN);
sg_init_table(sg, 3);
- sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad));
+ sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
sg_set_buf(&sg[1], data, data_len);
sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN);
@@ -63,7 +73,10 @@ int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
data_len + IEEE80211_GCMP_MIC_LEN, j_0);
aead_request_set_ad(aead_req, sg[0].length);
- return crypto_aead_decrypt(aead_req);
+ err = crypto_aead_decrypt(aead_req);
+ kzfree(aead_req);
+
+ return err;
}
struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[],
diff --git a/net/mac80211/aes_gcm.h b/net/mac80211/aes_gcm.h
index 1347fda6b76a..55aed5352494 100644
--- a/net/mac80211/aes_gcm.h
+++ b/net/mac80211/aes_gcm.h
@@ -11,8 +11,10 @@
#include <linux/crypto.h>
-void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic);
+#define GCM_AAD_LEN 32
+
+int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
+ u8 *data, size_t data_len, u8 *mic);
int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
u8 *data, size_t data_len, u8 *mic);
struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[],
diff --git a/net/mac80211/aes_gmac.c b/net/mac80211/aes_gmac.c
index 3ddd927aaf30..bd72a862ddb7 100644
--- a/net/mac80211/aes_gmac.c
+++ b/net/mac80211/aes_gmac.c
@@ -17,28 +17,27 @@
#include "key.h"
#include "aes_gmac.h"
-#define GMAC_MIC_LEN 16
-#define GMAC_NONCE_LEN 12
-#define AAD_LEN 20
-
int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce,
const u8 *data, size_t data_len, u8 *mic)
{
struct scatterlist sg[4];
- char aead_req_data[sizeof(struct aead_request) +
- crypto_aead_reqsize(tfm)]
- __aligned(__alignof__(struct aead_request));
- struct aead_request *aead_req = (void *)aead_req_data;
- u8 zero[GMAC_MIC_LEN], iv[AES_BLOCK_SIZE];
+ u8 *zero, *__aad, iv[AES_BLOCK_SIZE];
+ struct aead_request *aead_req;
+ int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
if (data_len < GMAC_MIC_LEN)
return -EINVAL;
- memset(aead_req, 0, sizeof(aead_req_data));
+ aead_req = kzalloc(reqsize + GMAC_MIC_LEN + GMAC_AAD_LEN, GFP_ATOMIC);
+ if (!aead_req)
+ return -ENOMEM;
+
+ zero = (u8 *)aead_req + reqsize;
+ __aad = zero + GMAC_MIC_LEN;
+ memcpy(__aad, aad, GMAC_AAD_LEN);
- memset(zero, 0, GMAC_MIC_LEN);
sg_init_table(sg, 4);
- sg_set_buf(&sg[0], aad, AAD_LEN);
+ sg_set_buf(&sg[0], __aad, GMAC_AAD_LEN);
sg_set_buf(&sg[1], data, data_len - GMAC_MIC_LEN);
sg_set_buf(&sg[2], zero, GMAC_MIC_LEN);
sg_set_buf(&sg[3], mic, GMAC_MIC_LEN);
@@ -49,9 +48,10 @@ int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce,
aead_request_set_tfm(aead_req, tfm);
aead_request_set_crypt(aead_req, sg, sg, 0, iv);
- aead_request_set_ad(aead_req, AAD_LEN + data_len);
+ aead_request_set_ad(aead_req, GMAC_AAD_LEN + data_len);
crypto_aead_encrypt(aead_req);
+ kzfree(aead_req);
return 0;
}
diff --git a/net/mac80211/aes_gmac.h b/net/mac80211/aes_gmac.h
index d328204d73a8..32e6442c95be 100644
--- a/net/mac80211/aes_gmac.h
+++ b/net/mac80211/aes_gmac.h
@@ -11,6 +11,10 @@
#include <linux/crypto.h>
+#define GMAC_AAD_LEN 20
+#define GMAC_MIC_LEN 16
+#define GMAC_NONCE_LEN 12
+
struct crypto_aead *ieee80211_aes_gmac_key_setup(const u8 key[],
size_t key_len);
int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce,
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index c3f610bba3fe..eede5c6db8d5 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -820,7 +820,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT)
break;
rcu_read_lock();
- sta = sta_info_get(sdata, mgmt->da);
+ sta = sta_info_get_bss(sdata, mgmt->da);
rcu_read_unlock();
if (!sta)
return -ENOLINK;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 6175db385ba7..a47bbc973f2d 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2298,6 +2298,8 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
__le16 fc = hdr->frame_control;
struct sk_buff_head frame_list;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
+ struct ethhdr ethhdr;
+ const u8 *check_da = ethhdr.h_dest, *check_sa = ethhdr.h_source;
if (unlikely(!ieee80211_is_data(fc)))
return RX_CONTINUE;
@@ -2308,24 +2310,53 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
if (!(status->rx_flags & IEEE80211_RX_AMSDU))
return RX_CONTINUE;
- if (ieee80211_has_a4(hdr->frame_control) &&
- rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
- !rx->sdata->u.vlan.sta)
- return RX_DROP_UNUSABLE;
+ if (unlikely(ieee80211_has_a4(hdr->frame_control))) {
+ switch (rx->sdata->vif.type) {
+ case NL80211_IFTYPE_AP_VLAN:
+ if (!rx->sdata->u.vlan.sta)
+ return RX_DROP_UNUSABLE;
+ break;
+ case NL80211_IFTYPE_STATION:
+ if (!rx->sdata->u.mgd.use_4addr)
+ return RX_DROP_UNUSABLE;
+ break;
+ default:
+ return RX_DROP_UNUSABLE;
+ }
+ check_da = NULL;
+ check_sa = NULL;
+ } else switch (rx->sdata->vif.type) {
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_AP_VLAN:
+ check_da = NULL;
+ break;
+ case NL80211_IFTYPE_STATION:
+ if (!rx->sta ||
+ !test_sta_flag(rx->sta, WLAN_STA_TDLS_PEER))
+ check_sa = NULL;
+ break;
+ case NL80211_IFTYPE_MESH_POINT:
+ check_sa = NULL;
+ break;
+ default:
+ break;
+ }
- if (is_multicast_ether_addr(hdr->addr1) &&
- ((rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
- rx->sdata->u.vlan.sta) ||
- (rx->sdata->vif.type == NL80211_IFTYPE_STATION &&
- rx->sdata->u.mgd.use_4addr)))
+ if (is_multicast_ether_addr(hdr->addr1))
return RX_DROP_UNUSABLE;
skb->dev = dev;
__skb_queue_head_init(&frame_list);
+ if (ieee80211_data_to_8023_exthdr(skb, &ethhdr,
+ rx->sdata->vif.addr,
+ rx->sdata->vif.type))
+ return RX_DROP_UNUSABLE;
+
ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
rx->sdata->vif.type,
- rx->local->hw.extra_tx_headroom, true);
+ rx->local->hw.extra_tx_headroom,
+ check_da, check_sa);
while (!skb_queue_empty(&frame_list)) {
rx->skb = __skb_dequeue(&frame_list);
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index b48c1e13e281..42ce9bd4426f 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -405,7 +405,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb,
u8 *pos;
u8 pn[6];
u64 pn64;
- u8 aad[2 * AES_BLOCK_SIZE];
+ u8 aad[CCM_AAD_LEN];
u8 b_0[AES_BLOCK_SIZE];
if (info->control.hw_key &&
@@ -461,10 +461,8 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb,
pos += IEEE80211_CCMP_HDR_LEN;
ccmp_special_blocks(skb, pn, b_0, aad);
- ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len,
- skb_put(skb, mic_len), mic_len);
-
- return 0;
+ return ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len,
+ skb_put(skb, mic_len), mic_len);
}
@@ -639,7 +637,7 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
u8 *pos;
u8 pn[6];
u64 pn64;
- u8 aad[2 * AES_BLOCK_SIZE];
+ u8 aad[GCM_AAD_LEN];
u8 j_0[AES_BLOCK_SIZE];
if (info->control.hw_key &&
@@ -696,10 +694,8 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
pos += IEEE80211_GCMP_HDR_LEN;
gcmp_special_blocks(skb, pn, j_0, aad);
- ieee80211_aes_gcm_encrypt(key->u.gcmp.tfm, j_0, aad, pos, len,
- skb_put(skb, IEEE80211_GCMP_MIC_LEN));
-
- return 0;
+ return ieee80211_aes_gcm_encrypt(key->u.gcmp.tfm, j_0, aad, pos, len,
+ skb_put(skb, IEEE80211_GCMP_MIC_LEN));
}
ieee80211_tx_result
@@ -1123,9 +1119,9 @@ ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx)
struct ieee80211_key *key = tx->key;
struct ieee80211_mmie_16 *mmie;
struct ieee80211_hdr *hdr;
- u8 aad[20];
+ u8 aad[GMAC_AAD_LEN];
u64 pn64;
- u8 nonce[12];
+ u8 nonce[GMAC_NONCE_LEN];
if (WARN_ON(skb_queue_len(&tx->skbs) != 1))
return TX_DROP;
@@ -1171,7 +1167,7 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx)
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
struct ieee80211_key *key = rx->key;
struct ieee80211_mmie_16 *mmie;
- u8 aad[20], mic[16], ipn[6], nonce[12];
+ u8 aad[GMAC_AAD_LEN], mic[GMAC_MIC_LEN], ipn[6], nonce[GMAC_NONCE_LEN];
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
if (!ieee80211_is_mgmt(hdr->frame_control))
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 13290a70fa71..1308a56f2591 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -246,6 +246,7 @@ enum {
ncsi_dev_state_config_gls,
ncsi_dev_state_config_done,
ncsi_dev_state_suspend_select = 0x0401,
+ ncsi_dev_state_suspend_gls,
ncsi_dev_state_suspend_dcnt,
ncsi_dev_state_suspend_dc,
ncsi_dev_state_suspend_deselect,
@@ -264,6 +265,7 @@ struct ncsi_dev_priv {
#endif
unsigned int package_num; /* Number of packages */
struct list_head packages; /* List of packages */
+ struct ncsi_channel *hot_channel; /* Channel was ever active */
struct ncsi_request requests[256]; /* Request table */
unsigned int request_id; /* Last used request ID */
#define NCSI_REQ_START_IDX 1
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index b41a6617d498..6898e7229285 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -141,23 +141,35 @@ static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp,
return -ENODEV;
/* If the channel is active one, we need reconfigure it */
+ spin_lock_irqsave(&nc->lock, flags);
ncm = &nc->modes[NCSI_MODE_LINK];
hncdsc = (struct ncsi_aen_hncdsc_pkt *)h;
ncm->data[3] = ntohl(hncdsc->status);
if (!list_empty(&nc->link) ||
- nc->state != NCSI_CHANNEL_ACTIVE ||
- (ncm->data[3] & 0x1))
+ nc->state != NCSI_CHANNEL_ACTIVE) {
+ spin_unlock_irqrestore(&nc->lock, flags);
return 0;
+ }
- if (ndp->flags & NCSI_DEV_HWA)
+ spin_unlock_irqrestore(&nc->lock, flags);
+ if (!(ndp->flags & NCSI_DEV_HWA) && !(ncm->data[3] & 0x1))
ndp->flags |= NCSI_DEV_RESHUFFLE;
/* If this channel is the active one and the link doesn't
* work, we have to choose another channel to be active one.
* The logic here is exactly similar to what we do when link
* is down on the active channel.
+ *
+ * On the other hand, we need configure it when host driver
+ * state on the active channel becomes ready.
*/
ncsi_stop_channel_monitor(nc);
+
+ spin_lock_irqsave(&nc->lock, flags);
+ nc->state = (ncm->data[3] & 0x1) ? NCSI_CHANNEL_INACTIVE :
+ NCSI_CHANNEL_ACTIVE;
+ spin_unlock_irqrestore(&nc->lock, flags);
+
spin_lock_irqsave(&ndp->lock, flags);
list_add_tail_rcu(&nc->link, &ndp->channel_queue);
spin_unlock_irqrestore(&ndp->lock, flags);
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 5e509e547c2d..a3bd5fa8ad09 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -540,42 +540,86 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
nd->state = ncsi_dev_state_suspend_select;
/* Fall through */
case ncsi_dev_state_suspend_select:
- case ncsi_dev_state_suspend_dcnt:
- case ncsi_dev_state_suspend_dc:
- case ncsi_dev_state_suspend_deselect:
ndp->pending_req_num = 1;
- np = ndp->active_package;
- nc = ndp->active_channel;
+ nca.type = NCSI_PKT_CMD_SP;
nca.package = np->id;
- if (nd->state == ncsi_dev_state_suspend_select) {
- nca.type = NCSI_PKT_CMD_SP;
- nca.channel = NCSI_RESERVED_CHANNEL;
- if (ndp->flags & NCSI_DEV_HWA)
- nca.bytes[0] = 0;
- else
- nca.bytes[0] = 1;
+ nca.channel = NCSI_RESERVED_CHANNEL;
+ if (ndp->flags & NCSI_DEV_HWA)
+ nca.bytes[0] = 0;
+ else
+ nca.bytes[0] = 1;
+
+ /* To retrieve the last link states of channels in current
+ * package when current active channel needs fail over to
+ * another one. It means we will possibly select another
+ * channel as next active one. The link states of channels
+ * are most important factor of the selection. So we need
+ * accurate link states. Unfortunately, the link states on
+ * inactive channels can't be updated with LSC AEN in time.
+ */
+ if (ndp->flags & NCSI_DEV_RESHUFFLE)
+ nd->state = ncsi_dev_state_suspend_gls;
+ else
nd->state = ncsi_dev_state_suspend_dcnt;
- } else if (nd->state == ncsi_dev_state_suspend_dcnt) {
- nca.type = NCSI_PKT_CMD_DCNT;
- nca.channel = nc->id;
- nd->state = ncsi_dev_state_suspend_dc;
- } else if (nd->state == ncsi_dev_state_suspend_dc) {
- nca.type = NCSI_PKT_CMD_DC;
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ goto error;
+
+ break;
+ case ncsi_dev_state_suspend_gls:
+ ndp->pending_req_num = np->channel_num;
+
+ nca.type = NCSI_PKT_CMD_GLS;
+ nca.package = np->id;
+
+ nd->state = ncsi_dev_state_suspend_dcnt;
+ NCSI_FOR_EACH_CHANNEL(np, nc) {
nca.channel = nc->id;
- nca.bytes[0] = 1;
- nd->state = ncsi_dev_state_suspend_deselect;
- } else if (nd->state == ncsi_dev_state_suspend_deselect) {
- nca.type = NCSI_PKT_CMD_DP;
- nca.channel = NCSI_RESERVED_CHANNEL;
- nd->state = ncsi_dev_state_suspend_done;
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ goto error;
}
+ break;
+ case ncsi_dev_state_suspend_dcnt:
+ ndp->pending_req_num = 1;
+
+ nca.type = NCSI_PKT_CMD_DCNT;
+ nca.package = np->id;
+ nca.channel = nc->id;
+
+ nd->state = ncsi_dev_state_suspend_dc;
ret = ncsi_xmit_cmd(&nca);
- if (ret) {
- nd->state = ncsi_dev_state_functional;
- return;
- }
+ if (ret)
+ goto error;
+
+ break;
+ case ncsi_dev_state_suspend_dc:
+ ndp->pending_req_num = 1;
+
+ nca.type = NCSI_PKT_CMD_DC;
+ nca.package = np->id;
+ nca.channel = nc->id;
+ nca.bytes[0] = 1;
+
+ nd->state = ncsi_dev_state_suspend_deselect;
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ goto error;
+
+ break;
+ case ncsi_dev_state_suspend_deselect:
+ ndp->pending_req_num = 1;
+
+ nca.type = NCSI_PKT_CMD_DP;
+ nca.package = np->id;
+ nca.channel = NCSI_RESERVED_CHANNEL;
+
+ nd->state = ncsi_dev_state_suspend_done;
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ goto error;
break;
case ncsi_dev_state_suspend_done:
@@ -589,6 +633,10 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
netdev_warn(nd->dev, "Wrong NCSI state 0x%x in suspend\n",
nd->state);
}
+
+ return;
+error:
+ nd->state = ncsi_dev_state_functional;
}
static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
@@ -597,6 +645,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
struct net_device *dev = nd->dev;
struct ncsi_package *np = ndp->active_package;
struct ncsi_channel *nc = ndp->active_channel;
+ struct ncsi_channel *hot_nc = NULL;
struct ncsi_cmd_arg nca;
unsigned char index;
unsigned long flags;
@@ -702,12 +751,20 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
break;
case ncsi_dev_state_config_done:
spin_lock_irqsave(&nc->lock, flags);
- if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1)
+ if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) {
+ hot_nc = nc;
nc->state = NCSI_CHANNEL_ACTIVE;
- else
+ } else {
+ hot_nc = NULL;
nc->state = NCSI_CHANNEL_INACTIVE;
+ }
spin_unlock_irqrestore(&nc->lock, flags);
+ /* Update the hot channel */
+ spin_lock_irqsave(&ndp->lock, flags);
+ ndp->hot_channel = hot_nc;
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
ncsi_start_channel_monitor(nc);
ncsi_process_next_channel(ndp);
break;
@@ -725,10 +782,14 @@ error:
static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
{
struct ncsi_package *np;
- struct ncsi_channel *nc, *found;
+ struct ncsi_channel *nc, *found, *hot_nc;
struct ncsi_channel_mode *ncm;
unsigned long flags;
+ spin_lock_irqsave(&ndp->lock, flags);
+ hot_nc = ndp->hot_channel;
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
/* The search is done once an inactive channel with up
* link is found.
*/
@@ -746,6 +807,9 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
if (!found)
found = nc;
+ if (nc == hot_nc)
+ found = nc;
+
ncm = &nc->modes[NCSI_MODE_LINK];
if (ncm->data[2] & 0x1) {
spin_unlock_irqrestore(&nc->lock, flags);
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index c9d90eb64046..004af030ef1a 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -65,49 +65,24 @@ static DEFINE_MUTEX(nf_hook_mutex);
#define nf_entry_dereference(e) \
rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
-static struct nf_hook_entry *nf_hook_entry_head(struct net *net,
- const struct nf_hook_ops *reg)
+static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
{
- struct nf_hook_entry *hook_head = NULL;
-
if (reg->pf != NFPROTO_NETDEV)
- hook_head = nf_entry_dereference(net->nf.hooks[reg->pf]
- [reg->hooknum]);
- else if (reg->hooknum == NF_NETDEV_INGRESS) {
+ return net->nf.hooks[reg->pf]+reg->hooknum;
+
#ifdef CONFIG_NETFILTER_INGRESS
+ if (reg->hooknum == NF_NETDEV_INGRESS) {
if (reg->dev && dev_net(reg->dev) == net)
- hook_head =
- nf_entry_dereference(
- reg->dev->nf_hooks_ingress);
-#endif
+ return &reg->dev->nf_hooks_ingress;
}
- return hook_head;
-}
-
-/* must hold nf_hook_mutex */
-static void nf_set_hooks_head(struct net *net, const struct nf_hook_ops *reg,
- struct nf_hook_entry *entry)
-{
- switch (reg->pf) {
- case NFPROTO_NETDEV:
-#ifdef CONFIG_NETFILTER_INGRESS
- /* We already checked in nf_register_net_hook() that this is
- * used from ingress.
- */
- rcu_assign_pointer(reg->dev->nf_hooks_ingress, entry);
#endif
- break;
- default:
- rcu_assign_pointer(net->nf.hooks[reg->pf][reg->hooknum],
- entry);
- break;
- }
+ return NULL;
}
int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
- struct nf_hook_entry *hooks_entry;
- struct nf_hook_entry *entry;
+ struct nf_hook_entry __rcu **pp;
+ struct nf_hook_entry *entry, *p;
if (reg->pf == NFPROTO_NETDEV) {
#ifndef CONFIG_NETFILTER_INGRESS
@@ -119,6 +94,10 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
return -EINVAL;
}
+ pp = nf_hook_entry_head(net, reg);
+ if (!pp)
+ return -EINVAL;
+
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return -ENOMEM;
@@ -128,26 +107,15 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
entry->next = NULL;
mutex_lock(&nf_hook_mutex);
- hooks_entry = nf_hook_entry_head(net, reg);
-
- if (hooks_entry && hooks_entry->orig_ops->priority > reg->priority) {
- /* This is the case where we need to insert at the head */
- entry->next = hooks_entry;
- hooks_entry = NULL;
- }
- while (hooks_entry &&
- reg->priority >= hooks_entry->orig_ops->priority &&
- nf_entry_dereference(hooks_entry->next)) {
- hooks_entry = nf_entry_dereference(hooks_entry->next);
- }
-
- if (hooks_entry) {
- entry->next = nf_entry_dereference(hooks_entry->next);
- rcu_assign_pointer(hooks_entry->next, entry);
- } else {
- nf_set_hooks_head(net, reg, entry);
+ /* Find the spot in the list */
+ while ((p = nf_entry_dereference(*pp)) != NULL) {
+ if (reg->priority < p->orig_ops->priority)
+ break;
+ pp = &p->next;
}
+ rcu_assign_pointer(entry->next, p);
+ rcu_assign_pointer(*pp, entry);
mutex_unlock(&nf_hook_mutex);
#ifdef CONFIG_NETFILTER_INGRESS
@@ -163,33 +131,23 @@ EXPORT_SYMBOL(nf_register_net_hook);
void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
- struct nf_hook_entry *hooks_entry;
+ struct nf_hook_entry __rcu **pp;
+ struct nf_hook_entry *p;
- mutex_lock(&nf_hook_mutex);
- hooks_entry = nf_hook_entry_head(net, reg);
- if (hooks_entry && hooks_entry->orig_ops == reg) {
- nf_set_hooks_head(net, reg,
- nf_entry_dereference(hooks_entry->next));
- goto unlock;
- }
- while (hooks_entry && nf_entry_dereference(hooks_entry->next)) {
- struct nf_hook_entry *next =
- nf_entry_dereference(hooks_entry->next);
- struct nf_hook_entry *nnext;
+ pp = nf_hook_entry_head(net, reg);
+ if (WARN_ON_ONCE(!pp))
+ return;
- if (next->orig_ops != reg) {
- hooks_entry = next;
- continue;
+ mutex_lock(&nf_hook_mutex);
+ while ((p = nf_entry_dereference(*pp)) != NULL) {
+ if (p->orig_ops == reg) {
+ rcu_assign_pointer(*pp, p->next);
+ break;
}
- nnext = nf_entry_dereference(next->next);
- rcu_assign_pointer(hooks_entry->next, nnext);
- hooks_entry = next;
- break;
+ pp = &p->next;
}
-
-unlock:
mutex_unlock(&nf_hook_mutex);
- if (!hooks_entry) {
+ if (!p) {
WARN(1, "nf_unregister_net_hook: hook not found!\n");
return;
}
@@ -201,10 +159,10 @@ unlock:
static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
synchronize_net();
- nf_queue_nf_hook_drop(net, hooks_entry);
+ nf_queue_nf_hook_drop(net, p);
/* other cpu might still process nfqueue verdict that used reg */
synchronize_net();
- kfree(hooks_entry);
+ kfree(p);
}
EXPORT_SYMBOL(nf_unregister_net_hook);
@@ -403,16 +361,9 @@ next_hook:
if (ret == 0)
ret = -EPERM;
} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
- int err;
-
- RCU_INIT_POINTER(state->hook_entries, entry);
- err = nf_queue(skb, state, verdict >> NF_VERDICT_QBITS);
- if (err < 0) {
- if (err == -ESRCH &&
- (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
- goto next_hook;
- kfree_skb(skb);
- }
+ ret = nf_queue(skb, state, &entry, verdict);
+ if (ret == 1 && entry)
+ goto next_hook;
}
return ret;
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index ba6a1d421222..df2f5a3901df 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -983,7 +983,7 @@ static void gc_worker(struct work_struct *work)
return;
ratio = scanned ? expired_count * 100 / scanned : 0;
- if (ratio >= 90)
+ if (ratio >= 90 || expired_count == GC_MAX_EVICTS)
next_run = 0;
gc_work->last_bucket = i;
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index e0adb5959342..9fdb655f85bc 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -18,7 +18,7 @@ unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state,
/* nf_queue.c */
int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
- unsigned int queuenum);
+ struct nf_hook_entry **entryp, unsigned int verdict);
void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry);
int __init netfilter_queue_init(void);
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 96964a0070e1..8f08d759844a 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -107,13 +107,8 @@ void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry)
rcu_read_unlock();
}
-/*
- * Any packet that leaves via this function must come back
- * through nf_reinject().
- */
-int nf_queue(struct sk_buff *skb,
- struct nf_hook_state *state,
- unsigned int queuenum)
+static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
+ unsigned int queuenum)
{
int status = -ENOENT;
struct nf_queue_entry *entry = NULL;
@@ -161,6 +156,27 @@ err:
return status;
}
+/* Packets leaving via this function must come back through nf_reinject(). */
+int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
+ struct nf_hook_entry **entryp, unsigned int verdict)
+{
+ struct nf_hook_entry *entry = *entryp;
+ int ret;
+
+ RCU_INIT_POINTER(state->hook_entries, entry);
+ ret = __nf_queue(skb, state, verdict >> NF_VERDICT_QBITS);
+ if (ret < 0) {
+ if (ret == -ESRCH &&
+ (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) {
+ *entryp = rcu_dereference(entry->next);
+ return 1;
+ }
+ kfree_skb(skb);
+ }
+
+ return 0;
+}
+
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
struct nf_hook_entry *hook_entry;
@@ -187,26 +203,26 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
entry->state.thresh = INT_MIN;
if (verdict == NF_ACCEPT) {
- next_hook:
- verdict = nf_iterate(skb, &entry->state, &hook_entry);
+ hook_entry = rcu_dereference(hook_entry->next);
+ if (hook_entry)
+next_hook:
+ verdict = nf_iterate(skb, &entry->state, &hook_entry);
}
switch (verdict & NF_VERDICT_MASK) {
case NF_ACCEPT:
case NF_STOP:
+okfn:
local_bh_disable();
entry->state.okfn(entry->state.net, entry->state.sk, skb);
local_bh_enable();
break;
case NF_QUEUE:
- RCU_INIT_POINTER(entry->state.hook_entries, hook_entry);
- err = nf_queue(skb, &entry->state,
- verdict >> NF_VERDICT_QBITS);
- if (err < 0) {
- if (err == -ESRCH &&
- (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
+ err = nf_queue(skb, &entry->state, &hook_entry, verdict);
+ if (err == 1) {
+ if (hook_entry)
goto next_hook;
- kfree_skb(skb);
+ goto okfn;
}
break;
case NF_STOLEN:
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index b70d3ea1430e..24db22257586 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4423,7 +4423,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
*/
unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
{
- int val;
+ u32 val;
val = ntohl(nla_get_be32(attr));
if (val > max)
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index e3b83c31da2e..517f08767a3c 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -158,7 +158,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (tb[NFTA_DYNSET_TIMEOUT] != NULL) {
if (!(set->flags & NFT_SET_TIMEOUT))
return -EINVAL;
- timeout = be64_to_cpu(nla_get_be64(tb[NFTA_DYNSET_TIMEOUT]));
+ timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64(
+ tb[NFTA_DYNSET_TIMEOUT])));
}
priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]);
@@ -246,7 +247,8 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
goto nla_put_failure;
if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name))
goto nla_put_failure;
- if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout),
+ if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT,
+ cpu_to_be64(jiffies_to_msecs(priv->timeout)),
NFTA_DYNSET_PAD))
goto nla_put_failure;
if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr))
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index a84cf3d66056..47beb3abcc9d 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -59,7 +59,8 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_exthdr *priv = nft_expr_priv(expr);
- u32 offset, len, err;
+ u32 offset, len;
+ int err;
if (tb[NFTA_EXTHDR_DREG] == NULL ||
tb[NFTA_EXTHDR_TYPE] == NULL ||
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 09473b415b95..baf694de3935 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -44,6 +44,7 @@ static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
[NFTA_HASH_LEN] = { .type = NLA_U32 },
[NFTA_HASH_MODULUS] = { .type = NLA_U32 },
[NFTA_HASH_SEED] = { .type = NLA_U32 },
+ [NFTA_HASH_OFFSET] = { .type = NLA_U32 },
};
static int nft_hash_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index c6d5358482d1..fbc88009ca2e 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c
@@ -28,22 +28,20 @@ static void nft_range_eval(const struct nft_expr *expr,
const struct nft_pktinfo *pkt)
{
const struct nft_range_expr *priv = nft_expr_priv(expr);
- bool mismatch;
int d1, d2;
d1 = memcmp(&regs->data[priv->sreg], &priv->data_from, priv->len);
d2 = memcmp(&regs->data[priv->sreg], &priv->data_to, priv->len);
switch (priv->op) {
case NFT_RANGE_EQ:
- mismatch = (d1 < 0 || d2 > 0);
+ if (d1 < 0 || d2 > 0)
+ regs->verdict.code = NFT_BREAK;
break;
case NFT_RANGE_NEQ:
- mismatch = (d1 >= 0 && d2 <= 0);
+ if (d1 >= 0 && d2 <= 0)
+ regs->verdict.code = NFT_BREAK;
break;
}
-
- if (mismatch)
- regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_range_policy[NFTA_RANGE_MAX + 1] = {
@@ -59,6 +57,7 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
struct nft_range_expr *priv = nft_expr_priv(expr);
struct nft_data_desc desc_from, desc_to;
int err;
+ u32 op;
err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from),
&desc_from, tb[NFTA_RANGE_FROM_DATA]);
@@ -80,7 +79,20 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
if (err < 0)
goto err2;
- priv->op = ntohl(nla_get_be32(tb[NFTA_RANGE_OP]));
+ err = nft_parse_u32_check(tb[NFTA_RANGE_OP], U8_MAX, &op);
+ if (err < 0)
+ goto err2;
+
+ switch (op) {
+ case NFT_RANGE_EQ:
+ case NFT_RANGE_NEQ:
+ break;
+ default:
+ err = -EINVAL;
+ goto err2;
+ }
+
+ priv->op = op;
priv->len = desc_from.len;
return 0;
err2:
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index e0aa7c1d0224..fc4977456c30 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1513,7 +1513,7 @@ xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn)
if (!num_hooks)
return ERR_PTR(-EINVAL);
- ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL);
+ ops = kcalloc(num_hooks, sizeof(*ops), GFP_KERNEL);
if (ops == NULL)
return ERR_PTR(-ENOMEM);
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 018eed7e1ff1..8668a5c18dc3 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -32,6 +32,7 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
li.u.ulog.copy_len = info->len;
li.u.ulog.group = info->group;
li.u.ulog.qthreshold = info->threshold;
+ li.u.ulog.flags = 0;
if (info->flags & XT_NFLOG_F_COPY_LEN)
li.u.ulog.flags |= NF_LOG_F_COPY_LEN;
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 2fab0c65aa94..b89b688e9d01 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -431,7 +431,7 @@ static void htable_put(struct xt_hashlimit_htable *hinfo)
CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie.
*/
#define MAX_CPJ_v1 (0xFFFFFFFF / (HZ*60*60*24))
-#define MAX_CPJ (0xFFFFFFFFFFFFFFFF / (HZ*60*60*24))
+#define MAX_CPJ (0xFFFFFFFFFFFFFFFFULL / (HZ*60*60*24))
/* Repeated shift and or gives us all 1s, final shift and add 1 gives
* us the power of 2 below the theoretical max, so GCC simply does a
@@ -473,7 +473,7 @@ static u64 user2credits(u64 user, int revision)
return div64_u64(user * HZ * CREDITS_PER_JIFFY_v1,
XT_HASHLIMIT_SCALE);
} else {
- if (user > 0xFFFFFFFFFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
+ if (user > 0xFFFFFFFFFFFFFFFFULL / (HZ*CREDITS_PER_JIFFY))
return div64_u64(user, XT_HASHLIMIT_SCALE_v2)
* HZ * CREDITS_PER_JIFFY;
diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c
index 89d53104c6b3..000e70377f85 100644
--- a/net/netfilter/xt_ipcomp.c
+++ b/net/netfilter/xt_ipcomp.c
@@ -26,6 +26,8 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Fan Du <fan.du@windriver.com>");
MODULE_DESCRIPTION("Xtables: IPv4/6 IPsec-IPComp SPI match");
+MODULE_ALIAS("ipt_ipcomp");
+MODULE_ALIAS("ip6t_ipcomp");
/* Returns 1 if the spi is matched by the range, 0 otherwise */
static inline bool
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 627f898c05b9..62bea4591054 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1832,7 +1832,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
/* Record the max length of recvmsg() calls for future allocations */
nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len);
nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len,
- 16384);
+ SKB_WITH_OVERHEAD(32768));
copied = data_skb->len;
if (len < copied) {
@@ -2083,8 +2083,9 @@ static int netlink_dump(struct sock *sk)
if (alloc_min_size < nlk->max_recvmsg_len) {
alloc_size = nlk->max_recvmsg_len;
- skb = alloc_skb(alloc_size, GFP_KERNEL |
- __GFP_NOWARN | __GFP_NORETRY);
+ skb = alloc_skb(alloc_size,
+ (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) |
+ __GFP_NOWARN | __GFP_NORETRY);
}
if (!skb) {
alloc_size = alloc_min_size;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index c8c82e109c68..22087062bd10 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -343,7 +343,7 @@ static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
key->eth.cvlan.tci = 0;
key->eth.cvlan.tpid = 0;
- if (likely(skb_vlan_tag_present(skb))) {
+ if (skb_vlan_tag_present(skb)) {
key->eth.vlan.tci = htons(skb->vlan_tci);
key->eth.vlan.tpid = skb->vlan_proto;
} else {
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 95c36147a6e1..e7da29021b38 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -176,7 +176,7 @@ static void do_setup(struct net_device *netdev)
netdev->vlan_features = netdev->features;
netdev->hw_enc_features = netdev->features;
- netdev->features |= NETIF_F_HW_VLAN_CTAG_TX;
+ netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
eth_hw_addr_random(netdev);
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 8f198437c724..7387418ac514 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -485,7 +485,8 @@ static unsigned int packet_length(const struct sk_buff *skb)
{
unsigned int length = skb->len - ETH_HLEN;
- if (skb_vlan_tagged(skb))
+ if (!skb_vlan_tag_present(skb) &&
+ eth_type_vlan(skb->protocol))
length -= VLAN_HLEN;
/* Don't subtract for multiple VLAN tags. Most (all?) drivers allow
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 33a4697d5539..d2238b204691 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -250,7 +250,7 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po);
static int packet_direct_xmit(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
- netdev_features_t features;
+ struct sk_buff *orig_skb = skb;
struct netdev_queue *txq;
int ret = NETDEV_TX_BUSY;
@@ -258,9 +258,8 @@ static int packet_direct_xmit(struct sk_buff *skb)
!netif_carrier_ok(dev)))
goto drop;
- features = netif_skb_features(skb);
- if (skb_needs_linearize(skb, features) &&
- __skb_linearize(skb))
+ skb = validate_xmit_skb_list(skb, dev);
+ if (skb != orig_skb)
goto drop;
txq = skb_get_tx_queue(dev, skb);
@@ -280,7 +279,7 @@ static int packet_direct_xmit(struct sk_buff *skb)
return ret;
drop:
atomic_long_inc(&dev->tx_dropped);
- kfree_skb(skb);
+ kfree_skb_list(skb);
return NET_XMIT_DROP;
}
@@ -3952,6 +3951,7 @@ static int packet_notifier(struct notifier_block *this,
}
if (msg == NETDEV_UNREGISTER) {
packet_cached_dev_reset(po);
+ fanout_release(sk);
po->ifindex = -1;
if (po->prot_hook.dev)
dev_put(po->prot_hook.dev);
diff --git a/net/rds/Makefile b/net/rds/Makefile
index 0e72bec1529f..56c7d27eefee 100644
--- a/net/rds/Makefile
+++ b/net/rds/Makefile
@@ -13,5 +13,5 @@ obj-$(CONFIG_RDS_TCP) += rds_tcp.o
rds_tcp-y := tcp.o tcp_connect.o tcp_listen.o tcp_recv.o \
tcp_send.o tcp_stats.o
-ccflags-$(CONFIG_RDS_DEBUG) := -DDEBUG
+ccflags-$(CONFIG_RDS_DEBUG) := -DRDS_DEBUG
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 7eaf887e46f8..5680d90b0b77 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -160,7 +160,7 @@ static void rds_ib_add_one(struct ib_device *device)
rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom;
rds_ibdev->dev = device;
- rds_ibdev->pd = ib_alloc_pd(device);
+ rds_ibdev->pd = ib_alloc_pd(device, 0);
if (IS_ERR(rds_ibdev->pd)) {
rds_ibdev->pd = NULL;
goto put_dev;
diff --git a/net/rds/rds.h b/net/rds/rds.h
index fd0bccb2f9f9..67ba67c058b1 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -33,7 +33,7 @@
#define KERNEL_HAS_ATOMIC64
#endif
-#ifdef DEBUG
+#ifdef RDS_DEBUG
#define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args)
#else
/* sigh, pr_debug() causes unused variable warnings */
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 44c9c2b0b190..2d59c9be40e1 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -678,9 +678,9 @@ static int rxrpc_release_sock(struct sock *sk)
sk->sk_state = RXRPC_CLOSE;
spin_unlock_bh(&sk->sk_receive_queue.lock);
- if (rx->local && rx->local->service == rx) {
+ if (rx->local && rcu_access_pointer(rx->local->service) == rx) {
write_lock(&rx->local->services_lock);
- rx->local->service = NULL;
+ rcu_assign_pointer(rx->local->service, NULL);
write_unlock(&rx->local->services_lock);
}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index d38dffd78085..f60e35576526 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -398,6 +398,7 @@ enum rxrpc_call_flag {
RXRPC_CALL_EXPOSED, /* The call was exposed to the world */
RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */
RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */
+ RXRPC_CALL_SEND_PING, /* A ping will need to be sent */
RXRPC_CALL_PINGING, /* Ping in process */
RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */
};
@@ -410,6 +411,7 @@ enum rxrpc_call_event {
RXRPC_CALL_EV_ABORT, /* need to generate abort */
RXRPC_CALL_EV_TIMER, /* Timer expired */
RXRPC_CALL_EV_RESEND, /* Tx resend required */
+ RXRPC_CALL_EV_PING, /* Ping send required */
};
/*
@@ -466,6 +468,7 @@ struct rxrpc_call {
struct rxrpc_sock __rcu *socket; /* socket responsible */
ktime_t ack_at; /* When deferred ACK needs to happen */
ktime_t resend_at; /* When next resend needs to happen */
+ ktime_t ping_at; /* When next to send a ping */
ktime_t expire_at; /* When the call times out */
struct timer_list timer; /* Combined event timer */
struct work_struct processor; /* Event processor */
@@ -558,8 +561,10 @@ struct rxrpc_call {
rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */
rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */
rxrpc_seq_t ackr_seen; /* Highest packet shown seen */
- rxrpc_serial_t ackr_ping; /* Last ping sent */
- ktime_t ackr_ping_time; /* Time last ping sent */
+
+ /* ping management */
+ rxrpc_serial_t ping_serial; /* Last ping sent */
+ ktime_t ping_time; /* Time last ping sent */
/* transmission-phase ACK management */
ktime_t acks_latest_ts; /* Timestamp of latest ACK received */
@@ -728,8 +733,10 @@ extern const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5];
enum rxrpc_timer_trace {
rxrpc_timer_begin,
rxrpc_timer_init_for_reply,
+ rxrpc_timer_init_for_send_reply,
rxrpc_timer_expired,
rxrpc_timer_set_for_ack,
+ rxrpc_timer_set_for_ping,
rxrpc_timer_set_for_resend,
rxrpc_timer_set_for_send,
rxrpc_timer__nr_trace
@@ -743,6 +750,7 @@ enum rxrpc_propose_ack_trace {
rxrpc_propose_ack_ping_for_lost_ack,
rxrpc_propose_ack_ping_for_lost_reply,
rxrpc_propose_ack_ping_for_params,
+ rxrpc_propose_ack_processing_op,
rxrpc_propose_ack_respond_to_ack,
rxrpc_propose_ack_respond_to_ping,
rxrpc_propose_ack_retry_tx,
@@ -777,7 +785,7 @@ extern const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10];
extern const char rxrpc_congest_changes[rxrpc_congest__nr_change][9];
extern const char *const rxrpc_pkts[];
-extern const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4];
+extern const char rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4];
#include <trace/events/rxrpc.h>
@@ -805,6 +813,7 @@ int rxrpc_reject_call(struct rxrpc_sock *);
/*
* call_event.c
*/
+void __rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t);
void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t);
void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool,
enum rxrpc_propose_ack_trace);
@@ -1068,7 +1077,8 @@ extern const s8 rxrpc_ack_priority[];
/*
* output.c
*/
-int rxrpc_send_call_packet(struct rxrpc_call *, u8);
+int rxrpc_send_ack_packet(struct rxrpc_call *, bool);
+int rxrpc_send_abort_packet(struct rxrpc_call *);
int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool);
void rxrpc_reject_packets(struct rxrpc_local *);
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 3cac231d8405..832d854c2d5c 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -337,7 +337,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
/* Get the socket providing the service */
rx = rcu_dereference(local->service);
- if (service_id == rx->srx.srx_service)
+ if (rx && service_id == rx->srx.srx_service)
goto found_service;
trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
@@ -565,7 +565,7 @@ out_discard:
write_unlock_bh(&call->state_lock);
write_unlock(&rx->call_lock);
if (abort) {
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+ rxrpc_send_abort_packet(call);
rxrpc_release_call(rx, call);
rxrpc_put_call(call, rxrpc_call_put);
}
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 4f00476630b9..97a17ada4431 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -24,19 +24,20 @@
/*
* Set the timer
*/
-void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why,
- ktime_t now)
+void __rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why,
+ ktime_t now)
{
unsigned long t_j, now_j = jiffies;
ktime_t t;
bool queue = false;
- read_lock_bh(&call->state_lock);
-
if (call->state < RXRPC_CALL_COMPLETE) {
t = call->expire_at;
- if (!ktime_after(t, now))
+ if (!ktime_after(t, now)) {
+ trace_rxrpc_timer(call, why, now, now_j);
+ queue = true;
goto out;
+ }
if (!ktime_after(call->resend_at, now)) {
call->resend_at = call->expire_at;
@@ -54,6 +55,14 @@ void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why,
t = call->ack_at;
}
+ if (!ktime_after(call->ping_at, now)) {
+ call->ping_at = call->expire_at;
+ if (!test_and_set_bit(RXRPC_CALL_EV_PING, &call->events))
+ queue = true;
+ } else if (ktime_before(call->ping_at, t)) {
+ t = call->ping_at;
+ }
+
t_j = nsecs_to_jiffies(ktime_to_ns(ktime_sub(t, now)));
t_j += jiffies;
@@ -68,16 +77,46 @@ void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why,
mod_timer(&call->timer, t_j);
trace_rxrpc_timer(call, why, now, now_j);
}
-
- if (queue)
- rxrpc_queue_call(call);
}
out:
+ if (queue)
+ rxrpc_queue_call(call);
+}
+
+/*
+ * Set the timer
+ */
+void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why,
+ ktime_t now)
+{
+ read_lock_bh(&call->state_lock);
+ __rxrpc_set_timer(call, why, now);
read_unlock_bh(&call->state_lock);
}
/*
+ * Propose a PING ACK be sent.
+ */
+static void rxrpc_propose_ping(struct rxrpc_call *call,
+ bool immediate, bool background)
+{
+ if (immediate) {
+ if (background &&
+ !test_and_set_bit(RXRPC_CALL_EV_PING, &call->events))
+ rxrpc_queue_call(call);
+ } else {
+ ktime_t now = ktime_get_real();
+ ktime_t ping_at = ktime_add_ms(now, rxrpc_idle_ack_delay);
+
+ if (ktime_before(ping_at, call->ping_at)) {
+ call->ping_at = ping_at;
+ rxrpc_set_timer(call, rxrpc_timer_set_for_ping, now);
+ }
+ }
+}
+
+/*
* propose an ACK be sent
*/
static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
@@ -90,6 +129,14 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
ktime_t now, ack_at;
s8 prior = rxrpc_ack_priority[ack_reason];
+ /* Pings are handled specially because we don't want to accidentally
+ * lose a ping response by subsuming it into a ping.
+ */
+ if (ack_reason == RXRPC_ACK_PING) {
+ rxrpc_propose_ping(call, immediate, background);
+ goto trace;
+ }
+
/* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial
* numbers, but we don't alter the timeout.
*/
@@ -125,7 +172,6 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
expiry = rxrpc_soft_ack_delay;
break;
- case RXRPC_ACK_PING:
case RXRPC_ACK_IDLE:
if (rxrpc_idle_ack_delay < expiry)
expiry = rxrpc_idle_ack_delay;
@@ -253,7 +299,7 @@ static void rxrpc_resend(struct rxrpc_call *call, ktime_t now)
goto out;
rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
rxrpc_propose_ack_ping_for_lost_ack);
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+ rxrpc_send_ack_packet(call, true);
goto out;
}
@@ -328,12 +374,13 @@ void rxrpc_process_call(struct work_struct *work)
recheck_state:
if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+ rxrpc_send_abort_packet(call);
goto recheck_state;
}
if (call->state == RXRPC_CALL_COMPLETE) {
del_timer_sync(&call->timer);
+ rxrpc_notify_socket(call);
goto out_put;
}
@@ -345,13 +392,17 @@ recheck_state:
}
if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) {
- call->ack_at = call->expire_at;
if (call->ackr_reason) {
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+ rxrpc_send_ack_packet(call, false);
goto recheck_state;
}
}
+ if (test_and_clear_bit(RXRPC_CALL_EV_PING, &call->events)) {
+ rxrpc_send_ack_packet(call, true);
+ goto recheck_state;
+ }
+
if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) {
rxrpc_resend(call, now);
goto recheck_state;
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 364b42dc3dce..1ed18d8c9c9f 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -205,6 +205,7 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call)
expire_at = ktime_add_ms(now, rxrpc_max_call_lifetime);
call->expire_at = expire_at;
call->ack_at = expire_at;
+ call->ping_at = expire_at;
call->resend_at = expire_at;
call->timer.expires = jiffies + LONG_MAX / 2;
rxrpc_set_timer(call, rxrpc_timer_begin, now);
@@ -275,7 +276,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
goto error;
trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage),
- here, ERR_PTR(ret));
+ here, NULL);
spin_lock_bh(&call->conn->params.peer->lock);
hlist_add_head(&call->error_link,
@@ -498,7 +499,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
struct rxrpc_call, sock_link);
rxrpc_get_call(call, rxrpc_call_got);
rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET);
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+ rxrpc_send_abort_packet(call);
rxrpc_release_call(rx, call);
rxrpc_put_call(call, rxrpc_call_put);
}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 3ad9f75031e3..44fb8d893c7d 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -625,9 +625,9 @@ static void rxrpc_input_ping_response(struct rxrpc_call *call,
rxrpc_serial_t ping_serial;
ktime_t ping_time;
- ping_time = call->ackr_ping_time;
+ ping_time = call->ping_time;
smp_rmb();
- ping_serial = call->ackr_ping;
+ ping_serial = call->ping_serial;
if (!test_bit(RXRPC_CALL_PINGING, &call->flags) ||
before(orig_serial, ping_serial))
@@ -847,7 +847,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] &
RXRPC_TX_ANNO_LAST &&
- summary.nr_acks == call->tx_top - hard_ack)
+ summary.nr_acks == call->tx_top - hard_ack &&
+ rxrpc_is_client_call(call))
rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial,
false, true,
rxrpc_propose_ack_ping_for_lost_reply);
@@ -938,6 +939,33 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
}
/*
+ * Handle a new call on a channel implicitly completing the preceding call on
+ * that channel.
+ *
+ * TODO: If callNumber > call_id + 1, renegotiate security.
+ */
+static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn,
+ struct rxrpc_call *call)
+{
+ switch (call->state) {
+ case RXRPC_CALL_SERVER_AWAIT_ACK:
+ rxrpc_call_completed(call);
+ break;
+ case RXRPC_CALL_COMPLETE:
+ break;
+ default:
+ if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, ESHUTDOWN)) {
+ set_bit(RXRPC_CALL_EV_ABORT, &call->events);
+ rxrpc_queue_call(call);
+ }
+ break;
+ }
+
+ __rxrpc_disconnect_call(conn, call);
+ rxrpc_notify_socket(call);
+}
+
+/*
* post connection-level events to the connection
* - this includes challenges, responses, some aborts and call terminal packet
* retransmission.
@@ -1145,6 +1173,16 @@ void rxrpc_data_ready(struct sock *udp_sk)
}
call = rcu_dereference(chan->call);
+
+ if (sp->hdr.callNumber > chan->call_id) {
+ if (!(sp->hdr.flags & RXRPC_CLIENT_INITIATED)) {
+ rcu_read_unlock();
+ goto reject_packet;
+ }
+ if (call)
+ rxrpc_input_implicit_end_call(conn, call);
+ call = NULL;
+ }
} else {
skew = 0;
call = NULL;
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index 9d1c721bc4e8..6dee55fad2d3 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -93,10 +93,9 @@ const s8 rxrpc_ack_priority[] = {
[RXRPC_ACK_EXCEEDS_WINDOW] = 6,
[RXRPC_ACK_NOSPACE] = 7,
[RXRPC_ACK_PING_RESPONSE] = 8,
- [RXRPC_ACK_PING] = 9,
};
-const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4] = {
+const char rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4] = {
"---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY",
"IDL", "-?-"
};
@@ -196,7 +195,9 @@ const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = {
[rxrpc_timer_begin] = "Begin ",
[rxrpc_timer_expired] = "*EXPR*",
[rxrpc_timer_init_for_reply] = "IniRpl",
+ [rxrpc_timer_init_for_send_reply] = "SndRpl",
[rxrpc_timer_set_for_ack] = "SetAck",
+ [rxrpc_timer_set_for_ping] = "SetPng",
[rxrpc_timer_set_for_send] = "SetTx ",
[rxrpc_timer_set_for_resend] = "SetRTx",
};
@@ -207,6 +208,7 @@ const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = {
[rxrpc_propose_ack_ping_for_lost_ack] = "LostAck",
[rxrpc_propose_ack_ping_for_lost_reply] = "LostRpl",
[rxrpc_propose_ack_ping_for_params] = "Params ",
+ [rxrpc_propose_ack_processing_op] = "ProcOp ",
[rxrpc_propose_ack_respond_to_ack] = "Rsp2Ack",
[rxrpc_propose_ack_respond_to_ping] = "Rsp2Png",
[rxrpc_propose_ack_retry_tx] = "RetryTx",
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 0d47db886f6e..5dab1ff3a6c2 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -19,26 +19,27 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
-struct rxrpc_pkt_buffer {
+struct rxrpc_ack_buffer {
struct rxrpc_wire_header whdr;
- union {
- struct {
- struct rxrpc_ackpacket ack;
- u8 acks[255];
- u8 pad[3];
- };
- __be32 abort_code;
- };
+ struct rxrpc_ackpacket ack;
+ u8 acks[255];
+ u8 pad[3];
struct rxrpc_ackinfo ackinfo;
};
+struct rxrpc_abort_buffer {
+ struct rxrpc_wire_header whdr;
+ __be32 abort_code;
+};
+
/*
* Fill out an ACK packet.
*/
static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
- struct rxrpc_pkt_buffer *pkt,
+ struct rxrpc_ack_buffer *pkt,
rxrpc_seq_t *_hard_ack,
- rxrpc_seq_t *_top)
+ rxrpc_seq_t *_top,
+ u8 reason)
{
rxrpc_serial_t serial;
rxrpc_seq_t hard_ack, top, seq;
@@ -58,10 +59,10 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
pkt->ack.firstPacket = htonl(hard_ack + 1);
pkt->ack.previousPacket = htonl(call->ackr_prev_seq);
pkt->ack.serial = htonl(serial);
- pkt->ack.reason = call->ackr_reason;
+ pkt->ack.reason = reason;
pkt->ack.nAcks = top - hard_ack;
- if (pkt->ack.reason == RXRPC_ACK_PING)
+ if (reason == RXRPC_ACK_PING)
pkt->whdr.flags |= RXRPC_REQUEST_ACK;
if (after(top, hard_ack)) {
@@ -91,22 +92,19 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
}
/*
- * Send an ACK or ABORT call packet.
+ * Send an ACK call packet.
*/
-int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type)
+int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping)
{
struct rxrpc_connection *conn = NULL;
- struct rxrpc_pkt_buffer *pkt;
+ struct rxrpc_ack_buffer *pkt;
struct msghdr msg;
struct kvec iov[2];
rxrpc_serial_t serial;
rxrpc_seq_t hard_ack, top;
size_t len, n;
- bool ping = false;
- int ioc, ret;
- u32 abort_code;
-
- _enter("%u,%s", call->debug_id, rxrpc_pkts[type]);
+ int ret;
+ u8 reason;
spin_lock_bh(&call->lock);
if (call->conn)
@@ -131,68 +129,44 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type)
pkt->whdr.cid = htonl(call->cid);
pkt->whdr.callNumber = htonl(call->call_id);
pkt->whdr.seq = 0;
- pkt->whdr.type = type;
- pkt->whdr.flags = conn->out_clientflag;
+ pkt->whdr.type = RXRPC_PACKET_TYPE_ACK;
+ pkt->whdr.flags = RXRPC_SLOW_START_OK | conn->out_clientflag;
pkt->whdr.userStatus = 0;
pkt->whdr.securityIndex = call->security_ix;
pkt->whdr._rsvd = 0;
pkt->whdr.serviceId = htons(call->service_id);
- iov[0].iov_base = pkt;
- iov[0].iov_len = sizeof(pkt->whdr);
- len = sizeof(pkt->whdr);
-
- switch (type) {
- case RXRPC_PACKET_TYPE_ACK:
- spin_lock_bh(&call->lock);
+ spin_lock_bh(&call->lock);
+ if (ping) {
+ reason = RXRPC_ACK_PING;
+ } else {
+ reason = call->ackr_reason;
if (!call->ackr_reason) {
spin_unlock_bh(&call->lock);
ret = 0;
goto out;
}
- ping = (call->ackr_reason == RXRPC_ACK_PING);
- n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top);
call->ackr_reason = 0;
+ }
+ n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top, reason);
- spin_unlock_bh(&call->lock);
-
-
- pkt->whdr.flags |= RXRPC_SLOW_START_OK;
-
- iov[0].iov_len += sizeof(pkt->ack) + n;
- iov[1].iov_base = &pkt->ackinfo;
- iov[1].iov_len = sizeof(pkt->ackinfo);
- len += sizeof(pkt->ack) + n + sizeof(pkt->ackinfo);
- ioc = 2;
- break;
-
- case RXRPC_PACKET_TYPE_ABORT:
- abort_code = call->abort_code;
- pkt->abort_code = htonl(abort_code);
- iov[0].iov_len += sizeof(pkt->abort_code);
- len += sizeof(pkt->abort_code);
- ioc = 1;
- break;
+ spin_unlock_bh(&call->lock);
- default:
- BUG();
- ret = -ENOANO;
- goto out;
- }
+ iov[0].iov_base = pkt;
+ iov[0].iov_len = sizeof(pkt->whdr) + sizeof(pkt->ack) + n;
+ iov[1].iov_base = &pkt->ackinfo;
+ iov[1].iov_len = sizeof(pkt->ackinfo);
+ len = iov[0].iov_len + iov[1].iov_len;
serial = atomic_inc_return(&conn->serial);
pkt->whdr.serial = htonl(serial);
- switch (type) {
- case RXRPC_PACKET_TYPE_ACK:
- trace_rxrpc_tx_ack(call, serial,
- ntohl(pkt->ack.firstPacket),
- ntohl(pkt->ack.serial),
- pkt->ack.reason, pkt->ack.nAcks);
- break;
- }
+ trace_rxrpc_tx_ack(call, serial,
+ ntohl(pkt->ack.firstPacket),
+ ntohl(pkt->ack.serial),
+ pkt->ack.reason, pkt->ack.nAcks);
if (ping) {
- call->ackr_ping = serial;
+ call->ping_serial = serial;
smp_wmb();
/* We need to stick a time in before we send the packet in case
* the reply gets back before kernel_sendmsg() completes - but
@@ -201,19 +175,19 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type)
* the packet transmission is more likely to happen towards the
* end of the kernel_sendmsg() call.
*/
- call->ackr_ping_time = ktime_get_real();
+ call->ping_time = ktime_get_real();
set_bit(RXRPC_CALL_PINGING, &call->flags);
trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial);
}
- ret = kernel_sendmsg(conn->params.local->socket,
- &msg, iov, ioc, len);
+
+ ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
if (ping)
- call->ackr_ping_time = ktime_get_real();
+ call->ping_time = ktime_get_real();
- if (type == RXRPC_PACKET_TYPE_ACK &&
- call->state < RXRPC_CALL_COMPLETE) {
+ if (call->state < RXRPC_CALL_COMPLETE) {
if (ret < 0) {
- clear_bit(RXRPC_CALL_PINGING, &call->flags);
+ if (ping)
+ clear_bit(RXRPC_CALL_PINGING, &call->flags);
rxrpc_propose_ACK(call, pkt->ack.reason,
ntohs(pkt->ack.maxSkew),
ntohl(pkt->ack.serial),
@@ -236,6 +210,56 @@ out:
}
/*
+ * Send an ABORT call packet.
+ */
+int rxrpc_send_abort_packet(struct rxrpc_call *call)
+{
+ struct rxrpc_connection *conn = NULL;
+ struct rxrpc_abort_buffer pkt;
+ struct msghdr msg;
+ struct kvec iov[1];
+ rxrpc_serial_t serial;
+ int ret;
+
+ spin_lock_bh(&call->lock);
+ if (call->conn)
+ conn = rxrpc_get_connection_maybe(call->conn);
+ spin_unlock_bh(&call->lock);
+ if (!conn)
+ return -ECONNRESET;
+
+ msg.msg_name = &call->peer->srx.transport;
+ msg.msg_namelen = call->peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ pkt.whdr.epoch = htonl(conn->proto.epoch);
+ pkt.whdr.cid = htonl(call->cid);
+ pkt.whdr.callNumber = htonl(call->call_id);
+ pkt.whdr.seq = 0;
+ pkt.whdr.type = RXRPC_PACKET_TYPE_ABORT;
+ pkt.whdr.flags = conn->out_clientflag;
+ pkt.whdr.userStatus = 0;
+ pkt.whdr.securityIndex = call->security_ix;
+ pkt.whdr._rsvd = 0;
+ pkt.whdr.serviceId = htons(call->service_id);
+ pkt.abort_code = htonl(call->abort_code);
+
+ iov[0].iov_base = &pkt;
+ iov[0].iov_len = sizeof(pkt);
+
+ serial = atomic_inc_return(&conn->serial);
+ pkt.whdr.serial = htonl(serial);
+
+ ret = kernel_sendmsg(conn->params.local->socket,
+ &msg, iov, 1, sizeof(pkt));
+
+ rxrpc_put_connection(conn);
+ return ret;
+}
+
+/*
* send a packet through the transport endpoint
*/
int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
@@ -283,11 +307,12 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
/* If our RTT cache needs working on, request an ACK. Also request
* ACKs if a DATA packet appears to have been lost.
*/
- if (retrans ||
- call->cong_mode == RXRPC_CALL_SLOW_START ||
- (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) ||
- ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
- ktime_get_real()))
+ if (!(sp->hdr.flags & RXRPC_LAST_PACKET) &&
+ (retrans ||
+ call->cong_mode == RXRPC_CALL_SLOW_START ||
+ (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) ||
+ ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
+ ktime_get_real())))
whdr.flags |= RXRPC_REQUEST_ACK;
if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 941b724d523b..862eea6b266c 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -193,8 +193,8 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
fl6->fl6_dport = htons(7001);
fl6->fl6_sport = htons(7000);
dst = ip6_route_output(&init_net, NULL, fl6);
- if (IS_ERR(dst)) {
- _leave(" [route err %ld]", PTR_ERR(dst));
+ if (dst->error) {
+ _leave(" [route err %d]", dst->error);
return;
}
break;
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index f05ea0a88076..c29362d50a92 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -143,7 +143,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) {
rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false,
rxrpc_propose_ack_terminal_ack);
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+ rxrpc_send_ack_packet(call, false);
}
write_lock_bh(&call->state_lock);
@@ -151,17 +151,21 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
switch (call->state) {
case RXRPC_CALL_CLIENT_RECV_REPLY:
__rxrpc_call_completed(call);
+ write_unlock_bh(&call->state_lock);
break;
case RXRPC_CALL_SERVER_RECV_REQUEST:
call->tx_phase = true;
call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
+ call->ack_at = call->expire_at;
+ write_unlock_bh(&call->state_lock);
+ rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, false, true,
+ rxrpc_propose_ack_processing_op);
break;
default:
+ write_unlock_bh(&call->state_lock);
break;
}
-
- write_unlock_bh(&call->state_lock);
}
/*
@@ -212,7 +216,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
true, false,
rxrpc_propose_ack_rotate_rx);
if (call->ackr_reason)
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+ rxrpc_send_ack_packet(call, false);
}
}
@@ -652,7 +656,7 @@ excess_data:
goto out;
call_complete:
*_abort = call->abort_code;
- ret = call->error;
+ ret = -call->error;
if (call->completion == RXRPC_CALL_SUCCEEDED) {
ret = 1;
if (size > 0)
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 627abed5f999..4374e7b9c7bf 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -381,7 +381,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
return 0;
protocol_error:
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+ rxrpc_send_abort_packet(call);
_leave(" = -EPROTO");
return -EPROTO;
@@ -471,7 +471,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
return 0;
protocol_error:
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+ rxrpc_send_abort_packet(call);
_leave(" = -EPROTO");
return -EPROTO;
@@ -523,7 +523,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
if (cksum != expected_cksum) {
rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO);
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+ rxrpc_send_abort_packet(call);
_leave(" = -EPROTO [csum failed]");
return -EPROTO;
}
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 3322543d460a..b214a4d4a641 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -130,6 +130,11 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
break;
case RXRPC_CALL_SERVER_ACK_REQUEST:
call->state = RXRPC_CALL_SERVER_SEND_REPLY;
+ call->ack_at = call->expire_at;
+ if (call->ackr_reason == RXRPC_ACK_DELAY)
+ call->ackr_reason = 0;
+ __rxrpc_set_timer(call, rxrpc_timer_init_for_send_reply,
+ ktime_get_real());
if (!last)
break;
case RXRPC_CALL_SERVER_SEND_REPLY:
@@ -197,7 +202,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
do {
/* Check to see if there's a ping ACK to reply to. */
if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE)
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+ rxrpc_send_ack_packet(call, false);
if (!skb) {
size_t size, chunk, max, space;
@@ -514,8 +519,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
} else if (cmd == RXRPC_CMD_SEND_ABORT) {
ret = 0;
if (rxrpc_abort_call("CMD", call, 0, abort_code, ECONNABORTED))
- ret = rxrpc_send_call_packet(call,
- RXRPC_PACKET_TYPE_ABORT);
+ ret = rxrpc_send_abort_packet(call);
} else if (cmd != RXRPC_CMD_SEND_DATA) {
ret = -EINVAL;
} else if (rxrpc_is_client_call(call) &&
@@ -597,7 +601,7 @@ void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
lock_sock(sock->sk);
if (rxrpc_abort_call(why, call, 0, abort_code, error))
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+ rxrpc_send_abort_packet(call);
release_sock(sock->sk);
_leave("");
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index c9102172ce3b..f893d180da1c 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -341,22 +341,25 @@ int tcf_register_action(struct tc_action_ops *act,
if (!act->act || !act->dump || !act->init || !act->walk || !act->lookup)
return -EINVAL;
+ /* We have to register pernet ops before making the action ops visible,
+ * otherwise tcf_action_init_1() could get a partially initialized
+ * netns.
+ */
+ ret = register_pernet_subsys(ops);
+ if (ret)
+ return ret;
+
write_lock(&act_mod_lock);
list_for_each_entry(a, &act_base, head) {
if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
write_unlock(&act_mod_lock);
+ unregister_pernet_subsys(ops);
return -EEXIST;
}
}
list_add_tail(&act->head, &act_base);
write_unlock(&act_mod_lock);
- ret = register_pernet_subsys(ops);
- if (ret) {
- tcf_unregister_action(act, ops);
- return ret;
- }
-
return 0;
}
EXPORT_SYMBOL(tcf_register_action);
@@ -367,8 +370,6 @@ int tcf_unregister_action(struct tc_action_ops *act,
struct tc_action_ops *a;
int err = -ENOENT;
- unregister_pernet_subsys(ops);
-
write_lock(&act_mod_lock);
list_for_each_entry(a, &act_base, head) {
if (a == act) {
@@ -378,6 +379,8 @@ int tcf_unregister_action(struct tc_action_ops *act,
}
}
write_unlock(&act_mod_lock);
+ if (!err)
+ unregister_pernet_subsys(ops);
return err;
}
EXPORT_SYMBOL(tcf_unregister_action);
@@ -1025,8 +1028,7 @@ static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
if (tb[1] == NULL)
return NULL;
- if (nla_parse(tb2, TCA_ACT_MAX, nla_data(tb[1]),
- nla_len(tb[1]), NULL) < 0)
+ if (nla_parse_nested(tb2, TCA_ACT_MAX, tb[1], NULL) < 0)
return NULL;
kind = tb2[TCA_ACT_KIND];
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 667dc382df82..6b07fba5770b 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -207,8 +207,11 @@ out:
static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
u64 lastuse)
{
- tcf_lastuse_update(&a->tcfa_tm);
+ struct tcf_mirred *m = to_mirred(a);
+ struct tcf_t *tm = &m->tcf_tm;
+
_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+ tm->lastuse = lastuse;
}
static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 11da7da0b7c4..2b2a7974e4bb 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -101,7 +101,7 @@ EXPORT_SYMBOL(unregister_tcf_proto_ops);
static int tfilter_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
- unsigned long fh, int event);
+ unsigned long fh, int event, bool unicast);
static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n,
@@ -112,7 +112,7 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
for (it_chain = chain; (tp = rtnl_dereference(*it_chain)) != NULL;
it_chain = &tp->next)
- tfilter_notify(net, oskb, n, tp, 0, event);
+ tfilter_notify(net, oskb, n, tp, 0, event, false);
}
/* Select new prio value from the range, managed by kernel. */
@@ -319,7 +319,8 @@ replay:
RCU_INIT_POINTER(*back, next);
- tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
+ tfilter_notify(net, skb, n, tp, fh,
+ RTM_DELTFILTER, false);
tcf_destroy(tp, true);
err = 0;
goto errout;
@@ -344,15 +345,16 @@ replay:
if (err == 0) {
struct tcf_proto *next = rtnl_dereference(tp->next);
- tfilter_notify(net, skb, n, tp, fh,
- RTM_DELTFILTER);
+ tfilter_notify(net, skb, n, tp,
+ t->tcm_handle,
+ RTM_DELTFILTER, false);
if (tcf_destroy(tp, false))
RCU_INIT_POINTER(*back, next);
}
goto errout;
case RTM_GETTFILTER:
err = tfilter_notify(net, skb, n, tp, fh,
- RTM_NEWTFILTER);
+ RTM_NEWTFILTER, true);
goto errout;
default:
err = -EINVAL;
@@ -367,7 +369,7 @@ replay:
RCU_INIT_POINTER(tp->next, rtnl_dereference(*back));
rcu_assign_pointer(*back, tp);
}
- tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
+ tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false);
} else {
if (tp_created)
tcf_destroy(tp, true);
@@ -419,7 +421,7 @@ nla_put_failure:
static int tfilter_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
- unsigned long fh, int event)
+ unsigned long fh, int event, bool unicast)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -433,6 +435,9 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
return -EINVAL;
}
+ if (unicast)
+ return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+
return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
}
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 2a5c1896d18f..6cb0df859195 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -418,6 +418,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
__u8 has_data = 0;
int gso = 0;
int pktcount = 0;
+ int auth_len = 0;
struct dst_entry *dst;
unsigned char *auth = NULL; /* pointer to auth in skb data */
@@ -510,7 +511,12 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
list_for_each_entry(chunk, &packet->chunk_list, list) {
int padded = SCTP_PAD4(chunk->skb->len);
- if (pkt_size + padded > tp->pathmtu)
+ if (chunk == packet->auth)
+ auth_len = padded;
+ else if (auth_len + padded + packet->overhead >
+ tp->pathmtu)
+ goto nomem;
+ else if (pkt_size + padded > tp->pathmtu)
break;
pkt_size += padded;
}
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 026e3bca4a94..8ec20a64a3f8 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3422,6 +3422,12 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
+ /* Report violation if chunk len overflows */
+ ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
+ if (ch_end > skb_tail_pointer(skb))
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
+ commands);
+
/* Now that we know we at least have a chunk header,
* do things that are type appropriate.
*/
@@ -3453,12 +3459,6 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
}
}
- /* Report violation if chunk len overflows */
- ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
- if (ch_end > skb_tail_pointer(skb))
- return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
- commands);
-
ch = (sctp_chunkhdr_t *) ch_end;
} while (ch_end < skb_tail_pointer(skb));
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index fb02c7033307..9fbb6feb8c27 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4687,7 +4687,7 @@ static int sctp_getsockopt_disable_fragments(struct sock *sk, int len,
static int sctp_getsockopt_events(struct sock *sk, int len, char __user *optval,
int __user *optlen)
{
- if (len <= 0)
+ if (len == 0)
return -EINVAL;
if (len > sizeof(struct sctp_event_subscribe))
len = sizeof(struct sctp_event_subscribe);
@@ -6430,6 +6430,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
if (get_user(len, optlen))
return -EFAULT;
+ if (len < 0)
+ return -EINVAL;
+
lock_sock(sk);
switch (optname) {
diff --git a/net/socket.c b/net/socket.c
index a1bd16106625..5a9bf5ee2464 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -320,11 +320,38 @@ static const struct dentry_operations sockfs_dentry_operations = {
.d_dname = sockfs_dname,
};
+static int sockfs_xattr_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *suffix, void *value, size_t size)
+{
+ if (value) {
+ if (dentry->d_name.len + 1 > size)
+ return -ERANGE;
+ memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
+ }
+ return dentry->d_name.len + 1;
+}
+
+#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
+#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
+#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
+
+static const struct xattr_handler sockfs_xattr_handler = {
+ .name = XATTR_NAME_SOCKPROTONAME,
+ .get = sockfs_xattr_get,
+};
+
+static const struct xattr_handler *sockfs_xattr_handlers[] = {
+ &sockfs_xattr_handler,
+ NULL
+};
+
static struct dentry *sockfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- return mount_pseudo(fs_type, "socket:", &sockfs_ops,
- &sockfs_dentry_operations, SOCKFS_MAGIC);
+ return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
+ sockfs_xattr_handlers,
+ &sockfs_dentry_operations, SOCKFS_MAGIC);
}
static struct vfsmount *sock_mnt __read_mostly;
@@ -463,35 +490,6 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
return NULL;
}
-#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
-#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
-#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
-static ssize_t sockfs_getxattr(struct dentry *dentry, struct inode *inode,
- const char *name, void *value, size_t size)
-{
- const char *proto_name;
- size_t proto_size;
- int error;
-
- error = -ENODATA;
- if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
- proto_name = dentry->d_name.name;
- proto_size = strlen(proto_name);
-
- if (value) {
- error = -ERANGE;
- if (proto_size + 1 > size)
- goto out;
-
- strncpy(value, proto_name, proto_size + 1);
- }
- error = proto_size + 1;
- }
-
-out:
- return error;
-}
-
static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
size_t size)
{
@@ -521,7 +519,6 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
}
static const struct inode_operations sockfs_inode_ops = {
- .getxattr = sockfs_getxattr,
.listxattr = sockfs_listxattr,
};
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 5c7549b5b92c..41adf362936d 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -246,7 +246,7 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
} else {
strp->rx_interrupted = 1;
}
- strp_parser_err(strp, err, desc);
+ strp_parser_err(strp, len, desc);
break;
} else if (len > strp->sk->sk_rcvbuf) {
/* Message length exceeds maximum allowed */
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index a7e42f9a405c..2bff63a73cf8 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -551,7 +551,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
*entry, *new;
unsigned int nr;
- nr = hash_long(from_kuid(&init_user_ns, acred->uid), cache->hashbits);
+ nr = auth->au_ops->hash_cred(acred, cache->hashbits);
rcu_read_lock();
hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) {
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 168219535a34..f1df9837f1ac 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -78,6 +78,14 @@ static struct rpc_cred *generic_bind_cred(struct rpc_task *task,
return auth->au_ops->lookup_cred(auth, acred, lookupflags);
}
+static int
+generic_hash_cred(struct auth_cred *acred, unsigned int hashbits)
+{
+ return hash_64(from_kgid(&init_user_ns, acred->gid) |
+ ((u64)from_kuid(&init_user_ns, acred->uid) <<
+ (sizeof(gid_t) * 8)), hashbits);
+}
+
/*
* Lookup generic creds for current process
*/
@@ -176,8 +184,8 @@ generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
if (gcred->acred.group_info->ngroups != acred->group_info->ngroups)
goto out_nomatch;
for (i = 0; i < gcred->acred.group_info->ngroups; i++) {
- if (!gid_eq(GROUP_AT(gcred->acred.group_info, i),
- GROUP_AT(acred->group_info, i)))
+ if (!gid_eq(gcred->acred.group_info->gid[i],
+ acred->group_info->gid[i]))
goto out_nomatch;
}
out_match:
@@ -258,6 +266,7 @@ generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred)
static const struct rpc_authops generic_auth_ops = {
.owner = THIS_MODULE,
.au_name = "Generic",
+ .hash_cred = generic_hash_cred,
.lookup_cred = generic_lookup_cred,
.crcreate = generic_create_cred,
.key_timeout = generic_key_timeout,
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 976c7812bbd5..3dfd769dc5b5 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1298,6 +1298,12 @@ gss_destroy_cred(struct rpc_cred *cred)
gss_destroy_nullcred(cred);
}
+static int
+gss_hash_cred(struct auth_cred *acred, unsigned int hashbits)
+{
+ return hash_64(from_kuid(&init_user_ns, acred->uid), hashbits);
+}
+
/*
* Lookup RPCSEC_GSS cred for the current process
*/
@@ -1610,7 +1616,7 @@ gss_validate(struct rpc_task *task, __be32 *p)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
- __be32 seq;
+ __be32 *seq = NULL;
struct kvec iov;
struct xdr_buf verf_buf;
struct xdr_netobj mic;
@@ -1625,9 +1631,12 @@ gss_validate(struct rpc_task *task, __be32 *p)
goto out_bad;
if (flav != RPC_AUTH_GSS)
goto out_bad;
- seq = htonl(task->tk_rqstp->rq_seqno);
- iov.iov_base = &seq;
- iov.iov_len = sizeof(seq);
+ seq = kmalloc(4, GFP_NOFS);
+ if (!seq)
+ goto out_bad;
+ *seq = htonl(task->tk_rqstp->rq_seqno);
+ iov.iov_base = seq;
+ iov.iov_len = 4;
xdr_buf_from_iov(&iov, &verf_buf);
mic.data = (u8 *)p;
mic.len = len;
@@ -1647,11 +1656,13 @@ gss_validate(struct rpc_task *task, __be32 *p)
gss_put_ctx(ctx);
dprintk("RPC: %5u %s: gss_verify_mic succeeded.\n",
task->tk_pid, __func__);
+ kfree(seq);
return p + XDR_QUADLEN(len);
out_bad:
gss_put_ctx(ctx);
dprintk("RPC: %5u %s failed ret %ld.\n", task->tk_pid, __func__,
PTR_ERR(ret));
+ kfree(seq);
return ret;
}
@@ -1982,6 +1993,7 @@ static const struct rpc_authops authgss_ops = {
.au_name = "RPCSEC_GSS",
.create = gss_create,
.destroy = gss_destroy,
+ .hash_cred = gss_hash_cred,
.lookup_cred = gss_lookup_cred,
.crcreate = gss_create_cred,
.list_pseudoflavors = gss_mech_list_pseudoflavors,
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 244245bcbbd2..90115ceefd49 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -166,8 +166,8 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
unsigned int usage, struct xdr_netobj *cksumout)
{
struct scatterlist sg[1];
- int err;
- u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+ int err = -1;
+ u8 *checksumdata;
u8 rc4salt[4];
struct crypto_ahash *md5;
struct crypto_ahash *hmac_md5;
@@ -187,23 +187,22 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
return GSS_S_FAILURE;
}
+ checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS);
+ if (!checksumdata)
+ return GSS_S_FAILURE;
+
md5 = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(md5))
- return GSS_S_FAILURE;
+ goto out_free_cksum;
hmac_md5 = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0,
CRYPTO_ALG_ASYNC);
- if (IS_ERR(hmac_md5)) {
- crypto_free_ahash(md5);
- return GSS_S_FAILURE;
- }
+ if (IS_ERR(hmac_md5))
+ goto out_free_md5;
req = ahash_request_alloc(md5, GFP_KERNEL);
- if (!req) {
- crypto_free_ahash(hmac_md5);
- crypto_free_ahash(md5);
- return GSS_S_FAILURE;
- }
+ if (!req)
+ goto out_free_hmac_md5;
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
@@ -232,11 +231,8 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
ahash_request_free(req);
req = ahash_request_alloc(hmac_md5, GFP_KERNEL);
- if (!req) {
- crypto_free_ahash(hmac_md5);
- crypto_free_ahash(md5);
- return GSS_S_FAILURE;
- }
+ if (!req)
+ goto out_free_hmac_md5;
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
@@ -258,8 +254,12 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
cksumout->len = kctx->gk5e->cksumlength;
out:
ahash_request_free(req);
- crypto_free_ahash(md5);
+out_free_hmac_md5:
crypto_free_ahash(hmac_md5);
+out_free_md5:
+ crypto_free_ahash(md5);
+out_free_cksum:
+ kfree(checksumdata);
return err ? GSS_S_FAILURE : 0;
}
@@ -276,8 +276,8 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
struct crypto_ahash *tfm;
struct ahash_request *req;
struct scatterlist sg[1];
- int err;
- u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+ int err = -1;
+ u8 *checksumdata;
unsigned int checksumlen;
if (kctx->gk5e->ctype == CKSUMTYPE_HMAC_MD5_ARCFOUR)
@@ -291,15 +291,17 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
return GSS_S_FAILURE;
}
+ checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS);
+ if (checksumdata == NULL)
+ return GSS_S_FAILURE;
+
tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(tfm))
- return GSS_S_FAILURE;
+ goto out_free_cksum;
req = ahash_request_alloc(tfm, GFP_KERNEL);
- if (!req) {
- crypto_free_ahash(tfm);
- return GSS_S_FAILURE;
- }
+ if (!req)
+ goto out_free_ahash;
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
@@ -349,7 +351,10 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
cksumout->len = kctx->gk5e->cksumlength;
out:
ahash_request_free(req);
+out_free_ahash:
crypto_free_ahash(tfm);
+out_free_cksum:
+ kfree(checksumdata);
return err ? GSS_S_FAILURE : 0;
}
@@ -368,8 +373,8 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
struct crypto_ahash *tfm;
struct ahash_request *req;
struct scatterlist sg[1];
- int err;
- u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+ int err = -1;
+ u8 *checksumdata;
unsigned int checksumlen;
if (kctx->gk5e->keyed_cksum == 0) {
@@ -383,16 +388,18 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
return GSS_S_FAILURE;
}
+ checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS);
+ if (!checksumdata)
+ return GSS_S_FAILURE;
+
tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(tfm))
- return GSS_S_FAILURE;
+ goto out_free_cksum;
checksumlen = crypto_ahash_digestsize(tfm);
req = ahash_request_alloc(tfm, GFP_KERNEL);
- if (!req) {
- crypto_free_ahash(tfm);
- return GSS_S_FAILURE;
- }
+ if (!req)
+ goto out_free_ahash;
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
@@ -433,7 +440,10 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
}
out:
ahash_request_free(req);
+out_free_ahash:
crypto_free_ahash(tfm);
+out_free_cksum:
+ kfree(checksumdata);
return err ? GSS_S_FAILURE : 0;
}
@@ -666,14 +676,17 @@ gss_krb5_cts_crypt(struct crypto_skcipher *cipher, struct xdr_buf *buf,
u32 ret;
struct scatterlist sg[1];
SKCIPHER_REQUEST_ON_STACK(req, cipher);
- u8 data[GSS_KRB5_MAX_BLOCKSIZE * 2];
+ u8 *data;
struct page **save_pages;
u32 len = buf->len - offset;
- if (len > ARRAY_SIZE(data)) {
+ if (len > GSS_KRB5_MAX_BLOCKSIZE * 2) {
WARN_ON(0);
return -ENOMEM;
}
+ data = kmalloc(GSS_KRB5_MAX_BLOCKSIZE * 2, GFP_NOFS);
+ if (!data)
+ return -ENOMEM;
/*
* For encryption, we want to read from the cleartext
@@ -708,6 +721,7 @@ gss_krb5_cts_crypt(struct crypto_skcipher *cipher, struct xdr_buf *buf,
ret = write_bytes_to_xdr_buf(buf, offset, data, len);
out:
+ kfree(data);
return ret;
}
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index eeeba5adee6d..dc6fb79a361f 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -229,7 +229,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
kgid = make_kgid(&init_user_ns, tmp);
if (!gid_valid(kgid))
goto out_free_groups;
- GROUP_AT(creds->cr_group_info, i) = kgid;
+ creds->cr_group_info->gid[i] = kgid;
}
return 0;
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index d8582028b346..45662d7f0943 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -479,7 +479,7 @@ static int rsc_parse(struct cache_detail *cd,
kgid = make_kgid(&init_user_ns, id);
if (!gid_valid(kgid))
goto out;
- GROUP_AT(rsci.cred.cr_group_info, i) = kgid;
+ rsci.cred.cr_group_info->gid[i] = kgid;
}
/* mech name */
@@ -718,30 +718,37 @@ gss_write_null_verf(struct svc_rqst *rqstp)
static int
gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq)
{
- __be32 xdr_seq;
+ __be32 *xdr_seq;
u32 maj_stat;
struct xdr_buf verf_data;
struct xdr_netobj mic;
__be32 *p;
struct kvec iov;
+ int err = -1;
svc_putnl(rqstp->rq_res.head, RPC_AUTH_GSS);
- xdr_seq = htonl(seq);
+ xdr_seq = kmalloc(4, GFP_KERNEL);
+ if (!xdr_seq)
+ return -1;
+ *xdr_seq = htonl(seq);
- iov.iov_base = &xdr_seq;
- iov.iov_len = sizeof(xdr_seq);
+ iov.iov_base = xdr_seq;
+ iov.iov_len = 4;
xdr_buf_from_iov(&iov, &verf_data);
p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len;
mic.data = (u8 *)(p + 1);
maj_stat = gss_get_mic(ctx_id, &verf_data, &mic);
if (maj_stat != GSS_S_COMPLETE)
- return -1;
+ goto out;
*p++ = htonl(mic.len);
memset((u8 *)p + mic.len, 0, round_up_to_quad(mic.len) - mic.len);
p += XDR_QUADLEN(mic.len);
if (!xdr_ressize_check(rqstp, p))
- return -1;
- return 0;
+ goto out;
+ err = 0;
+out:
+ kfree(xdr_seq);
+ return err;
}
struct gss_domain {
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index a99278c984e8..306fc0f54596 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -46,6 +46,14 @@ unx_destroy(struct rpc_auth *auth)
rpcauth_clear_credcache(auth->au_credcache);
}
+static int
+unx_hash_cred(struct auth_cred *acred, unsigned int hashbits)
+{
+ return hash_64(from_kgid(&init_user_ns, acred->gid) |
+ ((u64)from_kuid(&init_user_ns, acred->uid) <<
+ (sizeof(gid_t) * 8)), hashbits);
+}
+
/*
* Lookup AUTH_UNIX creds for current process
*/
@@ -79,7 +87,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t
cred->uc_gid = acred->gid;
for (i = 0; i < groups; i++)
- cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
+ cred->uc_gids[i] = acred->group_info->gid[i];
if (i < NFS_NGROUPS)
cred->uc_gids[i] = INVALID_GID;
@@ -127,7 +135,7 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
if (groups > NFS_NGROUPS)
groups = NFS_NGROUPS;
for (i = 0; i < groups ; i++)
- if (!gid_eq(cred->uc_gids[i], GROUP_AT(acred->group_info, i)))
+ if (!gid_eq(cred->uc_gids[i], acred->group_info->gid[i]))
return 0;
if (groups < NFS_NGROUPS && gid_valid(cred->uc_gids[groups]))
return 0;
@@ -220,6 +228,7 @@ const struct rpc_authops authunix_ops = {
.au_name = "UNIX",
.create = unx_create,
.destroy = unx_destroy,
+ .hash_cred = unx_hash_cred,
.lookup_cred = unx_lookup_cred,
.crcreate = unx_create_cred,
};
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 229956bf8457..ac701c28f44f 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -76,13 +76,7 @@ static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags)
page = alloc_page(gfp_flags);
if (page == NULL)
return -ENOMEM;
- buf->head[0].iov_base = page_address(page);
- buf->head[0].iov_len = PAGE_SIZE;
- buf->tail[0].iov_base = NULL;
- buf->tail[0].iov_len = 0;
- buf->page_len = 0;
- buf->len = 0;
- buf->buflen = PAGE_SIZE;
+ xdr_buf_init(buf, page_address(page), PAGE_SIZE);
return 0;
}
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 4d8e11f94a35..8aabe12201f8 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -353,7 +353,7 @@ void sunrpc_init_cache_detail(struct cache_detail *cd)
spin_unlock(&cache_list_lock);
/* start the cleaning process */
- schedule_delayed_work(&cache_cleaner, 0);
+ queue_delayed_work(system_power_efficient_wq, &cache_cleaner, 0);
}
EXPORT_SYMBOL_GPL(sunrpc_init_cache_detail);
@@ -476,7 +476,8 @@ static void do_cache_clean(struct work_struct *work)
delay = 0;
if (delay)
- schedule_delayed_work(&cache_cleaner, delay);
+ queue_delayed_work(system_power_efficient_wq,
+ &cache_cleaner, delay);
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 66f23b376fa0..34dd7b26ee5f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -184,7 +184,6 @@ static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event,
struct super_block *sb)
{
struct dentry *dentry;
- int err = 0;
switch (event) {
case RPC_PIPEFS_MOUNT:
@@ -201,7 +200,7 @@ static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event,
printk(KERN_ERR "%s: unknown event: %ld\n", __func__, event);
return -ENOTSUPP;
}
- return err;
+ return 0;
}
static int __rpc_pipefs_event(struct rpc_clnt *clnt, unsigned long event,
@@ -988,7 +987,6 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
{
if (clnt != NULL) {
- rpc_task_release_client(task);
if (task->tk_xprt == NULL)
task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi);
task->tk_client = clnt;
@@ -1693,6 +1691,7 @@ call_allocate(struct rpc_task *task)
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
+ int status;
dprint_status(task);
@@ -1718,11 +1717,14 @@ call_allocate(struct rpc_task *task)
req->rq_rcvsize = RPC_REPHDRSIZE + slack + proc->p_replen;
req->rq_rcvsize <<= 2;
- req->rq_buffer = xprt->ops->buf_alloc(task,
- req->rq_callsize + req->rq_rcvsize);
- if (req->rq_buffer != NULL)
- return;
+ status = xprt->ops->buf_alloc(task);
xprt_inject_disconnect(xprt);
+ if (status == 0)
+ return;
+ if (status != -ENOMEM) {
+ rpc_exit(task, status);
+ return;
+ }
dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid);
@@ -1748,18 +1750,6 @@ rpc_task_force_reencode(struct rpc_task *task)
task->tk_rqstp->rq_bytes_sent = 0;
}
-static inline void
-rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
-{
- buf->head[0].iov_base = start;
- buf->head[0].iov_len = len;
- buf->tail[0].iov_len = 0;
- buf->page_len = 0;
- buf->flags = 0;
- buf->len = 0;
- buf->buflen = len;
-}
-
/*
* 3. Encode arguments of an RPC call
*/
@@ -1772,12 +1762,12 @@ rpc_xdr_encode(struct rpc_task *task)
dprint_status(task);
- rpc_xdr_buf_init(&req->rq_snd_buf,
- req->rq_buffer,
- req->rq_callsize);
- rpc_xdr_buf_init(&req->rq_rcv_buf,
- (char *)req->rq_buffer + req->rq_callsize,
- req->rq_rcvsize);
+ xdr_buf_init(&req->rq_snd_buf,
+ req->rq_buffer,
+ req->rq_callsize);
+ xdr_buf_init(&req->rq_rcv_buf,
+ req->rq_rbuffer,
+ req->rq_rcvsize);
p = rpc_encode_header(task);
if (p == NULL) {
@@ -2616,6 +2606,70 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
EXPORT_SYMBOL_GPL(rpc_clnt_test_and_add_xprt);
/**
+ * rpc_clnt_setup_test_and_add_xprt()
+ *
+ * This is an rpc_clnt_add_xprt setup() function which returns 1 so:
+ * 1) caller of the test function must dereference the rpc_xprt_switch
+ * and the rpc_xprt.
+ * 2) test function must call rpc_xprt_switch_add_xprt, usually in
+ * the rpc_call_done routine.
+ *
+ * Upon success (return of 1), the test function adds the new
+ * transport to the rpc_clnt xprt switch
+ *
+ * @clnt: struct rpc_clnt to get the new transport
+ * @xps: the rpc_xprt_switch to hold the new transport
+ * @xprt: the rpc_xprt to test
+ * @data: a struct rpc_add_xprt_test pointer that holds the test function
+ * and test function call data
+ */
+int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt,
+ struct rpc_xprt_switch *xps,
+ struct rpc_xprt *xprt,
+ void *data)
+{
+ struct rpc_cred *cred;
+ struct rpc_task *task;
+ struct rpc_add_xprt_test *xtest = (struct rpc_add_xprt_test *)data;
+ int status = -EADDRINUSE;
+
+ xprt = xprt_get(xprt);
+ xprt_switch_get(xps);
+
+ if (rpc_xprt_switch_has_addr(xps, (struct sockaddr *)&xprt->addr))
+ goto out_err;
+
+ /* Test the connection */
+ cred = authnull_ops.lookup_cred(NULL, NULL, 0);
+ task = rpc_call_null_helper(clnt, xprt, cred,
+ RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
+ NULL, NULL);
+ put_rpccred(cred);
+ if (IS_ERR(task)) {
+ status = PTR_ERR(task);
+ goto out_err;
+ }
+ status = task->tk_status;
+ rpc_put_task(task);
+
+ if (status < 0)
+ goto out_err;
+
+ /* rpc_xprt_switch and rpc_xprt are deferrenced by add_xprt_test() */
+ xtest->add_xprt_test(clnt, xprt, xtest->data);
+
+ /* so that rpc_clnt_add_xprt does not call rpc_xprt_switch_add_xprt */
+ return 1;
+out_err:
+ xprt_put(xprt);
+ xprt_switch_put(xps);
+ pr_info("RPC: rpc_clnt_test_xprt failed: %d addr %s not added\n",
+ status, xprt->address_strings[RPC_DISPLAY_ADDR]);
+ return status;
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_setup_test_and_add_xprt);
+
+/**
* rpc_clnt_add_xprt - Add a new transport to a rpc_clnt
* @clnt: pointer to struct rpc_clnt
* @xprtargs: pointer to struct xprt_create
@@ -2697,6 +2751,34 @@ rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo)
}
EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout);
+void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt)
+{
+ xprt_switch_put(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_put);
+
+void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
+{
+ rpc_xprt_switch_add_xprt(rcu_dereference(clnt->cl_xpi.xpi_xpswitch),
+ xprt);
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_add_xprt);
+
+bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt,
+ const struct sockaddr *sap)
+{
+ struct rpc_xprt_switch *xps;
+ bool ret;
+
+ xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
+
+ rcu_read_lock();
+ ret = rpc_xprt_switch_has_addr(xps, sap);
+ rcu_read_unlock();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_has_addr);
+
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
static void rpc_show_header(void)
{
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 84f98cbe31c3..61a504fb1ae2 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -477,7 +477,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
return NULL;
inode->i_ino = get_next_ino();
inode->i_mode = mode;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
switch (mode & S_IFMT) {
case S_IFDIR:
inode->i_fop = &simple_dir_operations;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 9ae588511aaf..5db68b371db2 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -849,14 +849,17 @@ static void rpc_async_schedule(struct work_struct *work)
}
/**
- * rpc_malloc - allocate an RPC buffer
- * @task: RPC task that will use this buffer
- * @size: requested byte size
+ * rpc_malloc - allocate RPC buffer resources
+ * @task: RPC task
+ *
+ * A single memory region is allocated, which is split between the
+ * RPC call and RPC reply that this task is being used for. When
+ * this RPC is retired, the memory is released by calling rpc_free.
*
* To prevent rpciod from hanging, this allocator never sleeps,
- * returning NULL and suppressing warning if the request cannot be serviced
- * immediately.
- * The caller can arrange to sleep in a way that is safe for rpciod.
+ * returning -ENOMEM and suppressing warning if the request cannot
+ * be serviced immediately. The caller can arrange to sleep in a
+ * way that is safe for rpciod.
*
* Most requests are 'small' (under 2KiB) and can be serviced from a
* mempool, ensuring that NFS reads and writes can always proceed,
@@ -865,8 +868,10 @@ static void rpc_async_schedule(struct work_struct *work)
* In order to avoid memory starvation triggering more writebacks of
* NFS requests, we avoid using GFP_KERNEL.
*/
-void *rpc_malloc(struct rpc_task *task, size_t size)
+int rpc_malloc(struct rpc_task *task)
{
+ struct rpc_rqst *rqst = task->tk_rqstp;
+ size_t size = rqst->rq_callsize + rqst->rq_rcvsize;
struct rpc_buffer *buf;
gfp_t gfp = GFP_NOIO | __GFP_NOWARN;
@@ -880,28 +885,28 @@ void *rpc_malloc(struct rpc_task *task, size_t size)
buf = kmalloc(size, gfp);
if (!buf)
- return NULL;
+ return -ENOMEM;
buf->len = size;
dprintk("RPC: %5u allocated buffer of size %zu at %p\n",
task->tk_pid, size, buf);
- return &buf->data;
+ rqst->rq_buffer = buf->data;
+ rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize;
+ return 0;
}
EXPORT_SYMBOL_GPL(rpc_malloc);
/**
- * rpc_free - free buffer allocated via rpc_malloc
- * @buffer: buffer to free
+ * rpc_free - free RPC buffer resources allocated via rpc_malloc
+ * @task: RPC task
*
*/
-void rpc_free(void *buffer)
+void rpc_free(struct rpc_task *task)
{
+ void *buffer = task->tk_rqstp->rq_buffer;
size_t size;
struct rpc_buffer *buf;
- if (!buffer)
- return;
-
buf = container_of(buffer, struct rpc_buffer, data);
size = buf->len;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index c5b0cb4f4056..7c8070ec93c8 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -401,6 +401,21 @@ int svc_bind(struct svc_serv *serv, struct net *net)
}
EXPORT_SYMBOL_GPL(svc_bind);
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+static void
+__svc_init_bc(struct svc_serv *serv)
+{
+ INIT_LIST_HEAD(&serv->sv_cb_list);
+ spin_lock_init(&serv->sv_cb_lock);
+ init_waitqueue_head(&serv->sv_cb_waitq);
+}
+#else
+static void
+__svc_init_bc(struct svc_serv *serv)
+{
+}
+#endif
+
/*
* Create an RPC service
*/
@@ -443,6 +458,8 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
init_timer(&serv->sv_temptimer);
spin_lock_init(&serv->sv_lock);
+ __svc_init_bc(serv);
+
serv->sv_nrpools = npools;
serv->sv_pools =
kcalloc(serv->sv_nrpools, sizeof(struct svc_pool),
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index dfacdc95b3f5..64af4f034de6 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -517,7 +517,7 @@ static int unix_gid_parse(struct cache_detail *cd,
kgid = make_kgid(&init_user_ns, gid);
if (!gid_valid(kgid))
goto out;
- GROUP_AT(ug.gi, i) = kgid;
+ ug.gi->gid[i] = kgid;
}
ugp = unix_gid_lookup(cd, uid);
@@ -564,7 +564,7 @@ static int unix_gid_show(struct seq_file *m,
seq_printf(m, "%u %d:", from_kuid_munged(user_ns, ug->uid), glen);
for (i = 0; i < glen; i++)
- seq_printf(m, " %d", from_kgid_munged(user_ns, GROUP_AT(ug->gi, i)));
+ seq_printf(m, " %d", from_kgid_munged(user_ns, ug->gi->gid[i]));
seq_printf(m, "\n");
return 0;
}
@@ -817,7 +817,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
return SVC_CLOSE;
for (i = 0; i < slen; i++) {
kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv));
- GROUP_AT(cred->cr_group_info, i) = kgid;
+ cred->cr_group_info->gid[i] = kgid;
}
if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
*authp = rpc_autherr_badverf;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index c4f3cc0c0775..7f1071e103ca 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -767,7 +767,7 @@ static void xdr_set_next_page(struct xdr_stream *xdr)
newbase -= xdr->buf->page_base;
if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0)
- xdr_set_iov(xdr, xdr->buf->tail, xdr->buf->len);
+ xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2);
}
static bool xdr_set_next_buffer(struct xdr_stream *xdr)
@@ -776,7 +776,7 @@ static bool xdr_set_next_buffer(struct xdr_stream *xdr)
xdr_set_next_page(xdr);
else if (xdr->iov == xdr->buf->head) {
if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0)
- xdr_set_iov(xdr, xdr->buf->tail, xdr->buf->len);
+ xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2);
}
return xdr->p != xdr->end;
}
@@ -859,12 +859,15 @@ EXPORT_SYMBOL_GPL(xdr_set_scratch_buffer);
static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes)
{
__be32 *p;
- void *cpdest = xdr->scratch.iov_base;
+ char *cpdest = xdr->scratch.iov_base;
size_t cplen = (char *)xdr->end - (char *)xdr->p;
if (nbytes > xdr->scratch.iov_len)
return NULL;
- memcpy(cpdest, xdr->p, cplen);
+ p = __xdr_inline_decode(xdr, cplen);
+ if (p == NULL)
+ return NULL;
+ memcpy(cpdest, p, cplen);
cpdest += cplen;
nbytes -= cplen;
if (!xdr_set_next_buffer(xdr))
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index ea244b29138b..685e6d225414 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1295,7 +1295,7 @@ void xprt_release(struct rpc_task *task)
xprt_schedule_autodisconnect(xprt);
spin_unlock_bh(&xprt->transport_lock);
if (req->rq_buffer)
- xprt->ops->buf_free(req->rq_buffer);
+ xprt->ops->buf_free(task);
xprt_inject_disconnect(xprt);
if (req->rq_cred != NULL)
put_rpccred(req->rq_cred);
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index 66c9d63f4797..ae92a9e9ba52 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -15,6 +15,7 @@
#include <asm/cmpxchg.h>
#include <linux/spinlock.h>
#include <linux/sunrpc/xprt.h>
+#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/xprtmultipath.h>
typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct list_head *head,
@@ -49,7 +50,8 @@ void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps,
if (xprt == NULL)
return;
spin_lock(&xps->xps_lock);
- if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL)
+ if ((xps->xps_net == xprt->xprt_net || xps->xps_net == NULL) &&
+ !rpc_xprt_switch_has_addr(xps, (struct sockaddr *)&xprt->addr))
xprt_switch_add_xprt_locked(xps, xprt);
spin_unlock(&xps->xps_lock);
}
@@ -232,6 +234,26 @@ struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi)
return xprt_switch_find_current_entry(head, xpi->xpi_cursor);
}
+bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
+ const struct sockaddr *sap)
+{
+ struct list_head *head;
+ struct rpc_xprt *pos;
+
+ if (xps == NULL || sap == NULL)
+ return false;
+
+ head = &xps->xps_xprt_list;
+ list_for_each_entry_rcu(pos, head, xprt_switch) {
+ if (rpc_cmp_addr_port(sap, (struct sockaddr *)&pos->addr)) {
+ pr_info("RPC: addr %s already in xprt switch\n",
+ pos->address_strings[RPC_DISPLAY_ADDR]);
+ return true;
+ }
+ }
+ return false;
+}
+
static
struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head,
const struct rpc_xprt *cur)
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 87762d976b63..2c472e1b4827 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -27,7 +27,7 @@ static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt,
list_del(&req->rl_all);
spin_unlock(&buf->rb_reqslock);
- rpcrdma_destroy_req(&r_xprt->rx_ia, req);
+ rpcrdma_destroy_req(req);
kfree(rqst);
}
@@ -35,10 +35,8 @@ static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt,
static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst *rqst)
{
- struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_regbuf *rb;
struct rpcrdma_req *req;
- struct xdr_buf *buf;
size_t size;
req = rpcrdma_create_req(r_xprt);
@@ -46,30 +44,19 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
return PTR_ERR(req);
req->rl_backchannel = true;
- size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst);
- rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL);
+ rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
+ DMA_TO_DEVICE, GFP_KERNEL);
if (IS_ERR(rb))
goto out_fail;
req->rl_rdmabuf = rb;
- size += RPCRDMA_INLINE_READ_THRESHOLD(rqst);
- rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL);
+ size = r_xprt->rx_data.inline_rsize;
+ rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
if (IS_ERR(rb))
goto out_fail;
- rb->rg_owner = req;
req->rl_sendbuf = rb;
- /* so that rpcr_to_rdmar works when receiving a request */
- rqst->rq_buffer = (void *)req->rl_sendbuf->rg_base;
-
- buf = &rqst->rq_snd_buf;
- buf->head[0].iov_base = rqst->rq_buffer;
- buf->head[0].iov_len = 0;
- buf->tail[0].iov_base = NULL;
- buf->tail[0].iov_len = 0;
- buf->page_len = 0;
- buf->len = 0;
- buf->buflen = size;
-
+ xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base, size);
+ rpcrdma_set_xprtdata(rqst, req);
return 0;
out_fail:
@@ -219,7 +206,6 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpcrdma_msg *headerp;
- size_t rpclen;
headerp = rdmab_to_msg(req->rl_rdmabuf);
headerp->rm_xid = rqst->rq_xid;
@@ -231,26 +217,9 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
headerp->rm_body.rm_chunks[1] = xdr_zero;
headerp->rm_body.rm_chunks[2] = xdr_zero;
- rpclen = rqst->rq_svec[0].iov_len;
-
-#ifdef RPCRDMA_BACKCHANNEL_DEBUG
- pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n",
- __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf));
- pr_info("RPC: %s: RPC/RDMA: %*ph\n",
- __func__, (int)RPCRDMA_HDRLEN_MIN, headerp);
- pr_info("RPC: %s: RPC: %*ph\n",
- __func__, (int)rpclen, rqst->rq_svec[0].iov_base);
-#endif
-
- req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf);
- req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN;
- req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf);
-
- req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf);
- req->rl_send_iov[1].length = rpclen;
- req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf);
-
- req->rl_niovs = 2;
+ if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, RPCRDMA_HDRLEN_MIN,
+ &rqst->rq_snd_buf, rpcrdma_noch))
+ return -EIO;
return 0;
}
@@ -402,7 +371,7 @@ out_overflow:
out_short:
pr_warn("RPC/RDMA short backward direction call\n");
- if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
+ if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))
xprt_disconnect_done(xprt);
else
pr_warn("RPC: %s: reposting rep %p\n",
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 21cb3b150b37..1ebb09e1ac4f 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -160,9 +160,8 @@ static int
fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct rpcrdma_create_data_internal *cdata)
{
- rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1,
- RPCRDMA_MAX_DATA_SEGS /
- RPCRDMA_MAX_FMR_SGES));
+ ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
+ RPCRDMA_MAX_FMR_SGES);
return 0;
}
@@ -274,6 +273,7 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
*/
list_for_each_entry(mw, &req->rl_registered, mw_list)
list_add_tail(&mw->fmr.fm_mr->list, &unmap_list);
+ r_xprt->rx_stats.local_inv_needed++;
rc = ib_unmap_fmr(&unmap_list);
if (rc)
goto out_reset;
@@ -331,4 +331,5 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
.ro_init_mr = fmr_op_init_mr,
.ro_release_mr = fmr_op_release_mr,
.ro_displayname = "fmr",
+ .ro_send_w_inv_ok = 0,
};
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 892b5e1d9b09..210949562786 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -67,6 +67,8 @@
* pending send queue WRs before the transport is reconnected.
*/
+#include <linux/sunrpc/rpc_rdma.h>
+
#include "xprt_rdma.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -161,7 +163,7 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
return PTR_ERR(f->fr_mr);
}
- dprintk("RPC: %s: recovered FRMR %p\n", __func__, r);
+ dprintk("RPC: %s: recovered FRMR %p\n", __func__, f);
f->fr_state = FRMR_IS_INVALID;
return 0;
}
@@ -242,9 +244,8 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
depth;
}
- rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1,
- RPCRDMA_MAX_DATA_SEGS /
- ia->ri_max_frmr_depth));
+ ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
+ ia->ri_max_frmr_depth);
return 0;
}
@@ -329,7 +330,7 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
if (wc->status != IB_WC_SUCCESS)
__frwr_sendcompletion_flush(wc, frmr, "localinv");
- complete_all(&frmr->fr_linv_done);
+ complete(&frmr->fr_linv_done);
}
/* Post a REG_MR Work Request to register a memory region
@@ -396,7 +397,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
goto out_mapmr_err;
dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n",
- __func__, mw, mw->mw_nents, mr->length);
+ __func__, frmr, mw->mw_nents, mr->length);
key = (u8)(mr->rkey & 0x000000FF);
ib_update_fast_reg_key(mr, ++key);
@@ -449,6 +450,8 @@ __frwr_prepare_linv_wr(struct rpcrdma_mw *mw)
struct rpcrdma_frmr *f = &mw->frmr;
struct ib_send_wr *invalidate_wr;
+ dprintk("RPC: %s: invalidating frmr %p\n", __func__, f);
+
f->fr_state = FRMR_IS_INVALID;
invalidate_wr = &f->fr_invwr;
@@ -472,6 +475,7 @@ static void
frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr;
+ struct rpcrdma_rep *rep = req->rl_reply;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_mw *mw, *tmp;
struct rpcrdma_frmr *f;
@@ -487,6 +491,12 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
f = NULL;
invalidate_wrs = pos = prev = NULL;
list_for_each_entry(mw, &req->rl_registered, mw_list) {
+ if ((rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) &&
+ (mw->mw_handle == rep->rr_inv_rkey)) {
+ mw->frmr.fr_state = FRMR_IS_INVALID;
+ continue;
+ }
+
pos = __frwr_prepare_linv_wr(mw);
if (!invalidate_wrs)
@@ -496,6 +506,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
prev = pos;
f = &mw->frmr;
}
+ if (!f)
+ goto unmap;
/* Strong send queue ordering guarantees that when the
* last WR in the chain completes, all WRs in the chain
@@ -510,6 +522,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* replaces the QP. The RPC reply handler won't call us
* unless ri_id->qp is a valid pointer.
*/
+ r_xprt->rx_stats.local_inv_needed++;
rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr);
if (rc)
goto reset_mrs;
@@ -521,6 +534,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
*/
unmap:
list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) {
+ dprintk("RPC: %s: unmapping frmr %p\n",
+ __func__, &mw->frmr);
list_del_init(&mw->mw_list);
ib_dma_unmap_sg(ia->ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
@@ -576,4 +591,5 @@ const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_init_mr = frwr_op_init_mr,
.ro_release_mr = frwr_op_release_mr,
.ro_displayname = "frwr",
+ .ro_send_w_inv_ok = RPCRDMA_CMP_F_SND_W_INV_OK,
};
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index a47f170b20ef..d987c2d3dd6e 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -53,14 +53,6 @@
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
-enum rpcrdma_chunktype {
- rpcrdma_noch = 0,
- rpcrdma_readch,
- rpcrdma_areadch,
- rpcrdma_writech,
- rpcrdma_replych
-};
-
static const char transfertypes[][12] = {
"inline", /* no chunks */
"read list", /* some argument via rdma read */
@@ -118,10 +110,12 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
return size;
}
-void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *ia,
- struct rpcrdma_create_data_internal *cdata,
- unsigned int maxsegs)
+void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
{
+ struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ unsigned int maxsegs = ia->ri_max_segs;
+
ia->ri_max_inline_write = cdata->inline_wsize -
rpcrdma_max_call_header_size(maxsegs);
ia->ri_max_inline_read = cdata->inline_rsize -
@@ -155,42 +149,6 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read;
}
-static int
-rpcrdma_tail_pullup(struct xdr_buf *buf)
-{
- size_t tlen = buf->tail[0].iov_len;
- size_t skip = tlen & 3;
-
- /* Do not include the tail if it is only an XDR pad */
- if (tlen < 4)
- return 0;
-
- /* xdr_write_pages() adds a pad at the beginning of the tail
- * if the content in "buf->pages" is unaligned. Force the
- * tail's actual content to land at the next XDR position
- * after the head instead.
- */
- if (skip) {
- unsigned char *src, *dst;
- unsigned int count;
-
- src = buf->tail[0].iov_base;
- dst = buf->head[0].iov_base;
- dst += buf->head[0].iov_len;
-
- src += skip;
- tlen -= skip;
-
- dprintk("RPC: %s: skip=%zu, memmove(%p, %p, %zu)\n",
- __func__, skip, dst, src, tlen);
-
- for (count = tlen; count; count--)
- *dst++ = *src++;
- }
-
- return tlen;
-}
-
/* Split "vec" on page boundaries into segments. FMR registers pages,
* not a byte range. Other modes coalesce these segments into a single
* MR when they can.
@@ -229,7 +187,8 @@ rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n)
static int
rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
- enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg)
+ enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg,
+ bool reminv_expected)
{
int len, n, p, page_base;
struct page **ppages;
@@ -271,6 +230,13 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
if (type == rpcrdma_readch)
return n;
+ /* When encoding the Write list, some servers need to see an extra
+ * segment for odd-length Write chunks. The upper layer provides
+ * space in the tail iovec for this purpose.
+ */
+ if (type == rpcrdma_writech && reminv_expected)
+ return n;
+
if (xdrbuf->tail[0].iov_len) {
/* the rpcrdma protocol allows us to omit any trailing
* xdr pad bytes, saving the server an RDMA operation. */
@@ -327,7 +293,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
if (rtype == rpcrdma_areadch)
pos = 0;
seg = req->rl_segments;
- nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg);
+ nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false);
if (nsegs < 0)
return ERR_PTR(nsegs);
@@ -391,7 +357,8 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
seg = req->rl_segments;
nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf,
rqst->rq_rcv_buf.head[0].iov_len,
- wtype, seg);
+ wtype, seg,
+ r_xprt->rx_ia.ri_reminv_expected);
if (nsegs < 0)
return ERR_PTR(nsegs);
@@ -456,7 +423,8 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
}
seg = req->rl_segments;
- nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg);
+ nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg,
+ r_xprt->rx_ia.ri_reminv_expected);
if (nsegs < 0)
return ERR_PTR(nsegs);
@@ -491,74 +459,184 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
return iptr;
}
-/*
- * Copy write data inline.
- * This function is used for "small" requests. Data which is passed
- * to RPC via iovecs (or page list) is copied directly into the
- * pre-registered memory buffer for this request. For small amounts
- * of data, this is efficient. The cutoff value is tunable.
+/* Prepare the RPC-over-RDMA header SGE.
*/
-static void rpcrdma_inline_pullup(struct rpc_rqst *rqst)
+static bool
+rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
+ u32 len)
{
- int i, npages, curlen;
- int copy_len;
- unsigned char *srcp, *destp;
- struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
- int page_base;
- struct page **ppages;
+ struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
+ struct ib_sge *sge = &req->rl_send_sge[0];
+
+ if (unlikely(!rpcrdma_regbuf_is_mapped(rb))) {
+ if (!__rpcrdma_dma_map_regbuf(ia, rb))
+ return false;
+ sge->addr = rdmab_addr(rb);
+ sge->lkey = rdmab_lkey(rb);
+ }
+ sge->length = len;
+
+ ib_dma_sync_single_for_device(ia->ri_device, sge->addr,
+ sge->length, DMA_TO_DEVICE);
+ req->rl_send_wr.num_sge++;
+ return true;
+}
- destp = rqst->rq_svec[0].iov_base;
- curlen = rqst->rq_svec[0].iov_len;
- destp += curlen;
+/* Prepare the Send SGEs. The head and tail iovec, and each entry
+ * in the page list, gets its own SGE.
+ */
+static bool
+rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
+ struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
+{
+ unsigned int sge_no, page_base, len, remaining;
+ struct rpcrdma_regbuf *rb = req->rl_sendbuf;
+ struct ib_device *device = ia->ri_device;
+ struct ib_sge *sge = req->rl_send_sge;
+ u32 lkey = ia->ri_pd->local_dma_lkey;
+ struct page *page, **ppages;
+
+ /* The head iovec is straightforward, as it is already
+ * DMA-mapped. Sync the content that has changed.
+ */
+ if (!rpcrdma_dma_map_regbuf(ia, rb))
+ return false;
+ sge_no = 1;
+ sge[sge_no].addr = rdmab_addr(rb);
+ sge[sge_no].length = xdr->head[0].iov_len;
+ sge[sge_no].lkey = rdmab_lkey(rb);
+ ib_dma_sync_single_for_device(device, sge[sge_no].addr,
+ sge[sge_no].length, DMA_TO_DEVICE);
+
+ /* If there is a Read chunk, the page list is being handled
+ * via explicit RDMA, and thus is skipped here. However, the
+ * tail iovec may include an XDR pad for the page list, as
+ * well as additional content, and may not reside in the
+ * same page as the head iovec.
+ */
+ if (rtype == rpcrdma_readch) {
+ len = xdr->tail[0].iov_len;
- dprintk("RPC: %s: destp 0x%p len %d hdrlen %d\n",
- __func__, destp, rqst->rq_slen, curlen);
+ /* Do not include the tail if it is only an XDR pad */
+ if (len < 4)
+ goto out;
- copy_len = rqst->rq_snd_buf.page_len;
+ page = virt_to_page(xdr->tail[0].iov_base);
+ page_base = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
- if (rqst->rq_snd_buf.tail[0].iov_len) {
- curlen = rqst->rq_snd_buf.tail[0].iov_len;
- if (destp + copy_len != rqst->rq_snd_buf.tail[0].iov_base) {
- memmove(destp + copy_len,
- rqst->rq_snd_buf.tail[0].iov_base, curlen);
- r_xprt->rx_stats.pullup_copy_count += curlen;
+ /* If the content in the page list is an odd length,
+ * xdr_write_pages() has added a pad at the beginning
+ * of the tail iovec. Force the tail's non-pad content
+ * to land at the next XDR position in the Send message.
+ */
+ page_base += len & 3;
+ len -= len & 3;
+ goto map_tail;
+ }
+
+ /* If there is a page list present, temporarily DMA map
+ * and prepare an SGE for each page to be sent.
+ */
+ if (xdr->page_len) {
+ ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
+ page_base = xdr->page_base & ~PAGE_MASK;
+ remaining = xdr->page_len;
+ while (remaining) {
+ sge_no++;
+ if (sge_no > RPCRDMA_MAX_SEND_SGES - 2)
+ goto out_mapping_overflow;
+
+ len = min_t(u32, PAGE_SIZE - page_base, remaining);
+ sge[sge_no].addr = ib_dma_map_page(device, *ppages,
+ page_base, len,
+ DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(device, sge[sge_no].addr))
+ goto out_mapping_err;
+ sge[sge_no].length = len;
+ sge[sge_no].lkey = lkey;
+
+ req->rl_mapped_sges++;
+ ppages++;
+ remaining -= len;
+ page_base = 0;
}
- dprintk("RPC: %s: tail destp 0x%p len %d\n",
- __func__, destp + copy_len, curlen);
- rqst->rq_svec[0].iov_len += curlen;
}
- r_xprt->rx_stats.pullup_copy_count += copy_len;
- page_base = rqst->rq_snd_buf.page_base;
- ppages = rqst->rq_snd_buf.pages + (page_base >> PAGE_SHIFT);
- page_base &= ~PAGE_MASK;
- npages = PAGE_ALIGN(page_base+copy_len) >> PAGE_SHIFT;
- for (i = 0; copy_len && i < npages; i++) {
- curlen = PAGE_SIZE - page_base;
- if (curlen > copy_len)
- curlen = copy_len;
- dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n",
- __func__, i, destp, copy_len, curlen);
- srcp = kmap_atomic(ppages[i]);
- memcpy(destp, srcp+page_base, curlen);
- kunmap_atomic(srcp);
- rqst->rq_svec[0].iov_len += curlen;
- destp += curlen;
- copy_len -= curlen;
- page_base = 0;
+ /* The tail iovec is not always constructed in the same
+ * page where the head iovec resides (see, for example,
+ * gss_wrap_req_priv). To neatly accommodate that case,
+ * DMA map it separately.
+ */
+ if (xdr->tail[0].iov_len) {
+ page = virt_to_page(xdr->tail[0].iov_base);
+ page_base = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
+ len = xdr->tail[0].iov_len;
+
+map_tail:
+ sge_no++;
+ sge[sge_no].addr = ib_dma_map_page(device, page,
+ page_base, len,
+ DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(device, sge[sge_no].addr))
+ goto out_mapping_err;
+ sge[sge_no].length = len;
+ sge[sge_no].lkey = lkey;
+ req->rl_mapped_sges++;
}
- /* header now contains entire send message */
+
+out:
+ req->rl_send_wr.num_sge = sge_no + 1;
+ return true;
+
+out_mapping_overflow:
+ pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no);
+ return false;
+
+out_mapping_err:
+ pr_err("rpcrdma: Send mapping error\n");
+ return false;
+}
+
+bool
+rpcrdma_prepare_send_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
+ u32 hdrlen, struct xdr_buf *xdr,
+ enum rpcrdma_chunktype rtype)
+{
+ req->rl_send_wr.num_sge = 0;
+ req->rl_mapped_sges = 0;
+
+ if (!rpcrdma_prepare_hdr_sge(ia, req, hdrlen))
+ goto out_map;
+
+ if (rtype != rpcrdma_areadch)
+ if (!rpcrdma_prepare_msg_sges(ia, req, xdr, rtype))
+ goto out_map;
+
+ return true;
+
+out_map:
+ pr_err("rpcrdma: failed to DMA map a Send buffer\n");
+ return false;
+}
+
+void
+rpcrdma_unmap_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
+{
+ struct ib_device *device = ia->ri_device;
+ struct ib_sge *sge;
+ int count;
+
+ sge = &req->rl_send_sge[2];
+ for (count = req->rl_mapped_sges; count--; sge++)
+ ib_dma_unmap_page(device, sge->addr, sge->length,
+ DMA_TO_DEVICE);
+ req->rl_mapped_sges = 0;
}
/*
* Marshal a request: the primary job of this routine is to choose
* the transfer modes. See comments below.
*
- * Prepares up to two IOVs per Call message:
- *
- * [0] -- RPC RDMA header
- * [1] -- the RPC header/data
- *
* Returns zero on success, otherwise a negative errno.
*/
@@ -626,12 +704,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
*/
if (rpcrdma_args_inline(r_xprt, rqst)) {
rtype = rpcrdma_noch;
- rpcrdma_inline_pullup(rqst);
- rpclen = rqst->rq_svec[0].iov_len;
+ rpclen = rqst->rq_snd_buf.len;
} else if (ddp_allowed && rqst->rq_snd_buf.flags & XDRBUF_WRITE) {
rtype = rpcrdma_readch;
- rpclen = rqst->rq_svec[0].iov_len;
- rpclen += rpcrdma_tail_pullup(&rqst->rq_snd_buf);
+ rpclen = rqst->rq_snd_buf.head[0].iov_len +
+ rqst->rq_snd_buf.tail[0].iov_len;
} else {
r_xprt->rx_stats.nomsg_call_count++;
headerp->rm_type = htonl(RDMA_NOMSG);
@@ -673,34 +750,18 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
goto out_unmap;
hdrlen = (unsigned char *)iptr - (unsigned char *)headerp;
- if (hdrlen + rpclen > RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
- goto out_overflow;
-
dprintk("RPC: %5u %s: %s/%s: hdrlen %zd rpclen %zd\n",
rqst->rq_task->tk_pid, __func__,
transfertypes[rtype], transfertypes[wtype],
hdrlen, rpclen);
- req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf);
- req->rl_send_iov[0].length = hdrlen;
- req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf);
-
- req->rl_niovs = 1;
- if (rtype == rpcrdma_areadch)
- return 0;
-
- req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf);
- req->rl_send_iov[1].length = rpclen;
- req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf);
-
- req->rl_niovs = 2;
+ if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen,
+ &rqst->rq_snd_buf, rtype)) {
+ iptr = ERR_PTR(-EIO);
+ goto out_unmap;
+ }
return 0;
-out_overflow:
- pr_err("rpcrdma: send overflow: hdrlen %zd rpclen %zu %s/%s\n",
- hdrlen, rpclen, transfertypes[rtype], transfertypes[wtype]);
- iptr = ERR_PTR(-EIO);
-
out_unmap:
r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);
return PTR_ERR(iptr);
@@ -916,8 +977,10 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep)
* allowed to timeout, to discover the errors at that time.
*/
void
-rpcrdma_reply_handler(struct rpcrdma_rep *rep)
+rpcrdma_reply_handler(struct work_struct *work)
{
+ struct rpcrdma_rep *rep =
+ container_of(work, struct rpcrdma_rep, rr_work);
struct rpcrdma_msg *headerp;
struct rpcrdma_req *req;
struct rpc_rqst *rqst;
@@ -1132,6 +1195,6 @@ out_duplicate:
repost:
r_xprt->rx_stats.bad_reply_count++;
- if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
+ if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))
rpcrdma_recv_buffer_put(rep);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index a2a7519b0f23..20027f8de129 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -129,7 +129,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
ret = -EIO;
goto out_unmap;
}
- atomic_inc(&rdma->sc_dma_used);
+ svc_rdma_count_mappings(rdma, ctxt);
memset(&send_wr, 0, sizeof(send_wr));
ctxt->cqe.done = svc_rdma_wc_send;
@@ -159,35 +159,44 @@ out_unmap:
/* Server-side transport endpoint wants a whole page for its send
* buffer. The client RPC code constructs the RPC header in this
* buffer before it invokes ->send_request.
- *
- * Returns NULL if there was a temporary allocation failure.
*/
-static void *
-xprt_rdma_bc_allocate(struct rpc_task *task, size_t size)
+static int
+xprt_rdma_bc_allocate(struct rpc_task *task)
{
struct rpc_rqst *rqst = task->tk_rqstp;
struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
+ size_t size = rqst->rq_callsize;
struct svcxprt_rdma *rdma;
struct page *page;
rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
- /* Prevent an infinite loop: try to make this case work */
- if (size > PAGE_SIZE)
+ if (size > PAGE_SIZE) {
WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n",
size);
+ return -EINVAL;
+ }
+ /* svc_rdma_sendto releases this page */
page = alloc_page(RPCRDMA_DEF_GFP);
if (!page)
- return NULL;
+ return -ENOMEM;
+ rqst->rq_buffer = page_address(page);
- return page_address(page);
+ rqst->rq_rbuffer = kmalloc(rqst->rq_rcvsize, RPCRDMA_DEF_GFP);
+ if (!rqst->rq_rbuffer) {
+ put_page(page);
+ return -ENOMEM;
+ }
+ return 0;
}
static void
-xprt_rdma_bc_free(void *buffer)
+xprt_rdma_bc_free(struct rpc_task *task)
{
- /* No-op: ctxt and page have already been freed. */
+ struct rpc_rqst *rqst = task->tk_rqstp;
+
+ kfree(rqst->rq_rbuffer);
}
static int
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 2c25606f2561..ad1df979b3f0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -159,7 +159,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
ctxt->sge[pno].addr);
if (ret)
goto err;
- atomic_inc(&xprt->sc_dma_used);
+ svc_rdma_count_mappings(xprt, ctxt);
ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey;
ctxt->sge[pno].length = len;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 54d533300620..f5a91edcd233 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -225,6 +225,48 @@ svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp,
return rp_ary;
}
+/* RPC-over-RDMA Version One private extension: Remote Invalidation.
+ * Responder's choice: requester signals it can handle Send With
+ * Invalidate, and responder chooses one rkey to invalidate.
+ *
+ * Find a candidate rkey to invalidate when sending a reply. Picks the
+ * first rkey it finds in the chunks lists.
+ *
+ * Returns zero if RPC's chunk lists are empty.
+ */
+static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp,
+ struct rpcrdma_write_array *wr_ary,
+ struct rpcrdma_write_array *rp_ary)
+{
+ struct rpcrdma_read_chunk *rd_ary;
+ struct rpcrdma_segment *arg_ch;
+ u32 inv_rkey;
+
+ inv_rkey = 0;
+
+ rd_ary = svc_rdma_get_read_chunk(rdma_argp);
+ if (rd_ary) {
+ inv_rkey = be32_to_cpu(rd_ary->rc_target.rs_handle);
+ goto out;
+ }
+
+ if (wr_ary && be32_to_cpu(wr_ary->wc_nchunks)) {
+ arg_ch = &wr_ary->wc_array[0].wc_target;
+ inv_rkey = be32_to_cpu(arg_ch->rs_handle);
+ goto out;
+ }
+
+ if (rp_ary && be32_to_cpu(rp_ary->wc_nchunks)) {
+ arg_ch = &rp_ary->wc_array[0].wc_target;
+ inv_rkey = be32_to_cpu(arg_ch->rs_handle);
+ goto out;
+ }
+
+out:
+ dprintk("svcrdma: Send With Invalidate rkey=%08x\n", inv_rkey);
+ return inv_rkey;
+}
+
/* Assumptions:
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
*/
@@ -280,7 +322,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
sge[sge_no].addr))
goto err;
- atomic_inc(&xprt->sc_dma_used);
+ svc_rdma_count_mappings(xprt, ctxt);
sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
ctxt->count++;
sge_off = 0;
@@ -464,7 +506,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
struct page *page,
struct rpcrdma_msg *rdma_resp,
struct svc_rdma_req_map *vec,
- int byte_count)
+ int byte_count,
+ u32 inv_rkey)
{
struct svc_rdma_op_ctxt *ctxt;
struct ib_send_wr send_wr;
@@ -489,7 +532,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->sge[0].length, DMA_TO_DEVICE);
if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
goto err;
- atomic_inc(&rdma->sc_dma_used);
+ svc_rdma_count_mappings(rdma, ctxt);
ctxt->direction = DMA_TO_DEVICE;
@@ -505,7 +548,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
if (ib_dma_mapping_error(rdma->sc_cm_id->device,
ctxt->sge[sge_no].addr))
goto err;
- atomic_inc(&rdma->sc_dma_used);
+ svc_rdma_count_mappings(rdma, ctxt);
ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
ctxt->sge[sge_no].length = sge_bytes;
}
@@ -523,23 +566,9 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
ctxt->count++;
rqstp->rq_respages[page_no] = NULL;
- /*
- * If there are more pages than SGE, terminate SGE
- * list so that svc_rdma_unmap_dma doesn't attempt to
- * unmap garbage.
- */
- if (page_no+1 >= sge_no)
- ctxt->sge[page_no+1].length = 0;
}
rqstp->rq_next_page = rqstp->rq_respages + 1;
- /* The loop above bumps sc_dma_used for each sge. The
- * xdr_buf.tail gets a separate sge, but resides in the
- * same page as xdr_buf.head. Don't count it twice.
- */
- if (sge_no > ctxt->count)
- atomic_dec(&rdma->sc_dma_used);
-
if (sge_no > rdma->sc_max_sge) {
pr_err("svcrdma: Too many sges (%d)\n", sge_no);
goto err;
@@ -549,7 +578,11 @@ static int send_reply(struct svcxprt_rdma *rdma,
send_wr.wr_cqe = &ctxt->cqe;
send_wr.sg_list = ctxt->sge;
send_wr.num_sge = sge_no;
- send_wr.opcode = IB_WR_SEND;
+ if (inv_rkey) {
+ send_wr.opcode = IB_WR_SEND_WITH_INV;
+ send_wr.ex.invalidate_rkey = inv_rkey;
+ } else
+ send_wr.opcode = IB_WR_SEND;
send_wr.send_flags = IB_SEND_SIGNALED;
ret = svc_rdma_send(rdma, &send_wr);
@@ -581,6 +614,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
int inline_bytes;
struct page *res_page;
struct svc_rdma_req_map *vec;
+ u32 inv_rkey;
dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
@@ -591,6 +625,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
wr_ary = svc_rdma_get_write_array(rdma_argp);
rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary);
+ inv_rkey = 0;
+ if (rdma->sc_snd_w_inv)
+ inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_ary, rp_ary);
+
/* Build an req vec for the XDR */
vec = svc_rdma_get_req_map(rdma);
ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL);
@@ -633,9 +671,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
goto err1;
ret = send_reply(rdma, rqstp, res_page, rdma_resp, vec,
- inline_bytes);
+ inline_bytes, inv_rkey);
if (ret < 0)
- goto err1;
+ goto err0;
svc_rdma_put_req_map(rdma, vec);
dprintk("svcrdma: send_reply returns %d\n", ret);
@@ -692,7 +730,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
svc_rdma_put_context(ctxt, 1);
return;
}
- atomic_inc(&xprt->sc_dma_used);
+ svc_rdma_count_mappings(xprt, ctxt);
/* Prepare SEND WR */
memset(&err_wr, 0, sizeof(err_wr));
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index dd9440137834..6864fb967038 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -198,6 +198,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
out:
ctxt->count = 0;
+ ctxt->mapped_sges = 0;
ctxt->frmr = NULL;
return ctxt;
@@ -221,22 +222,27 @@ out_empty:
void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
{
struct svcxprt_rdma *xprt = ctxt->xprt;
- int i;
- for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
+ struct ib_device *device = xprt->sc_cm_id->device;
+ u32 lkey = xprt->sc_pd->local_dma_lkey;
+ unsigned int i, count;
+
+ for (count = 0, i = 0; i < ctxt->mapped_sges; i++) {
/*
* Unmap the DMA addr in the SGE if the lkey matches
* the local_dma_lkey, otherwise, ignore it since it is
* an FRMR lkey and will be unmapped later when the
* last WR that uses it completes.
*/
- if (ctxt->sge[i].lkey == xprt->sc_pd->local_dma_lkey) {
- atomic_dec(&xprt->sc_dma_used);
- ib_dma_unmap_page(xprt->sc_cm_id->device,
+ if (ctxt->sge[i].lkey == lkey) {
+ count++;
+ ib_dma_unmap_page(device,
ctxt->sge[i].addr,
ctxt->sge[i].length,
ctxt->direction);
}
}
+ ctxt->mapped_sges = 0;
+ atomic_sub(count, &xprt->sc_dma_used);
}
void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
@@ -600,7 +606,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
DMA_FROM_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
goto err_put_ctxt;
- atomic_inc(&xprt->sc_dma_used);
+ svc_rdma_count_mappings(xprt, ctxt);
ctxt->sge[sge_no].addr = pa;
ctxt->sge[sge_no].length = PAGE_SIZE;
ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
@@ -642,6 +648,26 @@ int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags)
return ret;
}
+static void
+svc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt,
+ struct rdma_conn_param *param)
+{
+ const struct rpcrdma_connect_private *pmsg = param->private_data;
+
+ if (pmsg &&
+ pmsg->cp_magic == rpcrdma_cmp_magic &&
+ pmsg->cp_version == RPCRDMA_CMP_VERSION) {
+ newxprt->sc_snd_w_inv = pmsg->cp_flags &
+ RPCRDMA_CMP_F_SND_W_INV_OK;
+
+ dprintk("svcrdma: client send_size %u, recv_size %u "
+ "remote inv %ssupported\n",
+ rpcrdma_decode_buffer_size(pmsg->cp_send_size),
+ rpcrdma_decode_buffer_size(pmsg->cp_recv_size),
+ newxprt->sc_snd_w_inv ? "" : "un");
+ }
+}
+
/*
* This function handles the CONNECT_REQUEST event on a listening
* endpoint. It is passed the cma_id for the _new_ connection. The context in
@@ -653,7 +679,8 @@ int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags)
* will call the recvfrom method on the listen xprt which will accept the new
* connection.
*/
-static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird)
+static void handle_connect_req(struct rdma_cm_id *new_cma_id,
+ struct rdma_conn_param *param)
{
struct svcxprt_rdma *listen_xprt = new_cma_id->context;
struct svcxprt_rdma *newxprt;
@@ -669,9 +696,10 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird)
new_cma_id->context = newxprt;
dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n",
newxprt, newxprt->sc_cm_id, listen_xprt);
+ svc_rdma_parse_connect_private(newxprt, param);
/* Save client advertised inbound read limit for use later in accept. */
- newxprt->sc_ord = client_ird;
+ newxprt->sc_ord = param->initiator_depth;
/* Set the local and remote addresses in the transport */
sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
@@ -706,8 +734,7 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
"event = %s (%d)\n", cma_id, cma_id->context,
rdma_event_msg(event->event), event->event);
- handle_connect_req(cma_id,
- event->param.conn.initiator_depth);
+ handle_connect_req(cma_id, &event->param.conn);
break;
case RDMA_CM_EVENT_ESTABLISHED:
@@ -941,6 +968,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct svcxprt_rdma *listen_rdma;
struct svcxprt_rdma *newxprt = NULL;
struct rdma_conn_param conn_param;
+ struct rpcrdma_connect_private pmsg;
struct ib_qp_init_attr qp_attr;
struct ib_device *dev;
unsigned int i;
@@ -993,7 +1021,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_ord = min_t(size_t, dev->attrs.max_qp_rd_atom, newxprt->sc_ord);
newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord);
- newxprt->sc_pd = ib_alloc_pd(dev);
+ newxprt->sc_pd = ib_alloc_pd(dev, 0);
if (IS_ERR(newxprt->sc_pd)) {
dprintk("svcrdma: error creating PD for connect request\n");
goto errout;
@@ -1070,7 +1098,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dev->attrs.max_fast_reg_page_list_len;
newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
newxprt->sc_reader = rdma_read_chunk_frmr;
- }
+ } else
+ newxprt->sc_snd_w_inv = false;
/*
* Determine if a DMA MR is required and if so, what privs are required
@@ -1094,11 +1123,20 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
/* Swap out the handler */
newxprt->sc_cm_id->event_handler = rdma_cma_handler;
+ /* Construct RDMA-CM private message */
+ pmsg.cp_magic = rpcrdma_cmp_magic;
+ pmsg.cp_version = RPCRDMA_CMP_VERSION;
+ pmsg.cp_flags = 0;
+ pmsg.cp_send_size = pmsg.cp_recv_size =
+ rpcrdma_encode_buffer_size(newxprt->sc_max_req_size);
+
/* Accept Connection */
set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
memset(&conn_param, 0, sizeof conn_param);
conn_param.responder_resources = 0;
conn_param.initiator_depth = newxprt->sc_ord;
+ conn_param.private_data = &pmsg;
+ conn_param.private_data_len = sizeof(pmsg);
ret = rdma_accept(newxprt->sc_cm_id, &conn_param);
if (ret) {
dprintk("svcrdma: failed to accept new connection, ret=%d\n",
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 81f0e879f019..ed5e285fd2ea 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -97,7 +97,7 @@ static struct ctl_table xr_tunables_table[] = {
.data = &xprt_rdma_max_inline_read,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &min_inline_size,
.extra2 = &max_inline_size,
},
@@ -106,7 +106,7 @@ static struct ctl_table xr_tunables_table[] = {
.data = &xprt_rdma_max_inline_write,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &min_inline_size,
.extra2 = &max_inline_size,
},
@@ -477,115 +477,152 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
}
}
-/*
- * The RDMA allocate/free functions need the task structure as a place
- * to hide the struct rpcrdma_req, which is necessary for the actual send/recv
- * sequence.
+/* Allocate a fixed-size buffer in which to construct and send the
+ * RPC-over-RDMA header for this request.
+ */
+static bool
+rpcrdma_get_rdmabuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
+ gfp_t flags)
+{
+ size_t size = RPCRDMA_HDRBUF_SIZE;
+ struct rpcrdma_regbuf *rb;
+
+ if (req->rl_rdmabuf)
+ return true;
+
+ rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags);
+ if (IS_ERR(rb))
+ return false;
+
+ r_xprt->rx_stats.hardway_register_count += size;
+ req->rl_rdmabuf = rb;
+ return true;
+}
+
+static bool
+rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
+ size_t size, gfp_t flags)
+{
+ struct rpcrdma_regbuf *rb;
+
+ if (req->rl_sendbuf && rdmab_length(req->rl_sendbuf) >= size)
+ return true;
+
+ rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags);
+ if (IS_ERR(rb))
+ return false;
+
+ rpcrdma_free_regbuf(req->rl_sendbuf);
+ r_xprt->rx_stats.hardway_register_count += size;
+ req->rl_sendbuf = rb;
+ return true;
+}
+
+/* The rq_rcv_buf is used only if a Reply chunk is necessary.
+ * The decision to use a Reply chunk is made later in
+ * rpcrdma_marshal_req. This buffer is registered at that time.
*
- * The RPC layer allocates both send and receive buffers in the same call
- * (rq_send_buf and rq_rcv_buf are both part of a single contiguous buffer).
- * We may register rq_rcv_buf when using reply chunks.
+ * Otherwise, the associated RPC Reply arrives in a separate
+ * Receive buffer, arbitrarily chosen by the HCA. The buffer
+ * allocated here for the RPC Reply is not utilized in that
+ * case. See rpcrdma_inline_fixup.
+ *
+ * A regbuf is used here to remember the buffer size.
*/
-static void *
-xprt_rdma_allocate(struct rpc_task *task, size_t size)
+static bool
+rpcrdma_get_recvbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
+ size_t size, gfp_t flags)
{
- struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
- struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_regbuf *rb;
+
+ if (req->rl_recvbuf && rdmab_length(req->rl_recvbuf) >= size)
+ return true;
+
+ rb = rpcrdma_alloc_regbuf(size, DMA_NONE, flags);
+ if (IS_ERR(rb))
+ return false;
+
+ rpcrdma_free_regbuf(req->rl_recvbuf);
+ r_xprt->rx_stats.hardway_register_count += size;
+ req->rl_recvbuf = rb;
+ return true;
+}
+
+/**
+ * xprt_rdma_allocate - allocate transport resources for an RPC
+ * @task: RPC task
+ *
+ * Return values:
+ * 0: Success; rq_buffer points to RPC buffer to use
+ * ENOMEM: Out of memory, call again later
+ * EIO: A permanent error occurred, do not retry
+ *
+ * The RDMA allocate/free functions need the task structure as a place
+ * to hide the struct rpcrdma_req, which is necessary for the actual
+ * send/recv sequence.
+ *
+ * xprt_rdma_allocate provides buffers that are already mapped for
+ * DMA, and a local DMA lkey is provided for each.
+ */
+static int
+xprt_rdma_allocate(struct rpc_task *task)
+{
+ struct rpc_rqst *rqst = task->tk_rqstp;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req;
- size_t min_size;
gfp_t flags;
req = rpcrdma_buffer_get(&r_xprt->rx_buf);
if (req == NULL)
- return NULL;
+ return -ENOMEM;
flags = RPCRDMA_DEF_GFP;
if (RPC_IS_SWAPPER(task))
flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
- if (req->rl_rdmabuf == NULL)
- goto out_rdmabuf;
- if (req->rl_sendbuf == NULL)
- goto out_sendbuf;
- if (size > req->rl_sendbuf->rg_size)
- goto out_sendbuf;
-
-out:
- dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
- req->rl_connect_cookie = 0; /* our reserved value */
- req->rl_task = task;
- return req->rl_sendbuf->rg_base;
-
-out_rdmabuf:
- min_size = RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp);
- rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, min_size, flags);
- if (IS_ERR(rb))
+ if (!rpcrdma_get_rdmabuf(r_xprt, req, flags))
goto out_fail;
- req->rl_rdmabuf = rb;
-
-out_sendbuf:
- /* XDR encoding and RPC/RDMA marshaling of this request has not
- * yet occurred. Thus a lower bound is needed to prevent buffer
- * overrun during marshaling.
- *
- * RPC/RDMA marshaling may choose to send payload bearing ops
- * inline, if the result is smaller than the inline threshold.
- * The value of the "size" argument accounts for header
- * requirements but not for the payload in these cases.
- *
- * Likewise, allocate enough space to receive a reply up to the
- * size of the inline threshold.
- *
- * It's unlikely that both the send header and the received
- * reply will be large, but slush is provided here to allow
- * flexibility when marshaling.
- */
- min_size = RPCRDMA_INLINE_READ_THRESHOLD(task->tk_rqstp);
- min_size += RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp);
- if (size < min_size)
- size = min_size;
-
- rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, size, flags);
- if (IS_ERR(rb))
+ if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags))
+ goto out_fail;
+ if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
goto out_fail;
- rb->rg_owner = req;
- r_xprt->rx_stats.hardway_register_count += size;
- rpcrdma_free_regbuf(&r_xprt->rx_ia, req->rl_sendbuf);
- req->rl_sendbuf = rb;
- goto out;
+ dprintk("RPC: %5u %s: send size = %zd, recv size = %zd, req = %p\n",
+ task->tk_pid, __func__, rqst->rq_callsize,
+ rqst->rq_rcvsize, req);
+
+ req->rl_connect_cookie = 0; /* our reserved value */
+ rpcrdma_set_xprtdata(rqst, req);
+ rqst->rq_buffer = req->rl_sendbuf->rg_base;
+ rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
+ return 0;
out_fail:
rpcrdma_buffer_put(req);
- return NULL;
+ return -ENOMEM;
}
-/*
- * This function returns all RDMA resources to the pool.
+/**
+ * xprt_rdma_free - release resources allocated by xprt_rdma_allocate
+ * @task: RPC task
+ *
+ * Caller guarantees rqst->rq_buffer is non-NULL.
*/
static void
-xprt_rdma_free(void *buffer)
+xprt_rdma_free(struct rpc_task *task)
{
- struct rpcrdma_req *req;
- struct rpcrdma_xprt *r_xprt;
- struct rpcrdma_regbuf *rb;
-
- if (buffer == NULL)
- return;
+ struct rpc_rqst *rqst = task->tk_rqstp;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]);
- req = rb->rg_owner;
if (req->rl_backchannel)
return;
- r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf);
-
dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
- r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req,
- !RPC_IS_ASYNC(req->rl_task));
-
+ ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task));
+ rpcrdma_unmap_sges(ia, req);
rpcrdma_buffer_put(req);
}
@@ -685,10 +722,11 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
r_xprt->rx_stats.failed_marshal_count,
r_xprt->rx_stats.bad_reply_count,
r_xprt->rx_stats.nomsg_call_count);
- seq_printf(seq, "%lu %lu %lu\n",
+ seq_printf(seq, "%lu %lu %lu %lu\n",
r_xprt->rx_stats.mrs_recovered,
r_xprt->rx_stats.mrs_orphaned,
- r_xprt->rx_stats.mrs_allocated);
+ r_xprt->rx_stats.mrs_allocated,
+ r_xprt->rx_stats.local_inv_needed);
}
static int
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 799cce6cbe45..ec74289af7ec 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -129,15 +129,6 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
wc->status, wc->vendor_err);
}
-static void
-rpcrdma_receive_worker(struct work_struct *work)
-{
- struct rpcrdma_rep *rep =
- container_of(work, struct rpcrdma_rep, rr_work);
-
- rpcrdma_reply_handler(rep);
-}
-
/* Perform basic sanity checking to avoid using garbage
* to update the credit grant value.
*/
@@ -161,13 +152,13 @@ rpcrdma_update_granted_credits(struct rpcrdma_rep *rep)
}
/**
- * rpcrdma_receive_wc - Invoked by RDMA provider for each polled Receive WC
+ * rpcrdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
* @cq: completion queue (ignored)
* @wc: completed WR
*
*/
static void
-rpcrdma_receive_wc(struct ib_cq *cq, struct ib_wc *wc)
+rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_cqe *cqe = wc->wr_cqe;
struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep,
@@ -185,6 +176,9 @@ rpcrdma_receive_wc(struct ib_cq *cq, struct ib_wc *wc)
__func__, rep, wc->byte_len);
rep->rr_len = wc->byte_len;
+ rep->rr_wc_flags = wc->wc_flags;
+ rep->rr_inv_rkey = wc->ex.invalidate_rkey;
+
ib_dma_sync_single_for_cpu(rep->rr_device,
rdmab_addr(rep->rr_rdmabuf),
rep->rr_len, DMA_FROM_DEVICE);
@@ -204,6 +198,36 @@ out_fail:
goto out_schedule;
}
+static void
+rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
+ struct rdma_conn_param *param)
+{
+ struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
+ const struct rpcrdma_connect_private *pmsg = param->private_data;
+ unsigned int rsize, wsize;
+
+ /* Default settings for RPC-over-RDMA Version One */
+ r_xprt->rx_ia.ri_reminv_expected = false;
+ rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
+ wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
+
+ if (pmsg &&
+ pmsg->cp_magic == rpcrdma_cmp_magic &&
+ pmsg->cp_version == RPCRDMA_CMP_VERSION) {
+ r_xprt->rx_ia.ri_reminv_expected = true;
+ rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
+ wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
+ }
+
+ if (rsize < cdata->inline_rsize)
+ cdata->inline_rsize = rsize;
+ if (wsize < cdata->inline_wsize)
+ cdata->inline_wsize = wsize;
+ pr_info("rpcrdma: max send %u, max recv %u\n",
+ cdata->inline_wsize, cdata->inline_rsize);
+ rpcrdma_set_max_header_sizes(r_xprt);
+}
+
static int
rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
{
@@ -244,6 +268,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
" (%d initiator)\n",
__func__, attr->max_dest_rd_atomic,
attr->max_rd_atomic);
+ rpcrdma_update_connect_private(xprt, &event->param.conn);
goto connected;
case RDMA_CM_EVENT_CONNECT_ERROR:
connstate = -ENOTCONN;
@@ -387,7 +412,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
}
ia->ri_device = ia->ri_id->device;
- ia->ri_pd = ib_alloc_pd(ia->ri_device);
+ ia->ri_pd = ib_alloc_pd(ia->ri_device, 0);
if (IS_ERR(ia->ri_pd)) {
rc = PTR_ERR(ia->ri_pd);
pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc);
@@ -454,11 +479,12 @@ int
rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
struct rpcrdma_create_data_internal *cdata)
{
+ struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
struct ib_cq *sendcq, *recvcq;
unsigned int max_qp_wr;
int rc;
- if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) {
+ if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_SEND_SGES) {
dprintk("RPC: %s: insufficient sge's available\n",
__func__);
return -ENOMEM;
@@ -487,7 +513,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */
- ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS;
+ ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_SEND_SGES;
ep->rep_attr.cap.max_recv_sge = 1;
ep->rep_attr.cap.max_inline_data = 0;
ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -536,9 +562,14 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
/* Initialize cma parameters */
memset(&ep->rep_remote_cma, 0, sizeof(ep->rep_remote_cma));
- /* RPC/RDMA does not use private data */
- ep->rep_remote_cma.private_data = NULL;
- ep->rep_remote_cma.private_data_len = 0;
+ /* Prepare RDMA-CM private message */
+ pmsg->cp_magic = rpcrdma_cmp_magic;
+ pmsg->cp_version = RPCRDMA_CMP_VERSION;
+ pmsg->cp_flags |= ia->ri_ops->ro_send_w_inv_ok;
+ pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize);
+ pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize);
+ ep->rep_remote_cma.private_data = pmsg;
+ ep->rep_remote_cma.private_data_len = sizeof(*pmsg);
/* Client offers RDMA Read but does not initiate */
ep->rep_remote_cma.initiator_depth = 0;
@@ -849,6 +880,10 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
req->rl_cqe.done = rpcrdma_wc_send;
req->rl_buffer = &r_xprt->rx_buf;
INIT_LIST_HEAD(&req->rl_registered);
+ req->rl_send_wr.next = NULL;
+ req->rl_send_wr.wr_cqe = &req->rl_cqe;
+ req->rl_send_wr.sg_list = req->rl_send_sge;
+ req->rl_send_wr.opcode = IB_WR_SEND;
return req;
}
@@ -865,17 +900,21 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
if (rep == NULL)
goto out;
- rep->rr_rdmabuf = rpcrdma_alloc_regbuf(ia, cdata->inline_rsize,
- GFP_KERNEL);
+ rep->rr_rdmabuf = rpcrdma_alloc_regbuf(cdata->inline_rsize,
+ DMA_FROM_DEVICE, GFP_KERNEL);
if (IS_ERR(rep->rr_rdmabuf)) {
rc = PTR_ERR(rep->rr_rdmabuf);
goto out_free;
}
rep->rr_device = ia->ri_device;
- rep->rr_cqe.done = rpcrdma_receive_wc;
+ rep->rr_cqe.done = rpcrdma_wc_receive;
rep->rr_rxprt = r_xprt;
- INIT_WORK(&rep->rr_work, rpcrdma_receive_worker);
+ INIT_WORK(&rep->rr_work, rpcrdma_reply_handler);
+ rep->rr_recv_wr.next = NULL;
+ rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
+ rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
+ rep->rr_recv_wr.num_sge = 1;
return rep;
out_free:
@@ -966,17 +1005,18 @@ rpcrdma_buffer_get_rep_locked(struct rpcrdma_buffer *buf)
}
static void
-rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep)
+rpcrdma_destroy_rep(struct rpcrdma_rep *rep)
{
- rpcrdma_free_regbuf(ia, rep->rr_rdmabuf);
+ rpcrdma_free_regbuf(rep->rr_rdmabuf);
kfree(rep);
}
void
-rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
+rpcrdma_destroy_req(struct rpcrdma_req *req)
{
- rpcrdma_free_regbuf(ia, req->rl_sendbuf);
- rpcrdma_free_regbuf(ia, req->rl_rdmabuf);
+ rpcrdma_free_regbuf(req->rl_recvbuf);
+ rpcrdma_free_regbuf(req->rl_sendbuf);
+ rpcrdma_free_regbuf(req->rl_rdmabuf);
kfree(req);
}
@@ -1009,15 +1049,13 @@ rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf)
void
rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
{
- struct rpcrdma_ia *ia = rdmab_to_ia(buf);
-
cancel_delayed_work_sync(&buf->rb_recovery_worker);
while (!list_empty(&buf->rb_recv_bufs)) {
struct rpcrdma_rep *rep;
rep = rpcrdma_buffer_get_rep_locked(buf);
- rpcrdma_destroy_rep(ia, rep);
+ rpcrdma_destroy_rep(rep);
}
buf->rb_send_count = 0;
@@ -1030,7 +1068,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
list_del(&req->rl_all);
spin_unlock(&buf->rb_reqslock);
- rpcrdma_destroy_req(ia, req);
+ rpcrdma_destroy_req(req);
spin_lock(&buf->rb_reqslock);
}
spin_unlock(&buf->rb_reqslock);
@@ -1129,7 +1167,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
struct rpcrdma_buffer *buffers = req->rl_buffer;
struct rpcrdma_rep *rep = req->rl_reply;
- req->rl_niovs = 0;
+ req->rl_send_wr.num_sge = 0;
req->rl_reply = NULL;
spin_lock(&buffers->rb_lock);
@@ -1171,70 +1209,81 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
spin_unlock(&buffers->rb_lock);
}
-/*
- * Wrappers for internal-use kmalloc memory registration, used by buffer code.
- */
-
/**
- * rpcrdma_alloc_regbuf - kmalloc and register memory for SEND/RECV buffers
- * @ia: controlling rpcrdma_ia
+ * rpcrdma_alloc_regbuf - allocate and DMA-map memory for SEND/RECV buffers
* @size: size of buffer to be allocated, in bytes
+ * @direction: direction of data movement
* @flags: GFP flags
*
- * Returns pointer to private header of an area of internally
- * registered memory, or an ERR_PTR. The registered buffer follows
- * the end of the private header.
+ * Returns an ERR_PTR, or a pointer to a regbuf, a buffer that
+ * can be persistently DMA-mapped for I/O.
*
* xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
- * receiving the payload of RDMA RECV operations. regbufs are not
- * used for RDMA READ/WRITE operations, thus are registered only for
- * LOCAL access.
+ * receiving the payload of RDMA RECV operations. During Long Calls
+ * or Replies they may be registered externally via ro_map.
*/
struct rpcrdma_regbuf *
-rpcrdma_alloc_regbuf(struct rpcrdma_ia *ia, size_t size, gfp_t flags)
+rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction,
+ gfp_t flags)
{
struct rpcrdma_regbuf *rb;
- struct ib_sge *iov;
rb = kmalloc(sizeof(*rb) + size, flags);
if (rb == NULL)
- goto out;
+ return ERR_PTR(-ENOMEM);
- iov = &rb->rg_iov;
- iov->addr = ib_dma_map_single(ia->ri_device,
- (void *)rb->rg_base, size,
- DMA_BIDIRECTIONAL);
- if (ib_dma_mapping_error(ia->ri_device, iov->addr))
- goto out_free;
+ rb->rg_device = NULL;
+ rb->rg_direction = direction;
+ rb->rg_iov.length = size;
- iov->length = size;
- iov->lkey = ia->ri_pd->local_dma_lkey;
- rb->rg_size = size;
- rb->rg_owner = NULL;
return rb;
+}
-out_free:
- kfree(rb);
-out:
- return ERR_PTR(-ENOMEM);
+/**
+ * __rpcrdma_map_regbuf - DMA-map a regbuf
+ * @ia: controlling rpcrdma_ia
+ * @rb: regbuf to be mapped
+ */
+bool
+__rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
+{
+ if (rb->rg_direction == DMA_NONE)
+ return false;
+
+ rb->rg_iov.addr = ib_dma_map_single(ia->ri_device,
+ (void *)rb->rg_base,
+ rdmab_length(rb),
+ rb->rg_direction);
+ if (ib_dma_mapping_error(ia->ri_device, rdmab_addr(rb)))
+ return false;
+
+ rb->rg_device = ia->ri_device;
+ rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey;
+ return true;
+}
+
+static void
+rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb)
+{
+ if (!rpcrdma_regbuf_is_mapped(rb))
+ return;
+
+ ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb),
+ rdmab_length(rb), rb->rg_direction);
+ rb->rg_device = NULL;
}
/**
* rpcrdma_free_regbuf - deregister and free registered buffer
- * @ia: controlling rpcrdma_ia
* @rb: regbuf to be deregistered and freed
*/
void
-rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
+rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb)
{
- struct ib_sge *iov;
-
if (!rb)
return;
- iov = &rb->rg_iov;
- ib_dma_unmap_single(ia->ri_device,
- iov->addr, iov->length, DMA_BIDIRECTIONAL);
+ rpcrdma_dma_unmap_regbuf(rb);
kfree(rb);
}
@@ -1248,39 +1297,28 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
struct rpcrdma_ep *ep,
struct rpcrdma_req *req)
{
- struct ib_device *device = ia->ri_device;
- struct ib_send_wr send_wr, *send_wr_fail;
- struct rpcrdma_rep *rep = req->rl_reply;
- struct ib_sge *iov = req->rl_send_iov;
- int i, rc;
+ struct ib_send_wr *send_wr = &req->rl_send_wr;
+ struct ib_send_wr *send_wr_fail;
+ int rc;
- if (rep) {
- rc = rpcrdma_ep_post_recv(ia, ep, rep);
+ if (req->rl_reply) {
+ rc = rpcrdma_ep_post_recv(ia, req->rl_reply);
if (rc)
return rc;
req->rl_reply = NULL;
}
- send_wr.next = NULL;
- send_wr.wr_cqe = &req->rl_cqe;
- send_wr.sg_list = iov;
- send_wr.num_sge = req->rl_niovs;
- send_wr.opcode = IB_WR_SEND;
-
- for (i = 0; i < send_wr.num_sge; i++)
- ib_dma_sync_single_for_device(device, iov[i].addr,
- iov[i].length, DMA_TO_DEVICE);
dprintk("RPC: %s: posting %d s/g entries\n",
- __func__, send_wr.num_sge);
+ __func__, send_wr->num_sge);
if (DECR_CQCOUNT(ep) > 0)
- send_wr.send_flags = 0;
+ send_wr->send_flags = 0;
else { /* Provider must take a send completion every now and then */
INIT_CQCOUNT(ep);
- send_wr.send_flags = IB_SEND_SIGNALED;
+ send_wr->send_flags = IB_SEND_SIGNALED;
}
- rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
+ rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
if (rc)
goto out_postsend_err;
return 0;
@@ -1290,32 +1328,24 @@ out_postsend_err:
return -ENOTCONN;
}
-/*
- * (Re)post a receive buffer.
- */
int
rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
- struct rpcrdma_ep *ep,
struct rpcrdma_rep *rep)
{
- struct ib_recv_wr recv_wr, *recv_wr_fail;
+ struct ib_recv_wr *recv_wr_fail;
int rc;
- recv_wr.next = NULL;
- recv_wr.wr_cqe = &rep->rr_cqe;
- recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
- recv_wr.num_sge = 1;
-
- ib_dma_sync_single_for_cpu(ia->ri_device,
- rdmab_addr(rep->rr_rdmabuf),
- rdmab_length(rep->rr_rdmabuf),
- DMA_BIDIRECTIONAL);
-
- rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
+ if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf))
+ goto out_map;
+ rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail);
if (rc)
goto out_postrecv;
return 0;
+out_map:
+ pr_err("rpcrdma: failed to DMA map the Receive buffer\n");
+ return -EIO;
+
out_postrecv:
pr_err("rpcrdma: ib_post_recv returned %i\n", rc);
return -ENOTCONN;
@@ -1333,7 +1363,6 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
{
struct rpcrdma_buffer *buffers = &r_xprt->rx_buf;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- struct rpcrdma_ep *ep = &r_xprt->rx_ep;
struct rpcrdma_rep *rep;
int rc;
@@ -1344,7 +1373,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
rep = rpcrdma_buffer_get_rep_locked(buffers);
spin_unlock(&buffers->rb_lock);
- rc = rpcrdma_ep_post_recv(ia, ep, rep);
+ rc = rpcrdma_ep_post_recv(ia, rep);
if (rc)
goto out_rc;
}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index a71b0f5897d8..0d35b761c883 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -70,9 +70,11 @@ struct rpcrdma_ia {
struct ib_pd *ri_pd;
struct completion ri_done;
int ri_async_rc;
+ unsigned int ri_max_segs;
unsigned int ri_max_frmr_depth;
unsigned int ri_max_inline_write;
unsigned int ri_max_inline_read;
+ bool ri_reminv_expected;
struct ib_qp_attr ri_qp_attr;
struct ib_qp_init_attr ri_qp_init_attr;
};
@@ -87,6 +89,7 @@ struct rpcrdma_ep {
int rep_connected;
struct ib_qp_init_attr rep_attr;
wait_queue_head_t rep_connect_wait;
+ struct rpcrdma_connect_private rep_cm_private;
struct rdma_conn_param rep_remote_cma;
struct sockaddr_storage rep_remote_addr;
struct delayed_work rep_connect_worker;
@@ -112,9 +115,9 @@ struct rpcrdma_ep {
*/
struct rpcrdma_regbuf {
- size_t rg_size;
- struct rpcrdma_req *rg_owner;
struct ib_sge rg_iov;
+ struct ib_device *rg_device;
+ enum dma_data_direction rg_direction;
__be32 rg_base[0] __attribute__ ((aligned(256)));
};
@@ -162,7 +165,10 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
* The smallest inline threshold is 1024 bytes, ensuring that
* at least 750 bytes are available for RPC messages.
*/
-#define RPCRDMA_MAX_HDR_SEGS (8)
+enum {
+ RPCRDMA_MAX_HDR_SEGS = 8,
+ RPCRDMA_HDRBUF_SIZE = 256,
+};
/*
* struct rpcrdma_rep -- this structure encapsulates state required to recv
@@ -182,10 +188,13 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
struct rpcrdma_rep {
struct ib_cqe rr_cqe;
unsigned int rr_len;
+ int rr_wc_flags;
+ u32 rr_inv_rkey;
struct ib_device *rr_device;
struct rpcrdma_xprt *rr_rxprt;
struct work_struct rr_work;
struct list_head rr_list;
+ struct ib_recv_wr rr_recv_wr;
struct rpcrdma_regbuf *rr_rdmabuf;
};
@@ -276,19 +285,30 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
char *mr_offset; /* kva if no page, else offset */
};
-#define RPCRDMA_MAX_IOVS (2)
+/* Reserve enough Send SGEs to send a maximum size inline request:
+ * - RPC-over-RDMA header
+ * - xdr_buf head iovec
+ * - RPCRDMA_MAX_INLINE bytes, possibly unaligned, in pages
+ * - xdr_buf tail iovec
+ */
+enum {
+ RPCRDMA_MAX_SEND_PAGES = PAGE_SIZE + RPCRDMA_MAX_INLINE - 1,
+ RPCRDMA_MAX_PAGE_SGES = (RPCRDMA_MAX_SEND_PAGES >> PAGE_SHIFT) + 1,
+ RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1,
+};
struct rpcrdma_buffer;
struct rpcrdma_req {
struct list_head rl_free;
- unsigned int rl_niovs;
+ unsigned int rl_mapped_sges;
unsigned int rl_connect_cookie;
- struct rpc_task *rl_task;
struct rpcrdma_buffer *rl_buffer;
- struct rpcrdma_rep *rl_reply;/* holder for reply buffer */
- struct ib_sge rl_send_iov[RPCRDMA_MAX_IOVS];
- struct rpcrdma_regbuf *rl_rdmabuf;
- struct rpcrdma_regbuf *rl_sendbuf;
+ struct rpcrdma_rep *rl_reply;
+ struct ib_send_wr rl_send_wr;
+ struct ib_sge rl_send_sge[RPCRDMA_MAX_SEND_SGES];
+ struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */
+ struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */
+ struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */
struct ib_cqe rl_cqe;
struct list_head rl_all;
@@ -298,14 +318,16 @@ struct rpcrdma_req {
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
};
+static inline void
+rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
+{
+ rqst->rq_xprtdata = req;
+}
+
static inline struct rpcrdma_req *
rpcr_to_rdmar(struct rpc_rqst *rqst)
{
- void *buffer = rqst->rq_buffer;
- struct rpcrdma_regbuf *rb;
-
- rb = container_of(buffer, struct rpcrdma_regbuf, rg_base);
- return rb->rg_owner;
+ return rqst->rq_xprtdata;
}
/*
@@ -356,15 +378,6 @@ struct rpcrdma_create_data_internal {
unsigned int padding; /* non-rdma write header padding */
};
-#define RPCRDMA_INLINE_READ_THRESHOLD(rq) \
- (rpcx_to_rdmad(rq->rq_xprt).inline_rsize)
-
-#define RPCRDMA_INLINE_WRITE_THRESHOLD(rq)\
- (rpcx_to_rdmad(rq->rq_xprt).inline_wsize)
-
-#define RPCRDMA_INLINE_PAD_VALUE(rq)\
- rpcx_to_rdmad(rq->rq_xprt).padding
-
/*
* Statistics for RPCRDMA
*/
@@ -386,6 +399,7 @@ struct rpcrdma_stats {
unsigned long mrs_recovered;
unsigned long mrs_orphaned;
unsigned long mrs_allocated;
+ unsigned long local_inv_needed;
};
/*
@@ -409,6 +423,7 @@ struct rpcrdma_memreg_ops {
struct rpcrdma_mw *);
void (*ro_release_mr)(struct rpcrdma_mw *);
const char *ro_displayname;
+ const int ro_send_w_inv_ok;
};
extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops;
@@ -461,15 +476,14 @@ void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
struct rpcrdma_req *);
-int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
- struct rpcrdma_rep *);
+int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *);
/*
* Buffer calls - xprtrdma/verbs.c
*/
struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
-void rpcrdma_destroy_req(struct rpcrdma_ia *, struct rpcrdma_req *);
+void rpcrdma_destroy_req(struct rpcrdma_req *);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
@@ -482,10 +496,24 @@ void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *);
-struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *,
- size_t, gfp_t);
-void rpcrdma_free_regbuf(struct rpcrdma_ia *,
- struct rpcrdma_regbuf *);
+struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
+ gfp_t);
+bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *);
+void rpcrdma_free_regbuf(struct rpcrdma_regbuf *);
+
+static inline bool
+rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb)
+{
+ return rb->rg_device != NULL;
+}
+
+static inline bool
+rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
+{
+ if (likely(rpcrdma_regbuf_is_mapped(rb)))
+ return true;
+ return __rpcrdma_dma_map_regbuf(ia, rb);
+}
int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int);
@@ -507,15 +535,25 @@ rpcrdma_data_dir(bool writing)
*/
void rpcrdma_connect_worker(struct work_struct *);
void rpcrdma_conn_func(struct rpcrdma_ep *);
-void rpcrdma_reply_handler(struct rpcrdma_rep *);
+void rpcrdma_reply_handler(struct work_struct *);
/*
* RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
*/
+
+enum rpcrdma_chunktype {
+ rpcrdma_noch = 0,
+ rpcrdma_readch,
+ rpcrdma_areadch,
+ rpcrdma_writech,
+ rpcrdma_replych
+};
+
+bool rpcrdma_prepare_send_sges(struct rpcrdma_ia *, struct rpcrdma_req *,
+ u32, struct xdr_buf *, enum rpcrdma_chunktype);
+void rpcrdma_unmap_sges(struct rpcrdma_ia *, struct rpcrdma_req *);
int rpcrdma_marshal_req(struct rpc_rqst *);
-void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *,
- struct rpcrdma_create_data_internal *,
- unsigned int);
+void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
/* RPC/RDMA module init - xprtrdma/transport.c
*/
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index bf168838a029..e01c825bc683 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -473,7 +473,16 @@ static int xs_nospace(struct rpc_task *task)
spin_unlock_bh(&xprt->transport_lock);
/* Race breaker in case memory is freed before above code is called */
- sk->sk_write_space(sk);
+ if (ret == -EAGAIN) {
+ struct socket_wq *wq;
+
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags);
+ rcu_read_unlock();
+
+ sk->sk_write_space(sk);
+ }
return ret;
}
@@ -2533,35 +2542,39 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
* we allocate pages instead doing a kmalloc like rpc_malloc is because we want
* to use the server side send routines.
*/
-static void *bc_malloc(struct rpc_task *task, size_t size)
+static int bc_malloc(struct rpc_task *task)
{
+ struct rpc_rqst *rqst = task->tk_rqstp;
+ size_t size = rqst->rq_callsize;
struct page *page;
struct rpc_buffer *buf;
- WARN_ON_ONCE(size > PAGE_SIZE - sizeof(struct rpc_buffer));
- if (size > PAGE_SIZE - sizeof(struct rpc_buffer))
- return NULL;
+ if (size > PAGE_SIZE - sizeof(struct rpc_buffer)) {
+ WARN_ONCE(1, "xprtsock: large bc buffer request (size %zu)\n",
+ size);
+ return -EINVAL;
+ }
page = alloc_page(GFP_KERNEL);
if (!page)
- return NULL;
+ return -ENOMEM;
buf = page_address(page);
buf->len = PAGE_SIZE;
- return buf->data;
+ rqst->rq_buffer = buf->data;
+ rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize;
+ return 0;
}
/*
* Free the space allocated in the bc_alloc routine
*/
-static void bc_free(void *buffer)
+static void bc_free(struct rpc_task *task)
{
+ void *buffer = task->tk_rqstp->rq_buffer;
struct rpc_buffer *buf;
- if (!buffer)
- return;
-
buf = container_of(buffer, struct rpc_buffer, data);
free_page((unsigned long)buf);
}
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 02beb35f577f..3b95fe980fa2 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -771,6 +771,9 @@ int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
u32 mask = BR_LEARNING | BR_LEARNING_SYNC | BR_FLOOD;
int err;
+ if (!netif_is_bridge_port(dev))
+ return -EOPNOTSUPP;
+
err = switchdev_port_attr_get(dev, &attr);
if (err && err != -EOPNOTSUPP)
return err;
@@ -926,6 +929,9 @@ int switchdev_port_bridge_setlink(struct net_device *dev,
struct nlattr *afspec;
int err = 0;
+ if (!netif_is_bridge_port(dev))
+ return -EOPNOTSUPP;
+
protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
IFLA_PROTINFO);
if (protinfo) {
@@ -959,6 +965,9 @@ int switchdev_port_bridge_dellink(struct net_device *dev,
{
struct nlattr *afspec;
+ if (!netif_is_bridge_port(dev))
+ return -EOPNOTSUPP;
+
afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
IFLA_AF_SPEC);
if (afspec)
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 753f774cb46f..aa1babbea385 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -247,11 +247,17 @@ int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb)
*
* RCU is locked, no other locks set
*/
-void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked)
+void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l,
+ struct tipc_msg *hdr)
{
struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
+ u16 acked = msg_bcast_ack(hdr);
struct sk_buff_head xmitq;
+ /* Ignore bc acks sent by peer before bcast synch point was received */
+ if (msg_bc_ack_invalid(hdr))
+ return;
+
__skb_queue_head_init(&xmitq);
tipc_bcast_lock(net);
@@ -279,11 +285,11 @@ int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
__skb_queue_head_init(&xmitq);
tipc_bcast_lock(net);
- if (msg_type(hdr) == STATE_MSG) {
+ if (msg_type(hdr) != STATE_MSG) {
+ tipc_link_bc_init_rcv(l, hdr);
+ } else if (!msg_bc_ack_invalid(hdr)) {
tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq);
rc = tipc_link_bc_sync_rcv(l, hdr, &xmitq);
- } else {
- tipc_link_bc_init_rcv(l, hdr);
}
tipc_bcast_unlock(net);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 5ffe34472ccd..855d53c64ab3 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -55,7 +55,8 @@ void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id);
int tipc_bcast_get_mtu(struct net *net);
int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list);
int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb);
-void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked);
+void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l,
+ struct tipc_msg *hdr);
int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
struct tipc_msg *hdr);
int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index b36e16cdc945..1055164c6232 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1312,6 +1312,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
msg_set_next_sent(hdr, l->snd_nxt);
msg_set_ack(hdr, l->rcv_nxt - 1);
msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1);
+ msg_set_bc_ack_invalid(hdr, !node_up);
msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1);
msg_set_link_tolerance(hdr, tolerance);
msg_set_linkprio(hdr, priority);
@@ -1574,6 +1575,7 @@ static void tipc_link_build_bc_init_msg(struct tipc_link *l,
__skb_queue_head_init(&list);
if (!tipc_link_build_bc_proto_msg(l->bc_rcvlink, false, 0, &list))
return;
+ msg_set_bc_ack_invalid(buf_msg(skb_peek(&list)), true);
tipc_link_xmit(l, &list, xmitq);
}
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index c3832cdf2278..50a739860d37 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -714,6 +714,23 @@ static inline void msg_set_peer_stopping(struct tipc_msg *m, u32 s)
msg_set_bits(m, 5, 13, 0x1, s);
}
+static inline bool msg_bc_ack_invalid(struct tipc_msg *m)
+{
+ switch (msg_user(m)) {
+ case BCAST_PROTOCOL:
+ case NAME_DISTRIBUTOR:
+ case LINK_PROTOCOL:
+ return msg_bits(m, 5, 14, 0x1);
+ default:
+ return false;
+ }
+}
+
+static inline void msg_set_bc_ack_invalid(struct tipc_msg *m, bool invalid)
+{
+ msg_set_bits(m, 5, 14, 0x1, invalid);
+}
+
static inline char *msg_media_addr(struct tipc_msg *m)
{
return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET];
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index a04fe9be1c60..c1cfd92de17a 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -156,6 +156,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
pr_warn("Bulk publication failure\n");
return;
}
+ msg_set_bc_ack_invalid(buf_msg(skb), true);
item = (struct distr_item *)msg_data(buf_msg(skb));
}
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 7ef14e2d2356..9d2f4c2b08ab 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1535,7 +1535,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
if (unlikely(usr == LINK_PROTOCOL))
tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq);
else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack))
- tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack);
+ tipc_bcast_ack_rcv(net, n->bc_entry.link, hdr);
/* Receive packet directly if conditions permit */
tipc_node_read_lock(n);
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index d80cd3f7503f..78cab9c5a445 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -407,6 +407,7 @@ static int __tipc_nl_add_udp_addr(struct sk_buff *skb,
if (ntohs(addr->proto) == ETH_P_IP) {
struct sockaddr_in ip4;
+ memset(&ip4, 0, sizeof(ip4));
ip4.sin_family = AF_INET;
ip4.sin_port = addr->port;
ip4.sin_addr.s_addr = addr->ipv4.s_addr;
@@ -417,6 +418,7 @@ static int __tipc_nl_add_udp_addr(struct sk_buff *skb,
} else if (ntohs(addr->proto) == ETH_P_IPV6) {
struct sockaddr_in6 ip6;
+ memset(&ip6, 0, sizeof(ip6));
ip6.sin6_family = AF_INET6;
ip6.sin6_port = addr->port;
memcpy(&ip6.sin6_addr, &addr->ipv6, sizeof(struct in6_addr));
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 8309687a56b0..145082e2ba36 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2475,28 +2475,13 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
return unix_stream_read_generic(&state);
}
-static ssize_t skb_unix_socket_splice(struct sock *sk,
- struct pipe_inode_info *pipe,
- struct splice_pipe_desc *spd)
-{
- int ret;
- struct unix_sock *u = unix_sk(sk);
-
- mutex_unlock(&u->iolock);
- ret = splice_to_pipe(pipe, spd);
- mutex_lock(&u->iolock);
-
- return ret;
-}
-
static int unix_stream_splice_actor(struct sk_buff *skb,
int skip, int chunk,
struct unix_stream_read_state *state)
{
return skb_splice_bits(skb, state->socket->sk,
UNIXCB(skb).consumed + skip,
- state->pipe, chunk, state->splice_flags,
- skb_unix_socket_splice);
+ state->pipe, chunk, state->splice_flags);
}
static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 0082f4b01795..14b3f007826d 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -104,13 +104,16 @@ static int wiphy_suspend(struct device *dev)
rtnl_lock();
if (rdev->wiphy.registered) {
- if (!rdev->wiphy.wowlan_config)
+ if (!rdev->wiphy.wowlan_config) {
cfg80211_leave_all(rdev);
+ cfg80211_process_rdev_events(rdev);
+ }
if (rdev->ops->suspend)
ret = rdev_suspend(rdev, rdev->wiphy.wowlan_config);
if (ret == 1) {
/* Driver refuse to configure wowlan */
cfg80211_leave_all(rdev);
+ cfg80211_process_rdev_events(rdev);
ret = rdev_suspend(rdev, NULL);
}
}
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 8edce22d1b93..5ea12afc7706 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -420,8 +420,8 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
}
EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen);
-static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr,
- const u8 *addr, enum nl80211_iftype iftype)
+int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
+ const u8 *addr, enum nl80211_iftype iftype)
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
struct {
@@ -525,13 +525,7 @@ static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr,
return 0;
}
-
-int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
- enum nl80211_iftype iftype)
-{
- return __ieee80211_data_to_8023(skb, NULL, addr, iftype);
-}
-EXPORT_SYMBOL(ieee80211_data_to_8023);
+EXPORT_SYMBOL(ieee80211_data_to_8023_exthdr);
int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
enum nl80211_iftype iftype,
@@ -746,24 +740,18 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen,
void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
const u8 *addr, enum nl80211_iftype iftype,
const unsigned int extra_headroom,
- bool has_80211_header)
+ const u8 *check_da, const u8 *check_sa)
{
unsigned int hlen = ALIGN(extra_headroom, 4);
struct sk_buff *frame = NULL;
u16 ethertype;
u8 *payload;
- int offset = 0, remaining, err;
+ int offset = 0, remaining;
struct ethhdr eth;
bool reuse_frag = skb->head_frag && !skb_has_frag_list(skb);
bool reuse_skb = false;
bool last = false;
- if (has_80211_header) {
- err = __ieee80211_data_to_8023(skb, &eth, addr, iftype);
- if (err)
- goto out;
- }
-
while (!last) {
unsigned int subframe_len;
int len;
@@ -780,8 +768,17 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
goto purge;
offset += sizeof(struct ethhdr);
- /* reuse skb for the last subframe */
last = remaining <= subframe_len + padding;
+
+ /* FIXME: should we really accept multicast DA? */
+ if ((check_da && !is_multicast_ether_addr(eth.h_dest) &&
+ !ether_addr_equal(check_da, eth.h_dest)) ||
+ (check_sa && !ether_addr_equal(check_sa, eth.h_source))) {
+ offset += len + padding;
+ continue;
+ }
+
+ /* reuse skb for the last subframe */
if (!skb_is_nonlinear(skb) && !reuse_frag && last) {
skb_pull(skb, offset);
frame = skb;
@@ -819,7 +816,6 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
purge:
__skb_queue_purge(list);
- out:
dev_kfree_skb(skb);
}
EXPORT_SYMBOL(ieee80211_amsdu_to_8023s);
OpenPOWER on IntegriCloud