summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/appletalk/ddp.c1
-rw-r--r--net/atm/lec.c4
-rw-r--r--net/bluetooth/hci_core.c5
-rw-r--r--net/bluetooth/hci_event.c12
-rw-r--r--net/bluetooth/hci_request.c5
-rw-r--r--net/bluetooth/hci_request.h1
-rw-r--r--net/core/dev.c6
-rw-r--r--net/core/devlink.c97
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/dsa/Kconfig9
-rw-r--r--net/dsa/Makefile1
-rw-r--r--net/dsa/dsa.c5
-rw-r--r--net/dsa/dsa_priv.h14
-rw-r--r--net/dsa/legacy.c747
-rw-r--r--net/dsa/port.c85
-rw-r--r--net/dsa/slave.c24
-rw-r--r--net/dsa/switch.c31
-rw-r--r--net/ipv4/Kconfig29
-rw-r--r--net/ipv4/Makefile3
-rw-r--r--net/ipv4/esp4.c20
-rw-r--r--net/ipv4/esp4_offload.c58
-rw-r--r--net/ipv4/fib_semantics.c48
-rw-r--r--net/ipv4/ip_input.c11
-rw-r--r--net/ipv4/ip_output.c1
-rw-r--r--net/ipv4/ip_vti.c75
-rw-r--r--net/ipv4/route.c75
-rw-r--r--net/ipv4/tcp.c12
-rw-r--r--net/ipv4/tcp_input.c99
-rw-r--r--net/ipv4/tcp_ipv4.c13
-rw-r--r--net/ipv4/tcp_metrics.c10
-rw-r--r--net/ipv4/tcp_minisocks.c5
-rw-r--r--net/ipv4/tcp_output.c4
-rw-r--r--net/ipv4/tcp_timer.c3
-rw-r--r--net/ipv4/udp_offload.c16
-rw-r--r--net/ipv4/xfrm4_mode_beet.c155
-rw-r--r--net/ipv4/xfrm4_mode_transport.c114
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c152
-rw-r--r--net/ipv4/xfrm4_output.c27
-rw-r--r--net/ipv4/xfrm4_policy.c127
-rw-r--r--net/ipv4/xfrm4_protocol.c3
-rw-r--r--net/ipv6/Kconfig35
-rw-r--r--net/ipv6/Makefile4
-rw-r--r--net/ipv6/esp6_offload.c48
-rw-r--r--net/ipv6/ip6_fib.c4
-rw-r--r--net/ipv6/ip6_flowlabel.c22
-rw-r--r--net/ipv6/ip6_input.c12
-rw-r--r--net/ipv6/ip6_vti.c6
-rw-r--r--net/ipv6/route.c70
-rw-r--r--net/ipv6/tcp_ipv6.c5
-rw-r--r--net/ipv6/udp.c5
-rw-r--r--net/ipv6/xfrm6_mode_beet.c131
-rw-r--r--net/ipv6/xfrm6_mode_ro.c85
-rw-r--r--net/ipv6/xfrm6_mode_transport.c121
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c151
-rw-r--r--net/ipv6/xfrm6_output.c36
-rw-r--r--net/ipv6/xfrm6_policy.c126
-rw-r--r--net/ipv6/xfrm6_protocol.c3
-rw-r--r--net/ipv6/xfrm6_tunnel.c6
-rw-r--r--net/key/af_key.c4
-rw-r--r--net/l2tp/l2tp_core.c10
-rw-r--r--net/mac80211/debugfs_netdev.c2
-rw-r--r--net/mac80211/ht.c5
-rw-r--r--net/mac80211/iface.c3
-rw-r--r--net/netlink/genetlink.c28
-rw-r--r--net/openvswitch/conntrack.c4
-rw-r--r--net/openvswitch/datapath.c7
-rw-r--r--net/packet/af_packet.c37
-rw-r--r--net/rds/ib_recv.c8
-rw-r--r--net/rxrpc/call_object.c32
-rw-r--r--net/sched/cls_u32.c2
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/sched/sch_taprio.c605
-rw-r--r--net/sctp/sm_sideeffect.c29
-rw-r--r--net/sctp/sm_statefuns.c35
-rw-r--r--net/socket.c20
-rw-r--r--net/tipc/link.c35
-rw-r--r--net/tipc/link.h2
-rw-r--r--net/tipc/node.c54
-rw-r--r--net/tls/tls_device.c39
-rw-r--r--net/tls/tls_device_fallback.c3
-rw-r--r--net/wireless/reg.c5
-rw-r--r--net/xfrm/Kconfig8
-rw-r--r--net/xfrm/xfrm_device.c61
-rw-r--r--net/xfrm/xfrm_inout.h38
-rw-r--r--net/xfrm/xfrm_input.c299
-rw-r--r--net/xfrm/xfrm_interface.c23
-rw-r--r--net/xfrm/xfrm_output.c381
-rw-r--r--net/xfrm/xfrm_policy.c282
-rw-r--r--net/xfrm/xfrm_state.c188
-rw-r--r--net/xfrm/xfrm_user.c16
90 files changed, 2388 insertions, 2858 deletions
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index e2511027d19b..a2555023c654 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1915,6 +1915,7 @@ static int __init atalk_init(void)
ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv);
if (!ddp_dl) {
pr_crit("Unable to register DDP with SNAP.\n");
+ rc = -ENOMEM;
goto out_sock;
}
diff --git a/net/atm/lec.c b/net/atm/lec.c
index ad4f829193f0..a0311493b01b 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -726,9 +726,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg)
struct lec_priv *priv;
if (arg < 0)
- i = 0;
- else
- i = arg;
+ arg = 0;
if (arg >= MAX_LEC_ITF)
return -EINVAL;
i = array_index_nospec(arg, MAX_LEC_ITF);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 3d9175f130b3..b81bf53c5ac4 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -4381,6 +4381,9 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status,
return;
}
+ /* If we reach this point this event matches the last command sent */
+ hci_dev_clear_flag(hdev, HCI_CMD_PENDING);
+
/* If the command succeeded and there's still more commands in
* this request the request is not yet complete.
*/
@@ -4491,6 +4494,8 @@ static void hci_cmd_work(struct work_struct *work)
hdev->sent_cmd = skb_clone(skb, GFP_KERNEL);
if (hdev->sent_cmd) {
+ if (hci_req_status_pend(hdev))
+ hci_dev_set_flag(hdev, HCI_CMD_PENDING);
atomic_dec(&hdev->cmd_cnt);
hci_send_frame(hdev, skb);
if (test_bit(HCI_RESET, &hdev->flags))
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 66b631ab0d35..9e4fcf406d9c 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3404,6 +3404,12 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
hci_req_cmd_complete(hdev, *opcode, *status, req_complete,
req_complete_skb);
+ if (hci_dev_test_flag(hdev, HCI_CMD_PENDING)) {
+ bt_dev_err(hdev,
+ "unexpected event for opcode 0x%4.4x", *opcode);
+ return;
+ }
+
if (atomic_read(&hdev->cmd_cnt) && !skb_queue_empty(&hdev->cmd_q))
queue_work(hdev->workqueue, &hdev->cmd_work);
}
@@ -3511,6 +3517,12 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb,
hci_req_cmd_complete(hdev, *opcode, ev->status, req_complete,
req_complete_skb);
+ if (hci_dev_test_flag(hdev, HCI_CMD_PENDING)) {
+ bt_dev_err(hdev,
+ "unexpected event for opcode 0x%4.4x", *opcode);
+ return;
+ }
+
if (atomic_read(&hdev->cmd_cnt) && !skb_queue_empty(&hdev->cmd_q))
queue_work(hdev->workqueue, &hdev->cmd_work);
}
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index ca73d36cc149..e9a95ed65491 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -46,6 +46,11 @@ void hci_req_purge(struct hci_request *req)
skb_queue_purge(&req->cmd_q);
}
+bool hci_req_status_pend(struct hci_dev *hdev)
+{
+ return hdev->req_status == HCI_REQ_PEND;
+}
+
static int req_run(struct hci_request *req, hci_req_complete_t complete,
hci_req_complete_skb_t complete_skb)
{
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index 692cc8b13368..55b2050cc9ff 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -37,6 +37,7 @@ struct hci_request {
void hci_req_init(struct hci_request *req, struct hci_dev *hdev);
void hci_req_purge(struct hci_request *req);
+bool hci_req_status_pend(struct hci_dev *hdev);
int hci_req_run(struct hci_request *req, hci_req_complete_t complete);
int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete);
void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
diff --git a/net/core/dev.c b/net/core/dev.c
index 22f2640f559a..108ac8137b9b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4987,7 +4987,8 @@ static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
if (pt_prev)
- ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+ ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb,
+ skb->dev, pt_prev, orig_dev);
return ret;
}
@@ -5033,7 +5034,8 @@ static inline void __netif_receive_skb_list_ptype(struct list_head *head,
else
list_for_each_entry_safe(skb, next, head, list) {
skb_list_del_init(skb);
- pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+ INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb,
+ skb->dev, pt_prev, orig_dev);
}
}
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 4e28d04c0165..d43bc52b8840 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -20,6 +20,7 @@
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/spinlock.h>
+#include <linux/refcount.h>
#include <rdma/ib_verbs.h>
#include <net/netlink.h>
#include <net/genetlink.h>
@@ -4432,6 +4433,7 @@ struct devlink_health_reporter {
u64 error_count;
u64 recovery_count;
u64 last_recovery_ts;
+ refcount_t refcount;
};
void *
@@ -4447,6 +4449,7 @@ devlink_health_reporter_find_by_name(struct devlink *devlink,
{
struct devlink_health_reporter *reporter;
+ lockdep_assert_held(&devlink->reporters_lock);
list_for_each_entry(reporter, &devlink->reporter_list, list)
if (!strcmp(reporter->ops->name, reporter_name))
return reporter;
@@ -4470,7 +4473,7 @@ devlink_health_reporter_create(struct devlink *devlink,
{
struct devlink_health_reporter *reporter;
- mutex_lock(&devlink->lock);
+ mutex_lock(&devlink->reporters_lock);
if (devlink_health_reporter_find_by_name(devlink, ops->name)) {
reporter = ERR_PTR(-EEXIST);
goto unlock;
@@ -4494,9 +4497,10 @@ devlink_health_reporter_create(struct devlink *devlink,
reporter->graceful_period = graceful_period;
reporter->auto_recover = auto_recover;
mutex_init(&reporter->dump_lock);
+ refcount_set(&reporter->refcount, 1);
list_add_tail(&reporter->list, &devlink->reporter_list);
unlock:
- mutex_unlock(&devlink->lock);
+ mutex_unlock(&devlink->reporters_lock);
return reporter;
}
EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
@@ -4509,10 +4513,12 @@ EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
void
devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
{
- mutex_lock(&reporter->devlink->lock);
+ mutex_lock(&reporter->devlink->reporters_lock);
list_del(&reporter->list);
+ mutex_unlock(&reporter->devlink->reporters_lock);
+ while (refcount_read(&reporter->refcount) > 1)
+ msleep(100);
mutex_destroy(&reporter->dump_lock);
- mutex_unlock(&reporter->devlink->lock);
if (reporter->dump_fmsg)
devlink_fmsg_free(reporter->dump_fmsg);
kfree(reporter);
@@ -4648,6 +4654,7 @@ static struct devlink_health_reporter *
devlink_health_reporter_get_from_info(struct devlink *devlink,
struct genl_info *info)
{
+ struct devlink_health_reporter *reporter;
char *reporter_name;
if (!info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME])
@@ -4655,7 +4662,18 @@ devlink_health_reporter_get_from_info(struct devlink *devlink,
reporter_name =
nla_data(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]);
- return devlink_health_reporter_find_by_name(devlink, reporter_name);
+ mutex_lock(&devlink->reporters_lock);
+ reporter = devlink_health_reporter_find_by_name(devlink, reporter_name);
+ if (reporter)
+ refcount_inc(&reporter->refcount);
+ mutex_unlock(&devlink->reporters_lock);
+ return reporter;
+}
+
+static void
+devlink_health_reporter_put(struct devlink_health_reporter *reporter)
+{
+ refcount_dec(&reporter->refcount);
}
static int
@@ -4730,8 +4748,10 @@ static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
return -EINVAL;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
+ if (!msg) {
+ err = -ENOMEM;
+ goto out;
+ }
err = devlink_nl_health_reporter_fill(msg, devlink, reporter,
DEVLINK_CMD_HEALTH_REPORTER_GET,
@@ -4739,10 +4759,13 @@ static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
0);
if (err) {
nlmsg_free(msg);
- return err;
+ goto out;
}
- return genlmsg_reply(msg, info);
+ err = genlmsg_reply(msg, info);
+out:
+ devlink_health_reporter_put(reporter);
+ return err;
}
static int
@@ -4759,7 +4782,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
list_for_each_entry(devlink, &devlink_list, list) {
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
continue;
- mutex_lock(&devlink->lock);
+ mutex_lock(&devlink->reporters_lock);
list_for_each_entry(reporter, &devlink->reporter_list,
list) {
if (idx < start) {
@@ -4773,12 +4796,12 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
cb->nlh->nlmsg_seq,
NLM_F_MULTI);
if (err) {
- mutex_unlock(&devlink->lock);
+ mutex_unlock(&devlink->reporters_lock);
goto out;
}
idx++;
}
- mutex_unlock(&devlink->lock);
+ mutex_unlock(&devlink->reporters_lock);
}
out:
mutex_unlock(&devlink_mutex);
@@ -4793,6 +4816,7 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_health_reporter *reporter;
+ int err;
reporter = devlink_health_reporter_get_from_info(devlink, info);
if (!reporter)
@@ -4800,8 +4824,10 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
if (!reporter->ops->recover &&
(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] ||
- info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]))
- return -EOPNOTSUPP;
+ info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD])
reporter->graceful_period =
@@ -4811,7 +4837,11 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
reporter->auto_recover =
nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]);
+ devlink_health_reporter_put(reporter);
return 0;
+out:
+ devlink_health_reporter_put(reporter);
+ return err;
}
static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
@@ -4819,12 +4849,16 @@ static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_health_reporter *reporter;
+ int err;
reporter = devlink_health_reporter_get_from_info(devlink, info);
if (!reporter)
return -EINVAL;
- return devlink_health_reporter_recover(reporter, NULL);
+ err = devlink_health_reporter_recover(reporter, NULL);
+
+ devlink_health_reporter_put(reporter);
+ return err;
}
static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
@@ -4839,12 +4873,16 @@ static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
if (!reporter)
return -EINVAL;
- if (!reporter->ops->diagnose)
+ if (!reporter->ops->diagnose) {
+ devlink_health_reporter_put(reporter);
return -EOPNOTSUPP;
+ }
fmsg = devlink_fmsg_alloc();
- if (!fmsg)
+ if (!fmsg) {
+ devlink_health_reporter_put(reporter);
return -ENOMEM;
+ }
err = devlink_fmsg_obj_nest_start(fmsg);
if (err)
@@ -4863,6 +4901,7 @@ static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
out:
devlink_fmsg_free(fmsg);
+ devlink_health_reporter_put(reporter);
return err;
}
@@ -4877,8 +4916,10 @@ static int devlink_nl_cmd_health_reporter_dump_get_doit(struct sk_buff *skb,
if (!reporter)
return -EINVAL;
- if (!reporter->ops->dump)
+ if (!reporter->ops->dump) {
+ devlink_health_reporter_put(reporter);
return -EOPNOTSUPP;
+ }
mutex_lock(&reporter->dump_lock);
err = devlink_health_do_dump(reporter, NULL);
@@ -4890,6 +4931,7 @@ static int devlink_nl_cmd_health_reporter_dump_get_doit(struct sk_buff *skb,
out:
mutex_unlock(&reporter->dump_lock);
+ devlink_health_reporter_put(reporter);
return err;
}
@@ -4904,12 +4946,15 @@ devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
if (!reporter)
return -EINVAL;
- if (!reporter->ops->dump)
+ if (!reporter->ops->dump) {
+ devlink_health_reporter_put(reporter);
return -EOPNOTSUPP;
+ }
mutex_lock(&reporter->dump_lock);
devlink_health_dump_clear(reporter);
mutex_unlock(&reporter->dump_lock);
+ devlink_health_reporter_put(reporter);
return 0;
}
@@ -5191,7 +5236,8 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_get_doit,
.dumpit = devlink_nl_cmd_health_reporter_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ DEVLINK_NL_FLAG_NO_LOCK,
/* can be retrieved by unprivileged users */
},
{
@@ -5199,21 +5245,24 @@ static const struct genl_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_set_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_recover_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_diagnose_doit,
.flags = GENL_ADMIN_PERM,
- .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+ DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
@@ -5284,6 +5333,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
INIT_LIST_HEAD(&devlink->region_list);
INIT_LIST_HEAD(&devlink->reporter_list);
mutex_init(&devlink->lock);
+ mutex_init(&devlink->reporters_lock);
return devlink;
}
EXPORT_SYMBOL_GPL(devlink_alloc);
@@ -5326,6 +5376,7 @@ EXPORT_SYMBOL_GPL(devlink_unregister);
*/
void devlink_free(struct devlink *devlink)
{
+ mutex_destroy(&devlink->reporters_lock);
mutex_destroy(&devlink->lock);
WARN_ON(!list_empty(&devlink->reporter_list));
WARN_ON(!list_empty(&devlink->region_list));
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index f3f5a78cd062..319ad5490fb3 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2521,7 +2521,7 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev)
skb->_skb_refdst = (unsigned long)&pkt_dev->xdst.u.dst | SKB_DST_NOREF;
rcu_read_lock_bh();
- err = x->outer_mode->output(x, skb);
+ err = pktgen_xfrm_outer_mode_output(x, skb);
rcu_read_unlock_bh();
if (err) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR);
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 1f48642089ea..c0734028c7dc 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -17,15 +17,6 @@ menuconfig NET_DSA
if NET_DSA
-config NET_DSA_LEGACY
- bool "Support for older platform device and Device Tree registration"
- default y
- ---help---
- Say Y if you want to enable support for the older platform device and
- deprecated Device Tree binding registration.
-
- This feature is scheduled for removal in 4.17.
-
config NET_DSA_TAG_BRCM_COMMON
tristate
default n
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 717ac1618100..8a737b6ee94c 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -2,7 +2,6 @@
# the core
obj-$(CONFIG_NET_DSA) += dsa_core.o
dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o
-dsa_core-$(CONFIG_NET_DSA_LEGACY) += legacy.o
# tagging formats
obj-$(CONFIG_NET_DSA_TAG_BRCM_COMMON) += tag_brcm.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index ba04c78633be..9e1fc0b08290 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -346,10 +346,6 @@ static int __init dsa_init_module(void)
if (rc)
return rc;
- rc = dsa_legacy_register();
- if (rc)
- return rc;
-
dev_add_pack(&dsa_pack_type);
dsa_tag_driver_register(&DSA_TAG_DRIVER_NAME(none_ops),
@@ -365,7 +361,6 @@ static void __exit dsa_cleanup_module(void)
dsa_slave_unregister_notifier();
dev_remove_pack(&dsa_pack_type);
- dsa_legacy_unregister();
destroy_workqueue(dsa_owq);
}
module_exit(dsa_cleanup_module);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index e860512d673a..b434f5ff55ab 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -90,18 +90,6 @@ void dsa_tag_driver_put(const struct dsa_device_ops *ops);
bool dsa_schedule_work(struct work_struct *work);
const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops);
-/* legacy.c */
-#if IS_ENABLED(CONFIG_NET_DSA_LEGACY)
-int dsa_legacy_register(void);
-void dsa_legacy_unregister(void);
-#else
-static inline int dsa_legacy_register(void)
-{
- return 0;
-}
-
-static inline void dsa_legacy_unregister(void) { }
-#endif
int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr, u16 vid,
@@ -171,6 +159,8 @@ int dsa_port_vlan_add(struct dsa_port *dp,
struct switchdev_trans *trans);
int dsa_port_vlan_del(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan);
+int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags);
+int dsa_port_vid_del(struct dsa_port *dp, u16 vid);
int dsa_port_link_register_of(struct dsa_port *dp);
void dsa_port_link_unregister_of(struct dsa_port *dp);
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
deleted file mode 100644
index 219f4fa7ff4b..000000000000
--- a/net/dsa/legacy.c
+++ /dev/null
@@ -1,747 +0,0 @@
-/*
- * net/dsa/legacy.c - Hardware switch handling
- * Copyright (c) 2008-2009 Marvell Semiconductor
- * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/device.h>
-#include <linux/list.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_mdio.h>
-#include <linux/of_platform.h>
-#include <linux/of_net.h>
-#include <linux/netdevice.h>
-#include <linux/sysfs.h>
-#include <linux/phy_fixed.h>
-#include <linux/etherdevice.h>
-
-#include "dsa_priv.h"
-
-/* switch driver registration ***********************************************/
-static DEFINE_MUTEX(dsa_switch_drivers_mutex);
-static LIST_HEAD(dsa_switch_drivers);
-
-void register_switch_driver(struct dsa_switch_driver *drv)
-{
- mutex_lock(&dsa_switch_drivers_mutex);
- list_add_tail(&drv->list, &dsa_switch_drivers);
- mutex_unlock(&dsa_switch_drivers_mutex);
-}
-EXPORT_SYMBOL_GPL(register_switch_driver);
-
-void unregister_switch_driver(struct dsa_switch_driver *drv)
-{
- mutex_lock(&dsa_switch_drivers_mutex);
- list_del_init(&drv->list);
- mutex_unlock(&dsa_switch_drivers_mutex);
-}
-EXPORT_SYMBOL_GPL(unregister_switch_driver);
-
-static const struct dsa_switch_ops *
-dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr,
- const char **_name, void **priv)
-{
- const struct dsa_switch_ops *ret;
- struct list_head *list;
- const char *name;
-
- ret = NULL;
- name = NULL;
-
- mutex_lock(&dsa_switch_drivers_mutex);
- list_for_each(list, &dsa_switch_drivers) {
- const struct dsa_switch_ops *ops;
- struct dsa_switch_driver *drv;
-
- drv = list_entry(list, struct dsa_switch_driver, list);
- ops = drv->ops;
-
- name = ops->probe(parent, host_dev, sw_addr, priv);
- if (name != NULL) {
- ret = ops;
- break;
- }
- }
- mutex_unlock(&dsa_switch_drivers_mutex);
-
- *_name = name;
-
- return ret;
-}
-
-/* basic switch operations **************************************************/
-static int dsa_cpu_dsa_setups(struct dsa_switch *ds)
-{
- int ret, port;
-
- for (port = 0; port < ds->num_ports; port++) {
- if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
- continue;
-
- ret = dsa_port_link_register_of(&ds->ports[port]);
- if (ret)
- return ret;
- }
- return 0;
-}
-
-static int dsa_switch_setup_one(struct dsa_switch *ds,
- struct net_device *master)
-{
- const struct dsa_switch_ops *ops = ds->ops;
- struct dsa_switch_tree *dst = ds->dst;
- struct dsa_chip_data *cd = ds->cd;
- bool valid_name_found = false;
- int index = ds->index;
- struct dsa_port *dp;
- int i, ret;
-
- /*
- * Validate supplied switch configuration.
- */
- for (i = 0; i < ds->num_ports; i++) {
- char *name;
-
- dp = &ds->ports[i];
-
- name = cd->port_names[i];
- if (name == NULL)
- continue;
- dp->name = name;
-
- if (!strcmp(name, "cpu")) {
- if (dst->cpu_dp) {
- netdev_err(master,
- "multiple cpu ports?!\n");
- return -EINVAL;
- }
- dst->cpu_dp = &ds->ports[i];
- dst->cpu_dp->master = master;
- dp->type = DSA_PORT_TYPE_CPU;
- } else if (!strcmp(name, "dsa")) {
- dp->type = DSA_PORT_TYPE_DSA;
- } else {
- dp->type = DSA_PORT_TYPE_USER;
- }
- valid_name_found = true;
- }
-
- if (!valid_name_found && i == ds->num_ports)
- return -EINVAL;
-
- /* Make the built-in MII bus mask match the number of ports,
- * switch drivers can override this later
- */
- ds->phys_mii_mask |= dsa_user_ports(ds);
-
- /*
- * If the CPU connects to this switch, set the switch tree
- * tagging protocol to the preferred tagging format of this
- * switch.
- */
- if (dst->cpu_dp->ds == ds) {
- const struct dsa_device_ops *tag_ops;
- enum dsa_tag_protocol tag_protocol;
-
- tag_protocol = ops->get_tag_protocol(ds, dst->cpu_dp->index);
- tag_ops = dsa_tag_driver_get(tag_protocol);
- if (IS_ERR(tag_ops))
- return PTR_ERR(tag_ops);
-
- dst->cpu_dp->tag_ops = tag_ops;
-
- /* Few copies for faster access in master receive hot path */
- dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv;
- dst->cpu_dp->dst = dst;
- }
-
- dsa_tag_driver_put(dst->cpu_dp->tag_ops);
-
- memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable));
-
- /*
- * Do basic register setup.
- */
- ret = ops->setup(ds);
- if (ret < 0)
- return ret;
-
- ret = dsa_switch_register_notifier(ds);
- if (ret)
- return ret;
-
- if (!ds->slave_mii_bus && ops->phy_read) {
- ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
- if (!ds->slave_mii_bus)
- return -ENOMEM;
- dsa_slave_mii_bus_init(ds);
-
- ret = mdiobus_register(ds->slave_mii_bus);
- if (ret < 0)
- return ret;
- }
-
- /*
- * Create network devices for physical switch ports.
- */
- for (i = 0; i < ds->num_ports; i++) {
- ds->ports[i].dn = cd->port_dn[i];
- ds->ports[i].cpu_dp = dst->cpu_dp;
-
- if (!dsa_is_user_port(ds, i))
- continue;
-
- ret = dsa_slave_create(&ds->ports[i]);
- if (ret < 0)
- netdev_err(master, "[%d]: can't create dsa slave device for port %d(%s): %d\n",
- index, i, cd->port_names[i], ret);
- }
-
- /* Perform configuration of the CPU and DSA ports */
- ret = dsa_cpu_dsa_setups(ds);
- if (ret < 0)
- netdev_err(master, "[%d] : can't configure CPU and DSA ports\n",
- index);
-
- return 0;
-}
-
-static struct dsa_switch *
-dsa_switch_setup(struct dsa_switch_tree *dst, struct net_device *master,
- int index, struct device *parent, struct device *host_dev)
-{
- struct dsa_chip_data *cd = dst->pd->chip + index;
- const struct dsa_switch_ops *ops;
- struct dsa_switch *ds;
- int ret;
- const char *name;
- void *priv;
-
- /*
- * Probe for switch model.
- */
- ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv);
- if (!ops) {
- netdev_err(master, "[%d]: could not detect attached switch\n",
- index);
- return ERR_PTR(-EINVAL);
- }
- netdev_info(master, "[%d]: detected a %s switch\n",
- index, name);
-
-
- /*
- * Allocate and initialise switch state.
- */
- ds = dsa_switch_alloc(parent, DSA_MAX_PORTS);
- if (!ds)
- return ERR_PTR(-ENOMEM);
-
- ds->dst = dst;
- ds->index = index;
- ds->cd = cd;
- ds->ops = ops;
- ds->priv = priv;
-
- ret = dsa_switch_setup_one(ds, master);
- if (ret)
- return ERR_PTR(ret);
-
- return ds;
-}
-
-static void dsa_switch_destroy(struct dsa_switch *ds)
-{
- int port;
-
- /* Destroy network devices for physical switch ports. */
- for (port = 0; port < ds->num_ports; port++) {
- if (!dsa_is_user_port(ds, port))
- continue;
-
- if (!ds->ports[port].slave)
- continue;
-
- dsa_slave_destroy(ds->ports[port].slave);
- }
-
- /* Disable configuration of the CPU and DSA ports */
- for (port = 0; port < ds->num_ports; port++) {
- if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
- continue;
- dsa_port_link_unregister_of(&ds->ports[port]);
- }
-
- if (ds->slave_mii_bus && ds->ops->phy_read)
- mdiobus_unregister(ds->slave_mii_bus);
-
- dsa_switch_unregister_notifier(ds);
-}
-
-/* platform driver init and cleanup *****************************************/
-static int dev_is_class(struct device *dev, void *class)
-{
- if (dev->class != NULL && !strcmp(dev->class->name, class))
- return 1;
-
- return 0;
-}
-
-static struct device *dev_find_class(struct device *parent, char *class)
-{
- if (dev_is_class(parent, class)) {
- get_device(parent);
- return parent;
- }
-
- return device_find_child(parent, class, dev_is_class);
-}
-
-struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev)
-{
- struct device *d;
-
- d = dev_find_class(dev, "mdio_bus");
- if (d != NULL) {
- struct mii_bus *bus;
-
- bus = to_mii_bus(d);
- put_device(d);
-
- return bus;
- }
-
- return NULL;
-}
-EXPORT_SYMBOL_GPL(dsa_host_dev_to_mii_bus);
-
-#ifdef CONFIG_OF
-static int dsa_of_setup_routing_table(struct dsa_platform_data *pd,
- struct dsa_chip_data *cd,
- int chip_index, int port_index,
- struct device_node *link)
-{
- const __be32 *reg;
- int link_sw_addr;
- struct device_node *parent_sw;
- int len;
-
- parent_sw = of_get_parent(link);
- if (!parent_sw)
- return -EINVAL;
-
- reg = of_get_property(parent_sw, "reg", &len);
- if (!reg || (len != sizeof(*reg) * 2))
- return -EINVAL;
-
- /*
- * Get the destination switch number from the second field of its 'reg'
- * property, i.e. for "reg = <0x19 1>" sw_addr is '1'.
- */
- link_sw_addr = be32_to_cpup(reg + 1);
-
- if (link_sw_addr >= pd->nr_chips)
- return -EINVAL;
-
- cd->rtable[link_sw_addr] = port_index;
-
- return 0;
-}
-
-static int dsa_of_probe_links(struct dsa_platform_data *pd,
- struct dsa_chip_data *cd,
- int chip_index, int port_index,
- struct device_node *port,
- const char *port_name)
-{
- struct device_node *link;
- int link_index;
- int ret;
-
- for (link_index = 0;; link_index++) {
- link = of_parse_phandle(port, "link", link_index);
- if (!link)
- break;
-
- if (!strcmp(port_name, "dsa") && pd->nr_chips > 1) {
- ret = dsa_of_setup_routing_table(pd, cd, chip_index,
- port_index, link);
- if (ret)
- return ret;
- }
- }
- return 0;
-}
-
-static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
-{
- int i;
- int port_index;
-
- for (i = 0; i < pd->nr_chips; i++) {
- port_index = 0;
- while (port_index < DSA_MAX_PORTS) {
- kfree(pd->chip[i].port_names[port_index]);
- port_index++;
- }
-
- /* Drop our reference to the MDIO bus device */
- put_device(pd->chip[i].host_dev);
- }
- kfree(pd->chip);
-}
-
-static int dsa_of_probe(struct device *dev)
-{
- struct device_node *np = dev->of_node;
- struct device_node *child, *mdio, *ethernet, *port;
- struct mii_bus *mdio_bus, *mdio_bus_switch;
- struct net_device *ethernet_dev;
- struct dsa_platform_data *pd;
- struct dsa_chip_data *cd;
- const char *port_name;
- int chip_index, port_index;
- const unsigned int *sw_addr, *port_reg;
- u32 eeprom_len;
- int ret;
-
- mdio = of_parse_phandle(np, "dsa,mii-bus", 0);
- if (!mdio)
- return -EINVAL;
-
- mdio_bus = of_mdio_find_bus(mdio);
- if (!mdio_bus)
- return -EPROBE_DEFER;
-
- ethernet = of_parse_phandle(np, "dsa,ethernet", 0);
- if (!ethernet) {
- ret = -EINVAL;
- goto out_put_mdio;
- }
-
- ethernet_dev = of_find_net_device_by_node(ethernet);
- if (!ethernet_dev) {
- ret = -EPROBE_DEFER;
- goto out_put_mdio;
- }
-
- pd = kzalloc(sizeof(*pd), GFP_KERNEL);
- if (!pd) {
- ret = -ENOMEM;
- goto out_put_ethernet;
- }
-
- dev->platform_data = pd;
- pd->of_netdev = ethernet_dev;
- pd->nr_chips = of_get_available_child_count(np);
- if (pd->nr_chips > DSA_MAX_SWITCHES)
- pd->nr_chips = DSA_MAX_SWITCHES;
-
- pd->chip = kcalloc(pd->nr_chips, sizeof(struct dsa_chip_data),
- GFP_KERNEL);
- if (!pd->chip) {
- ret = -ENOMEM;
- goto out_free;
- }
-
- chip_index = -1;
- for_each_available_child_of_node(np, child) {
- int i;
-
- chip_index++;
- cd = &pd->chip[chip_index];
-
- cd->of_node = child;
-
- /* Initialize the routing table */
- for (i = 0; i < DSA_MAX_SWITCHES; ++i)
- cd->rtable[i] = DSA_RTABLE_NONE;
-
- /* When assigning the host device, increment its refcount */
- cd->host_dev = get_device(&mdio_bus->dev);
-
- sw_addr = of_get_property(child, "reg", NULL);
- if (!sw_addr)
- continue;
-
- cd->sw_addr = be32_to_cpup(sw_addr);
- if (cd->sw_addr >= PHY_MAX_ADDR)
- continue;
-
- if (!of_property_read_u32(child, "eeprom-length", &eeprom_len))
- cd->eeprom_len = eeprom_len;
-
- mdio = of_parse_phandle(child, "mii-bus", 0);
- if (mdio) {
- mdio_bus_switch = of_mdio_find_bus(mdio);
- if (!mdio_bus_switch) {
- ret = -EPROBE_DEFER;
- goto out_free_chip;
- }
-
- /* Drop the mdio_bus device ref, replacing the host
- * device with the mdio_bus_switch device, keeping
- * the refcount from of_mdio_find_bus() above.
- */
- put_device(cd->host_dev);
- cd->host_dev = &mdio_bus_switch->dev;
- }
-
- for_each_available_child_of_node(child, port) {
- port_reg = of_get_property(port, "reg", NULL);
- if (!port_reg)
- continue;
-
- port_index = be32_to_cpup(port_reg);
- if (port_index >= DSA_MAX_PORTS)
- break;
-
- port_name = of_get_property(port, "label", NULL);
- if (!port_name)
- continue;
-
- cd->port_dn[port_index] = port;
-
- cd->port_names[port_index] = kstrdup(port_name,
- GFP_KERNEL);
- if (!cd->port_names[port_index]) {
- ret = -ENOMEM;
- goto out_free_chip;
- }
-
- ret = dsa_of_probe_links(pd, cd, chip_index,
- port_index, port, port_name);
- if (ret)
- goto out_free_chip;
-
- }
- }
-
- /* The individual chips hold their own refcount on the mdio bus,
- * so drop ours */
- put_device(&mdio_bus->dev);
-
- return 0;
-
-out_free_chip:
- dsa_of_free_platform_data(pd);
-out_free:
- kfree(pd);
- dev->platform_data = NULL;
-out_put_ethernet:
- put_device(&ethernet_dev->dev);
-out_put_mdio:
- put_device(&mdio_bus->dev);
- return ret;
-}
-
-static void dsa_of_remove(struct device *dev)
-{
- struct dsa_platform_data *pd = dev->platform_data;
-
- if (!dev->of_node)
- return;
-
- dsa_of_free_platform_data(pd);
- put_device(&pd->of_netdev->dev);
- kfree(pd);
-}
-#else
-static inline int dsa_of_probe(struct device *dev)
-{
- return 0;
-}
-
-static inline void dsa_of_remove(struct device *dev)
-{
-}
-#endif
-
-static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
- struct device *parent, struct dsa_platform_data *pd)
-{
- int i;
- unsigned configured = 0;
-
- dst->pd = pd;
-
- for (i = 0; i < pd->nr_chips; i++) {
- struct dsa_switch *ds;
-
- ds = dsa_switch_setup(dst, dev, i, parent, pd->chip[i].host_dev);
- if (IS_ERR(ds)) {
- netdev_err(dev, "[%d]: couldn't create dsa switch instance (error %ld)\n",
- i, PTR_ERR(ds));
- continue;
- }
-
- dst->ds[i] = ds;
-
- ++configured;
- }
-
- /*
- * If no switch was found, exit cleanly
- */
- if (!configured)
- return -EPROBE_DEFER;
-
- return dsa_master_setup(dst->cpu_dp->master, dst->cpu_dp);
-}
-
-static int dsa_probe(struct platform_device *pdev)
-{
- struct dsa_platform_data *pd = pdev->dev.platform_data;
- struct net_device *dev;
- struct dsa_switch_tree *dst;
- int ret;
-
- if (pdev->dev.of_node) {
- ret = dsa_of_probe(&pdev->dev);
- if (ret)
- return ret;
-
- pd = pdev->dev.platform_data;
- }
-
- if (pd == NULL || (pd->netdev == NULL && pd->of_netdev == NULL))
- return -EINVAL;
-
- if (pd->of_netdev) {
- dev = pd->of_netdev;
- dev_hold(dev);
- } else {
- dev = dsa_dev_to_net_device(pd->netdev);
- }
- if (dev == NULL) {
- ret = -EPROBE_DEFER;
- goto out;
- }
-
- if (dev->dsa_ptr != NULL) {
- dev_put(dev);
- ret = -EEXIST;
- goto out;
- }
-
- dst = devm_kzalloc(&pdev->dev, sizeof(*dst), GFP_KERNEL);
- if (dst == NULL) {
- dev_put(dev);
- ret = -ENOMEM;
- goto out;
- }
-
- platform_set_drvdata(pdev, dst);
-
- ret = dsa_setup_dst(dst, dev, &pdev->dev, pd);
- if (ret) {
- dev_put(dev);
- goto out;
- }
-
- return 0;
-
-out:
- dsa_of_remove(&pdev->dev);
-
- return ret;
-}
-
-static void dsa_remove_dst(struct dsa_switch_tree *dst)
-{
- int i;
-
- dsa_master_teardown(dst->cpu_dp->master);
-
- for (i = 0; i < dst->pd->nr_chips; i++) {
- struct dsa_switch *ds = dst->ds[i];
-
- if (ds)
- dsa_switch_destroy(ds);
- }
-
- dev_put(dst->cpu_dp->master);
-}
-
-static int dsa_remove(struct platform_device *pdev)
-{
- struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
-
- dsa_remove_dst(dst);
- dsa_of_remove(&pdev->dev);
-
- return 0;
-}
-
-static void dsa_shutdown(struct platform_device *pdev)
-{
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int dsa_suspend(struct device *d)
-{
- struct dsa_switch_tree *dst = dev_get_drvdata(d);
- int i, ret = 0;
-
- for (i = 0; i < dst->pd->nr_chips; i++) {
- struct dsa_switch *ds = dst->ds[i];
-
- if (ds != NULL)
- ret = dsa_switch_suspend(ds);
- }
-
- return ret;
-}
-
-static int dsa_resume(struct device *d)
-{
- struct dsa_switch_tree *dst = dev_get_drvdata(d);
- int i, ret = 0;
-
- for (i = 0; i < dst->pd->nr_chips; i++) {
- struct dsa_switch *ds = dst->ds[i];
-
- if (ds != NULL)
- ret = dsa_switch_resume(ds);
- }
-
- return ret;
-}
-#endif
-
-static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume);
-
-static const struct of_device_id dsa_of_match_table[] = {
- { .compatible = "marvell,dsa", },
- {}
-};
-MODULE_DEVICE_TABLE(of, dsa_of_match_table);
-
-static struct platform_driver dsa_driver = {
- .probe = dsa_probe,
- .remove = dsa_remove,
- .shutdown = dsa_shutdown,
- .driver = {
- .name = "dsa",
- .of_match_table = dsa_of_match_table,
- .pm = &dsa_pm_ops,
- },
-};
-
-int dsa_legacy_register(void)
-{
- return platform_driver_register(&dsa_driver);
-}
-
-void dsa_legacy_unregister(void)
-{
- platform_driver_unregister(&dsa_driver);
-}
diff --git a/net/dsa/port.c b/net/dsa/port.c
index caeef4c99dc0..1ed287b2badd 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -154,19 +154,67 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
}
+static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp,
+ bool vlan_filtering)
+{
+ struct dsa_switch *ds = dp->ds;
+ int i;
+
+ if (!ds->vlan_filtering_is_global)
+ return true;
+
+ /* For cases where enabling/disabling VLAN awareness is global to the
+ * switch, we need to handle the case where multiple bridges span
+ * different ports of the same switch device and one of them has a
+ * different setting than what is being requested.
+ */
+ for (i = 0; i < ds->num_ports; i++) {
+ struct net_device *other_bridge;
+
+ other_bridge = dsa_to_port(ds, i)->bridge_dev;
+ if (!other_bridge)
+ continue;
+ /* If it's the same bridge, it also has same
+ * vlan_filtering setting => no need to check
+ */
+ if (other_bridge == dp->bridge_dev)
+ continue;
+ if (br_vlan_enabled(other_bridge) != vlan_filtering) {
+ dev_err(ds->dev, "VLAN filtering is a global setting\n");
+ return false;
+ }
+ }
+ return true;
+}
+
int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
struct switchdev_trans *trans)
{
struct dsa_switch *ds = dp->ds;
+ int err;
/* bridge skips -EOPNOTSUPP, so skip the prepare phase */
if (switchdev_trans_ph_prepare(trans))
return 0;
- if (ds->ops->port_vlan_filtering)
- return ds->ops->port_vlan_filtering(ds, dp->index,
- vlan_filtering);
+ if (!ds->ops->port_vlan_filtering)
+ return 0;
+
+ if (!dsa_port_can_apply_vlan_filtering(dp, vlan_filtering))
+ return -EINVAL;
+
+ if (dsa_port_is_vlan_filtering(dp) == vlan_filtering)
+ return 0;
+
+ err = ds->ops->port_vlan_filtering(ds, dp->index,
+ vlan_filtering);
+ if (err)
+ return err;
+ if (ds->vlan_filtering_is_global)
+ ds->vlan_filtering = vlan_filtering;
+ else
+ dp->vlan_filtering = vlan_filtering;
return 0;
}
@@ -322,6 +370,37 @@ int dsa_port_vlan_del(struct dsa_port *dp,
return 0;
}
+int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags)
+{
+ struct switchdev_obj_port_vlan vlan = {
+ .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+ .flags = flags,
+ .vid_begin = vid,
+ .vid_end = vid,
+ };
+ struct switchdev_trans trans;
+ int err;
+
+ trans.ph_prepare = true;
+ err = dsa_port_vlan_add(dp, &vlan, &trans);
+ if (err == -EOPNOTSUPP)
+ return 0;
+
+ trans.ph_prepare = false;
+ return dsa_port_vlan_add(dp, &vlan, &trans);
+}
+
+int dsa_port_vid_del(struct dsa_port *dp, u16 vid)
+{
+ struct switchdev_obj_port_vlan vlan = {
+ .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+ .vid_begin = vid,
+ .vid_end = vid,
+ };
+
+ return dsa_port_vlan_del(dp, &vlan);
+}
+
static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp)
{
struct device_node *phy_dn;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index ce26dddc8270..8ad9bf957da1 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1001,13 +1001,6 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
u16 vid)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
- struct switchdev_obj_port_vlan vlan = {
- .vid_begin = vid,
- .vid_end = vid,
- /* This API only allows programming tagged, non-PVID VIDs */
- .flags = 0,
- };
- struct switchdev_trans trans;
struct bridge_vlan_info info;
int ret;
@@ -1024,25 +1017,14 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
return -EBUSY;
}
- trans.ph_prepare = true;
- ret = dsa_port_vlan_add(dp, &vlan, &trans);
- if (ret == -EOPNOTSUPP)
- return 0;
-
- trans.ph_prepare = false;
- return dsa_port_vlan_add(dp, &vlan, &trans);
+ /* This API only allows programming tagged, non-PVID VIDs */
+ return dsa_port_vid_add(dp, vid, 0);
}
static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
u16 vid)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
- struct switchdev_obj_port_vlan vlan = {
- .vid_begin = vid,
- .vid_end = vid,
- /* This API only allows programming tagged, non-PVID VIDs */
- .flags = 0,
- };
struct bridge_vlan_info info;
int ret;
@@ -1059,7 +1041,7 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
return -EBUSY;
}
- ret = dsa_port_vlan_del(dp, &vlan);
+ ret = dsa_port_vid_del(dp, vid);
if (ret == -EOPNOTSUPP)
ret = 0;
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index e1fae969aa73..7d8cd9bc0ecc 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -10,6 +10,7 @@
* (at your option) any later version.
*/
+#include <linux/if_bridge.h>
#include <linux/netdevice.h>
#include <linux/notifier.h>
#include <linux/if_vlan.h>
@@ -71,6 +72,9 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds,
static int dsa_switch_bridge_leave(struct dsa_switch *ds,
struct dsa_notifier_bridge_info *info)
{
+ bool unset_vlan_filtering = br_vlan_enabled(info->br);
+ int err, i;
+
if (ds->index == info->sw_index && ds->ops->port_bridge_leave)
ds->ops->port_bridge_leave(ds, info->port, info->br);
@@ -78,6 +82,31 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
ds->ops->crosschip_bridge_leave(ds, info->sw_index, info->port,
info->br);
+ /* If the bridge was vlan_filtering, the bridge core doesn't trigger an
+ * event for changing vlan_filtering setting upon slave ports leaving
+ * it. That is a good thing, because that lets us handle it and also
+ * handle the case where the switch's vlan_filtering setting is global
+ * (not per port). When that happens, the correct moment to trigger the
+ * vlan_filtering callback is only when the last port left this bridge.
+ */
+ if (unset_vlan_filtering && ds->vlan_filtering_is_global) {
+ for (i = 0; i < ds->num_ports; i++) {
+ if (i == info->port)
+ continue;
+ if (dsa_to_port(ds, i)->bridge_dev == info->br) {
+ unset_vlan_filtering = false;
+ break;
+ }
+ }
+ }
+ if (unset_vlan_filtering) {
+ struct switchdev_trans trans = {0};
+
+ err = dsa_port_vlan_filtering(&ds->ports[info->port],
+ false, &trans);
+ if (err && err != EOPNOTSUPP)
+ return err;
+ }
return 0;
}
@@ -196,7 +225,7 @@ static int dsa_port_vlan_check(struct dsa_switch *ds, int port,
if (!dp->bridge_dev)
return err;
- /* dsa_slave_vlan_rx_{add,kill}_vid() cannot use the prepare pharse and
+ /* dsa_slave_vlan_rx_{add,kill}_vid() cannot use the prepare phase and
* already checks whether there is an overlapping bridge VLAN entry
* with the same VID, so here we only need to check that if we are
* adding a bridge VLAN entry there is not an overlapping VLAN device
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 32cae39cdff6..8108e97d4285 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -304,7 +304,7 @@ config NET_IPVTI
tristate "Virtual (secure) IP: tunneling"
select INET_TUNNEL
select NET_IP_TUNNEL
- depends on INET_XFRM_MODE_TUNNEL
+ select XFRM
---help---
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
@@ -396,33 +396,6 @@ config INET_TUNNEL
tristate
default n
-config INET_XFRM_MODE_TRANSPORT
- tristate "IP: IPsec transport mode"
- default y
- select XFRM
- ---help---
- Support for IPsec transport mode.
-
- If unsure, say Y.
-
-config INET_XFRM_MODE_TUNNEL
- tristate "IP: IPsec tunnel mode"
- default y
- select XFRM
- ---help---
- Support for IPsec tunnel mode.
-
- If unsure, say Y.
-
-config INET_XFRM_MODE_BEET
- tristate "IP: IPsec BEET mode"
- default y
- select XFRM
- ---help---
- Support for IPsec BEET mode.
-
- If unsure, say Y.
-
config INET_DIAG
tristate "INET: socket monitoring interface"
default y
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 58629314eae9..000a61994c8f 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -37,10 +37,7 @@ obj-$(CONFIG_INET_ESP) += esp4.o
obj-$(CONFIG_INET_ESP_OFFLOAD) += esp4_offload.o
obj-$(CONFIG_INET_IPCOMP) += ipcomp.o
obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o
-obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o
obj-$(CONFIG_INET_TUNNEL) += tunnel4.o
-obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
-obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
obj-$(CONFIG_IP_PNP) += ipconfig.o
obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/
obj-$(CONFIG_INET_DIAG) += inet_diag.o
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 10e809b296ec..fb065a8937ea 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -226,7 +226,7 @@ static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto)
tail[plen - 1] = proto;
}
-static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
+static int esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
{
int encap_type;
struct udphdr *uh;
@@ -234,6 +234,7 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru
__be16 sport, dport;
struct xfrm_encap_tmpl *encap = x->encap;
struct ip_esp_hdr *esph = esp->esph;
+ unsigned int len;
spin_lock_bh(&x->lock);
sport = encap->encap_sport;
@@ -241,11 +242,14 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru
encap_type = encap->encap_type;
spin_unlock_bh(&x->lock);
+ len = skb->len + esp->tailen - skb_transport_offset(skb);
+ if (len + sizeof(struct iphdr) >= IP_MAX_MTU)
+ return -EMSGSIZE;
+
uh = (struct udphdr *)esph;
uh->source = sport;
uh->dest = dport;
- uh->len = htons(skb->len + esp->tailen
- - skb_transport_offset(skb));
+ uh->len = htons(len);
uh->check = 0;
switch (encap_type) {
@@ -262,6 +266,8 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru
*skb_mac_header(skb) = IPPROTO_UDP;
esp->esph = esph;
+
+ return 0;
}
int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
@@ -275,8 +281,12 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
int tailen = esp->tailen;
/* this is non-NULL only with UDP Encapsulation */
- if (x->encap)
- esp_output_udp_encap(x, skb, esp);
+ if (x->encap) {
+ int err = esp_output_udp_encap(x, skb, esp);
+
+ if (err < 0)
+ return err;
+ }
if (!skb_cloned(skb)) {
if (tailen <= skb_tailroom(skb)) {
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 8756e0e790d2..8edcfa66d1e5 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -52,13 +52,13 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
goto out;
if (sp->len == XFRM_MAX_DEPTH)
- goto out;
+ goto out_reset;
x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
(xfrm_address_t *)&ip_hdr(skb)->daddr,
spi, IPPROTO_ESP, AF_INET);
if (!x)
- goto out;
+ goto out_reset;
sp->xvec[sp->len++] = x;
sp->olen++;
@@ -66,7 +66,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
xo = xfrm_offload(skb);
if (!xo) {
xfrm_state_put(x);
- goto out;
+ goto out_reset;
}
}
@@ -82,6 +82,8 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
xfrm_input(skb, IPPROTO_ESP, spi, -2);
return ERR_PTR(-EINPROGRESS);
+out_reset:
+ secpath_reset(skb);
out:
skb_push(skb, offset);
NAPI_GRO_CB(skb)->same_flow = 0;
@@ -107,6 +109,44 @@ static void esp4_gso_encap(struct xfrm_state *x, struct sk_buff *skb)
xo->proto = proto;
}
+static struct sk_buff *xfrm4_tunnel_gso_segment(struct xfrm_state *x,
+ struct sk_buff *skb,
+ netdev_features_t features)
+{
+ __skb_push(skb, skb->mac_len);
+ return skb_mac_gso_segment(skb, features);
+}
+
+static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x,
+ struct sk_buff *skb,
+ netdev_features_t features)
+{
+ const struct net_offload *ops;
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ skb->transport_header += x->props.header_len;
+ ops = rcu_dereference(inet_offloads[xo->proto]);
+ if (likely(ops && ops->callbacks.gso_segment))
+ segs = ops->callbacks.gso_segment(skb, features);
+
+ return segs;
+}
+
+static struct sk_buff *xfrm4_outer_mode_gso_segment(struct xfrm_state *x,
+ struct sk_buff *skb,
+ netdev_features_t features)
+{
+ switch (x->outer_mode.encap) {
+ case XFRM_MODE_TUNNEL:
+ return xfrm4_tunnel_gso_segment(x, skb, features);
+ case XFRM_MODE_TRANSPORT:
+ return xfrm4_transport_gso_segment(x, skb, features);
+ }
+
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -138,14 +178,16 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
skb->encap_hdr_csum = 1;
- if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev)
+ if ((!(skb->dev->gso_partial_features & NETIF_F_HW_ESP) &&
+ !(features & NETIF_F_HW_ESP)) || x->xso.dev != skb->dev)
esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
- else if (!(features & NETIF_F_HW_ESP_TX_CSUM))
+ else if (!(features & NETIF_F_HW_ESP_TX_CSUM) &&
+ !(skb->dev->gso_partial_features & NETIF_F_HW_ESP_TX_CSUM))
esp_features = features & ~NETIF_F_CSUM_MASK;
xo->flags |= XFRM_GSO_SEGMENT;
- return x->outer_mode->gso_segment(x, skb, esp_features);
+ return xfrm4_outer_mode_gso_segment(x, skb, esp_features);
}
static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb)
@@ -181,7 +223,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
if (!xo)
return -EINVAL;
- if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev) {
+ if ((!(features & NETIF_F_HW_ESP) &&
+ !(skb->dev->gso_partial_features & NETIF_F_HW_ESP)) ||
+ x->xso.dev != skb->dev) {
xo->flags |= CRYPTO_FALLBACK;
hw_offload = false;
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 71c2165a2ce3..d3da6a10f86f 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -159,12 +159,12 @@ static void rt_fibinfo_free(struct rtable __rcu **rtp)
dst_release_immediate(&rt->dst);
}
-static void free_nh_exceptions(struct fib_nh *nh)
+static void free_nh_exceptions(struct fib_nh_common *nhc)
{
struct fnhe_hash_bucket *hash;
int i;
- hash = rcu_dereference_protected(nh->nh_exceptions, 1);
+ hash = rcu_dereference_protected(nhc->nhc_exceptions, 1);
if (!hash)
return;
for (i = 0; i < FNHE_HASH_SIZE; i++) {
@@ -212,6 +212,9 @@ void fib_nh_common_release(struct fib_nh_common *nhc)
dev_put(nhc->nhc_dev);
lwtstate_put(nhc->nhc_lwtstate);
+ rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
+ rt_fibinfo_free(&nhc->nhc_rth_input);
+ free_nh_exceptions(nhc);
}
EXPORT_SYMBOL_GPL(fib_nh_common_release);
@@ -222,9 +225,6 @@ void fib_nh_release(struct net *net, struct fib_nh *fib_nh)
net->ipv4.fib_num_tclassid_users--;
#endif
fib_nh_common_release(&fib_nh->nh_common);
- free_nh_exceptions(fib_nh);
- rt_fibinfo_free_cpus(fib_nh->nh_pcpu_rth_output);
- rt_fibinfo_free(&fib_nh->nh_rth_input);
}
/* Release a nexthop info record */
@@ -491,23 +491,35 @@ int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap,
u16 encap_type, void *cfg, gfp_t gfp_flags,
struct netlink_ext_ack *extack)
{
+ int err;
+
+ nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *,
+ gfp_flags);
+ if (!nhc->nhc_pcpu_rth_output)
+ return -ENOMEM;
+
if (encap) {
struct lwtunnel_state *lwtstate;
- int err;
if (encap_type == LWTUNNEL_ENCAP_NONE) {
NL_SET_ERR_MSG(extack, "LWT encap type not specified");
- return -EINVAL;
+ err = -EINVAL;
+ goto lwt_failure;
}
err = lwtunnel_build_state(encap_type, encap, nhc->nhc_family,
cfg, &lwtstate, extack);
if (err)
- return err;
+ goto lwt_failure;
nhc->nhc_lwtstate = lwtstate_get(lwtstate);
}
return 0;
+
+lwt_failure:
+ rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
+ nhc->nhc_pcpu_rth_output = NULL;
+ return err;
}
EXPORT_SYMBOL_GPL(fib_nh_common_init);
@@ -515,18 +527,14 @@ int fib_nh_init(struct net *net, struct fib_nh *nh,
struct fib_config *cfg, int nh_weight,
struct netlink_ext_ack *extack)
{
- int err = -ENOMEM;
+ int err;
nh->fib_nh_family = AF_INET;
- nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
- if (!nh->nh_pcpu_rth_output)
- goto err_out;
-
err = fib_nh_common_init(&nh->nh_common, cfg->fc_encap,
cfg->fc_encap_type, cfg, GFP_KERNEL, extack);
if (err)
- goto init_failure;
+ return err;
nh->fib_nh_oif = cfg->fc_oif;
nh->fib_nh_gw_family = cfg->fc_gw_family;
@@ -546,12 +554,6 @@ int fib_nh_init(struct net *net, struct fib_nh *nh,
nh->fib_nh_weight = nh_weight;
#endif
return 0;
-
-init_failure:
- rt_fibinfo_free_cpus(nh->nh_pcpu_rth_output);
- nh->nh_pcpu_rth_output = NULL;
-err_out:
- return err;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -1711,12 +1713,12 @@ static int call_fib_nh_notifiers(struct fib_nh *nh,
* - if the new MTU is greater than the PMTU, don't make any change
* - otherwise, unlock and set PMTU
*/
-static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig)
+static void nh_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
{
struct fnhe_hash_bucket *bucket;
int i;
- bucket = rcu_dereference_protected(nh->nh_exceptions, 1);
+ bucket = rcu_dereference_protected(nhc->nhc_exceptions, 1);
if (!bucket)
return;
@@ -1747,7 +1749,7 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
hlist_for_each_entry(nh, head, nh_hash) {
if (nh->fib_nh_dev == dev)
- nh_update_mtu(nh, dev->mtu, orig_mtu);
+ nh_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
}
}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 1132d6d1796a..ed97724c5e33 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -130,6 +130,7 @@
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
+#include <linux/indirect_call_wrapper.h>
#include <net/snmp.h>
#include <net/ip.h>
@@ -188,6 +189,8 @@ bool ip_call_ra_chain(struct sk_buff *skb)
return false;
}
+INDIRECT_CALLABLE_DECLARE(int udp_rcv(struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(int tcp_v4_rcv(struct sk_buff *));
void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol)
{
const struct net_protocol *ipprot;
@@ -205,7 +208,8 @@ resubmit:
}
nf_reset(skb);
}
- ret = ipprot->handler(skb);
+ ret = INDIRECT_CALL_2(ipprot->handler, tcp_v4_rcv, udp_rcv,
+ skb);
if (ret < 0) {
protocol = -ret;
goto resubmit;
@@ -305,6 +309,8 @@ drop:
return true;
}
+INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
static int ip_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb, struct net_device *dev)
{
@@ -322,7 +328,8 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
ipprot = rcu_dereference(inet_protos[protocol]);
if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
- err = edemux(skb);
+ err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
+ udp_v4_early_demux, skb);
if (unlikely(err))
goto drop_error;
/* must reload iph, skb->head might have changed */
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 4e42c1974ba2..ac880beda8a7 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -516,6 +516,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->pkt_type = from->pkt_type;
to->priority = from->priority;
to->protocol = from->protocol;
+ to->skb_iif = from->skb_iif;
skb_dst_drop(to);
skb_dst_copy(to, from);
to->dev = from->dev;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 68a21bf75dd0..254a42e83ff9 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -50,7 +50,7 @@ static unsigned int vti_net_id __read_mostly;
static int vti_tunnel_init(struct net_device *dev);
static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
- int encap_type)
+ int encap_type, bool update_skb_dev)
{
struct ip_tunnel *tunnel;
const struct iphdr *iph = ip_hdr(skb);
@@ -65,6 +65,9 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
+ if (update_skb_dev)
+ skb->dev = tunnel->dev;
+
return xfrm_input(skb, nexthdr, spi, encap_type);
}
@@ -74,47 +77,28 @@ drop:
return 0;
}
-static int vti_input_ipip(struct sk_buff *skb, int nexthdr, __be32 spi,
- int encap_type)
+static int vti_input_proto(struct sk_buff *skb, int nexthdr, __be32 spi,
+ int encap_type)
{
- struct ip_tunnel *tunnel;
- const struct iphdr *iph = ip_hdr(skb);
- struct net *net = dev_net(skb->dev);
- struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
-
- tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
- iph->saddr, iph->daddr, 0);
- if (tunnel) {
- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
- goto drop;
-
- XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
-
- skb->dev = tunnel->dev;
-
- return xfrm_input(skb, nexthdr, spi, encap_type);
- }
-
- return -EINVAL;
-drop:
- kfree_skb(skb);
- return 0;
+ return vti_input(skb, nexthdr, spi, encap_type, false);
}
-static int vti_rcv(struct sk_buff *skb)
+static int vti_rcv(struct sk_buff *skb, __be32 spi, bool update_skb_dev)
{
XFRM_SPI_SKB_CB(skb)->family = AF_INET;
XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
- return vti_input(skb, ip_hdr(skb)->protocol, 0, 0);
+ return vti_input(skb, ip_hdr(skb)->protocol, spi, 0, update_skb_dev);
}
-static int vti_rcv_ipip(struct sk_buff *skb)
+static int vti_rcv_proto(struct sk_buff *skb)
{
- XFRM_SPI_SKB_CB(skb)->family = AF_INET;
- XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
+ return vti_rcv(skb, 0, false);
+}
- return vti_input_ipip(skb, ip_hdr(skb)->protocol, ip_hdr(skb)->saddr, 0);
+static int vti_rcv_tunnel(struct sk_buff *skb)
+{
+ return vti_rcv(skb, ip_hdr(skb)->saddr, true);
}
static int vti_rcv_cb(struct sk_buff *skb, int err)
@@ -123,7 +107,7 @@ static int vti_rcv_cb(struct sk_buff *skb, int err)
struct net_device *dev;
struct pcpu_sw_netstats *tstats;
struct xfrm_state *x;
- struct xfrm_mode *inner_mode;
+ const struct xfrm_mode *inner_mode;
struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4;
u32 orig_mark = skb->mark;
int ret;
@@ -142,7 +126,7 @@ static int vti_rcv_cb(struct sk_buff *skb, int err)
x = xfrm_input_state(skb);
- inner_mode = x->inner_mode;
+ inner_mode = &x->inner_mode;
if (x->sel.family == AF_UNSPEC) {
inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
@@ -153,7 +137,7 @@ static int vti_rcv_cb(struct sk_buff *skb, int err)
}
}
- family = inner_mode->afinfo->family;
+ family = inner_mode->family;
skb->mark = be32_to_cpu(tunnel->parms.i_key);
ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
@@ -447,31 +431,31 @@ static void __net_init vti_fb_tunnel_init(struct net_device *dev)
}
static struct xfrm4_protocol vti_esp4_protocol __read_mostly = {
- .handler = vti_rcv,
- .input_handler = vti_input,
+ .handler = vti_rcv_proto,
+ .input_handler = vti_input_proto,
.cb_handler = vti_rcv_cb,
.err_handler = vti4_err,
.priority = 100,
};
static struct xfrm4_protocol vti_ah4_protocol __read_mostly = {
- .handler = vti_rcv,
- .input_handler = vti_input,
+ .handler = vti_rcv_proto,
+ .input_handler = vti_input_proto,
.cb_handler = vti_rcv_cb,
.err_handler = vti4_err,
.priority = 100,
};
static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = {
- .handler = vti_rcv,
- .input_handler = vti_input,
+ .handler = vti_rcv_proto,
+ .input_handler = vti_input_proto,
.cb_handler = vti_rcv_cb,
.err_handler = vti4_err,
.priority = 100,
};
static struct xfrm_tunnel ipip_handler __read_mostly = {
- .handler = vti_rcv_ipip,
+ .handler = vti_rcv_tunnel,
.err_handler = vti4_err,
.priority = 0,
};
@@ -646,10 +630,8 @@ static int __init vti_init(void)
msg = "ipip tunnel";
err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
- if (err < 0) {
- pr_info("%s: cant't register tunnel\n",__func__);
+ if (err < 0)
goto xfrm_tunnel_failed;
- }
msg = "netlink interface";
err = rtnl_link_register(&vti_link_ops);
@@ -659,9 +641,9 @@ static int __init vti_init(void)
return err;
rtnl_link_failed:
- xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
-xfrm_tunnel_failed:
xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
+xfrm_tunnel_failed:
+ xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
xfrm_proto_comp_failed:
xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
xfrm_proto_ah_failed:
@@ -676,6 +658,7 @@ pernet_dev_failed:
static void __exit vti_fini(void)
{
rtnl_link_unregister(&vti_link_ops);
+ xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 795aed6e4720..11ddc276776e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -643,8 +643,9 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
}
}
-static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
- u32 pmtu, bool lock, unsigned long expires)
+static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
+ __be32 gw, u32 pmtu, bool lock,
+ unsigned long expires)
{
struct fnhe_hash_bucket *hash;
struct fib_nh_exception *fnhe;
@@ -653,17 +654,17 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
unsigned int i;
int depth;
- genid = fnhe_genid(dev_net(nh->fib_nh_dev));
+ genid = fnhe_genid(dev_net(nhc->nhc_dev));
hval = fnhe_hashfun(daddr);
spin_lock_bh(&fnhe_lock);
- hash = rcu_dereference(nh->nh_exceptions);
+ hash = rcu_dereference(nhc->nhc_exceptions);
if (!hash) {
hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC);
if (!hash)
goto out_unlock;
- rcu_assign_pointer(nh->nh_exceptions, hash);
+ rcu_assign_pointer(nhc->nhc_exceptions, hash);
}
hash += hval;
@@ -715,13 +716,13 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
* stale, so anyone caching it rechecks if this exception
* applies to them.
*/
- rt = rcu_dereference(nh->nh_rth_input);
+ rt = rcu_dereference(nhc->nhc_rth_input);
if (rt)
rt->dst.obsolete = DST_OBSOLETE_KILL;
for_each_possible_cpu(i) {
struct rtable __rcu **prt;
- prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
+ prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i);
rt = rcu_dereference(*prt);
if (rt)
rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -788,10 +789,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
} else {
if (fib_lookup(net, fl4, &res, 0) == 0) {
struct fib_nh_common *nhc = FIB_RES_NHC(res);
- struct fib_nh *nh;
- nh = container_of(nhc, struct fib_nh, nh_common);
- update_or_create_fnhe(nh, fl4->daddr, new_gw,
+ update_or_create_fnhe(nhc, fl4->daddr, new_gw,
0, false,
jiffies + ip_rt_gc_timeout);
}
@@ -1039,10 +1038,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
rcu_read_lock();
if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
struct fib_nh_common *nhc = FIB_RES_NHC(res);
- struct fib_nh *nh;
- nh = container_of(nhc, struct fib_nh, nh_common);
- update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock,
+ update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
jiffies + ip_rt_mtu_expires);
}
rcu_read_unlock();
@@ -1328,7 +1325,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}
-static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
+static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr)
{
struct fnhe_hash_bucket *hash;
struct fib_nh_exception *fnhe, __rcu **fnhe_p;
@@ -1336,7 +1333,7 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
spin_lock_bh(&fnhe_lock);
- hash = rcu_dereference_protected(nh->nh_exceptions,
+ hash = rcu_dereference_protected(nhc->nhc_exceptions,
lockdep_is_held(&fnhe_lock));
hash += hval;
@@ -1362,9 +1359,10 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
spin_unlock_bh(&fnhe_lock);
}
-static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
+static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc,
+ __be32 daddr)
{
- struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
+ struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions);
struct fib_nh_exception *fnhe;
u32 hval;
@@ -1378,7 +1376,7 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
if (fnhe->fnhe_daddr == daddr) {
if (fnhe->fnhe_expires &&
time_after(jiffies, fnhe->fnhe_expires)) {
- ip_del_fnhe(nh, daddr);
+ ip_del_fnhe(nhc, daddr);
break;
}
return fnhe;
@@ -1405,10 +1403,9 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
mtu = fi->fib_mtu;
if (likely(!mtu)) {
- struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common);
struct fib_nh_exception *fnhe;
- fnhe = find_exception(nh, daddr);
+ fnhe = find_exception(nhc, daddr);
if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
mtu = fnhe->fnhe_pmtu;
}
@@ -1469,15 +1466,15 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
return ret;
}
-static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
+static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)
{
struct rtable *orig, *prev, **p;
bool ret = true;
if (rt_is_input_route(rt)) {
- p = (struct rtable **)&nh->nh_rth_input;
+ p = (struct rtable **)&nhc->nhc_rth_input;
} else {
- p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output);
+ p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
}
orig = *p;
@@ -1574,7 +1571,6 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
if (fi) {
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
- struct fib_nh *nh;
if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
rt->rt_gw_family = nhc->nhc_gw_family;
@@ -1587,15 +1583,19 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
- nh = container_of(nhc, struct fib_nh, nh_common);
#ifdef CONFIG_IP_ROUTE_CLASSID
- rt->dst.tclassid = nh->nh_tclassid;
+ {
+ struct fib_nh *nh;
+
+ nh = container_of(nhc, struct fib_nh, nh_common);
+ rt->dst.tclassid = nh->nh_tclassid;
+ }
#endif
- rt->dst.lwtstate = lwtstate_get(nh->fib_nh_lws);
+ rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
if (unlikely(fnhe))
cached = rt_bind_exception(rt, fnhe, daddr, do_cache);
else if (do_cache)
- cached = rt_cache_route(nh, rt);
+ cached = rt_cache_route(nhc, rt);
if (unlikely(!cached)) {
/* Routes we intend to cache in nexthop exception or
* FIB nexthop have the DST_NOCACHE bit clear.
@@ -1756,7 +1756,6 @@ static int __mkroute_input(struct sk_buff *skb,
struct net_device *dev = nhc->nhc_dev;
struct fib_nh_exception *fnhe;
struct rtable *rth;
- struct fib_nh *nh;
int err;
struct in_device *out_dev;
bool do_cache;
@@ -1804,13 +1803,12 @@ static int __mkroute_input(struct sk_buff *skb,
}
}
- nh = container_of(nhc, struct fib_nh, nh_common);
- fnhe = find_exception(nh, daddr);
+ fnhe = find_exception(nhc, daddr);
if (do_cache) {
if (fnhe)
rth = rcu_dereference(fnhe->fnhe_rth_input);
else
- rth = rcu_dereference(nh->nh_rth_input);
+ rth = rcu_dereference(nhc->nhc_rth_input);
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
goto out;
@@ -2105,10 +2103,8 @@ local_input:
if (res->fi) {
if (!itag) {
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
- struct fib_nh *nh;
- nh = container_of(nhc, struct fib_nh, nh_common);
- rth = rcu_dereference(nh->nh_rth_input);
+ rth = rcu_dereference(nhc->nhc_rth_input);
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
err = 0;
@@ -2139,7 +2135,6 @@ local_input:
if (do_cache) {
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
- struct fib_nh *nh;
rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
@@ -2148,8 +2143,7 @@ local_input:
rth->dst.input = lwtunnel_input;
}
- nh = container_of(nhc, struct fib_nh, nh_common);
- if (unlikely(!rt_cache_route(nh, rth)))
+ if (unlikely(!rt_cache_route(nhc, rth)))
rt_add_uncached_list(rth);
}
skb_dst_set(skb, &rth->dst);
@@ -2321,10 +2315,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
do_cache &= fi != NULL;
if (fi) {
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
- struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common);
struct rtable __rcu **prth;
- fnhe = find_exception(nh, fl4->daddr);
+ fnhe = find_exception(nhc, fl4->daddr);
if (!do_cache)
goto add;
if (fnhe) {
@@ -2337,7 +2330,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
do_cache = false;
goto add;
}
- prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
+ prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
}
rth = rcu_dereference(*prth);
if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f7567a3698eb..1fa15beb8380 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -457,18 +457,6 @@ void tcp_init_sock(struct sock *sk)
}
EXPORT_SYMBOL(tcp_init_sock);
-void tcp_init_transfer(struct sock *sk, int bpf_op)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
-
- tcp_mtup_init(sk);
- icsk->icsk_af_ops->rebuild_header(sk);
- tcp_init_metrics(sk);
- tcp_call_bpf(sk, bpf_op, 0, NULL);
- tcp_init_congestion_control(sk);
- tcp_init_buffer_space(sk);
-}
-
static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
{
struct sk_buff *skb = tcp_write_queue_tail(sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 97671bff597a..077d9abdfcf5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2252,7 +2252,7 @@ static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
*/
static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
{
- return !tp->retrans_stamp ||
+ return tp->retrans_stamp &&
tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
}
@@ -3521,7 +3521,7 @@ static void tcp_xmit_recovery(struct sock *sk, int rexmit)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (rexmit == REXMIT_NONE)
+ if (rexmit == REXMIT_NONE || sk->sk_state == TCP_SYN_SENT)
return;
if (unlikely(rexmit == 2)) {
@@ -5647,6 +5647,32 @@ discard:
}
EXPORT_SYMBOL(tcp_rcv_established);
+void tcp_init_transfer(struct sock *sk, int bpf_op)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ tcp_mtup_init(sk);
+ icsk->icsk_af_ops->rebuild_header(sk);
+ tcp_init_metrics(sk);
+
+ /* Initialize the congestion window to start the transfer.
+ * Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
+ * retransmitted. In light of RFC6298 more aggressive 1sec
+ * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK
+ * retransmission has occurred.
+ */
+ if (tp->total_retrans > 1 && tp->undo_marker)
+ tp->snd_cwnd = 1;
+ else
+ tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
+ tp->snd_cwnd_stamp = tcp_jiffies32;
+
+ tcp_call_bpf(sk, bpf_op, 0, NULL);
+ tcp_init_congestion_control(sk);
+ tcp_init_buffer_space(sk);
+}
+
void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -5748,6 +5774,21 @@ static void smc_check_reset_syn(struct tcp_sock *tp)
#endif
}
+static void tcp_try_undo_spurious_syn(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 syn_stamp;
+
+ /* undo_marker is set when SYN or SYNACK times out. The timeout is
+ * spurious if the ACK's timestamp option echo value matches the
+ * original SYN timestamp.
+ */
+ syn_stamp = tp->retrans_stamp;
+ if (tp->undo_marker && syn_stamp && tp->rx_opt.saw_tstamp &&
+ syn_stamp == tp->rx_opt.rcv_tsecr)
+ tp->undo_marker = 0;
+}
+
static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
const struct tcphdr *th)
{
@@ -5815,6 +5856,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_ecn_rcv_synack(tp, th);
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
+ tcp_try_undo_spurious_syn(sk);
tcp_ack(sk, skb, FLAG_SLOWPATH);
/* Ok.. it's good. Set up sequence numbers and
@@ -5973,6 +6015,27 @@ reset_and_undo:
return 1;
}
+static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
+{
+ tcp_try_undo_loss(sk, false);
+ inet_csk(sk)->icsk_retransmits = 0;
+
+ /* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1,
+ * we no longer need req so release it.
+ */
+ reqsk_fastopen_remove(sk, tcp_sk(sk)->fastopen_rsk, false);
+
+ /* Re-arm the timer because data may have been sent out.
+ * This is similar to the regular data transmission case
+ * when new data has just been ack'ed.
+ *
+ * (TFO) - we could try to be more aggressive and
+ * retransmitting any data sooner based on when they
+ * are sent out.
+ */
+ tcp_rearm_rto(sk);
+}
+
/*
* This function implements the receiving procedure of RFC 793 for
* all states except ESTABLISHED and TIME_WAIT.
@@ -6069,22 +6132,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (!tp->srtt_us)
tcp_synack_rtt_meas(sk, req);
- /* Once we leave TCP_SYN_RECV, we no longer need req
- * so release it.
- */
if (req) {
- inet_csk(sk)->icsk_retransmits = 0;
- reqsk_fastopen_remove(sk, req, false);
- /* Re-arm the timer because data may have been sent out.
- * This is similar to the regular data transmission case
- * when new data has just been ack'ed.
- *
- * (TFO) - we could try to be more aggressive and
- * retransmitting any data sooner based on when they
- * are sent out.
- */
- tcp_rearm_rto(sk);
+ tcp_rcv_synrecv_state_fastopen(sk);
} else {
+ tcp_try_undo_spurious_syn(sk);
+ tp->retrans_stamp = 0;
tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
tp->copied_seq = tp->rcv_nxt;
}
@@ -6119,16 +6171,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_FIN_WAIT1: {
int tmo;
- /* If we enter the TCP_FIN_WAIT1 state and we are a
- * Fast Open socket and this is the first acceptable
- * ACK we have received, this would have acknowledged
- * our SYNACK so stop the SYNACK timer.
- */
- if (req) {
- /* We no longer need the request sock. */
- reqsk_fastopen_remove(sk, req, false);
- tcp_rearm_rto(sk);
- }
+ if (req)
+ tcp_rcv_synrecv_state_fastopen(sk);
+
if (tp->snd_una != tp->write_seq)
break;
@@ -6303,7 +6348,7 @@ static void tcp_openreq_init(struct request_sock *req,
req->cookie_ts = 0;
tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
- tcp_rsk(req)->snt_synack = tcp_clock_us();
+ tcp_rsk(req)->snt_synack = 0;
tcp_rsk(req)->last_oow_ack_time = 0;
req->mss = rx_opt->mss_clamp;
req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index faa6fa619f59..af81e4a6a8d8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1673,7 +1673,9 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
if (TCP_SKB_CB(tail)->end_seq != TCP_SKB_CB(skb)->seq ||
TCP_SKB_CB(tail)->ip_dsfield != TCP_SKB_CB(skb)->ip_dsfield ||
((TCP_SKB_CB(tail)->tcp_flags |
- TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_URG) ||
+ TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_SYN | TCPHDR_RST | TCPHDR_URG)) ||
+ !((TCP_SKB_CB(tail)->tcp_flags &
+ TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) ||
((TCP_SKB_CB(tail)->tcp_flags ^
TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
#ifdef CONFIG_TLS_DEVICE
@@ -1692,6 +1694,15 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq))
TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
+ /* We have to update both TCP_SKB_CB(tail)->tcp_flags and
+ * thtail->fin, so that the fast path in tcp_rcv_established()
+ * is not entered if we append a packet with a FIN.
+ * SYN, RST, URG are not present.
+ * ACK is set on both packets.
+ * PSH : we do not really care in TCP stack,
+ * at least for 'GRO' packets.
+ */
+ thtail->fin |= th->fin;
TCP_SKB_CB(tail)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
if (TCP_SKB_CB(skb)->has_rxtstamp) {
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index f262f2cace29..c4848e7a0aad 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -512,16 +512,6 @@ reset:
inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
}
- /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
- * retransmitted. In light of RFC6298 more aggressive 1sec
- * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK
- * retransmission has occurred.
- */
- if (tp->total_retrans > 1)
- tp->snd_cwnd = 1;
- else
- tp->snd_cwnd = tcp_init_cwnd(tp, dst);
- tp->snd_cwnd_stamp = tcp_jiffies32;
}
bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 79900f783e0d..9c2a0d36fb20 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -522,6 +522,11 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->rx_opt.ts_recent_stamp = 0;
newtp->tcp_header_len = sizeof(struct tcphdr);
}
+ if (req->num_timeout) {
+ newtp->undo_marker = treq->snt_isn;
+ newtp->retrans_stamp = div_u64(treq->snt_synack,
+ USEC_PER_SEC / TCP_TS_HZ);
+ }
newtp->tsoffset = treq->ts_off;
#ifdef CONFIG_TCP_MD5SIG
newtp->md5sig_info = NULL; /*XXX*/
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 32061928b054..0c4ed66dc1bf 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3247,7 +3247,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
skb->skb_mstamp_ns = cookie_init_timestamp(req);
else
#endif
+ {
skb->skb_mstamp_ns = tcp_clock_ns();
+ if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */
+ tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb);
+ }
#ifdef CONFIG_TCP_MD5SIG
rcu_read_lock();
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index f0c86398e6a7..2ac23da42dd2 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -393,6 +393,9 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
tcp_write_err(sk);
return;
}
+ /* Lower cwnd after certain SYNACK timeout like tcp_init_transfer() */
+ if (icsk->icsk_retransmits == 1)
+ tcp_enter_loss(sk);
/* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error
* returned from rtx_syn_ack() to make it more persistent like
* regular retransmit because if the child socket has been accepted
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 64f9715173ac..065334b41d57 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -352,6 +352,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
struct sk_buff *pp = NULL;
struct udphdr *uh2;
struct sk_buff *p;
+ unsigned int ulen;
/* requires non zero csum, for symmetry with GSO */
if (!uh->check) {
@@ -359,6 +360,12 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
return NULL;
}
+ /* Do not deal with padded or malicious packets, sorry ! */
+ ulen = ntohs(uh->len);
+ if (ulen <= sizeof(*uh) || ulen != skb_gro_len(skb)) {
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
+ }
/* pull encapsulating udp header */
skb_gro_pull(skb, sizeof(struct udphdr));
skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
@@ -377,13 +384,14 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
/* Terminate the flow on len mismatch or if it grow "too much".
* Under small packet flood GRO count could elsewhere grow a lot
- * leading to execessive truesize values
+ * leading to excessive truesize values.
+ * On len mismatch merge the first packet shorter than gso_size,
+ * otherwise complete the GRO packet.
*/
- if (!skb_gro_receive(p, skb) &&
+ if (ulen > ntohs(uh2->len) || skb_gro_receive(p, skb) ||
+ ulen != ntohs(uh2->len) ||
NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
pp = p;
- else if (uh->len != uh2->len)
- pp = p;
return pp;
}
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
deleted file mode 100644
index 856d2dfdb44b..000000000000
--- a/net/ipv4/xfrm4_mode_beet.c
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * xfrm4_mode_beet.c - BEET mode encapsulation for IPv4.
- *
- * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com>
- * Miika Komu <miika@iki.fi>
- * Herbert Xu <herbert@gondor.apana.org.au>
- * Abhinav Pathak <abhinav.pathak@hiit.fi>
- * Jeff Ahrenholz <ahrenholz@gmail.com>
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/stringify.h>
-#include <net/dst.h>
-#include <net/ip.h>
-#include <net/xfrm.h>
-
-static void xfrm4_beet_make_header(struct sk_buff *skb)
-{
- struct iphdr *iph = ip_hdr(skb);
-
- iph->ihl = 5;
- iph->version = 4;
-
- iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol;
- iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
-
- iph->id = XFRM_MODE_SKB_CB(skb)->id;
- iph->frag_off = XFRM_MODE_SKB_CB(skb)->frag_off;
- iph->ttl = XFRM_MODE_SKB_CB(skb)->ttl;
-}
-
-/* Add encapsulation header.
- *
- * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
- */
-static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
-{
- struct ip_beet_phdr *ph;
- struct iphdr *top_iph;
- int hdrlen, optlen;
-
- hdrlen = 0;
- optlen = XFRM_MODE_SKB_CB(skb)->optlen;
- if (unlikely(optlen))
- hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4);
-
- skb_set_network_header(skb, -x->props.header_len -
- hdrlen + (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph)));
- if (x->sel.family != AF_INET6)
- skb->network_header += IPV4_BEET_PHMAXLEN;
- skb->mac_header = skb->network_header +
- offsetof(struct iphdr, protocol);
- skb->transport_header = skb->network_header + sizeof(*top_iph);
-
- xfrm4_beet_make_header(skb);
-
- ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen);
-
- top_iph = ip_hdr(skb);
-
- if (unlikely(optlen)) {
- BUG_ON(optlen < 0);
-
- ph->padlen = 4 - (optlen & 4);
- ph->hdrlen = optlen / 8;
- ph->nexthdr = top_iph->protocol;
- if (ph->padlen)
- memset(ph + 1, IPOPT_NOP, ph->padlen);
-
- top_iph->protocol = IPPROTO_BEETPH;
- top_iph->ihl = sizeof(struct iphdr) / 4;
- }
-
- top_iph->saddr = x->props.saddr.a4;
- top_iph->daddr = x->id.daddr.a4;
-
- return 0;
-}
-
-static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
-{
- struct iphdr *iph;
- int optlen = 0;
- int err = -EINVAL;
-
- if (unlikely(XFRM_MODE_SKB_CB(skb)->protocol == IPPROTO_BEETPH)) {
- struct ip_beet_phdr *ph;
- int phlen;
-
- if (!pskb_may_pull(skb, sizeof(*ph)))
- goto out;
-
- ph = (struct ip_beet_phdr *)skb->data;
-
- phlen = sizeof(*ph) + ph->padlen;
- optlen = ph->hdrlen * 8 + (IPV4_BEET_PHMAXLEN - phlen);
- if (optlen < 0 || optlen & 3 || optlen > 250)
- goto out;
-
- XFRM_MODE_SKB_CB(skb)->protocol = ph->nexthdr;
-
- if (!pskb_may_pull(skb, phlen))
- goto out;
- __skb_pull(skb, phlen);
- }
-
- skb_push(skb, sizeof(*iph));
- skb_reset_network_header(skb);
- skb_mac_header_rebuild(skb);
-
- xfrm4_beet_make_header(skb);
-
- iph = ip_hdr(skb);
-
- iph->ihl += optlen / 4;
- iph->tot_len = htons(skb->len);
- iph->daddr = x->sel.daddr.a4;
- iph->saddr = x->sel.saddr.a4;
- iph->check = 0;
- iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
- err = 0;
-out:
- return err;
-}
-
-static struct xfrm_mode xfrm4_beet_mode = {
- .input2 = xfrm4_beet_input,
- .input = xfrm_prepare_input,
- .output2 = xfrm4_beet_output,
- .output = xfrm4_prepare_output,
- .owner = THIS_MODULE,
- .encap = XFRM_MODE_BEET,
- .flags = XFRM_MODE_FLAG_TUNNEL,
-};
-
-static int __init xfrm4_beet_init(void)
-{
- return xfrm_register_mode(&xfrm4_beet_mode, AF_INET);
-}
-
-static void __exit xfrm4_beet_exit(void)
-{
- int err;
-
- err = xfrm_unregister_mode(&xfrm4_beet_mode, AF_INET);
- BUG_ON(err);
-}
-
-module_init(xfrm4_beet_init);
-module_exit(xfrm4_beet_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_BEET);
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
deleted file mode 100644
index 1ad2c2c4e250..000000000000
--- a/net/ipv4/xfrm4_mode_transport.c
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * xfrm4_mode_transport.c - Transport mode encapsulation for IPv4.
- *
- * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/stringify.h>
-#include <net/dst.h>
-#include <net/ip.h>
-#include <net/xfrm.h>
-#include <net/protocol.h>
-
-/* Add encapsulation header.
- *
- * The IP header will be moved forward to make space for the encapsulation
- * header.
- */
-static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
-{
- struct iphdr *iph = ip_hdr(skb);
- int ihl = iph->ihl * 4;
-
- skb_set_inner_transport_header(skb, skb_transport_offset(skb));
-
- skb_set_network_header(skb, -x->props.header_len);
- skb->mac_header = skb->network_header +
- offsetof(struct iphdr, protocol);
- skb->transport_header = skb->network_header + ihl;
- __skb_pull(skb, ihl);
- memmove(skb_network_header(skb), iph, ihl);
- return 0;
-}
-
-/* Remove encapsulation header.
- *
- * The IP header will be moved over the top of the encapsulation header.
- *
- * On entry, skb->h shall point to where the IP header should be and skb->nh
- * shall be set to where the IP header currently is. skb->data shall point
- * to the start of the payload.
- */
-static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
-{
- int ihl = skb->data - skb_transport_header(skb);
-
- if (skb->transport_header != skb->network_header) {
- memmove(skb_transport_header(skb),
- skb_network_header(skb), ihl);
- skb->network_header = skb->transport_header;
- }
- ip_hdr(skb)->tot_len = htons(skb->len + ihl);
- skb_reset_transport_header(skb);
- return 0;
-}
-
-static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x,
- struct sk_buff *skb,
- netdev_features_t features)
-{
- const struct net_offload *ops;
- struct sk_buff *segs = ERR_PTR(-EINVAL);
- struct xfrm_offload *xo = xfrm_offload(skb);
-
- skb->transport_header += x->props.header_len;
- ops = rcu_dereference(inet_offloads[xo->proto]);
- if (likely(ops && ops->callbacks.gso_segment))
- segs = ops->callbacks.gso_segment(skb, features);
-
- return segs;
-}
-
-static void xfrm4_transport_xmit(struct xfrm_state *x, struct sk_buff *skb)
-{
- struct xfrm_offload *xo = xfrm_offload(skb);
-
- skb_reset_mac_len(skb);
- pskb_pull(skb, skb->mac_len + sizeof(struct iphdr) + x->props.header_len);
-
- if (xo->flags & XFRM_GSO_SEGMENT) {
- skb_reset_transport_header(skb);
- skb->transport_header -= x->props.header_len;
- }
-}
-
-static struct xfrm_mode xfrm4_transport_mode = {
- .input = xfrm4_transport_input,
- .output = xfrm4_transport_output,
- .gso_segment = xfrm4_transport_gso_segment,
- .xmit = xfrm4_transport_xmit,
- .owner = THIS_MODULE,
- .encap = XFRM_MODE_TRANSPORT,
-};
-
-static int __init xfrm4_transport_init(void)
-{
- return xfrm_register_mode(&xfrm4_transport_mode, AF_INET);
-}
-
-static void __exit xfrm4_transport_exit(void)
-{
- int err;
-
- err = xfrm_unregister_mode(&xfrm4_transport_mode, AF_INET);
- BUG_ON(err);
-}
-
-module_init(xfrm4_transport_init);
-module_exit(xfrm4_transport_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_TRANSPORT);
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
deleted file mode 100644
index 2a9764bd1719..000000000000
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * xfrm4_mode_tunnel.c - Tunnel mode encapsulation for IPv4.
- *
- * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
- */
-
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/stringify.h>
-#include <net/dst.h>
-#include <net/inet_ecn.h>
-#include <net/ip.h>
-#include <net/xfrm.h>
-
-static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
-{
- struct iphdr *inner_iph = ipip_hdr(skb);
-
- if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
- IP_ECN_set_ce(inner_iph);
-}
-
-/* Add encapsulation header.
- *
- * The top IP header will be constructed per RFC 2401.
- */
-static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
-{
- struct dst_entry *dst = skb_dst(skb);
- struct iphdr *top_iph;
- int flags;
-
- skb_set_inner_network_header(skb, skb_network_offset(skb));
- skb_set_inner_transport_header(skb, skb_transport_offset(skb));
-
- skb_set_network_header(skb, -x->props.header_len);
- skb->mac_header = skb->network_header +
- offsetof(struct iphdr, protocol);
- skb->transport_header = skb->network_header + sizeof(*top_iph);
- top_iph = ip_hdr(skb);
-
- top_iph->ihl = 5;
- top_iph->version = 4;
-
- top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family);
-
- /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */
- if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
- top_iph->tos = 0;
- else
- top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
- top_iph->tos = INET_ECN_encapsulate(top_iph->tos,
- XFRM_MODE_SKB_CB(skb)->tos);
-
- flags = x->props.flags;
- if (flags & XFRM_STATE_NOECN)
- IP_ECN_clear(top_iph);
-
- top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
- 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
-
- top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst));
-
- top_iph->saddr = x->props.saddr.a4;
- top_iph->daddr = x->id.daddr.a4;
- ip_select_ident(dev_net(dst->dev), skb, NULL);
-
- return 0;
-}
-
-static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
-{
- int err = -EINVAL;
-
- if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP)
- goto out;
-
- if (!pskb_may_pull(skb, sizeof(struct iphdr)))
- goto out;
-
- err = skb_unclone(skb, GFP_ATOMIC);
- if (err)
- goto out;
-
- if (x->props.flags & XFRM_STATE_DECAP_DSCP)
- ipv4_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, ipip_hdr(skb));
- if (!(x->props.flags & XFRM_STATE_NOECN))
- ipip_ecn_decapsulate(skb);
-
- skb_reset_network_header(skb);
- skb_mac_header_rebuild(skb);
- if (skb->mac_len)
- eth_hdr(skb)->h_proto = skb->protocol;
-
- err = 0;
-
-out:
- return err;
-}
-
-static struct sk_buff *xfrm4_mode_tunnel_gso_segment(struct xfrm_state *x,
- struct sk_buff *skb,
- netdev_features_t features)
-{
- __skb_push(skb, skb->mac_len);
- return skb_mac_gso_segment(skb, features);
-}
-
-static void xfrm4_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb)
-{
- struct xfrm_offload *xo = xfrm_offload(skb);
-
- if (xo->flags & XFRM_GSO_SEGMENT)
- skb->transport_header = skb->network_header +
- sizeof(struct iphdr);
-
- skb_reset_mac_len(skb);
- pskb_pull(skb, skb->mac_len + x->props.header_len);
-}
-
-static struct xfrm_mode xfrm4_tunnel_mode = {
- .input2 = xfrm4_mode_tunnel_input,
- .input = xfrm_prepare_input,
- .output2 = xfrm4_mode_tunnel_output,
- .output = xfrm4_prepare_output,
- .gso_segment = xfrm4_mode_tunnel_gso_segment,
- .xmit = xfrm4_mode_tunnel_xmit,
- .owner = THIS_MODULE,
- .encap = XFRM_MODE_TUNNEL,
- .flags = XFRM_MODE_FLAG_TUNNEL,
-};
-
-static int __init xfrm4_mode_tunnel_init(void)
-{
- return xfrm_register_mode(&xfrm4_tunnel_mode, AF_INET);
-}
-
-static void __exit xfrm4_mode_tunnel_exit(void)
-{
- int err;
-
- err = xfrm_unregister_mode(&xfrm4_tunnel_mode, AF_INET);
- BUG_ON(err);
-}
-
-module_init(xfrm4_mode_tunnel_init);
-module_exit(xfrm4_mode_tunnel_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_TUNNEL);
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index be980c195fc5..9bb8905088c7 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -58,21 +58,6 @@ int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb)
return xfrm4_extract_header(skb);
}
-int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
-{
- int err;
-
- err = xfrm_inner_extract_output(x, skb);
- if (err)
- return err;
-
- IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
- skb->protocol = htons(ETH_P_IP);
-
- return x->outer_mode->output2(x, skb);
-}
-EXPORT_SYMBOL(xfrm4_prepare_output);
-
int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb)
{
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
@@ -87,6 +72,8 @@ int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb)
static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct xfrm_state *x = skb_dst(skb)->xfrm;
+ const struct xfrm_state_afinfo *afinfo;
+ int ret = -EAFNOSUPPORT;
#ifdef CONFIG_NETFILTER
if (!x) {
@@ -95,7 +82,15 @@ static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
}
#endif
- return x->outer_mode->afinfo->output_finish(sk, skb);
+ rcu_read_lock();
+ afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family);
+ if (likely(afinfo))
+ ret = afinfo->output_finish(sk, skb);
+ else
+ kfree_skb(skb);
+ rcu_read_unlock();
+
+ return ret;
}
int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 72d19b1838ed..cdef8f9a3b01 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -12,7 +12,6 @@
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/inetdevice.h>
-#include <linux/if_tunnel.h>
#include <net/dst.h>
#include <net/xfrm.h>
#include <net/ip.h>
@@ -69,17 +68,6 @@ static int xfrm4_get_saddr(struct net *net, int oif,
return 0;
}
-static int xfrm4_get_tos(const struct flowi *fl)
-{
- return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; /* Strip ECN bits */
-}
-
-static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
- int nfheader_len)
-{
- return 0;
-}
-
static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
const struct flowi *fl)
{
@@ -110,118 +98,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
return 0;
}
-static void
-_decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
-{
- const struct iphdr *iph = ip_hdr(skb);
- u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
- struct flowi4 *fl4 = &fl->u.ip4;
- int oif = 0;
-
- if (skb_dst(skb))
- oif = skb_dst(skb)->dev->ifindex;
-
- memset(fl4, 0, sizeof(struct flowi4));
- fl4->flowi4_mark = skb->mark;
- fl4->flowi4_oif = reverse ? skb->skb_iif : oif;
-
- if (!ip_is_fragment(iph)) {
- switch (iph->protocol) {
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE:
- case IPPROTO_TCP:
- case IPPROTO_SCTP:
- case IPPROTO_DCCP:
- if (xprth + 4 < skb->data ||
- pskb_may_pull(skb, xprth + 4 - skb->data)) {
- __be16 *ports;
-
- xprth = skb_network_header(skb) + iph->ihl * 4;
- ports = (__be16 *)xprth;
-
- fl4->fl4_sport = ports[!!reverse];
- fl4->fl4_dport = ports[!reverse];
- }
- break;
-
- case IPPROTO_ICMP:
- if (xprth + 2 < skb->data ||
- pskb_may_pull(skb, xprth + 2 - skb->data)) {
- u8 *icmp;
-
- xprth = skb_network_header(skb) + iph->ihl * 4;
- icmp = xprth;
-
- fl4->fl4_icmp_type = icmp[0];
- fl4->fl4_icmp_code = icmp[1];
- }
- break;
-
- case IPPROTO_ESP:
- if (xprth + 4 < skb->data ||
- pskb_may_pull(skb, xprth + 4 - skb->data)) {
- __be32 *ehdr;
-
- xprth = skb_network_header(skb) + iph->ihl * 4;
- ehdr = (__be32 *)xprth;
-
- fl4->fl4_ipsec_spi = ehdr[0];
- }
- break;
-
- case IPPROTO_AH:
- if (xprth + 8 < skb->data ||
- pskb_may_pull(skb, xprth + 8 - skb->data)) {
- __be32 *ah_hdr;
-
- xprth = skb_network_header(skb) + iph->ihl * 4;
- ah_hdr = (__be32 *)xprth;
-
- fl4->fl4_ipsec_spi = ah_hdr[1];
- }
- break;
-
- case IPPROTO_COMP:
- if (xprth + 4 < skb->data ||
- pskb_may_pull(skb, xprth + 4 - skb->data)) {
- __be16 *ipcomp_hdr;
-
- xprth = skb_network_header(skb) + iph->ihl * 4;
- ipcomp_hdr = (__be16 *)xprth;
-
- fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
- }
- break;
-
- case IPPROTO_GRE:
- if (xprth + 12 < skb->data ||
- pskb_may_pull(skb, xprth + 12 - skb->data)) {
- __be16 *greflags;
- __be32 *gre_hdr;
-
- xprth = skb_network_header(skb) + iph->ihl * 4;
- greflags = (__be16 *)xprth;
- gre_hdr = (__be32 *)xprth;
-
- if (greflags[0] & GRE_KEY) {
- if (greflags[0] & GRE_CSUM)
- gre_hdr++;
- fl4->fl4_gre_key = gre_hdr[1];
- }
- }
- break;
-
- default:
- fl4->fl4_ipsec_spi = 0;
- break;
- }
- }
- fl4->flowi4_proto = iph->protocol;
- fl4->daddr = reverse ? iph->saddr : iph->daddr;
- fl4->saddr = reverse ? iph->daddr : iph->saddr;
- fl4->flowi4_tos = iph->tos;
-}
-
static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu)
{
@@ -274,9 +150,6 @@ static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
.dst_ops = &xfrm4_dst_ops_template,
.dst_lookup = xfrm4_dst_lookup,
.get_saddr = xfrm4_get_saddr,
- .decode_session = _decode_session4,
- .get_tos = xfrm4_get_tos,
- .init_path = xfrm4_init_path,
.fill_dst = xfrm4_fill_dst,
.blackhole_route = ipv4_blackhole_route,
};
diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
index 35c54865dc42..bcab48944c15 100644
--- a/net/ipv4/xfrm4_protocol.c
+++ b/net/ipv4/xfrm4_protocol.c
@@ -46,7 +46,7 @@ static inline struct xfrm4_protocol __rcu **proto_handlers(u8 protocol)
handler != NULL; \
handler = rcu_dereference(handler->next)) \
-int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
+static int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
{
int ret;
struct xfrm4_protocol *handler;
@@ -61,7 +61,6 @@ int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
return 0;
}
-EXPORT_SYMBOL(xfrm4_rcv_cb);
int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
int encap_type)
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 613282c65a10..cd915e332c98 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -135,44 +135,11 @@ config INET6_TUNNEL
tristate
default n
-config INET6_XFRM_MODE_TRANSPORT
- tristate "IPv6: IPsec transport mode"
- default IPV6
- select XFRM
- ---help---
- Support for IPsec transport mode.
-
- If unsure, say Y.
-
-config INET6_XFRM_MODE_TUNNEL
- tristate "IPv6: IPsec tunnel mode"
- default IPV6
- select XFRM
- ---help---
- Support for IPsec tunnel mode.
-
- If unsure, say Y.
-
-config INET6_XFRM_MODE_BEET
- tristate "IPv6: IPsec BEET mode"
- default IPV6
- select XFRM
- ---help---
- Support for IPsec BEET mode.
-
- If unsure, say Y.
-
-config INET6_XFRM_MODE_ROUTEOPTIMIZATION
- tristate "IPv6: MIPv6 route optimization mode"
- select XFRM
- ---help---
- Support for MIPv6 route optimization mode.
-
config IPV6_VTI
tristate "Virtual (secure) IPv6: tunneling"
select IPV6_TUNNEL
select NET_IP_TUNNEL
- depends on INET6_XFRM_MODE_TUNNEL
+ select XFRM
---help---
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index e0026fa1261b..8ccf35514015 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -35,10 +35,6 @@ obj-$(CONFIG_INET6_ESP_OFFLOAD) += esp6_offload.o
obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o
obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o
obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o
-obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o
-obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
-obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
-obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
obj-$(CONFIG_IPV6_MIP6) += mip6.o
obj-$(CONFIG_IPV6_ILA) += ila/
obj-$(CONFIG_NETFILTER) += netfilter/
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index d46b4eb645c2..d453cf417b03 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -74,13 +74,13 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
goto out;
if (sp->len == XFRM_MAX_DEPTH)
- goto out;
+ goto out_reset;
x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
(xfrm_address_t *)&ipv6_hdr(skb)->daddr,
spi, IPPROTO_ESP, AF_INET6);
if (!x)
- goto out;
+ goto out_reset;
sp->xvec[sp->len++] = x;
sp->olen++;
@@ -88,7 +88,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
xo = xfrm_offload(skb);
if (!xo) {
xfrm_state_put(x);
- goto out;
+ goto out_reset;
}
}
@@ -109,6 +109,8 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
xfrm_input(skb, IPPROTO_ESP, spi, -2);
return ERR_PTR(-EINPROGRESS);
+out_reset:
+ secpath_reset(skb);
out:
skb_push(skb, offset);
NAPI_GRO_CB(skb)->same_flow = 0;
@@ -134,6 +136,44 @@ static void esp6_gso_encap(struct xfrm_state *x, struct sk_buff *skb)
xo->proto = proto;
}
+static struct sk_buff *xfrm6_tunnel_gso_segment(struct xfrm_state *x,
+ struct sk_buff *skb,
+ netdev_features_t features)
+{
+ __skb_push(skb, skb->mac_len);
+ return skb_mac_gso_segment(skb, features);
+}
+
+static struct sk_buff *xfrm6_transport_gso_segment(struct xfrm_state *x,
+ struct sk_buff *skb,
+ netdev_features_t features)
+{
+ const struct net_offload *ops;
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ skb->transport_header += x->props.header_len;
+ ops = rcu_dereference(inet6_offloads[xo->proto]);
+ if (likely(ops && ops->callbacks.gso_segment))
+ segs = ops->callbacks.gso_segment(skb, features);
+
+ return segs;
+}
+
+static struct sk_buff *xfrm6_outer_mode_gso_segment(struct xfrm_state *x,
+ struct sk_buff *skb,
+ netdev_features_t features)
+{
+ switch (x->outer_mode.encap) {
+ case XFRM_MODE_TUNNEL:
+ return xfrm6_tunnel_gso_segment(x, skb, features);
+ case XFRM_MODE_TRANSPORT:
+ return xfrm6_transport_gso_segment(x, skb, features);
+ }
+
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -172,7 +212,7 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
xo->flags |= XFRM_GSO_SEGMENT;
- return x->outer_mode->gso_segment(x, skb, esp_features);
+ return xfrm6_outer_mode_gso_segment(x, skb, esp_features);
}
static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index a8919c217cc2..08e0390e001c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -916,9 +916,7 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
if (pcpu_rt) {
struct fib6_info *from;
- from = rcu_dereference_protected(pcpu_rt->from,
- lockdep_is_held(&table->tb6_lock));
- rcu_assign_pointer(pcpu_rt->from, NULL);
+ from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
fib6_info_release(from);
}
}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index cb54a8a3c273..be5f3d7ceb96 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -94,15 +94,21 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
return fl;
}
+static void fl_free_rcu(struct rcu_head *head)
+{
+ struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu);
+
+ if (fl->share == IPV6_FL_S_PROCESS)
+ put_pid(fl->owner.pid);
+ kfree(fl->opt);
+ kfree(fl);
+}
+
static void fl_free(struct ip6_flowlabel *fl)
{
- if (fl) {
- if (fl->share == IPV6_FL_S_PROCESS)
- put_pid(fl->owner.pid);
- kfree(fl->opt);
- kfree_rcu(fl, rcu);
- }
+ if (fl)
+ call_rcu(&fl->rcu, fl_free_rcu);
}
static void fl_release(struct ip6_flowlabel *fl)
@@ -633,9 +639,9 @@ recheck:
if (fl1->share == IPV6_FL_S_EXCL ||
fl1->share != fl->share ||
((fl1->share == IPV6_FL_S_PROCESS) &&
- (fl1->owner.pid == fl->owner.pid)) ||
+ (fl1->owner.pid != fl->owner.pid)) ||
((fl1->share == IPV6_FL_S_USER) &&
- uid_eq(fl1->owner.uid, fl->owner.uid)))
+ !uid_eq(fl1->owner.uid, fl->owner.uid)))
goto release;
err = -ENOMEM;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index c7ed2b6d5a1d..b50b1af1f530 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -29,6 +29,7 @@
#include <linux/icmpv6.h>
#include <linux/mroute6.h>
#include <linux/slab.h>
+#include <linux/indirect_call_wrapper.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
@@ -47,6 +48,8 @@
#include <net/inet_ecn.h>
#include <net/dst_metadata.h>
+INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *));
static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
@@ -57,7 +60,8 @@ static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
- edemux(skb);
+ INDIRECT_CALL_2(edemux, tcp_v6_early_demux,
+ udp_v6_early_demux, skb);
}
if (!skb_valid_dst(skb))
ip6_route_input(skb);
@@ -316,6 +320,9 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
ip6_sublist_rcv(&sublist, curr_dev, curr_net);
}
+INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *));
+
/*
* Deliver the packet to the host
*/
@@ -391,7 +398,8 @@ resubmit_final:
!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard;
- ret = ipprot->handler(skb);
+ ret = INDIRECT_CALL_2(ipprot->handler, tcp_v6_rcv, udpv6_rcv,
+ skb);
if (ret > 0) {
if (ipprot->flags & INET6_PROTO_FINAL) {
/* Not an extension header, most likely UDP
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 8b6eefff2f7e..218a0dedc8f4 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -342,7 +342,7 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err)
struct net_device *dev;
struct pcpu_sw_netstats *tstats;
struct xfrm_state *x;
- struct xfrm_mode *inner_mode;
+ const struct xfrm_mode *inner_mode;
struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
u32 orig_mark = skb->mark;
int ret;
@@ -361,7 +361,7 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err)
x = xfrm_input_state(skb);
- inner_mode = x->inner_mode;
+ inner_mode = &x->inner_mode;
if (x->sel.family == AF_UNSPEC) {
inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
@@ -372,7 +372,7 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err)
}
}
- family = inner_mode->afinfo->family;
+ family = inner_mode->family;
skb->mark = be32_to_cpu(t->parms.i_key);
ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b18e85cd7587..23a20d62daac 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -380,11 +380,8 @@ static void ip6_dst_destroy(struct dst_entry *dst)
in6_dev_put(idev);
}
- rcu_read_lock();
- from = rcu_dereference(rt->from);
- rcu_assign_pointer(rt->from, NULL);
+ from = xchg((__force struct fib6_info **)&rt->from, NULL);
fib6_info_release(from);
- rcu_read_unlock();
}
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -1323,9 +1320,7 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
/* purge completely the exception to allow releasing the held resources:
* some [sk] cache may keep the dst around for unlimited time
*/
- from = rcu_dereference_protected(rt6_ex->rt6i->from,
- lockdep_is_held(&rt6_exception_lock));
- rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
+ from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
fib6_info_release(from);
dst_dev_put(&rt6_ex->rt6i->dst);
@@ -3495,11 +3490,8 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
rcu_read_lock();
res.f6i = rcu_dereference(rt->from);
- /* This fib6_info_hold() is safe here because we hold reference to rt
- * and rt already holds reference to fib6_info.
- */
- fib6_info_hold(res.f6i);
- rcu_read_unlock();
+ if (!res.f6i)
+ goto out;
res.nh = &res.f6i->fib6_nh;
res.fib6_flags = res.f6i->fib6_flags;
@@ -3514,10 +3506,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
- /* No need to remove rt from the exception table if rt is
- * a cached route because rt6_insert_exception() will
- * takes care of it
- */
+ /* rt6_insert_exception() will take care of duplicated exceptions */
if (rt6_insert_exception(nrt, &res)) {
dst_release_immediate(&nrt->dst);
goto out;
@@ -3530,7 +3519,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
out:
- fib6_info_release(res.f6i);
+ rcu_read_unlock();
neigh_release(neigh);
}
@@ -3772,23 +3761,34 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
{
- int type;
struct dst_entry *dst = skb_dst(skb);
+ struct net *net = dev_net(dst->dev);
+ struct inet6_dev *idev;
+ int type;
+
+ if (netif_is_l3_master(skb->dev) &&
+ dst->dev == net->loopback_dev)
+ idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
+ else
+ idev = ip6_dst_idev(dst);
+
switch (ipstats_mib_noroutes) {
case IPSTATS_MIB_INNOROUTES:
type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
if (type == IPV6_ADDR_ANY) {
- IP6_INC_STATS(dev_net(dst->dev),
- __in6_dev_get_safely(skb->dev),
- IPSTATS_MIB_INADDRERRORS);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
break;
}
/* FALLTHROUGH */
case IPSTATS_MIB_OUTNOROUTES:
- IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
- ipstats_mib_noroutes);
+ IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
break;
}
+
+ /* Start over by dropping the dst for l3mdev case */
+ if (netif_is_l3_master(skb->dev))
+ skb_dst_drop(skb);
+
icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
kfree_skb(skb);
return 0;
@@ -5056,16 +5056,20 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
rcu_read_lock();
from = rcu_dereference(rt->from);
-
- if (fibmatch)
- err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
- RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, 0);
- else
- err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
- &fl6.saddr, iif, RTM_NEWROUTE,
- NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
- 0);
+ if (from) {
+ if (fibmatch)
+ err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
+ iif, RTM_NEWROUTE,
+ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, 0);
+ else
+ err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
+ &fl6.saddr, iif, RTM_NEWROUTE,
+ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, 0);
+ } else {
+ err = -ENETUNREACH;
+ }
rcu_read_unlock();
if (err < 0) {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 82018bdce863..beaf28456301 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -43,6 +43,7 @@
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/random.h>
+#include <linux/indirect_call_wrapper.h>
#include <net/tcp.h>
#include <net/ndisc.h>
@@ -1435,7 +1436,7 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
}
-static int tcp_v6_rcv(struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
{
struct sk_buff *skb_to_free;
int sdif = inet6_sdif(skb);
@@ -1654,7 +1655,7 @@ do_time_wait:
goto discard_it;
}
-static void tcp_v6_early_demux(struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
{
const struct ipv6hdr *hdr;
const struct tcphdr *th;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 2464fba569b4..07fa579dfb96 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -36,6 +36,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
+#include <linux/indirect_call_wrapper.h>
#include <net/addrconf.h>
#include <net/ndisc.h>
@@ -980,7 +981,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
return NULL;
}
-static void udp_v6_early_demux(struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
const struct udphdr *uh;
@@ -1021,7 +1022,7 @@ static void udp_v6_early_demux(struct sk_buff *skb)
}
}
-static __inline__ int udpv6_rcv(struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE int udpv6_rcv(struct sk_buff *skb)
{
return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP);
}
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
deleted file mode 100644
index 57fd314ec2b8..000000000000
--- a/net/ipv6/xfrm6_mode_beet.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * xfrm6_mode_beet.c - BEET mode encapsulation for IPv6.
- *
- * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com>
- * Miika Komu <miika@iki.fi>
- * Herbert Xu <herbert@gondor.apana.org.au>
- * Abhinav Pathak <abhinav.pathak@hiit.fi>
- * Jeff Ahrenholz <ahrenholz@gmail.com>
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/stringify.h>
-#include <net/dsfield.h>
-#include <net/dst.h>
-#include <net/inet_ecn.h>
-#include <net/ipv6.h>
-#include <net/xfrm.h>
-
-static void xfrm6_beet_make_header(struct sk_buff *skb)
-{
- struct ipv6hdr *iph = ipv6_hdr(skb);
-
- iph->version = 6;
-
- memcpy(iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
- sizeof(iph->flow_lbl));
- iph->nexthdr = XFRM_MODE_SKB_CB(skb)->protocol;
-
- ipv6_change_dsfield(iph, 0, XFRM_MODE_SKB_CB(skb)->tos);
- iph->hop_limit = XFRM_MODE_SKB_CB(skb)->ttl;
-}
-
-/* Add encapsulation header.
- *
- * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
- */
-static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
-{
- struct ipv6hdr *top_iph;
- struct ip_beet_phdr *ph;
- int optlen, hdr_len;
-
- hdr_len = 0;
- optlen = XFRM_MODE_SKB_CB(skb)->optlen;
- if (unlikely(optlen))
- hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4);
-
- skb_set_network_header(skb, -x->props.header_len - hdr_len);
- if (x->sel.family != AF_INET6)
- skb->network_header += IPV4_BEET_PHMAXLEN;
- skb->mac_header = skb->network_header +
- offsetof(struct ipv6hdr, nexthdr);
- skb->transport_header = skb->network_header + sizeof(*top_iph);
- ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdr_len);
-
- xfrm6_beet_make_header(skb);
-
- top_iph = ipv6_hdr(skb);
- if (unlikely(optlen)) {
-
- BUG_ON(optlen < 0);
-
- ph->padlen = 4 - (optlen & 4);
- ph->hdrlen = optlen / 8;
- ph->nexthdr = top_iph->nexthdr;
- if (ph->padlen)
- memset(ph + 1, IPOPT_NOP, ph->padlen);
-
- top_iph->nexthdr = IPPROTO_BEETPH;
- }
-
- top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
- top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
- return 0;
-}
-
-static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
-{
- struct ipv6hdr *ip6h;
- int size = sizeof(struct ipv6hdr);
- int err;
-
- err = skb_cow_head(skb, size + skb->mac_len);
- if (err)
- goto out;
-
- __skb_push(skb, size);
- skb_reset_network_header(skb);
- skb_mac_header_rebuild(skb);
-
- xfrm6_beet_make_header(skb);
-
- ip6h = ipv6_hdr(skb);
- ip6h->payload_len = htons(skb->len - size);
- ip6h->daddr = x->sel.daddr.in6;
- ip6h->saddr = x->sel.saddr.in6;
- err = 0;
-out:
- return err;
-}
-
-static struct xfrm_mode xfrm6_beet_mode = {
- .input2 = xfrm6_beet_input,
- .input = xfrm_prepare_input,
- .output2 = xfrm6_beet_output,
- .output = xfrm6_prepare_output,
- .owner = THIS_MODULE,
- .encap = XFRM_MODE_BEET,
- .flags = XFRM_MODE_FLAG_TUNNEL,
-};
-
-static int __init xfrm6_beet_init(void)
-{
- return xfrm_register_mode(&xfrm6_beet_mode, AF_INET6);
-}
-
-static void __exit xfrm6_beet_exit(void)
-{
- int err;
-
- err = xfrm_unregister_mode(&xfrm6_beet_mode, AF_INET6);
- BUG_ON(err);
-}
-
-module_init(xfrm6_beet_init);
-module_exit(xfrm6_beet_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_BEET);
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
deleted file mode 100644
index da28e4407b8f..000000000000
--- a/net/ipv6/xfrm6_mode_ro.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * xfrm6_mode_ro.c - Route optimization mode for IPv6.
- *
- * Copyright (C)2003-2006 Helsinki University of Technology
- * Copyright (C)2003-2006 USAGI/WIDE Project
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-/*
- * Authors:
- * Noriaki TAKAMIYA @USAGI
- * Masahide NAKAMURA @USAGI
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-#include <linux/stringify.h>
-#include <linux/time.h>
-#include <net/ipv6.h>
-#include <net/xfrm.h>
-
-/* Add route optimization header space.
- *
- * The IP header and mutable extension headers will be moved forward to make
- * space for the route optimization header.
- */
-static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
-{
- struct ipv6hdr *iph;
- u8 *prevhdr;
- int hdr_len;
-
- iph = ipv6_hdr(skb);
-
- hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
- if (hdr_len < 0)
- return hdr_len;
- skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
- skb_set_network_header(skb, -x->props.header_len);
- skb->transport_header = skb->network_header + hdr_len;
- __skb_pull(skb, hdr_len);
- memmove(ipv6_hdr(skb), iph, hdr_len);
-
- x->lastused = ktime_get_real_seconds();
-
- return 0;
-}
-
-static struct xfrm_mode xfrm6_ro_mode = {
- .output = xfrm6_ro_output,
- .owner = THIS_MODULE,
- .encap = XFRM_MODE_ROUTEOPTIMIZATION,
-};
-
-static int __init xfrm6_ro_init(void)
-{
- return xfrm_register_mode(&xfrm6_ro_mode, AF_INET6);
-}
-
-static void __exit xfrm6_ro_exit(void)
-{
- int err;
-
- err = xfrm_unregister_mode(&xfrm6_ro_mode, AF_INET6);
- BUG_ON(err);
-}
-
-module_init(xfrm6_ro_init);
-module_exit(xfrm6_ro_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_ROUTEOPTIMIZATION);
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
deleted file mode 100644
index 3c29da5defe6..000000000000
--- a/net/ipv6/xfrm6_mode_transport.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * xfrm6_mode_transport.c - Transport mode encapsulation for IPv6.
- *
- * Copyright (C) 2002 USAGI/WIDE Project
- * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/stringify.h>
-#include <net/dst.h>
-#include <net/ipv6.h>
-#include <net/xfrm.h>
-#include <net/protocol.h>
-
-/* Add encapsulation header.
- *
- * The IP header and mutable extension headers will be moved forward to make
- * space for the encapsulation header.
- */
-static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
-{
- struct ipv6hdr *iph;
- u8 *prevhdr;
- int hdr_len;
-
- iph = ipv6_hdr(skb);
- skb_set_inner_transport_header(skb, skb_transport_offset(skb));
-
- hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
- if (hdr_len < 0)
- return hdr_len;
- skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
- skb_set_network_header(skb, -x->props.header_len);
- skb->transport_header = skb->network_header + hdr_len;
- __skb_pull(skb, hdr_len);
- memmove(ipv6_hdr(skb), iph, hdr_len);
- return 0;
-}
-
-/* Remove encapsulation header.
- *
- * The IP header will be moved over the top of the encapsulation header.
- *
- * On entry, skb->h shall point to where the IP header should be and skb->nh
- * shall be set to where the IP header currently is. skb->data shall point
- * to the start of the payload.
- */
-static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
-{
- int ihl = skb->data - skb_transport_header(skb);
-
- if (skb->transport_header != skb->network_header) {
- memmove(skb_transport_header(skb),
- skb_network_header(skb), ihl);
- skb->network_header = skb->transport_header;
- }
- ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -
- sizeof(struct ipv6hdr));
- skb_reset_transport_header(skb);
- return 0;
-}
-
-static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x,
- struct sk_buff *skb,
- netdev_features_t features)
-{
- const struct net_offload *ops;
- struct sk_buff *segs = ERR_PTR(-EINVAL);
- struct xfrm_offload *xo = xfrm_offload(skb);
-
- skb->transport_header += x->props.header_len;
- ops = rcu_dereference(inet6_offloads[xo->proto]);
- if (likely(ops && ops->callbacks.gso_segment))
- segs = ops->callbacks.gso_segment(skb, features);
-
- return segs;
-}
-
-static void xfrm6_transport_xmit(struct xfrm_state *x, struct sk_buff *skb)
-{
- struct xfrm_offload *xo = xfrm_offload(skb);
-
- skb_reset_mac_len(skb);
- pskb_pull(skb, skb->mac_len + sizeof(struct ipv6hdr) + x->props.header_len);
-
- if (xo->flags & XFRM_GSO_SEGMENT) {
- skb_reset_transport_header(skb);
- skb->transport_header -= x->props.header_len;
- }
-}
-
-
-static struct xfrm_mode xfrm6_transport_mode = {
- .input = xfrm6_transport_input,
- .output = xfrm6_transport_output,
- .gso_segment = xfrm4_transport_gso_segment,
- .xmit = xfrm6_transport_xmit,
- .owner = THIS_MODULE,
- .encap = XFRM_MODE_TRANSPORT,
-};
-
-static int __init xfrm6_transport_init(void)
-{
- return xfrm_register_mode(&xfrm6_transport_mode, AF_INET6);
-}
-
-static void __exit xfrm6_transport_exit(void)
-{
- int err;
-
- err = xfrm_unregister_mode(&xfrm6_transport_mode, AF_INET6);
- BUG_ON(err);
-}
-
-module_init(xfrm6_transport_init);
-module_exit(xfrm6_transport_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TRANSPORT);
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
deleted file mode 100644
index de1b0b8c53b0..000000000000
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * xfrm6_mode_tunnel.c - Tunnel mode encapsulation for IPv6.
- *
- * Copyright (C) 2002 USAGI/WIDE Project
- * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au>
- */
-
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/stringify.h>
-#include <net/dsfield.h>
-#include <net/dst.h>
-#include <net/inet_ecn.h>
-#include <net/ip6_route.h>
-#include <net/ipv6.h>
-#include <net/xfrm.h>
-
-static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
-{
- struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
-
- if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
- IP6_ECN_set_ce(skb, inner_iph);
-}
-
-/* Add encapsulation header.
- *
- * The top IP header will be constructed per RFC 2401.
- */
-static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
-{
- struct dst_entry *dst = skb_dst(skb);
- struct ipv6hdr *top_iph;
- int dsfield;
-
- skb_set_inner_network_header(skb, skb_network_offset(skb));
- skb_set_inner_transport_header(skb, skb_transport_offset(skb));
-
- skb_set_network_header(skb, -x->props.header_len);
- skb->mac_header = skb->network_header +
- offsetof(struct ipv6hdr, nexthdr);
- skb->transport_header = skb->network_header + sizeof(*top_iph);
- top_iph = ipv6_hdr(skb);
-
- top_iph->version = 6;
-
- memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
- sizeof(top_iph->flow_lbl));
- top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family);
-
- if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
- dsfield = 0;
- else
- dsfield = XFRM_MODE_SKB_CB(skb)->tos;
- dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos);
- if (x->props.flags & XFRM_STATE_NOECN)
- dsfield &= ~INET_ECN_MASK;
- ipv6_change_dsfield(top_iph, 0, dsfield);
- top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst));
- top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
- top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
- return 0;
-}
-
-#define for_each_input_rcu(head, handler) \
- for (handler = rcu_dereference(head); \
- handler != NULL; \
- handler = rcu_dereference(handler->next))
-
-
-static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
-{
- int err = -EINVAL;
-
- if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
- goto out;
- if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
- goto out;
-
- err = skb_unclone(skb, GFP_ATOMIC);
- if (err)
- goto out;
-
- if (x->props.flags & XFRM_STATE_DECAP_DSCP)
- ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)),
- ipipv6_hdr(skb));
- if (!(x->props.flags & XFRM_STATE_NOECN))
- ipip6_ecn_decapsulate(skb);
-
- skb_reset_network_header(skb);
- skb_mac_header_rebuild(skb);
- if (skb->mac_len)
- eth_hdr(skb)->h_proto = skb->protocol;
-
- err = 0;
-
-out:
- return err;
-}
-
-static struct sk_buff *xfrm6_mode_tunnel_gso_segment(struct xfrm_state *x,
- struct sk_buff *skb,
- netdev_features_t features)
-{
- __skb_push(skb, skb->mac_len);
- return skb_mac_gso_segment(skb, features);
-}
-
-static void xfrm6_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb)
-{
- struct xfrm_offload *xo = xfrm_offload(skb);
-
- if (xo->flags & XFRM_GSO_SEGMENT)
- skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
-
- skb_reset_mac_len(skb);
- pskb_pull(skb, skb->mac_len + x->props.header_len);
-}
-
-static struct xfrm_mode xfrm6_tunnel_mode = {
- .input2 = xfrm6_mode_tunnel_input,
- .input = xfrm_prepare_input,
- .output2 = xfrm6_mode_tunnel_output,
- .output = xfrm6_prepare_output,
- .gso_segment = xfrm6_mode_tunnel_gso_segment,
- .xmit = xfrm6_mode_tunnel_xmit,
- .owner = THIS_MODULE,
- .encap = XFRM_MODE_TUNNEL,
- .flags = XFRM_MODE_FLAG_TUNNEL,
-};
-
-static int __init xfrm6_mode_tunnel_init(void)
-{
- return xfrm_register_mode(&xfrm6_tunnel_mode, AF_INET6);
-}
-
-static void __exit xfrm6_mode_tunnel_exit(void)
-{
- int err;
-
- err = xfrm_unregister_mode(&xfrm6_tunnel_mode, AF_INET6);
- BUG_ON(err);
-}
-
-module_init(xfrm6_mode_tunnel_init);
-module_exit(xfrm6_mode_tunnel_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TUNNEL);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 6a74080005cf..8ad5e54eb8ca 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -111,21 +111,6 @@ int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb)
return xfrm6_extract_header(skb);
}
-int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
-{
- int err;
-
- err = xfrm_inner_extract_output(x, skb);
- if (err)
- return err;
-
- skb->ignore_df = 1;
- skb->protocol = htons(ETH_P_IPV6);
-
- return x->outer_mode->output2(x, skb);
-}
-EXPORT_SYMBOL(xfrm6_prepare_output);
-
int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb)
{
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
@@ -137,11 +122,28 @@ int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb)
return xfrm_output(sk, skb);
}
+static int __xfrm6_output_state_finish(struct xfrm_state *x, struct sock *sk,
+ struct sk_buff *skb)
+{
+ const struct xfrm_state_afinfo *afinfo;
+ int ret = -EAFNOSUPPORT;
+
+ rcu_read_lock();
+ afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family);
+ if (likely(afinfo))
+ ret = afinfo->output_finish(sk, skb);
+ else
+ kfree_skb(skb);
+ rcu_read_unlock();
+
+ return ret;
+}
+
static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct xfrm_state *x = skb_dst(skb)->xfrm;
- return x->outer_mode->afinfo->output_finish(sk, skb);
+ return __xfrm6_output_state_finish(x, sk, skb);
}
static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -183,7 +185,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
__xfrm6_output_finish);
skip_frag:
- return x->outer_mode->afinfo->output_finish(sk, skb);
+ return __xfrm6_output_state_finish(x, sk, skb);
}
int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 769f8f78d3b8..699e0730ce8e 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -22,9 +22,6 @@
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/l3mdev.h>
-#if IS_ENABLED(CONFIG_IPV6_MIP6)
-#include <net/mip6.h>
-#endif
static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
const xfrm_address_t *saddr,
@@ -71,24 +68,6 @@ static int xfrm6_get_saddr(struct net *net, int oif,
return 0;
}
-static int xfrm6_get_tos(const struct flowi *fl)
-{
- return 0;
-}
-
-static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
- int nfheader_len)
-{
- if (dst->ops->family == AF_INET6) {
- struct rt6_info *rt = (struct rt6_info *)dst;
- path->path_cookie = rt6_get_cookie(rt);
- }
-
- path->u.rt6.rt6i_nfheader_len = nfheader_len;
-
- return 0;
-}
-
static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
const struct flowi *fl)
{
@@ -118,108 +97,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
return 0;
}
-static inline void
-_decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
-{
- struct flowi6 *fl6 = &fl->u.ip6;
- int onlyproto = 0;
- const struct ipv6hdr *hdr = ipv6_hdr(skb);
- u32 offset = sizeof(*hdr);
- struct ipv6_opt_hdr *exthdr;
- const unsigned char *nh = skb_network_header(skb);
- u16 nhoff = IP6CB(skb)->nhoff;
- int oif = 0;
- u8 nexthdr;
-
- if (!nhoff)
- nhoff = offsetof(struct ipv6hdr, nexthdr);
-
- nexthdr = nh[nhoff];
-
- if (skb_dst(skb))
- oif = skb_dst(skb)->dev->ifindex;
-
- memset(fl6, 0, sizeof(struct flowi6));
- fl6->flowi6_mark = skb->mark;
- fl6->flowi6_oif = reverse ? skb->skb_iif : oif;
-
- fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
- fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
-
- while (nh + offset + sizeof(*exthdr) < skb->data ||
- pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) {
- nh = skb_network_header(skb);
- exthdr = (struct ipv6_opt_hdr *)(nh + offset);
-
- switch (nexthdr) {
- case NEXTHDR_FRAGMENT:
- onlyproto = 1;
- /* fall through */
- case NEXTHDR_ROUTING:
- case NEXTHDR_HOP:
- case NEXTHDR_DEST:
- offset += ipv6_optlen(exthdr);
- nexthdr = exthdr->nexthdr;
- exthdr = (struct ipv6_opt_hdr *)(nh + offset);
- break;
-
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE:
- case IPPROTO_TCP:
- case IPPROTO_SCTP:
- case IPPROTO_DCCP:
- if (!onlyproto && (nh + offset + 4 < skb->data ||
- pskb_may_pull(skb, nh + offset + 4 - skb->data))) {
- __be16 *ports;
-
- nh = skb_network_header(skb);
- ports = (__be16 *)(nh + offset);
- fl6->fl6_sport = ports[!!reverse];
- fl6->fl6_dport = ports[!reverse];
- }
- fl6->flowi6_proto = nexthdr;
- return;
-
- case IPPROTO_ICMPV6:
- if (!onlyproto && (nh + offset + 2 < skb->data ||
- pskb_may_pull(skb, nh + offset + 2 - skb->data))) {
- u8 *icmp;
-
- nh = skb_network_header(skb);
- icmp = (u8 *)(nh + offset);
- fl6->fl6_icmp_type = icmp[0];
- fl6->fl6_icmp_code = icmp[1];
- }
- fl6->flowi6_proto = nexthdr;
- return;
-
-#if IS_ENABLED(CONFIG_IPV6_MIP6)
- case IPPROTO_MH:
- offset += ipv6_optlen(exthdr);
- if (!onlyproto && (nh + offset + 3 < skb->data ||
- pskb_may_pull(skb, nh + offset + 3 - skb->data))) {
- struct ip6_mh *mh;
-
- nh = skb_network_header(skb);
- mh = (struct ip6_mh *)(nh + offset);
- fl6->fl6_mh_type = mh->ip6mh_type;
- }
- fl6->flowi6_proto = nexthdr;
- return;
-#endif
-
- /* XXX Why are there these headers? */
- case IPPROTO_AH:
- case IPPROTO_ESP:
- case IPPROTO_COMP:
- default:
- fl6->fl6_ipsec_spi = 0;
- fl6->flowi6_proto = nexthdr;
- return;
- }
- }
-}
-
static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu)
{
@@ -291,9 +168,6 @@ static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
.dst_ops = &xfrm6_dst_ops_template,
.dst_lookup = xfrm6_dst_lookup,
.get_saddr = xfrm6_get_saddr,
- .decode_session = _decode_session6,
- .get_tos = xfrm6_get_tos,
- .init_path = xfrm6_init_path,
.fill_dst = xfrm6_fill_dst,
.blackhole_route = ip6_blackhole_route,
};
diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c
index cc979b702c89..aaacac7fdbce 100644
--- a/net/ipv6/xfrm6_protocol.c
+++ b/net/ipv6/xfrm6_protocol.c
@@ -46,7 +46,7 @@ static inline struct xfrm6_protocol __rcu **proto_handlers(u8 protocol)
handler != NULL; \
handler = rcu_dereference(handler->next)) \
-int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
+static int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
{
int ret;
struct xfrm6_protocol *handler;
@@ -61,7 +61,6 @@ int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
return 0;
}
-EXPORT_SYMBOL(xfrm6_rcv_cb);
static int xfrm6_esp_rcv(struct sk_buff *skb)
{
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index bc65db782bfb..d9e5f6808811 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -345,7 +345,7 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
unsigned int i;
xfrm_flush_gc();
- xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true);
+ xfrm_state_flush(net, 0, false, true);
for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i]));
@@ -402,6 +402,10 @@ static void __exit xfrm6_tunnel_fini(void)
xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6);
xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
unregister_pernet_subsys(&xfrm6_tunnel_net_ops);
+ /* Someone maybe has gotten the xfrm6_tunnel_spi.
+ * So need to wait it.
+ */
+ rcu_barrier();
kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
}
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 5651c29cb5bd..4af1e1d60b9f 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1951,8 +1951,10 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
if (rq->sadb_x_ipsecrequest_mode == 0)
return -EINVAL;
+ if (!xfrm_id_proto_valid(rq->sadb_x_ipsecrequest_proto))
+ return -EINVAL;
- t->id.proto = rq->sadb_x_ipsecrequest_proto; /* XXX check proto */
+ t->id.proto = rq->sadb_x_ipsecrequest_proto;
if ((mode = pfkey_mode_to_xfrm(rq->sadb_x_ipsecrequest_mode)) < 0)
return -EINVAL;
t->mode = mode;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index fed6becc5daf..52b5a2797c0c 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -169,8 +169,8 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
rcu_read_lock_bh();
list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- if (tunnel->tunnel_id == tunnel_id) {
- l2tp_tunnel_inc_refcount(tunnel);
+ if (tunnel->tunnel_id == tunnel_id &&
+ refcount_inc_not_zero(&tunnel->ref_count)) {
rcu_read_unlock_bh();
return tunnel;
@@ -190,8 +190,8 @@ struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth)
rcu_read_lock_bh();
list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- if (++count > nth) {
- l2tp_tunnel_inc_refcount(tunnel);
+ if (++count > nth &&
+ refcount_inc_not_zero(&tunnel->ref_count)) {
rcu_read_unlock_bh();
return tunnel;
}
@@ -909,7 +909,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct l2tp_tunnel *tunnel;
- tunnel = l2tp_tunnel(sk);
+ tunnel = rcu_dereference_sk_user_data(sk);
if (tunnel == NULL)
goto pass_up;
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index cff0fb3578c9..deb3faf08337 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -841,7 +841,7 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata)
dir = sdata->vif.debugfs_dir;
- if (!dir)
+ if (IS_ERR_OR_NULL(dir))
return;
sprintf(buf, "netdev:%s", sdata->name);
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index e03c46ac8e4d..c62101857b9b 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -112,8 +112,9 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
IEEE80211_HT_CAP_TX_STBC);
/* Allow user to configure RX STBC bits */
- if (ht_capa_mask->cap_info & IEEE80211_HT_CAP_RX_STBC)
- ht_cap->cap |= ht_capa->cap_info & IEEE80211_HT_CAP_RX_STBC;
+ if (ht_capa_mask->cap_info & cpu_to_le16(IEEE80211_HT_CAP_RX_STBC))
+ ht_cap->cap |= le16_to_cpu(ht_capa->cap_info) &
+ IEEE80211_HT_CAP_RX_STBC;
/* Allow user to decrease AMPDU factor */
if (ht_capa_mask->ampdu_params_info &
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 94459b2b3d2a..410685d38c46 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1907,6 +1907,9 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata)
list_del_rcu(&sdata->list);
mutex_unlock(&sdata->local->iflist_mtx);
+ if (sdata->vif.txq)
+ ieee80211_txq_purge(sdata->local, to_txq_info(sdata->vif.txq));
+
synchronize_rcu();
if (sdata->dev) {
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 72668759cd2b..efccd1ac9a66 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -362,8 +362,8 @@ int genl_register_family(struct genl_family *family)
} else
family->attrbuf = NULL;
- family->id = idr_alloc(&genl_fam_idr, family,
- start, end + 1, GFP_KERNEL);
+ family->id = idr_alloc_cyclic(&genl_fam_idr, family,
+ start, end + 1, GFP_KERNEL);
if (family->id < 0) {
err = family->id;
goto errout_free;
@@ -537,21 +537,25 @@ static int genl_family_rcv_msg(const struct genl_family *family,
return -EOPNOTSUPP;
if (!(ops->validate & GENL_DONT_VALIDATE_DUMP)) {
- unsigned int validate = NL_VALIDATE_STRICT;
int hdrlen = GENL_HDRLEN + family->hdrsize;
- if (ops->validate & GENL_DONT_VALIDATE_DUMP_STRICT)
- validate = NL_VALIDATE_LIBERAL;
-
if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
return -EINVAL;
- rc = __nla_validate(nlmsg_attrdata(nlh, hdrlen),
- nlmsg_attrlen(nlh, hdrlen),
- family->maxattr, family->policy,
- validate, extack);
- if (rc)
- return rc;
+ if (family->maxattr) {
+ unsigned int validate = NL_VALIDATE_STRICT;
+
+ if (ops->validate &
+ GENL_DONT_VALIDATE_DUMP_STRICT)
+ validate = NL_VALIDATE_LIBERAL;
+ rc = __nla_validate(nlmsg_attrdata(nlh, hdrlen),
+ nlmsg_attrlen(nlh, hdrlen),
+ family->maxattr,
+ family->policy,
+ validate, extack);
+ if (rc)
+ return rc;
+ }
}
if (!family->parallel_ops) {
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index c4128082f88b..333ec5f298fe 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -2175,6 +2175,10 @@ static int ovs_ct_limit_cmd_get(struct sk_buff *skb, struct genl_info *info)
return PTR_ERR(reply);
nla_reply = nla_nest_start_noflag(reply, OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
+ if (!nla_reply) {
+ err = -EMSGSIZE;
+ goto exit_err;
+ }
if (a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) {
err = ovs_ct_limit_get_zone_limit(
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index b95015c7e999..dc9ff9367221 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -455,7 +455,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
upcall->dp_ifindex = dp_ifindex;
err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
- BUG_ON(err);
+ if (err)
+ goto out;
if (upcall_info->userdata)
__nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
@@ -471,7 +472,9 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
}
err = ovs_nla_put_tunnel_info(user_skb,
upcall_info->egress_tun_info);
- BUG_ON(err);
+ if (err)
+ goto out;
+
nla_nest_end(user_skb, nla);
}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 5c4a118d6f96..90d4e3ce00e5 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2600,8 +2600,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
void *ph;
DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
+ unsigned char *addr = NULL;
int tp_len, size_max;
- unsigned char *addr;
void *data;
int len_sum = 0;
int status = TP_STATUS_AVAILABLE;
@@ -2612,7 +2612,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
if (likely(saddr == NULL)) {
dev = packet_cached_dev_get(po);
proto = po->num;
- addr = NULL;
} else {
err = -EINVAL;
if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -2622,10 +2621,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
sll_addr)))
goto out;
proto = saddr->sll_protocol;
- addr = saddr->sll_halen ? saddr->sll_addr : NULL;
dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
- if (addr && dev && saddr->sll_halen < dev->addr_len)
- goto out_put;
+ if (po->sk.sk_socket->type == SOCK_DGRAM) {
+ if (dev && msg->msg_namelen < dev->addr_len +
+ offsetof(struct sockaddr_ll, sll_addr))
+ goto out_put;
+ addr = saddr->sll_addr;
+ }
}
err = -ENXIO;
@@ -2797,7 +2799,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
struct sk_buff *skb;
struct net_device *dev;
__be16 proto;
- unsigned char *addr;
+ unsigned char *addr = NULL;
int err, reserve = 0;
struct sockcm_cookie sockc;
struct virtio_net_hdr vnet_hdr = { 0 };
@@ -2814,7 +2816,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (likely(saddr == NULL)) {
dev = packet_cached_dev_get(po);
proto = po->num;
- addr = NULL;
} else {
err = -EINVAL;
if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -2822,10 +2823,13 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
goto out;
proto = saddr->sll_protocol;
- addr = saddr->sll_halen ? saddr->sll_addr : NULL;
dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
- if (addr && dev && saddr->sll_halen < dev->addr_len)
- goto out_unlock;
+ if (sock->type == SOCK_DGRAM) {
+ if (dev && msg->msg_namelen < dev->addr_len +
+ offsetof(struct sockaddr_ll, sll_addr))
+ goto out_unlock;
+ addr = saddr->sll_addr;
+ }
}
err = -ENXIO;
@@ -3342,20 +3346,29 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
sock_recv_ts_and_drops(msg, sk, skb);
if (msg->msg_name) {
+ int copy_len;
+
/* If the address length field is there to be filled
* in, we fill it in now.
*/
if (sock->type == SOCK_PACKET) {
__sockaddr_check_size(sizeof(struct sockaddr_pkt));
msg->msg_namelen = sizeof(struct sockaddr_pkt);
+ copy_len = msg->msg_namelen;
} else {
struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
msg->msg_namelen = sll->sll_halen +
offsetof(struct sockaddr_ll, sll_addr);
+ copy_len = msg->msg_namelen;
+ if (msg->msg_namelen < sizeof(struct sockaddr_ll)) {
+ memset(msg->msg_name +
+ offsetof(struct sockaddr_ll, sll_addr),
+ 0, sizeof(sll->sll_addr));
+ msg->msg_namelen = sizeof(struct sockaddr_ll);
+ }
}
- memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
- msg->msg_namelen);
+ memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
}
if (pkt_sk(sk)->auxdata) {
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 70559854837e..8946c89d7392 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -772,7 +772,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
unsigned long frag_off;
unsigned long to_copy;
unsigned long copied;
- uint64_t uncongested = 0;
+ __le64 uncongested = 0;
void *addr;
/* catch completely corrupt packets */
@@ -789,7 +789,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
copied = 0;
while (copied < RDS_CONG_MAP_BYTES) {
- uint64_t *src, *dst;
+ __le64 *src, *dst;
unsigned int k;
to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
@@ -824,9 +824,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
}
/* the congestion map is in little endian order */
- uncongested = le64_to_cpu(uncongested);
-
- rds_cong_map_updated(map, uncongested);
+ rds_cong_map_updated(map, le64_to_cpu(uncongested));
}
static void rds_ib_process_recv(struct rds_connection *conn,
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 8aa2937b069f..fe96881a334d 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -604,30 +604,30 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
_enter("");
- if (list_empty(&rxnet->calls))
- return;
+ if (!list_empty(&rxnet->calls)) {
+ write_lock(&rxnet->call_lock);
- write_lock(&rxnet->call_lock);
+ while (!list_empty(&rxnet->calls)) {
+ call = list_entry(rxnet->calls.next,
+ struct rxrpc_call, link);
+ _debug("Zapping call %p", call);
- while (!list_empty(&rxnet->calls)) {
- call = list_entry(rxnet->calls.next, struct rxrpc_call, link);
- _debug("Zapping call %p", call);
+ rxrpc_see_call(call);
+ list_del_init(&call->link);
- rxrpc_see_call(call);
- list_del_init(&call->link);
+ pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
+ call, atomic_read(&call->usage),
+ rxrpc_call_states[call->state],
+ call->flags, call->events);
- pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
- call, atomic_read(&call->usage),
- rxrpc_call_states[call->state],
- call->flags, call->events);
+ write_unlock(&rxnet->call_lock);
+ cond_resched();
+ write_lock(&rxnet->call_lock);
+ }
write_unlock(&rxnet->call_lock);
- cond_resched();
- write_lock(&rxnet->call_lock);
}
- write_unlock(&rxnet->call_lock);
-
atomic_dec(&rxnet->nr_calls);
wait_var_event(&rxnet->nr_calls, !atomic_read(&rxnet->nr_calls));
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 04e9ef088535..4b8710a266cc 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -847,7 +847,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
/* Similarly success statistics must be moved as pointers */
new->pcpu_success = n->pcpu_success;
#endif
- memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
+ memcpy(&new->sel, s, struct_size(s, keys, s->nkeys));
if (tcf_exts_init(&new->exts, net, TCA_U32_ACT, TCA_U32_POLICE)) {
kfree(new);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 848aab3693bd..cce1e9ee85af 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -32,6 +32,7 @@
#include <net/pkt_sched.h>
#include <net/dst.h>
#include <trace/events/qdisc.h>
+#include <trace/events/net.h>
#include <net/xfrm.h>
/* Qdisc to use by default */
@@ -441,6 +442,7 @@ static void dev_watchdog(struct timer_list *t)
}
if (some_queue_timedout) {
+ trace_net_dev_xmit_timeout(dev, i);
WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
dev->name, netdev_drivername(dev), i);
dev->netdev_ops->ndo_tx_timeout(dev);
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 09563c245473..539677120b9f 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -16,6 +16,7 @@
#include <linux/math64.h>
#include <linux/module.h>
#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
@@ -41,25 +42,88 @@ struct sched_entry {
u8 command;
};
+struct sched_gate_list {
+ struct rcu_head rcu;
+ struct list_head entries;
+ size_t num_entries;
+ ktime_t cycle_close_time;
+ s64 cycle_time;
+ s64 cycle_time_extension;
+ s64 base_time;
+};
+
struct taprio_sched {
struct Qdisc **qdiscs;
struct Qdisc *root;
- s64 base_time;
int clockid;
atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
* speeds it's sub-nanoseconds per byte
*/
- size_t num_entries;
/* Protects the update side of the RCU protected current_entry */
spinlock_t current_entry_lock;
struct sched_entry __rcu *current_entry;
- struct list_head entries;
+ struct sched_gate_list __rcu *oper_sched;
+ struct sched_gate_list __rcu *admin_sched;
ktime_t (*get_time)(void);
struct hrtimer advance_timer;
struct list_head taprio_list;
};
+static ktime_t sched_base_time(const struct sched_gate_list *sched)
+{
+ if (!sched)
+ return KTIME_MAX;
+
+ return ns_to_ktime(sched->base_time);
+}
+
+static void taprio_free_sched_cb(struct rcu_head *head)
+{
+ struct sched_gate_list *sched = container_of(head, struct sched_gate_list, rcu);
+ struct sched_entry *entry, *n;
+
+ if (!sched)
+ return;
+
+ list_for_each_entry_safe(entry, n, &sched->entries, list) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
+
+ kfree(sched);
+}
+
+static void switch_schedules(struct taprio_sched *q,
+ struct sched_gate_list **admin,
+ struct sched_gate_list **oper)
+{
+ rcu_assign_pointer(q->oper_sched, *admin);
+ rcu_assign_pointer(q->admin_sched, NULL);
+
+ if (*oper)
+ call_rcu(&(*oper)->rcu, taprio_free_sched_cb);
+
+ *oper = *admin;
+ *admin = NULL;
+}
+
+static ktime_t get_cycle_time(struct sched_gate_list *sched)
+{
+ struct sched_entry *entry;
+ ktime_t cycle = 0;
+
+ if (sched->cycle_time != 0)
+ return sched->cycle_time;
+
+ list_for_each_entry(entry, &sched->entries, list)
+ cycle = ktime_add_ns(cycle, entry->interval);
+
+ sched->cycle_time = cycle;
+
+ return cycle;
+}
+
static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
@@ -136,8 +200,8 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
{
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
+ struct sk_buff *skb = NULL;
struct sched_entry *entry;
- struct sk_buff *skb;
u32 gate_mask;
int i;
@@ -154,10 +218,9 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
* "AdminGateSates"
*/
gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
- rcu_read_unlock();
if (!gate_mask)
- return NULL;
+ goto done;
for (i = 0; i < dev->num_tx_queues; i++) {
struct Qdisc *child = q->qdiscs[i];
@@ -197,22 +260,72 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
skb = child->ops->dequeue(child);
if (unlikely(!skb))
- return NULL;
+ goto done;
qdisc_bstats_update(sch, skb);
qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
- return skb;
+ goto done;
}
- return NULL;
+done:
+ rcu_read_unlock();
+
+ return skb;
+}
+
+static bool should_restart_cycle(const struct sched_gate_list *oper,
+ const struct sched_entry *entry)
+{
+ if (list_is_last(&entry->list, &oper->entries))
+ return true;
+
+ if (ktime_compare(entry->close_time, oper->cycle_close_time) == 0)
+ return true;
+
+ return false;
+}
+
+static bool should_change_schedules(const struct sched_gate_list *admin,
+ const struct sched_gate_list *oper,
+ ktime_t close_time)
+{
+ ktime_t next_base_time, extension_time;
+
+ if (!admin)
+ return false;
+
+ next_base_time = sched_base_time(admin);
+
+ /* This is the simple case, the close_time would fall after
+ * the next schedule base_time.
+ */
+ if (ktime_compare(next_base_time, close_time) <= 0)
+ return true;
+
+ /* This is the cycle_time_extension case, if the close_time
+ * plus the amount that can be extended would fall after the
+ * next schedule base_time, we can extend the current schedule
+ * for that amount.
+ */
+ extension_time = ktime_add_ns(close_time, oper->cycle_time_extension);
+
+ /* FIXME: the IEEE 802.1Q-2018 Specification isn't clear about
+ * how precisely the extension should be made. So after
+ * conformance testing, this logic may change.
+ */
+ if (ktime_compare(next_base_time, extension_time) <= 0)
+ return true;
+
+ return false;
}
static enum hrtimer_restart advance_sched(struct hrtimer *timer)
{
struct taprio_sched *q = container_of(timer, struct taprio_sched,
advance_timer);
+ struct sched_gate_list *oper, *admin;
struct sched_entry *entry, *next;
struct Qdisc *sch = q->root;
ktime_t close_time;
@@ -220,25 +333,46 @@ static enum hrtimer_restart advance_sched(struct hrtimer *timer)
spin_lock(&q->current_entry_lock);
entry = rcu_dereference_protected(q->current_entry,
lockdep_is_held(&q->current_entry_lock));
+ oper = rcu_dereference_protected(q->oper_sched,
+ lockdep_is_held(&q->current_entry_lock));
+ admin = rcu_dereference_protected(q->admin_sched,
+ lockdep_is_held(&q->current_entry_lock));
- /* This is the case that it's the first time that the schedule
- * runs, so it only happens once per schedule. The first entry
- * is pre-calculated during the schedule initialization.
+ if (!oper)
+ switch_schedules(q, &admin, &oper);
+
+ /* This can happen in two cases: 1. this is the very first run
+ * of this function (i.e. we weren't running any schedule
+ * previously); 2. The previous schedule just ended. The first
+ * entry of all schedules are pre-calculated during the
+ * schedule initialization.
*/
- if (unlikely(!entry)) {
- next = list_first_entry(&q->entries, struct sched_entry,
+ if (unlikely(!entry || entry->close_time == oper->base_time)) {
+ next = list_first_entry(&oper->entries, struct sched_entry,
list);
close_time = next->close_time;
goto first_run;
}
- if (list_is_last(&entry->list, &q->entries))
- next = list_first_entry(&q->entries, struct sched_entry,
+ if (should_restart_cycle(oper, entry)) {
+ next = list_first_entry(&oper->entries, struct sched_entry,
list);
- else
+ oper->cycle_close_time = ktime_add_ns(oper->cycle_close_time,
+ oper->cycle_time);
+ } else {
next = list_next_entry(entry, list);
+ }
close_time = ktime_add_ns(entry->close_time, next->interval);
+ close_time = min_t(ktime_t, close_time, oper->cycle_close_time);
+
+ if (should_change_schedules(admin, oper, close_time)) {
+ /* Set things so the next time this runs, the new
+ * schedule runs.
+ */
+ close_time = sched_base_time(admin);
+ switch_schedules(q, &admin, &oper);
+ }
next->close_time = close_time;
taprio_set_budget(q, next);
@@ -271,10 +405,12 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_PRIOMAP] = {
.len = sizeof(struct tc_mqprio_qopt)
},
- [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED },
- [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 },
- [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED },
- [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 },
+ [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED },
+ [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 },
+ [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED },
+ [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 },
+ [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 },
+ [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
};
static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry,
@@ -322,71 +458,8 @@ static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry,
return fill_sched_entry(tb, entry, extack);
}
-/* Returns the number of entries in case of success */
-static int parse_sched_single_entry(struct nlattr *n,
- struct taprio_sched *q,
- struct netlink_ext_ack *extack)
-{
- struct nlattr *tb_entry[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
- struct nlattr *tb_list[TCA_TAPRIO_SCHED_MAX + 1] = { };
- struct sched_entry *entry;
- bool found = false;
- u32 index;
- int err;
-
- err = nla_parse_nested_deprecated(tb_list, TCA_TAPRIO_SCHED_MAX, n,
- entry_list_policy, NULL);
- if (err < 0) {
- NL_SET_ERR_MSG(extack, "Could not parse nested entry");
- return -EINVAL;
- }
-
- if (!tb_list[TCA_TAPRIO_SCHED_ENTRY]) {
- NL_SET_ERR_MSG(extack, "Single-entry must include an entry");
- return -EINVAL;
- }
-
- err = nla_parse_nested_deprecated(tb_entry,
- TCA_TAPRIO_SCHED_ENTRY_MAX,
- tb_list[TCA_TAPRIO_SCHED_ENTRY],
- entry_policy, NULL);
- if (err < 0) {
- NL_SET_ERR_MSG(extack, "Could not parse nested entry");
- return -EINVAL;
- }
-
- if (!tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]) {
- NL_SET_ERR_MSG(extack, "Entry must specify an index\n");
- return -EINVAL;
- }
-
- index = nla_get_u32(tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]);
- if (index >= q->num_entries) {
- NL_SET_ERR_MSG(extack, "Index for single entry exceeds number of entries in schedule");
- return -EINVAL;
- }
-
- list_for_each_entry(entry, &q->entries, list) {
- if (entry->index == index) {
- found = true;
- break;
- }
- }
-
- if (!found) {
- NL_SET_ERR_MSG(extack, "Could not find entry");
- return -ENOENT;
- }
-
- err = fill_sched_entry(tb_entry, entry, extack);
- if (err < 0)
- return err;
-
- return q->num_entries;
-}
-
static int parse_sched_list(struct nlattr *list,
- struct taprio_sched *q,
+ struct sched_gate_list *sched,
struct netlink_ext_ack *extack)
{
struct nlattr *n;
@@ -416,64 +489,42 @@ static int parse_sched_list(struct nlattr *list,
return err;
}
- list_add_tail(&entry->list, &q->entries);
+ list_add_tail(&entry->list, &sched->entries);
i++;
}
- q->num_entries = i;
+ sched->num_entries = i;
return i;
}
-/* Returns the number of entries in case of success */
-static int parse_taprio_opt(struct nlattr **tb, struct taprio_sched *q,
- struct netlink_ext_ack *extack)
+static int parse_taprio_schedule(struct nlattr **tb,
+ struct sched_gate_list *new,
+ struct netlink_ext_ack *extack)
{
int err = 0;
- int clockid;
-
- if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] &&
- tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY])
- return -EINVAL;
-
- if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] && q->num_entries == 0)
- return -EINVAL;
- if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID])
- return -EINVAL;
+ if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) {
+ NL_SET_ERR_MSG(extack, "Adding a single entry is not supported");
+ return -ENOTSUPP;
+ }
if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME])
- q->base_time = nla_get_s64(
- tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]);
+ new->base_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]);
- if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
- clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
-
- /* We only support static clockids and we don't allow
- * for it to be modified after the first init.
- */
- if (clockid < 0 || (q->clockid != -1 && q->clockid != clockid))
- return -EINVAL;
+ if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION])
+ new->cycle_time_extension = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION]);
- q->clockid = clockid;
- }
+ if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME])
+ new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]);
if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST])
err = parse_sched_list(
- tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], q, extack);
- else if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY])
- err = parse_sched_single_entry(
- tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY], q, extack);
-
- /* parse_sched_* return the number of entries in the schedule,
- * a schedule with zero entries is an error.
- */
- if (err == 0) {
- NL_SET_ERR_MSG(extack, "The schedule should contain at least one entry");
- return -EINVAL;
- }
+ tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], new, extack);
+ if (err < 0)
+ return err;
- return err;
+ return 0;
}
static int taprio_parse_mqprio_opt(struct net_device *dev,
@@ -482,11 +533,17 @@ static int taprio_parse_mqprio_opt(struct net_device *dev,
{
int i, j;
- if (!qopt) {
+ if (!qopt && !dev->num_tc) {
NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
return -EINVAL;
}
+ /* If num_tc is already set, it means that the user already
+ * configured the mqprio part
+ */
+ if (dev->num_tc)
+ return 0;
+
/* Verify num_tc is not out of max range */
if (qopt->num_tc > TC_MAX_QUEUE) {
NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range");
@@ -532,14 +589,15 @@ static int taprio_parse_mqprio_opt(struct net_device *dev,
return 0;
}
-static int taprio_get_start_time(struct Qdisc *sch, ktime_t *start)
+static int taprio_get_start_time(struct Qdisc *sch,
+ struct sched_gate_list *sched,
+ ktime_t *start)
{
struct taprio_sched *q = qdisc_priv(sch);
- struct sched_entry *entry;
ktime_t now, base, cycle;
s64 n;
- base = ns_to_ktime(q->base_time);
+ base = sched_base_time(sched);
now = q->get_time();
if (ktime_after(base, now)) {
@@ -547,11 +605,7 @@ static int taprio_get_start_time(struct Qdisc *sch, ktime_t *start)
return 0;
}
- /* Calculate the cycle_time, by summing all the intervals.
- */
- cycle = 0;
- list_for_each_entry(entry, &q->entries, list)
- cycle = ktime_add_ns(cycle, entry->interval);
+ cycle = get_cycle_time(sched);
/* The qdisc is expected to have at least one sched_entry. Moreover,
* any entry must have 'interval' > 0. Thus if the cycle time is zero,
@@ -569,22 +623,40 @@ static int taprio_get_start_time(struct Qdisc *sch, ktime_t *start)
return 0;
}
-static void taprio_start_sched(struct Qdisc *sch, ktime_t start)
+static void setup_first_close_time(struct taprio_sched *q,
+ struct sched_gate_list *sched, ktime_t base)
{
- struct taprio_sched *q = qdisc_priv(sch);
struct sched_entry *first;
- unsigned long flags;
+ ktime_t cycle;
- spin_lock_irqsave(&q->current_entry_lock, flags);
+ first = list_first_entry(&sched->entries,
+ struct sched_entry, list);
- first = list_first_entry(&q->entries, struct sched_entry,
- list);
+ cycle = get_cycle_time(sched);
- first->close_time = ktime_add_ns(start, first->interval);
+ /* FIXME: find a better place to do this */
+ sched->cycle_close_time = ktime_add_ns(base, cycle);
+
+ first->close_time = ktime_add_ns(base, first->interval);
taprio_set_budget(q, first);
rcu_assign_pointer(q->current_entry, NULL);
+}
- spin_unlock_irqrestore(&q->current_entry_lock, flags);
+static void taprio_start_sched(struct Qdisc *sch,
+ ktime_t start, struct sched_gate_list *new)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ ktime_t expires;
+
+ expires = hrtimer_get_expires(&q->advance_timer);
+ if (expires == 0)
+ expires = KTIME_MAX;
+
+ /* If the new schedule starts before the next expiration, we
+ * reprogram it to the earliest one, so we change the admin
+ * schedule to the operational one at the right time.
+ */
+ start = min_t(ktime_t, start, expires);
hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS);
}
@@ -639,10 +711,12 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { };
+ struct sched_gate_list *oper, *admin, *new_admin;
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
struct tc_mqprio_qopt *mqprio = NULL;
- int i, err, size;
+ int i, err, clockid;
+ unsigned long flags;
ktime_t start;
err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt,
@@ -657,48 +731,64 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
if (err < 0)
return err;
- /* A schedule with less than one entry is an error */
- size = parse_taprio_opt(tb, q, extack);
- if (size < 0)
- return size;
+ new_admin = kzalloc(sizeof(*new_admin), GFP_KERNEL);
+ if (!new_admin) {
+ NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule");
+ return -ENOMEM;
+ }
+ INIT_LIST_HEAD(&new_admin->entries);
- hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
- q->advance_timer.function = advance_sched;
+ rcu_read_lock();
+ oper = rcu_dereference(q->oper_sched);
+ admin = rcu_dereference(q->admin_sched);
+ rcu_read_unlock();
- switch (q->clockid) {
- case CLOCK_REALTIME:
- q->get_time = ktime_get_real;
- break;
- case CLOCK_MONOTONIC:
- q->get_time = ktime_get;
- break;
- case CLOCK_BOOTTIME:
- q->get_time = ktime_get_boottime;
- break;
- case CLOCK_TAI:
- q->get_time = ktime_get_clocktai;
- break;
- default:
- return -ENOTSUPP;
+ if (mqprio && (oper || admin)) {
+ NL_SET_ERR_MSG(extack, "Changing the traffic mapping of a running schedule is not supported");
+ err = -ENOTSUPP;
+ goto free_sched;
}
- for (i = 0; i < dev->num_tx_queues; i++) {
- struct netdev_queue *dev_queue;
- struct Qdisc *qdisc;
+ err = parse_taprio_schedule(tb, new_admin, extack);
+ if (err < 0)
+ goto free_sched;
- dev_queue = netdev_get_tx_queue(dev, i);
- qdisc = qdisc_create_dflt(dev_queue,
- &pfifo_qdisc_ops,
- TC_H_MAKE(TC_H_MAJ(sch->handle),
- TC_H_MIN(i + 1)),
- extack);
- if (!qdisc)
- return -ENOMEM;
+ if (new_admin->num_entries == 0) {
+ NL_SET_ERR_MSG(extack, "There should be at least one entry in the schedule");
+ err = -EINVAL;
+ goto free_sched;
+ }
- if (i < dev->real_num_tx_queues)
- qdisc_hash_add(qdisc, false);
+ if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
+ clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
- q->qdiscs[i] = qdisc;
+ /* We only support static clockids and we don't allow
+ * for it to be modified after the first init.
+ */
+ if (clockid < 0 ||
+ (q->clockid != -1 && q->clockid != clockid)) {
+ NL_SET_ERR_MSG(extack, "Changing the 'clockid' of a running schedule is not supported");
+ err = -ENOTSUPP;
+ goto free_sched;
+ }
+
+ q->clockid = clockid;
+ }
+
+ if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
+ NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory");
+ err = -EINVAL;
+ goto free_sched;
+ }
+
+ taprio_set_picos_per_byte(dev, q);
+
+ /* Protects against enqueue()/dequeue() */
+ spin_lock_bh(qdisc_lock(sch));
+
+ if (!hrtimer_active(&q->advance_timer)) {
+ hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
+ q->advance_timer.function = advance_sched;
}
if (mqprio) {
@@ -714,24 +804,60 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
mqprio->prio_tc_map[i]);
}
- taprio_set_picos_per_byte(dev, q);
+ switch (q->clockid) {
+ case CLOCK_REALTIME:
+ q->get_time = ktime_get_real;
+ break;
+ case CLOCK_MONOTONIC:
+ q->get_time = ktime_get;
+ break;
+ case CLOCK_BOOTTIME:
+ q->get_time = ktime_get_boottime;
+ break;
+ case CLOCK_TAI:
+ q->get_time = ktime_get_clocktai;
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
+ err = -EINVAL;
+ goto unlock;
+ }
- err = taprio_get_start_time(sch, &start);
+ err = taprio_get_start_time(sch, new_admin, &start);
if (err < 0) {
NL_SET_ERR_MSG(extack, "Internal error: failed get start time");
- return err;
+ goto unlock;
}
- taprio_start_sched(sch, start);
+ setup_first_close_time(q, new_admin, start);
- return 0;
+ /* Protects against advance_sched() */
+ spin_lock_irqsave(&q->current_entry_lock, flags);
+
+ taprio_start_sched(sch, start, new_admin);
+
+ rcu_assign_pointer(q->admin_sched, new_admin);
+ if (admin)
+ call_rcu(&admin->rcu, taprio_free_sched_cb);
+ new_admin = NULL;
+
+ spin_unlock_irqrestore(&q->current_entry_lock, flags);
+
+ err = 0;
+
+unlock:
+ spin_unlock_bh(qdisc_lock(sch));
+
+free_sched:
+ kfree(new_admin);
+
+ return err;
}
static void taprio_destroy(struct Qdisc *sch)
{
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
- struct sched_entry *entry, *n;
unsigned int i;
spin_lock(&taprio_list_lock);
@@ -750,10 +876,11 @@ static void taprio_destroy(struct Qdisc *sch)
netdev_set_num_tc(dev, 0);
- list_for_each_entry_safe(entry, n, &q->entries, list) {
- list_del(&entry->list);
- kfree(entry);
- }
+ if (q->oper_sched)
+ call_rcu(&q->oper_sched->rcu, taprio_free_sched_cb);
+
+ if (q->admin_sched)
+ call_rcu(&q->admin_sched->rcu, taprio_free_sched_cb);
}
static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
@@ -761,12 +888,12 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
{
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
+ int i;
- INIT_LIST_HEAD(&q->entries);
spin_lock_init(&q->current_entry_lock);
- /* We may overwrite the configuration later */
hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS);
+ q->advance_timer.function = advance_sched;
q->root = sch;
@@ -796,6 +923,25 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
list_add(&q->taprio_list, &taprio_list);
spin_unlock(&taprio_list_lock);
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct netdev_queue *dev_queue;
+ struct Qdisc *qdisc;
+
+ dev_queue = netdev_get_tx_queue(dev, i);
+ qdisc = qdisc_create_dflt(dev_queue,
+ &pfifo_qdisc_ops,
+ TC_H_MAKE(TC_H_MAJ(sch->handle),
+ TC_H_MIN(i + 1)),
+ extack);
+ if (!qdisc)
+ return -ENOMEM;
+
+ if (i < dev->real_num_tx_queues)
+ qdisc_hash_add(qdisc, false);
+
+ q->qdiscs[i] = qdisc;
+ }
+
return taprio_change(sch, opt, extack);
}
@@ -867,15 +1013,55 @@ nla_put_failure:
return -1;
}
+static int dump_schedule(struct sk_buff *msg,
+ const struct sched_gate_list *root)
+{
+ struct nlattr *entry_list;
+ struct sched_entry *entry;
+
+ if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_BASE_TIME,
+ root->base_time, TCA_TAPRIO_PAD))
+ return -1;
+
+ if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME,
+ root->cycle_time, TCA_TAPRIO_PAD))
+ return -1;
+
+ if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION,
+ root->cycle_time_extension, TCA_TAPRIO_PAD))
+ return -1;
+
+ entry_list = nla_nest_start_noflag(msg,
+ TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST);
+ if (!entry_list)
+ goto error_nest;
+
+ list_for_each_entry(entry, &root->entries, list) {
+ if (dump_entry(msg, entry) < 0)
+ goto error_nest;
+ }
+
+ nla_nest_end(msg, entry_list);
+ return 0;
+
+error_nest:
+ nla_nest_cancel(msg, entry_list);
+ return -1;
+}
+
static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
+ struct sched_gate_list *oper, *admin;
struct tc_mqprio_qopt opt = { 0 };
- struct nlattr *nest, *entry_list;
- struct sched_entry *entry;
+ struct nlattr *nest, *sched_nest;
unsigned int i;
+ rcu_read_lock();
+ oper = rcu_dereference(q->oper_sched);
+ admin = rcu_dereference(q->admin_sched);
+
opt.num_tc = netdev_get_num_tc(dev);
memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
@@ -886,35 +1072,41 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (!nest)
- return -ENOSPC;
+ goto start_error;
if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt))
goto options_error;
- if (nla_put_s64(skb, TCA_TAPRIO_ATTR_SCHED_BASE_TIME,
- q->base_time, TCA_TAPRIO_PAD))
- goto options_error;
-
if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
goto options_error;
- entry_list = nla_nest_start_noflag(skb,
- TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST);
- if (!entry_list)
+ if (oper && dump_schedule(skb, oper))
goto options_error;
- list_for_each_entry(entry, &q->entries, list) {
- if (dump_entry(skb, entry) < 0)
- goto options_error;
- }
+ if (!admin)
+ goto done;
+
+ sched_nest = nla_nest_start_noflag(skb, TCA_TAPRIO_ATTR_ADMIN_SCHED);
+
+ if (dump_schedule(skb, admin))
+ goto admin_error;
- nla_nest_end(skb, entry_list);
+ nla_nest_end(skb, sched_nest);
+
+done:
+ rcu_read_unlock();
return nla_nest_end(skb, nest);
+admin_error:
+ nla_nest_cancel(skb, sched_nest);
+
options_error:
nla_nest_cancel(skb, nest);
- return -1;
+
+start_error:
+ rcu_read_unlock();
+ return -ENOSPC;
}
static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
@@ -1001,6 +1193,7 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
.id = "taprio",
.priv_size = sizeof(struct taprio_sched),
.init = taprio_init,
+ .change = taprio_change,
.destroy = taprio_destroy,
.peek = taprio_peek,
.dequeue = taprio_dequeue,
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 1d143bc3f73d..4aa03588f87b 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1112,32 +1112,6 @@ static void sctp_cmd_send_msg(struct sctp_association *asoc,
}
-/* Sent the next ASCONF packet currently stored in the association.
- * This happens after the ASCONF_ACK was succeffully processed.
- */
-static void sctp_cmd_send_asconf(struct sctp_association *asoc)
-{
- struct net *net = sock_net(asoc->base.sk);
-
- /* Send the next asconf chunk from the addip chunk
- * queue.
- */
- if (!list_empty(&asoc->addip_chunk_list)) {
- struct list_head *entry = asoc->addip_chunk_list.next;
- struct sctp_chunk *asconf = list_entry(entry,
- struct sctp_chunk, list);
- list_del_init(entry);
-
- /* Hold the chunk until an ASCONF_ACK is received. */
- sctp_chunk_hold(asconf);
- if (sctp_primitive_ASCONF(net, asoc, asconf))
- sctp_chunk_free(asconf);
- else
- asoc->addip_last_asconf = asconf;
- }
-}
-
-
/* These three macros allow us to pull the debugging code out of the
* main flow of sctp_do_sm() to keep attention focused on the real
* functionality there.
@@ -1783,9 +1757,6 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type,
}
sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp);
break;
- case SCTP_CMD_SEND_NEXT_ASCONF:
- sctp_cmd_send_asconf(asoc);
- break;
case SCTP_CMD_PURGE_ASCONF_QUEUE:
sctp_asconf_queue_teardown(asoc);
break;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 7dfc34b28f4f..e3f4abe6134e 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3824,6 +3824,29 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net,
return SCTP_DISPOSITION_CONSUME;
}
+static enum sctp_disposition sctp_send_next_asconf(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ struct sctp_association *asoc,
+ const union sctp_subtype type,
+ struct sctp_cmd_seq *commands)
+{
+ struct sctp_chunk *asconf;
+ struct list_head *entry;
+
+ if (list_empty(&asoc->addip_chunk_list))
+ return SCTP_DISPOSITION_CONSUME;
+
+ entry = asoc->addip_chunk_list.next;
+ asconf = list_entry(entry, struct sctp_chunk, list);
+
+ list_del_init(entry);
+ sctp_chunk_hold(asconf);
+ asoc->addip_last_asconf = asconf;
+
+ return sctp_sf_do_prm_asconf(net, ep, asoc, type, asconf, commands);
+}
+
/*
* ADDIP Section 4.3 General rules for address manipulation
* When building TLV parameters for the ASCONF Chunk that will add or
@@ -3915,14 +3938,10 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net,
SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
if (!sctp_process_asconf_ack((struct sctp_association *)asoc,
- asconf_ack)) {
- /* Successfully processed ASCONF_ACK. We can
- * release the next asconf if we have one.
- */
- sctp_add_cmd_sf(commands, SCTP_CMD_SEND_NEXT_ASCONF,
- SCTP_NULL());
- return SCTP_DISPOSITION_CONSUME;
- }
+ asconf_ack))
+ return sctp_send_next_asconf(net, ep,
+ (struct sctp_association *)asoc,
+ type, commands);
abort = sctp_make_abort(asoc, asconf_ack,
sizeof(struct sctp_errhdr));
diff --git a/net/socket.c b/net/socket.c
index a180e1a9ff23..472fbefa5d9b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -90,6 +90,7 @@
#include <linux/slab.h>
#include <linux/xattr.h>
#include <linux/nospec.h>
+#include <linux/indirect_call_wrapper.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
@@ -108,6 +109,13 @@
#include <net/busy_poll.h>
#include <linux/errqueue.h>
+/* proto_ops for ipv4 and ipv6 use the same {recv,send}msg function */
+#if IS_ENABLED(CONFIG_INET)
+#define INDIRECT_CALL_INET4(f, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__)
+#else
+#define INDIRECT_CALL_INET4(f, f1, ...) f(__VA_ARGS__)
+#endif
+
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sysctl_net_busy_read __read_mostly;
unsigned int sysctl_net_busy_poll __read_mostly;
@@ -645,10 +653,12 @@ EXPORT_SYMBOL(__sock_tx_timestamp);
* Sends @msg through @sock, passing through LSM.
* Returns the number of bytes sent, or an error code.
*/
-
+INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
+ size_t));
static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
{
- int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
+ int ret = INDIRECT_CALL_INET4(sock->ops->sendmsg, inet_sendmsg, sock,
+ msg, msg_data_left(msg));
BUG_ON(ret == -EIOCBQUEUED);
return ret;
}
@@ -874,11 +884,13 @@ EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
* Receives @msg from @sock, passing through LSM. Returns the total number
* of bytes received, or an error.
*/
-
+INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
+ size_t , int ));
static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
int flags)
{
- return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
+ return INDIRECT_CALL_INET4(sock->ops->recvmsg, inet_recvmsg, sock, msg,
+ msg_data_left(msg), flags);
}
int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 1c514b64a0a9..f5cd986e1e50 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1705,6 +1705,41 @@ tnl:
}
}
+/**
+ * tipc_link_failover_prepare() - prepare tnl for link failover
+ *
+ * This is a special version of the precursor - tipc_link_tnl_prepare(),
+ * see the tipc_node_link_failover() for details
+ *
+ * @l: failover link
+ * @tnl: tunnel link
+ * @xmitq: queue for messages to be xmited
+ */
+void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff_head *fdefq = &tnl->failover_deferdq;
+
+ tipc_link_create_dummy_tnl_msg(tnl, xmitq);
+
+ /* This failover link enpoint was never established before,
+ * so it has not received anything from peer.
+ * Otherwise, it must be a normal failover situation or the
+ * node has entered SELF_DOWN_PEER_LEAVING and both peer nodes
+ * would have to start over from scratch instead.
+ */
+ WARN_ON(l && tipc_link_is_up(l));
+ tnl->drop_point = 1;
+ tnl->failover_reasm_skb = NULL;
+
+ /* Initiate the link's failover deferdq */
+ if (unlikely(!skb_queue_empty(fdefq))) {
+ pr_warn("Link failover deferdq not empty: %d!\n",
+ skb_queue_len(fdefq));
+ __skb_queue_purge(fdefq);
+ }
+}
+
/* tipc_link_validate_msg(): validate message against current link state
* Returns true if message should be accepted, otherwise false
*/
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 8439e0ee53a8..adcad65e761c 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -90,6 +90,8 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
int mtyp, struct sk_buff_head *xmitq);
void tipc_link_create_dummy_tnl_msg(struct tipc_link *tnl,
struct sk_buff_head *xmitq);
+void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl,
+ struct sk_buff_head *xmitq);
void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq);
int tipc_link_fsm_evt(struct tipc_link *l, int evt);
bool tipc_link_is_up(struct tipc_link *l);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 0eb1bf850219..9e106d3ed187 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -714,7 +714,6 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
*slot0 = bearer_id;
*slot1 = bearer_id;
tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT);
- n->failover_sent = false;
n->action_flags |= TIPC_NOTIFY_NODE_UP;
tipc_link_set_active(nl, true);
tipc_bcast_add_peer(n->net, nl, xmitq);
@@ -757,6 +756,45 @@ static void tipc_node_link_up(struct tipc_node *n, int bearer_id,
}
/**
+ * tipc_node_link_failover() - start failover in case "half-failover"
+ *
+ * This function is only called in a very special situation where link
+ * failover can be already started on peer node but not on this node.
+ * This can happen when e.g.
+ * 1. Both links <1A-2A>, <1B-2B> down
+ * 2. Link endpoint 2A up, but 1A still down (e.g. due to network
+ * disturbance, wrong session, etc.)
+ * 3. Link <1B-2B> up
+ * 4. Link endpoint 2A down (e.g. due to link tolerance timeout)
+ * 5. Node B starts failover onto link <1B-2B>
+ *
+ * ==> Node A does never start link/node failover!
+ *
+ * @n: tipc node structure
+ * @l: link peer endpoint failingover (- can be NULL)
+ * @tnl: tunnel link
+ * @xmitq: queue for messages to be xmited on tnl link later
+ */
+static void tipc_node_link_failover(struct tipc_node *n, struct tipc_link *l,
+ struct tipc_link *tnl,
+ struct sk_buff_head *xmitq)
+{
+ /* Avoid to be "self-failover" that can never end */
+ if (!tipc_link_is_up(tnl))
+ return;
+
+ tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT);
+ tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT);
+
+ n->sync_point = tipc_link_rcv_nxt(tnl) + (U16_MAX / 2 - 1);
+ tipc_link_failover_prepare(l, tnl, xmitq);
+
+ if (l)
+ tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
+ tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT);
+}
+
+/**
* __tipc_node_link_down - handle loss of link
*/
static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
@@ -1675,14 +1713,16 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl),
tipc_link_inputq(l));
}
+
/* If parallel link was already down, and this happened before
- * the tunnel link came up, FAILOVER was never sent. Ensure that
- * FAILOVER is sent to get peer out of NODE_FAILINGOVER state.
+ * the tunnel link came up, node failover was never started.
+ * Ensure that a FAILOVER_MSG is sent to get peer out of
+ * NODE_FAILINGOVER state, also this node must accept
+ * TUNNEL_MSGs from peer.
*/
- if (n->state != NODE_FAILINGOVER && !n->failover_sent) {
- tipc_link_create_dummy_tnl_msg(l, xmitq);
- n->failover_sent = true;
- }
+ if (n->state != NODE_FAILINGOVER)
+ tipc_node_link_failover(n, pl, l, xmitq);
+
/* If pkts arrive out of order, use lowest calculated syncpt */
if (less(syncpt, n->sync_point))
n->sync_point = syncpt;
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 26f26e71ef3f..e225c81e6b35 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -580,7 +580,7 @@ void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn)
static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb)
{
struct strp_msg *rxm = strp_msg(skb);
- int err = 0, offset = rxm->offset, copy, nsg;
+ int err = 0, offset = rxm->offset, copy, nsg, data_len, pos;
struct sk_buff *skb_iter, *unused;
struct scatterlist sg[1];
char *orig_buf, *buf;
@@ -611,25 +611,42 @@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb)
else
err = 0;
- copy = min_t(int, skb_pagelen(skb) - offset,
- rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+ data_len = rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE;
- if (skb->decrypted)
- skb_store_bits(skb, offset, buf, copy);
+ if (skb_pagelen(skb) > offset) {
+ copy = min_t(int, skb_pagelen(skb) - offset, data_len);
- offset += copy;
- buf += copy;
+ if (skb->decrypted)
+ skb_store_bits(skb, offset, buf, copy);
+ offset += copy;
+ buf += copy;
+ }
+
+ pos = skb_pagelen(skb);
skb_walk_frags(skb, skb_iter) {
- copy = min_t(int, skb_iter->len,
- rxm->full_len - offset + rxm->offset -
- TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+ int frag_pos;
+
+ /* Practically all frags must belong to msg if reencrypt
+ * is needed with current strparser and coalescing logic,
+ * but strparser may "get optimized", so let's be safe.
+ */
+ if (pos + skb_iter->len <= offset)
+ goto done_with_frag;
+ if (pos >= data_len + rxm->offset)
+ break;
+
+ frag_pos = offset - pos;
+ copy = min_t(int, skb_iter->len - frag_pos,
+ data_len + rxm->offset - offset);
if (skb_iter->decrypted)
- skb_store_bits(skb_iter, offset, buf, copy);
+ skb_store_bits(skb_iter, frag_pos, buf, copy);
offset += copy;
buf += copy;
+done_with_frag:
+ pos += skb_iter->len;
}
free_buf:
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index a3ebd4b02714..c3a5fe624b4e 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -201,13 +201,14 @@ static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln)
skb_put(nskb, skb->len);
memcpy(nskb->data, skb->data, headln);
- update_chksum(nskb, headln);
nskb->destructor = skb->destructor;
nskb->sk = sk;
skb->destructor = NULL;
skb->sk = NULL;
+ update_chksum(nskb, headln);
+
delta = nskb->truesize - skb->truesize;
if (likely(delta < 0))
WARN_ON_ONCE(refcount_sub_and_test(-delta, &sk->sk_wmem_alloc));
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 816425ffe05a..4831ad745f91 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -3769,10 +3769,9 @@ void wiphy_regulatory_register(struct wiphy *wiphy)
/*
* The last request may have been received before this
* registration call. Call the driver notifier if
- * initiator is USER and user type is CELL_BASE.
+ * initiator is USER.
*/
- if (lr->initiator == NL80211_REGDOM_SET_BY_USER &&
- lr->user_reg_hint_type == NL80211_USER_REG_HINT_CELL_BASE)
+ if (lr->initiator == NL80211_REGDOM_SET_BY_USER)
reg_call_notifier(wiphy, lr);
}
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 5d43aaa17027..1ec8071226b2 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -3,7 +3,7 @@
#
config XFRM
bool
- depends on NET
+ depends on INET
select GRO_CELLS
select SKB_EXTENSIONS
@@ -15,9 +15,9 @@ config XFRM_ALGO
select XFRM
select CRYPTO
+if INET
config XFRM_USER
tristate "Transformation user configuration interface"
- depends on INET
select XFRM_ALGO
---help---
Support for Transformation(XFRM) user configuration interface
@@ -56,7 +56,7 @@ config XFRM_MIGRATE
config XFRM_STATISTICS
bool "Transformation statistics"
- depends on INET && XFRM && PROC_FS
+ depends on XFRM && PROC_FS
---help---
This statistics is not a SNMP/MIB specification but shows
statistics about transformation error (or almost error) factor
@@ -95,3 +95,5 @@ config NET_KEY_MIGRATE
<draft-sugimoto-mip6-pfkey-migrate>.
If unsure, say N.
+
+endif # INET
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 2db1626557c5..b24cd86a02c3 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -23,6 +23,60 @@
#include <linux/notifier.h>
#ifdef CONFIG_XFRM_OFFLOAD
+static void __xfrm_transport_prep(struct xfrm_state *x, struct sk_buff *skb,
+ unsigned int hsize)
+{
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ skb_reset_mac_len(skb);
+ pskb_pull(skb, skb->mac_len + hsize + x->props.header_len);
+
+ if (xo->flags & XFRM_GSO_SEGMENT) {
+ skb_reset_transport_header(skb);
+ skb->transport_header -= x->props.header_len;
+ }
+}
+
+static void __xfrm_mode_tunnel_prep(struct xfrm_state *x, struct sk_buff *skb,
+ unsigned int hsize)
+
+{
+ struct xfrm_offload *xo = xfrm_offload(skb);
+
+ if (xo->flags & XFRM_GSO_SEGMENT)
+ skb->transport_header = skb->network_header + hsize;
+
+ skb_reset_mac_len(skb);
+ pskb_pull(skb, skb->mac_len + x->props.header_len);
+}
+
+/* Adjust pointers into the packet when IPsec is done at layer2 */
+static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb)
+{
+ switch (x->outer_mode.encap) {
+ case XFRM_MODE_TUNNEL:
+ if (x->outer_mode.family == AF_INET)
+ return __xfrm_mode_tunnel_prep(x, skb,
+ sizeof(struct iphdr));
+ if (x->outer_mode.family == AF_INET6)
+ return __xfrm_mode_tunnel_prep(x, skb,
+ sizeof(struct ipv6hdr));
+ break;
+ case XFRM_MODE_TRANSPORT:
+ if (x->outer_mode.family == AF_INET)
+ return __xfrm_transport_prep(x, skb,
+ sizeof(struct iphdr));
+ if (x->outer_mode.family == AF_INET6)
+ return __xfrm_transport_prep(x, skb,
+ sizeof(struct ipv6hdr));
+ break;
+ case XFRM_MODE_ROUTEOPTIMIZATION:
+ case XFRM_MODE_IN_TRIGGER:
+ case XFRM_MODE_BEET:
+ break;
+ }
+}
+
struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again)
{
int err;
@@ -78,7 +132,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
}
if (!skb->next) {
- x->outer_mode->xmit(x, skb);
+ esp_features |= skb->dev->gso_partial_features;
+ xfrm_outer_mode_prep(x, skb);
xo->flags |= XFRM_DEV_RESUME;
@@ -101,12 +156,14 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
do {
struct sk_buff *nskb = skb2->next;
+
+ esp_features |= skb->dev->gso_partial_features;
skb_mark_not_on_list(skb2);
xo = xfrm_offload(skb2);
xo->flags |= XFRM_DEV_RESUME;
- x->outer_mode->xmit(x, skb2);
+ xfrm_outer_mode_prep(x, skb2);
err = x->type_offload->xmit(x, skb2, esp_features);
if (!err) {
diff --git a/net/xfrm/xfrm_inout.h b/net/xfrm/xfrm_inout.h
new file mode 100644
index 000000000000..c7b0318938e2
--- /dev/null
+++ b/net/xfrm/xfrm_inout.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/ipv6.h>
+#include <net/dsfield.h>
+#include <net/xfrm.h>
+
+#ifndef XFRM_INOUT_H
+#define XFRM_INOUT_H 1
+
+static inline void xfrm6_beet_make_header(struct sk_buff *skb)
+{
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+
+ iph->version = 6;
+
+ memcpy(iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
+ sizeof(iph->flow_lbl));
+ iph->nexthdr = XFRM_MODE_SKB_CB(skb)->protocol;
+
+ ipv6_change_dsfield(iph, 0, XFRM_MODE_SKB_CB(skb)->tos);
+ iph->hop_limit = XFRM_MODE_SKB_CB(skb)->ttl;
+}
+
+static inline void xfrm4_beet_make_header(struct sk_buff *skb)
+{
+ struct iphdr *iph = ip_hdr(skb);
+
+ iph->ihl = 5;
+ iph->version = 4;
+
+ iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol;
+ iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
+
+ iph->id = XFRM_MODE_SKB_CB(skb)->id;
+ iph->frag_off = XFRM_MODE_SKB_CB(skb)->frag_off;
+ iph->ttl = XFRM_MODE_SKB_CB(skb)->ttl;
+}
+
+#endif
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index b3b613660d44..314973aaa414 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -21,6 +21,8 @@
#include <net/ip_tunnels.h>
#include <net/ip6_tunnel.h>
+#include "xfrm_inout.h"
+
struct xfrm_trans_tasklet {
struct tasklet_struct tasklet;
struct sk_buff_head queue;
@@ -166,35 +168,299 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
}
EXPORT_SYMBOL(xfrm_parse_spi);
-int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
+static int xfrm4_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct iphdr *iph;
+ int optlen = 0;
+ int err = -EINVAL;
+
+ if (unlikely(XFRM_MODE_SKB_CB(skb)->protocol == IPPROTO_BEETPH)) {
+ struct ip_beet_phdr *ph;
+ int phlen;
+
+ if (!pskb_may_pull(skb, sizeof(*ph)))
+ goto out;
+
+ ph = (struct ip_beet_phdr *)skb->data;
+
+ phlen = sizeof(*ph) + ph->padlen;
+ optlen = ph->hdrlen * 8 + (IPV4_BEET_PHMAXLEN - phlen);
+ if (optlen < 0 || optlen & 3 || optlen > 250)
+ goto out;
+
+ XFRM_MODE_SKB_CB(skb)->protocol = ph->nexthdr;
+
+ if (!pskb_may_pull(skb, phlen))
+ goto out;
+ __skb_pull(skb, phlen);
+ }
+
+ skb_push(skb, sizeof(*iph));
+ skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+
+ xfrm4_beet_make_header(skb);
+
+ iph = ip_hdr(skb);
+
+ iph->ihl += optlen / 4;
+ iph->tot_len = htons(skb->len);
+ iph->daddr = x->sel.daddr.a4;
+ iph->saddr = x->sel.saddr.a4;
+ iph->check = 0;
+ iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
+ err = 0;
+out:
+ return err;
+}
+
+static void ipip_ecn_decapsulate(struct sk_buff *skb)
+{
+ struct iphdr *inner_iph = ipip_hdr(skb);
+
+ if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
+ IP_ECN_set_ce(inner_iph);
+}
+
+static int xfrm4_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb)
{
- struct xfrm_mode *inner_mode = x->inner_mode;
+ int err = -EINVAL;
+
+ if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP)
+ goto out;
+
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ goto out;
+
+ err = skb_unclone(skb, GFP_ATOMIC);
+ if (err)
+ goto out;
+
+ if (x->props.flags & XFRM_STATE_DECAP_DSCP)
+ ipv4_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, ipip_hdr(skb));
+ if (!(x->props.flags & XFRM_STATE_NOECN))
+ ipip_ecn_decapsulate(skb);
+
+ skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+ if (skb->mac_len)
+ eth_hdr(skb)->h_proto = skb->protocol;
+
+ err = 0;
+
+out:
+ return err;
+}
+
+static void ipip6_ecn_decapsulate(struct sk_buff *skb)
+{
+ struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
+
+ if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
+ IP6_ECN_set_ce(skb, inner_iph);
+}
+
+static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int err = -EINVAL;
+
+ if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
+ goto out;
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ goto out;
+
+ err = skb_unclone(skb, GFP_ATOMIC);
+ if (err)
+ goto out;
+
+ if (x->props.flags & XFRM_STATE_DECAP_DSCP)
+ ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)),
+ ipipv6_hdr(skb));
+ if (!(x->props.flags & XFRM_STATE_NOECN))
+ ipip6_ecn_decapsulate(skb);
+
+ skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+ if (skb->mac_len)
+ eth_hdr(skb)->h_proto = skb->protocol;
+
+ err = 0;
+
+out:
+ return err;
+}
+
+static int xfrm6_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct ipv6hdr *ip6h;
+ int size = sizeof(struct ipv6hdr);
int err;
- err = x->outer_mode->afinfo->extract_input(x, skb);
+ err = skb_cow_head(skb, size + skb->mac_len);
if (err)
+ goto out;
+
+ __skb_push(skb, size);
+ skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+
+ xfrm6_beet_make_header(skb);
+
+ ip6h = ipv6_hdr(skb);
+ ip6h->payload_len = htons(skb->len - size);
+ ip6h->daddr = x->sel.daddr.in6;
+ ip6h->saddr = x->sel.saddr.in6;
+ err = 0;
+out:
+ return err;
+}
+
+/* Remove encapsulation header.
+ *
+ * The IP header will be moved over the top of the encapsulation
+ * header.
+ *
+ * On entry, the transport header shall point to where the IP header
+ * should be and the network header shall be set to where the IP
+ * header currently is. skb->data shall point to the start of the
+ * payload.
+ */
+static int
+xfrm_inner_mode_encap_remove(struct xfrm_state *x,
+ const struct xfrm_mode *inner_mode,
+ struct sk_buff *skb)
+{
+ switch (inner_mode->encap) {
+ case XFRM_MODE_BEET:
+ if (inner_mode->family == AF_INET)
+ return xfrm4_remove_beet_encap(x, skb);
+ if (inner_mode->family == AF_INET6)
+ return xfrm6_remove_beet_encap(x, skb);
+ break;
+ case XFRM_MODE_TUNNEL:
+ if (inner_mode->family == AF_INET)
+ return xfrm4_remove_tunnel_encap(x, skb);
+ if (inner_mode->family == AF_INET6)
+ return xfrm6_remove_tunnel_encap(x, skb);
+ break;
+ }
+
+ WARN_ON_ONCE(1);
+ return -EOPNOTSUPP;
+}
+
+static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+ const struct xfrm_mode *inner_mode = &x->inner_mode;
+ const struct xfrm_state_afinfo *afinfo;
+ int err = -EAFNOSUPPORT;
+
+ rcu_read_lock();
+ afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family);
+ if (likely(afinfo))
+ err = afinfo->extract_input(x, skb);
+
+ if (err) {
+ rcu_read_unlock();
return err;
+ }
if (x->sel.family == AF_UNSPEC) {
inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
- if (inner_mode == NULL)
+ if (!inner_mode) {
+ rcu_read_unlock();
return -EAFNOSUPPORT;
+ }
}
- skb->protocol = inner_mode->afinfo->eth_proto;
- return inner_mode->input2(x, skb);
+ afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
+ if (unlikely(!afinfo)) {
+ rcu_read_unlock();
+ return -EAFNOSUPPORT;
+ }
+
+ skb->protocol = afinfo->eth_proto;
+ rcu_read_unlock();
+ return xfrm_inner_mode_encap_remove(x, inner_mode, skb);
+}
+
+/* Remove encapsulation header.
+ *
+ * The IP header will be moved over the top of the encapsulation header.
+ *
+ * On entry, skb_transport_header() shall point to where the IP header
+ * should be and skb_network_header() shall be set to where the IP header
+ * currently is. skb->data shall point to the start of the payload.
+ */
+static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int ihl = skb->data - skb_transport_header(skb);
+
+ if (skb->transport_header != skb->network_header) {
+ memmove(skb_transport_header(skb),
+ skb_network_header(skb), ihl);
+ skb->network_header = skb->transport_header;
+ }
+ ip_hdr(skb)->tot_len = htons(skb->len + ihl);
+ skb_reset_transport_header(skb);
+ return 0;
+}
+
+static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ int ihl = skb->data - skb_transport_header(skb);
+
+ if (skb->transport_header != skb->network_header) {
+ memmove(skb_transport_header(skb),
+ skb_network_header(skb), ihl);
+ skb->network_header = skb->transport_header;
+ }
+ ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -
+ sizeof(struct ipv6hdr));
+ skb_reset_transport_header(skb);
+ return 0;
+#else
+ WARN_ON_ONCE(1);
+ return -EAFNOSUPPORT;
+#endif
+}
+
+static int xfrm_inner_mode_input(struct xfrm_state *x,
+ const struct xfrm_mode *inner_mode,
+ struct sk_buff *skb)
+{
+ switch (inner_mode->encap) {
+ case XFRM_MODE_BEET:
+ case XFRM_MODE_TUNNEL:
+ return xfrm_prepare_input(x, skb);
+ case XFRM_MODE_TRANSPORT:
+ if (inner_mode->family == AF_INET)
+ return xfrm4_transport_input(x, skb);
+ if (inner_mode->family == AF_INET6)
+ return xfrm6_transport_input(x, skb);
+ break;
+ case XFRM_MODE_ROUTEOPTIMIZATION:
+ WARN_ON_ONCE(1);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ return -EOPNOTSUPP;
}
-EXPORT_SYMBOL(xfrm_prepare_input);
int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
{
+ const struct xfrm_state_afinfo *afinfo;
struct net *net = dev_net(skb->dev);
+ const struct xfrm_mode *inner_mode;
int err;
__be32 seq;
__be32 seq_hi;
struct xfrm_state *x = NULL;
xfrm_address_t *daddr;
- struct xfrm_mode *inner_mode;
u32 mark = skb->mark;
unsigned int family = AF_UNSPEC;
int decaps = 0;
@@ -216,7 +482,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
goto drop;
}
- family = x->outer_mode->afinfo->family;
+ family = x->outer_mode.family;
/* An encap_type of -1 indicates async resumption. */
if (encap_type == -1) {
@@ -400,7 +666,7 @@ resume:
XFRM_MODE_SKB_CB(skb)->protocol = nexthdr;
- inner_mode = x->inner_mode;
+ inner_mode = &x->inner_mode;
if (x->sel.family == AF_UNSPEC) {
inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
@@ -410,12 +676,12 @@ resume:
}
}
- if (inner_mode->input(x, skb)) {
+ if (xfrm_inner_mode_input(x, inner_mode, skb)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
goto drop;
}
- if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) {
+ if (x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL) {
decaps = 1;
break;
}
@@ -425,7 +691,7 @@ resume:
* transport mode so the outer address is identical.
*/
daddr = &x->id.daddr;
- family = x->outer_mode->afinfo->family;
+ family = x->outer_mode.family;
err = xfrm_parse_spi(skb, nexthdr, &spi, &seq);
if (err < 0) {
@@ -453,7 +719,12 @@ resume:
if (xo)
xfrm_gro = xo->flags & XFRM_GRO;
- err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async);
+ err = -EAFNOSUPPORT;
+ rcu_read_lock();
+ afinfo = xfrm_state_afinfo_get_rcu(x->inner_mode.family);
+ if (likely(afinfo))
+ err = afinfo->transport_finish(skb, xfrm_gro || async);
+ rcu_read_unlock();
if (xfrm_gro) {
sp = skb_sec_path(skb);
if (sp)
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index dbb3c1945b5c..ad3a2555c517 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -70,17 +70,28 @@ static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x)
return NULL;
}
-static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb)
+static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb,
+ unsigned short family)
{
struct xfrmi_net *xfrmn;
- int ifindex;
struct xfrm_if *xi;
+ int ifindex = 0;
if (!secpath_exists(skb) || !skb->dev)
return NULL;
+ switch (family) {
+ case AF_INET6:
+ ifindex = inet6_sdif(skb);
+ break;
+ case AF_INET:
+ ifindex = inet_sdif(skb);
+ break;
+ }
+ if (!ifindex)
+ ifindex = skb->dev->ifindex;
+
xfrmn = net_generic(xs_net(xfrm_input_state(skb)), xfrmi_net_id);
- ifindex = skb->dev->ifindex;
for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
if (ifindex == xi->dev->ifindex &&
@@ -244,8 +255,8 @@ static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
{
+ const struct xfrm_mode *inner_mode;
struct pcpu_sw_netstats *tstats;
- struct xfrm_mode *inner_mode;
struct net_device *dev;
struct xfrm_state *x;
struct xfrm_if *xi;
@@ -273,7 +284,7 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
xnet = !net_eq(xi->net, dev_net(skb->dev));
if (xnet) {
- inner_mode = x->inner_mode;
+ inner_mode = &x->inner_mode;
if (x->sel.family == AF_UNSPEC) {
inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
@@ -285,7 +296,7 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
}
if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb,
- inner_mode->afinfo->family))
+ inner_mode->family))
return -EPERM;
}
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 9333153bafda..a55510f9ff35 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -17,9 +17,13 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <net/dst.h>
+#include <net/inet_ecn.h>
#include <net/xfrm.h>
+#include "xfrm_inout.h"
+
static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb);
+static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb);
static int xfrm_skb_check_space(struct sk_buff *skb)
{
@@ -50,6 +54,360 @@ static struct dst_entry *skb_dst_pop(struct sk_buff *skb)
return child;
}
+/* Add encapsulation header.
+ *
+ * The IP header will be moved forward to make space for the encapsulation
+ * header.
+ */
+static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct iphdr *iph = ip_hdr(skb);
+ int ihl = iph->ihl * 4;
+
+ skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+
+ skb_set_network_header(skb, -x->props.header_len);
+ skb->mac_header = skb->network_header +
+ offsetof(struct iphdr, protocol);
+ skb->transport_header = skb->network_header + ihl;
+ __skb_pull(skb, ihl);
+ memmove(skb_network_header(skb), iph, ihl);
+ return 0;
+}
+
+/* Add encapsulation header.
+ *
+ * The IP header and mutable extension headers will be moved forward to make
+ * space for the encapsulation header.
+ */
+static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct ipv6hdr *iph;
+ u8 *prevhdr;
+ int hdr_len;
+
+ iph = ipv6_hdr(skb);
+ skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+
+ hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+ if (hdr_len < 0)
+ return hdr_len;
+ skb_set_mac_header(skb,
+ (prevhdr - x->props.header_len) - skb->data);
+ skb_set_network_header(skb, -x->props.header_len);
+ skb->transport_header = skb->network_header + hdr_len;
+ __skb_pull(skb, hdr_len);
+ memmove(ipv6_hdr(skb), iph, hdr_len);
+ return 0;
+#else
+ WARN_ON_ONCE(1);
+ return -EAFNOSUPPORT;
+#endif
+}
+
+/* Add route optimization header space.
+ *
+ * The IP header and mutable extension headers will be moved forward to make
+ * space for the route optimization header.
+ */
+static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct ipv6hdr *iph;
+ u8 *prevhdr;
+ int hdr_len;
+
+ iph = ipv6_hdr(skb);
+
+ hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+ if (hdr_len < 0)
+ return hdr_len;
+ skb_set_mac_header(skb,
+ (prevhdr - x->props.header_len) - skb->data);
+ skb_set_network_header(skb, -x->props.header_len);
+ skb->transport_header = skb->network_header + hdr_len;
+ __skb_pull(skb, hdr_len);
+ memmove(ipv6_hdr(skb), iph, hdr_len);
+
+ x->lastused = ktime_get_real_seconds();
+
+ return 0;
+#else
+ WARN_ON_ONCE(1);
+ return -EAFNOSUPPORT;
+#endif
+}
+
+/* Add encapsulation header.
+ *
+ * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
+ */
+static int xfrm4_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct ip_beet_phdr *ph;
+ struct iphdr *top_iph;
+ int hdrlen, optlen;
+
+ hdrlen = 0;
+ optlen = XFRM_MODE_SKB_CB(skb)->optlen;
+ if (unlikely(optlen))
+ hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4);
+
+ skb_set_network_header(skb, -x->props.header_len - hdrlen +
+ (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph)));
+ if (x->sel.family != AF_INET6)
+ skb->network_header += IPV4_BEET_PHMAXLEN;
+ skb->mac_header = skb->network_header +
+ offsetof(struct iphdr, protocol);
+ skb->transport_header = skb->network_header + sizeof(*top_iph);
+
+ xfrm4_beet_make_header(skb);
+
+ ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen);
+
+ top_iph = ip_hdr(skb);
+
+ if (unlikely(optlen)) {
+ if (WARN_ON(optlen < 0))
+ return -EINVAL;
+
+ ph->padlen = 4 - (optlen & 4);
+ ph->hdrlen = optlen / 8;
+ ph->nexthdr = top_iph->protocol;
+ if (ph->padlen)
+ memset(ph + 1, IPOPT_NOP, ph->padlen);
+
+ top_iph->protocol = IPPROTO_BEETPH;
+ top_iph->ihl = sizeof(struct iphdr) / 4;
+ }
+
+ top_iph->saddr = x->props.saddr.a4;
+ top_iph->daddr = x->id.daddr.a4;
+
+ return 0;
+}
+
+/* Add encapsulation header.
+ *
+ * The top IP header will be constructed per RFC 2401.
+ */
+static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct iphdr *top_iph;
+ int flags;
+
+ skb_set_inner_network_header(skb, skb_network_offset(skb));
+ skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+
+ skb_set_network_header(skb, -x->props.header_len);
+ skb->mac_header = skb->network_header +
+ offsetof(struct iphdr, protocol);
+ skb->transport_header = skb->network_header + sizeof(*top_iph);
+ top_iph = ip_hdr(skb);
+
+ top_iph->ihl = 5;
+ top_iph->version = 4;
+
+ top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family);
+
+ /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */
+ if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
+ top_iph->tos = 0;
+ else
+ top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
+ top_iph->tos = INET_ECN_encapsulate(top_iph->tos,
+ XFRM_MODE_SKB_CB(skb)->tos);
+
+ flags = x->props.flags;
+ if (flags & XFRM_STATE_NOECN)
+ IP_ECN_clear(top_iph);
+
+ top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
+ 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
+
+ top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst));
+
+ top_iph->saddr = x->props.saddr.a4;
+ top_iph->daddr = x->id.daddr.a4;
+ ip_select_ident(dev_net(dst->dev), skb, NULL);
+
+ return 0;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int xfrm6_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct ipv6hdr *top_iph;
+ int dsfield;
+
+ skb_set_inner_network_header(skb, skb_network_offset(skb));
+ skb_set_inner_transport_header(skb, skb_transport_offset(skb));
+
+ skb_set_network_header(skb, -x->props.header_len);
+ skb->mac_header = skb->network_header +
+ offsetof(struct ipv6hdr, nexthdr);
+ skb->transport_header = skb->network_header + sizeof(*top_iph);
+ top_iph = ipv6_hdr(skb);
+
+ top_iph->version = 6;
+
+ memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl,
+ sizeof(top_iph->flow_lbl));
+ top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family);
+
+ if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
+ dsfield = 0;
+ else
+ dsfield = XFRM_MODE_SKB_CB(skb)->tos;
+ dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos);
+ if (x->props.flags & XFRM_STATE_NOECN)
+ dsfield &= ~INET_ECN_MASK;
+ ipv6_change_dsfield(top_iph, 0, dsfield);
+ top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst));
+ top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
+ top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
+ return 0;
+}
+
+static int xfrm6_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct ipv6hdr *top_iph;
+ struct ip_beet_phdr *ph;
+ int optlen, hdr_len;
+
+ hdr_len = 0;
+ optlen = XFRM_MODE_SKB_CB(skb)->optlen;
+ if (unlikely(optlen))
+ hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4);
+
+ skb_set_network_header(skb, -x->props.header_len - hdr_len);
+ if (x->sel.family != AF_INET6)
+ skb->network_header += IPV4_BEET_PHMAXLEN;
+ skb->mac_header = skb->network_header +
+ offsetof(struct ipv6hdr, nexthdr);
+ skb->transport_header = skb->network_header + sizeof(*top_iph);
+ ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdr_len);
+
+ xfrm6_beet_make_header(skb);
+
+ top_iph = ipv6_hdr(skb);
+ if (unlikely(optlen)) {
+ if (WARN_ON(optlen < 0))
+ return -EINVAL;
+
+ ph->padlen = 4 - (optlen & 4);
+ ph->hdrlen = optlen / 8;
+ ph->nexthdr = top_iph->nexthdr;
+ if (ph->padlen)
+ memset(ph + 1, IPOPT_NOP, ph->padlen);
+
+ top_iph->nexthdr = IPPROTO_BEETPH;
+ }
+
+ top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
+ top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
+ return 0;
+}
+#endif
+
+/* Add encapsulation header.
+ *
+ * On exit, the transport header will be set to the start of the
+ * encapsulation header to be filled in by x->type->output and the mac
+ * header will be set to the nextheader (protocol for IPv4) field of the
+ * extension header directly preceding the encapsulation header, or in
+ * its absence, that of the top IP header.
+ * The value of the network header will always point to the top IP header
+ * while skb->data will point to the payload.
+ */
+static int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int err;
+
+ err = xfrm_inner_extract_output(x, skb);
+ if (err)
+ return err;
+
+ IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
+ skb->protocol = htons(ETH_P_IP);
+
+ switch (x->outer_mode.encap) {
+ case XFRM_MODE_BEET:
+ return xfrm4_beet_encap_add(x, skb);
+ case XFRM_MODE_TUNNEL:
+ return xfrm4_tunnel_encap_add(x, skb);
+ }
+
+ WARN_ON_ONCE(1);
+ return -EOPNOTSUPP;
+}
+
+static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ int err;
+
+ err = xfrm_inner_extract_output(x, skb);
+ if (err)
+ return err;
+
+ skb->ignore_df = 1;
+ skb->protocol = htons(ETH_P_IPV6);
+
+ switch (x->outer_mode.encap) {
+ case XFRM_MODE_BEET:
+ return xfrm6_beet_encap_add(x, skb);
+ case XFRM_MODE_TUNNEL:
+ return xfrm6_tunnel_encap_add(x, skb);
+ default:
+ WARN_ON_ONCE(1);
+ return -EOPNOTSUPP;
+ }
+#endif
+ WARN_ON_ONCE(1);
+ return -EAFNOSUPPORT;
+}
+
+static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ switch (x->outer_mode.encap) {
+ case XFRM_MODE_BEET:
+ case XFRM_MODE_TUNNEL:
+ if (x->outer_mode.family == AF_INET)
+ return xfrm4_prepare_output(x, skb);
+ if (x->outer_mode.family == AF_INET6)
+ return xfrm6_prepare_output(x, skb);
+ break;
+ case XFRM_MODE_TRANSPORT:
+ if (x->outer_mode.family == AF_INET)
+ return xfrm4_transport_output(x, skb);
+ if (x->outer_mode.family == AF_INET6)
+ return xfrm6_transport_output(x, skb);
+ break;
+ case XFRM_MODE_ROUTEOPTIMIZATION:
+ if (x->outer_mode.family == AF_INET6)
+ return xfrm6_ro_output(x, skb);
+ WARN_ON_ONCE(1);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+#if IS_ENABLED(CONFIG_NET_PKTGEN)
+int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ return xfrm_outer_mode_output(x, skb);
+}
+EXPORT_SYMBOL_GPL(pktgen_xfrm_outer_mode_output);
+#endif
+
static int xfrm_output_one(struct sk_buff *skb, int err)
{
struct dst_entry *dst = skb_dst(skb);
@@ -68,7 +426,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
skb->mark = xfrm_smark_get(skb->mark, x);
- err = x->outer_mode->output(x, skb);
+ err = xfrm_outer_mode_output(x, skb);
if (err) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR);
goto error_nolock;
@@ -131,7 +489,7 @@ resume:
}
skb_dst_set(skb, dst);
x = dst->xfrm;
- } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL));
+ } while (x && !(x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL));
return 0;
@@ -258,20 +616,29 @@ out:
}
EXPORT_SYMBOL_GPL(xfrm_output);
-int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
+static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
{
- struct xfrm_mode *inner_mode;
+ const struct xfrm_state_afinfo *afinfo;
+ const struct xfrm_mode *inner_mode;
+ int err = -EAFNOSUPPORT;
+
if (x->sel.family == AF_UNSPEC)
inner_mode = xfrm_ip2inner_mode(x,
xfrm_af2proto(skb_dst(skb)->ops->family));
else
- inner_mode = x->inner_mode;
+ inner_mode = &x->inner_mode;
if (inner_mode == NULL)
return -EAFNOSUPPORT;
- return inner_mode->afinfo->extract_output(x, skb);
+
+ rcu_read_lock();
+ afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
+ if (likely(afinfo))
+ err = afinfo->extract_output(x, skb);
+ rcu_read_unlock();
+
+ return err;
}
-EXPORT_SYMBOL_GPL(xfrm_inner_extract_output);
void xfrm_local_error(struct sk_buff *skb, int mtu)
{
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 8d1a898d0ba5..410233c5681e 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -27,10 +27,14 @@
#include <linux/cpu.h>
#include <linux/audit.h>
#include <linux/rhashtable.h>
+#include <linux/if_tunnel.h>
#include <net/dst.h>
#include <net/flow.h>
#include <net/xfrm.h>
#include <net/ip.h>
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+#include <net/mip6.h>
+#endif
#ifdef CONFIG_XFRM_STATISTICS
#include <net/snmp.h>
#endif
@@ -2450,18 +2454,10 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
static int xfrm_get_tos(const struct flowi *fl, int family)
{
- const struct xfrm_policy_afinfo *afinfo;
- int tos;
+ if (family == AF_INET)
+ return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos;
- afinfo = xfrm_policy_get_afinfo(family);
- if (!afinfo)
- return 0;
-
- tos = afinfo->get_tos(fl);
-
- rcu_read_unlock();
-
- return tos;
+ return 0;
}
static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
@@ -2499,21 +2495,14 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
return xdst;
}
-static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
- int nfheader_len)
+static void xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
+ int nfheader_len)
{
- const struct xfrm_policy_afinfo *afinfo =
- xfrm_policy_get_afinfo(dst->ops->family);
- int err;
-
- if (!afinfo)
- return -EINVAL;
-
- err = afinfo->init_path(path, dst, nfheader_len);
-
- rcu_read_unlock();
-
- return err;
+ if (dst->ops->family == AF_INET6) {
+ struct rt6_info *rt = (struct rt6_info *)dst;
+ path->path_cookie = rt6_get_cookie(rt);
+ path->u.rt6.rt6i_nfheader_len = nfheader_len;
+ }
}
static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
@@ -2545,10 +2534,11 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
const struct flowi *fl,
struct dst_entry *dst)
{
+ const struct xfrm_state_afinfo *afinfo;
+ const struct xfrm_mode *inner_mode;
struct net *net = xp_net(policy);
unsigned long now = jiffies;
struct net_device *dev;
- struct xfrm_mode *inner_mode;
struct xfrm_dst *xdst_prev = NULL;
struct xfrm_dst *xdst0 = NULL;
int i = 0;
@@ -2594,7 +2584,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
goto put_states;
}
} else
- inner_mode = xfrm[i]->inner_mode;
+ inner_mode = &xfrm[i]->inner_mode;
xdst->route = dst;
dst_copy_metrics(dst1, dst);
@@ -2622,7 +2612,14 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
dst1->lastuse = now;
dst1->input = dst_discard;
- dst1->output = inner_mode->afinfo->output;
+
+ rcu_read_lock();
+ afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
+ if (likely(afinfo))
+ dst1->output = afinfo->output;
+ else
+ dst1->output = dst_discard_out;
+ rcu_read_unlock();
xdst_prev = xdst;
@@ -3263,20 +3260,229 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star
return start;
}
+static void
+decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
+ struct flowi4 *fl4 = &fl->u.ip4;
+ int oif = 0;
+
+ if (skb_dst(skb))
+ oif = skb_dst(skb)->dev->ifindex;
+
+ memset(fl4, 0, sizeof(struct flowi4));
+ fl4->flowi4_mark = skb->mark;
+ fl4->flowi4_oif = reverse ? skb->skb_iif : oif;
+
+ if (!ip_is_fragment(iph)) {
+ switch (iph->protocol) {
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ case IPPROTO_TCP:
+ case IPPROTO_SCTP:
+ case IPPROTO_DCCP:
+ if (xprth + 4 < skb->data ||
+ pskb_may_pull(skb, xprth + 4 - skb->data)) {
+ __be16 *ports;
+
+ xprth = skb_network_header(skb) + iph->ihl * 4;
+ ports = (__be16 *)xprth;
+
+ fl4->fl4_sport = ports[!!reverse];
+ fl4->fl4_dport = ports[!reverse];
+ }
+ break;
+ case IPPROTO_ICMP:
+ if (xprth + 2 < skb->data ||
+ pskb_may_pull(skb, xprth + 2 - skb->data)) {
+ u8 *icmp;
+
+ xprth = skb_network_header(skb) + iph->ihl * 4;
+ icmp = xprth;
+
+ fl4->fl4_icmp_type = icmp[0];
+ fl4->fl4_icmp_code = icmp[1];
+ }
+ break;
+ case IPPROTO_ESP:
+ if (xprth + 4 < skb->data ||
+ pskb_may_pull(skb, xprth + 4 - skb->data)) {
+ __be32 *ehdr;
+
+ xprth = skb_network_header(skb) + iph->ihl * 4;
+ ehdr = (__be32 *)xprth;
+
+ fl4->fl4_ipsec_spi = ehdr[0];
+ }
+ break;
+ case IPPROTO_AH:
+ if (xprth + 8 < skb->data ||
+ pskb_may_pull(skb, xprth + 8 - skb->data)) {
+ __be32 *ah_hdr;
+
+ xprth = skb_network_header(skb) + iph->ihl * 4;
+ ah_hdr = (__be32 *)xprth;
+
+ fl4->fl4_ipsec_spi = ah_hdr[1];
+ }
+ break;
+ case IPPROTO_COMP:
+ if (xprth + 4 < skb->data ||
+ pskb_may_pull(skb, xprth + 4 - skb->data)) {
+ __be16 *ipcomp_hdr;
+
+ xprth = skb_network_header(skb) + iph->ihl * 4;
+ ipcomp_hdr = (__be16 *)xprth;
+
+ fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
+ }
+ break;
+ case IPPROTO_GRE:
+ if (xprth + 12 < skb->data ||
+ pskb_may_pull(skb, xprth + 12 - skb->data)) {
+ __be16 *greflags;
+ __be32 *gre_hdr;
+
+ xprth = skb_network_header(skb) + iph->ihl * 4;
+ greflags = (__be16 *)xprth;
+ gre_hdr = (__be32 *)xprth;
+
+ if (greflags[0] & GRE_KEY) {
+ if (greflags[0] & GRE_CSUM)
+ gre_hdr++;
+ fl4->fl4_gre_key = gre_hdr[1];
+ }
+ }
+ break;
+ default:
+ fl4->fl4_ipsec_spi = 0;
+ break;
+ }
+ }
+ fl4->flowi4_proto = iph->protocol;
+ fl4->daddr = reverse ? iph->saddr : iph->daddr;
+ fl4->saddr = reverse ? iph->daddr : iph->saddr;
+ fl4->flowi4_tos = iph->tos;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void
+decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse)
+{
+ struct flowi6 *fl6 = &fl->u.ip6;
+ int onlyproto = 0;
+ const struct ipv6hdr *hdr = ipv6_hdr(skb);
+ u32 offset = sizeof(*hdr);
+ struct ipv6_opt_hdr *exthdr;
+ const unsigned char *nh = skb_network_header(skb);
+ u16 nhoff = IP6CB(skb)->nhoff;
+ int oif = 0;
+ u8 nexthdr;
+
+ if (!nhoff)
+ nhoff = offsetof(struct ipv6hdr, nexthdr);
+
+ nexthdr = nh[nhoff];
+
+ if (skb_dst(skb))
+ oif = skb_dst(skb)->dev->ifindex;
+
+ memset(fl6, 0, sizeof(struct flowi6));
+ fl6->flowi6_mark = skb->mark;
+ fl6->flowi6_oif = reverse ? skb->skb_iif : oif;
+
+ fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
+ fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
+
+ while (nh + offset + sizeof(*exthdr) < skb->data ||
+ pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) {
+ nh = skb_network_header(skb);
+ exthdr = (struct ipv6_opt_hdr *)(nh + offset);
+
+ switch (nexthdr) {
+ case NEXTHDR_FRAGMENT:
+ onlyproto = 1;
+ /* fall through */
+ case NEXTHDR_ROUTING:
+ case NEXTHDR_HOP:
+ case NEXTHDR_DEST:
+ offset += ipv6_optlen(exthdr);
+ nexthdr = exthdr->nexthdr;
+ exthdr = (struct ipv6_opt_hdr *)(nh + offset);
+ break;
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ case IPPROTO_TCP:
+ case IPPROTO_SCTP:
+ case IPPROTO_DCCP:
+ if (!onlyproto && (nh + offset + 4 < skb->data ||
+ pskb_may_pull(skb, nh + offset + 4 - skb->data))) {
+ __be16 *ports;
+
+ nh = skb_network_header(skb);
+ ports = (__be16 *)(nh + offset);
+ fl6->fl6_sport = ports[!!reverse];
+ fl6->fl6_dport = ports[!reverse];
+ }
+ fl6->flowi6_proto = nexthdr;
+ return;
+ case IPPROTO_ICMPV6:
+ if (!onlyproto && (nh + offset + 2 < skb->data ||
+ pskb_may_pull(skb, nh + offset + 2 - skb->data))) {
+ u8 *icmp;
+
+ nh = skb_network_header(skb);
+ icmp = (u8 *)(nh + offset);
+ fl6->fl6_icmp_type = icmp[0];
+ fl6->fl6_icmp_code = icmp[1];
+ }
+ fl6->flowi6_proto = nexthdr;
+ return;
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+ case IPPROTO_MH:
+ offset += ipv6_optlen(exthdr);
+ if (!onlyproto && (nh + offset + 3 < skb->data ||
+ pskb_may_pull(skb, nh + offset + 3 - skb->data))) {
+ struct ip6_mh *mh;
+
+ nh = skb_network_header(skb);
+ mh = (struct ip6_mh *)(nh + offset);
+ fl6->fl6_mh_type = mh->ip6mh_type;
+ }
+ fl6->flowi6_proto = nexthdr;
+ return;
+#endif
+ /* XXX Why are there these headers? */
+ case IPPROTO_AH:
+ case IPPROTO_ESP:
+ case IPPROTO_COMP:
+ default:
+ fl6->fl6_ipsec_spi = 0;
+ fl6->flowi6_proto = nexthdr;
+ return;
+ }
+ }
+}
+#endif
+
int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
unsigned int family, int reverse)
{
- const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
- int err;
-
- if (unlikely(afinfo == NULL))
+ switch (family) {
+ case AF_INET:
+ decode_session4(skb, fl, reverse);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ decode_session6(skb, fl, reverse);
+ break;
+#endif
+ default:
return -EAFNOSUPPORT;
+ }
- afinfo->decode_session(skb, fl, reverse);
-
- err = security_xfrm_decode_session(skb, &fl->flowi_secid);
- rcu_read_unlock();
- return err;
+ return security_xfrm_decode_session(skb, &fl->flowi_secid);
}
EXPORT_SYMBOL(__xfrm_decode_session);
@@ -3313,7 +3519,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
ifcb = xfrm_if_get_cb();
if (ifcb) {
- xi = ifcb->decode_session(skb);
+ xi = ifcb->decode_session(skb, family);
if (xi) {
if_id = xi->p.if_id;
net = xi->net;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 1bb971f46fc6..3edbf4b26116 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -173,7 +173,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock);
int __xfrm_state_delete(struct xfrm_state *x);
int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
-bool km_is_alive(const struct km_event *c);
+static bool km_is_alive(const struct km_event *c);
void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
static DEFINE_SPINLOCK(xfrm_type_lock);
@@ -330,100 +330,67 @@ static void xfrm_put_type_offload(const struct xfrm_type_offload *type)
module_put(type->owner);
}
-static DEFINE_SPINLOCK(xfrm_mode_lock);
-int xfrm_register_mode(struct xfrm_mode *mode, int family)
-{
- struct xfrm_state_afinfo *afinfo;
- struct xfrm_mode **modemap;
- int err;
-
- if (unlikely(mode->encap >= XFRM_MODE_MAX))
- return -EINVAL;
-
- afinfo = xfrm_state_get_afinfo(family);
- if (unlikely(afinfo == NULL))
- return -EAFNOSUPPORT;
-
- err = -EEXIST;
- modemap = afinfo->mode_map;
- spin_lock_bh(&xfrm_mode_lock);
- if (modemap[mode->encap])
- goto out;
-
- err = -ENOENT;
- if (!try_module_get(afinfo->owner))
- goto out;
-
- mode->afinfo = afinfo;
- modemap[mode->encap] = mode;
- err = 0;
-
-out:
- spin_unlock_bh(&xfrm_mode_lock);
- rcu_read_unlock();
- return err;
-}
-EXPORT_SYMBOL(xfrm_register_mode);
-
-int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
-{
- struct xfrm_state_afinfo *afinfo;
- struct xfrm_mode **modemap;
- int err;
-
- if (unlikely(mode->encap >= XFRM_MODE_MAX))
- return -EINVAL;
-
- afinfo = xfrm_state_get_afinfo(family);
- if (unlikely(afinfo == NULL))
- return -EAFNOSUPPORT;
-
- err = -ENOENT;
- modemap = afinfo->mode_map;
- spin_lock_bh(&xfrm_mode_lock);
- if (likely(modemap[mode->encap] == mode)) {
- modemap[mode->encap] = NULL;
- module_put(mode->afinfo->owner);
- err = 0;
- }
-
- spin_unlock_bh(&xfrm_mode_lock);
- rcu_read_unlock();
- return err;
-}
-EXPORT_SYMBOL(xfrm_unregister_mode);
-
-static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
-{
- struct xfrm_state_afinfo *afinfo;
- struct xfrm_mode *mode;
- int modload_attempted = 0;
+static const struct xfrm_mode xfrm4_mode_map[XFRM_MODE_MAX] = {
+ [XFRM_MODE_BEET] = {
+ .encap = XFRM_MODE_BEET,
+ .flags = XFRM_MODE_FLAG_TUNNEL,
+ .family = AF_INET,
+ },
+ [XFRM_MODE_TRANSPORT] = {
+ .encap = XFRM_MODE_TRANSPORT,
+ .family = AF_INET,
+ },
+ [XFRM_MODE_TUNNEL] = {
+ .encap = XFRM_MODE_TUNNEL,
+ .flags = XFRM_MODE_FLAG_TUNNEL,
+ .family = AF_INET,
+ },
+};
+
+static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = {
+ [XFRM_MODE_BEET] = {
+ .encap = XFRM_MODE_BEET,
+ .flags = XFRM_MODE_FLAG_TUNNEL,
+ .family = AF_INET6,
+ },
+ [XFRM_MODE_ROUTEOPTIMIZATION] = {
+ .encap = XFRM_MODE_ROUTEOPTIMIZATION,
+ .family = AF_INET6,
+ },
+ [XFRM_MODE_TRANSPORT] = {
+ .encap = XFRM_MODE_TRANSPORT,
+ .family = AF_INET6,
+ },
+ [XFRM_MODE_TUNNEL] = {
+ .encap = XFRM_MODE_TUNNEL,
+ .flags = XFRM_MODE_FLAG_TUNNEL,
+ .family = AF_INET6,
+ },
+};
+
+static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
+{
+ const struct xfrm_mode *mode;
if (unlikely(encap >= XFRM_MODE_MAX))
return NULL;
-retry:
- afinfo = xfrm_state_get_afinfo(family);
- if (unlikely(afinfo == NULL))
- return NULL;
-
- mode = READ_ONCE(afinfo->mode_map[encap]);
- if (unlikely(mode && !try_module_get(mode->owner)))
- mode = NULL;
-
- rcu_read_unlock();
- if (!mode && !modload_attempted) {
- request_module("xfrm-mode-%d-%d", family, encap);
- modload_attempted = 1;
- goto retry;
+ switch (family) {
+ case AF_INET:
+ mode = &xfrm4_mode_map[encap];
+ if (mode->family == family)
+ return mode;
+ break;
+ case AF_INET6:
+ mode = &xfrm6_mode_map[encap];
+ if (mode->family == family)
+ return mode;
+ break;
+ default:
+ break;
}
- return mode;
-}
-
-static void xfrm_put_mode(struct xfrm_mode *mode)
-{
- module_put(mode->owner);
+ return NULL;
}
void xfrm_state_free(struct xfrm_state *x)
@@ -444,12 +411,6 @@ static void ___xfrm_state_destroy(struct xfrm_state *x)
kfree(x->coaddr);
kfree(x->replay_esn);
kfree(x->preplay_esn);
- if (x->inner_mode)
- xfrm_put_mode(x->inner_mode);
- if (x->inner_mode_iaf)
- xfrm_put_mode(x->inner_mode_iaf);
- if (x->outer_mode)
- xfrm_put_mode(x->outer_mode);
if (x->type_offload)
xfrm_put_type_offload(x->type_offload);
if (x->type) {
@@ -590,8 +551,6 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
x->lft.hard_packet_limit = XFRM_INF;
x->replay_maxage = 0;
x->replay_maxdiff = 0;
- x->inner_mode = NULL;
- x->inner_mode_iaf = NULL;
spin_lock_init(&x->lock);
}
return x;
@@ -2066,7 +2025,7 @@ int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address
}
EXPORT_SYMBOL(km_report);
-bool km_is_alive(const struct km_event *c)
+static bool km_is_alive(const struct km_event *c)
{
struct xfrm_mgr *km;
bool is_alive = false;
@@ -2082,7 +2041,6 @@ bool km_is_alive(const struct km_event *c)
return is_alive;
}
-EXPORT_SYMBOL(km_is_alive);
int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
{
@@ -2195,6 +2153,7 @@ struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family)
return rcu_dereference(xfrm_state_afinfo[family]);
}
+EXPORT_SYMBOL_GPL(xfrm_state_afinfo_get_rcu);
struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
{
@@ -2242,8 +2201,9 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu)
int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
{
- struct xfrm_state_afinfo *afinfo;
- struct xfrm_mode *inner_mode;
+ const struct xfrm_state_afinfo *afinfo;
+ const struct xfrm_mode *inner_mode;
+ const struct xfrm_mode *outer_mode;
int family = x->props.family;
int err;
@@ -2269,25 +2229,22 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
goto error;
if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
- family != x->sel.family) {
- xfrm_put_mode(inner_mode);
+ family != x->sel.family)
goto error;
- }
- x->inner_mode = inner_mode;
+ x->inner_mode = *inner_mode;
} else {
- struct xfrm_mode *inner_mode_iaf;
+ const struct xfrm_mode *inner_mode_iaf;
int iafamily = AF_INET;
inner_mode = xfrm_get_mode(x->props.mode, x->props.family);
if (inner_mode == NULL)
goto error;
- if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) {
- xfrm_put_mode(inner_mode);
+ if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL))
goto error;
- }
- x->inner_mode = inner_mode;
+
+ x->inner_mode = *inner_mode;
if (x->props.family == AF_INET)
iafamily = AF_INET6;
@@ -2295,9 +2252,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
inner_mode_iaf = xfrm_get_mode(x->props.mode, iafamily);
if (inner_mode_iaf) {
if (inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL)
- x->inner_mode_iaf = inner_mode_iaf;
- else
- xfrm_put_mode(inner_mode_iaf);
+ x->inner_mode_iaf = *inner_mode_iaf;
}
}
@@ -2311,12 +2266,13 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
if (err)
goto error;
- x->outer_mode = xfrm_get_mode(x->props.mode, family);
- if (x->outer_mode == NULL) {
+ outer_mode = xfrm_get_mode(x->props.mode, family);
+ if (!outer_mode) {
err = -EPROTONOSUPPORT;
goto error;
}
+ x->outer_mode = *outer_mode;
if (init_replay) {
err = xfrm_init_replay(x);
if (err)
@@ -2384,7 +2340,7 @@ void xfrm_state_fini(struct net *net)
flush_work(&net->xfrm.state_hash_work);
flush_work(&xfrm_state_gc_work);
- xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true);
+ xfrm_state_flush(net, 0, false, true);
WARN_ON(!list_empty(&net->xfrm.state_all));
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d7cb16f0df5b..eb8d14389601 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1424,7 +1424,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
ret = verify_policy_dir(p->dir);
if (ret)
return ret;
- if (p->index && ((p->index & XFRM_POLICY_MAX) != p->dir))
+ if (p->index && (xfrm_policy_id2dir(p->index) != p->dir))
return -EINVAL;
return 0;
@@ -1513,20 +1513,8 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
return -EINVAL;
}
- switch (ut[i].id.proto) {
- case IPPROTO_AH:
- case IPPROTO_ESP:
- case IPPROTO_COMP:
-#if IS_ENABLED(CONFIG_IPV6)
- case IPPROTO_ROUTING:
- case IPPROTO_DSTOPTS:
-#endif
- case IPSEC_PROTO_ANY:
- break;
- default:
+ if (!xfrm_id_proto_valid(ut[i].id.proto))
return -EINVAL;
- }
-
}
return 0;
OpenPOWER on IntegriCloud