diff options
Diffstat (limited to 'net')
90 files changed, 2388 insertions, 2858 deletions
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index e2511027d19b..a2555023c654 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1915,6 +1915,7 @@ static int __init atalk_init(void) ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv); if (!ddp_dl) { pr_crit("Unable to register DDP with SNAP.\n"); + rc = -ENOMEM; goto out_sock; } diff --git a/net/atm/lec.c b/net/atm/lec.c index ad4f829193f0..a0311493b01b 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -726,9 +726,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) struct lec_priv *priv; if (arg < 0) - i = 0; - else - i = arg; + arg = 0; if (arg >= MAX_LEC_ITF) return -EINVAL; i = array_index_nospec(arg, MAX_LEC_ITF); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 3d9175f130b3..b81bf53c5ac4 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -4381,6 +4381,9 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, return; } + /* If we reach this point this event matches the last command sent */ + hci_dev_clear_flag(hdev, HCI_CMD_PENDING); + /* If the command succeeded and there's still more commands in * this request the request is not yet complete. */ @@ -4491,6 +4494,8 @@ static void hci_cmd_work(struct work_struct *work) hdev->sent_cmd = skb_clone(skb, GFP_KERNEL); if (hdev->sent_cmd) { + if (hci_req_status_pend(hdev)) + hci_dev_set_flag(hdev, HCI_CMD_PENDING); atomic_dec(&hdev->cmd_cnt); hci_send_frame(hdev, skb); if (test_bit(HCI_RESET, &hdev->flags)) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 66b631ab0d35..9e4fcf406d9c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3404,6 +3404,12 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb, hci_req_cmd_complete(hdev, *opcode, *status, req_complete, req_complete_skb); + if (hci_dev_test_flag(hdev, HCI_CMD_PENDING)) { + bt_dev_err(hdev, + "unexpected event for opcode 0x%4.4x", *opcode); + return; + } + if (atomic_read(&hdev->cmd_cnt) && !skb_queue_empty(&hdev->cmd_q)) queue_work(hdev->workqueue, &hdev->cmd_work); } @@ -3511,6 +3517,12 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb, hci_req_cmd_complete(hdev, *opcode, ev->status, req_complete, req_complete_skb); + if (hci_dev_test_flag(hdev, HCI_CMD_PENDING)) { + bt_dev_err(hdev, + "unexpected event for opcode 0x%4.4x", *opcode); + return; + } + if (atomic_read(&hdev->cmd_cnt) && !skb_queue_empty(&hdev->cmd_q)) queue_work(hdev->workqueue, &hdev->cmd_work); } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index ca73d36cc149..e9a95ed65491 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -46,6 +46,11 @@ void hci_req_purge(struct hci_request *req) skb_queue_purge(&req->cmd_q); } +bool hci_req_status_pend(struct hci_dev *hdev) +{ + return hdev->req_status == HCI_REQ_PEND; +} + static int req_run(struct hci_request *req, hci_req_complete_t complete, hci_req_complete_skb_t complete_skb) { diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 692cc8b13368..55b2050cc9ff 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -37,6 +37,7 @@ struct hci_request { void hci_req_init(struct hci_request *req, struct hci_dev *hdev); void hci_req_purge(struct hci_request *req); +bool hci_req_status_pend(struct hci_dev *hdev); int hci_req_run(struct hci_request *req, hci_req_complete_t complete); int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete); void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, diff --git a/net/core/dev.c b/net/core/dev.c index 22f2640f559a..108ac8137b9b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4987,7 +4987,8 @@ static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc) ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev); if (pt_prev) - ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); + ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb, + skb->dev, pt_prev, orig_dev); return ret; } @@ -5033,7 +5034,8 @@ static inline void __netif_receive_skb_list_ptype(struct list_head *head, else list_for_each_entry_safe(skb, next, head, list) { skb_list_del_init(skb); - pt_prev->func(skb, skb->dev, pt_prev, orig_dev); + INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb, + skb->dev, pt_prev, orig_dev); } } diff --git a/net/core/devlink.c b/net/core/devlink.c index 4e28d04c0165..d43bc52b8840 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -20,6 +20,7 @@ #include <linux/list.h> #include <linux/netdevice.h> #include <linux/spinlock.h> +#include <linux/refcount.h> #include <rdma/ib_verbs.h> #include <net/netlink.h> #include <net/genetlink.h> @@ -4432,6 +4433,7 @@ struct devlink_health_reporter { u64 error_count; u64 recovery_count; u64 last_recovery_ts; + refcount_t refcount; }; void * @@ -4447,6 +4449,7 @@ devlink_health_reporter_find_by_name(struct devlink *devlink, { struct devlink_health_reporter *reporter; + lockdep_assert_held(&devlink->reporters_lock); list_for_each_entry(reporter, &devlink->reporter_list, list) if (!strcmp(reporter->ops->name, reporter_name)) return reporter; @@ -4470,7 +4473,7 @@ devlink_health_reporter_create(struct devlink *devlink, { struct devlink_health_reporter *reporter; - mutex_lock(&devlink->lock); + mutex_lock(&devlink->reporters_lock); if (devlink_health_reporter_find_by_name(devlink, ops->name)) { reporter = ERR_PTR(-EEXIST); goto unlock; @@ -4494,9 +4497,10 @@ devlink_health_reporter_create(struct devlink *devlink, reporter->graceful_period = graceful_period; reporter->auto_recover = auto_recover; mutex_init(&reporter->dump_lock); + refcount_set(&reporter->refcount, 1); list_add_tail(&reporter->list, &devlink->reporter_list); unlock: - mutex_unlock(&devlink->lock); + mutex_unlock(&devlink->reporters_lock); return reporter; } EXPORT_SYMBOL_GPL(devlink_health_reporter_create); @@ -4509,10 +4513,12 @@ EXPORT_SYMBOL_GPL(devlink_health_reporter_create); void devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) { - mutex_lock(&reporter->devlink->lock); + mutex_lock(&reporter->devlink->reporters_lock); list_del(&reporter->list); + mutex_unlock(&reporter->devlink->reporters_lock); + while (refcount_read(&reporter->refcount) > 1) + msleep(100); mutex_destroy(&reporter->dump_lock); - mutex_unlock(&reporter->devlink->lock); if (reporter->dump_fmsg) devlink_fmsg_free(reporter->dump_fmsg); kfree(reporter); @@ -4648,6 +4654,7 @@ static struct devlink_health_reporter * devlink_health_reporter_get_from_info(struct devlink *devlink, struct genl_info *info) { + struct devlink_health_reporter *reporter; char *reporter_name; if (!info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]) @@ -4655,7 +4662,18 @@ devlink_health_reporter_get_from_info(struct devlink *devlink, reporter_name = nla_data(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]); - return devlink_health_reporter_find_by_name(devlink, reporter_name); + mutex_lock(&devlink->reporters_lock); + reporter = devlink_health_reporter_find_by_name(devlink, reporter_name); + if (reporter) + refcount_inc(&reporter->refcount); + mutex_unlock(&devlink->reporters_lock); + return reporter; +} + +static void +devlink_health_reporter_put(struct devlink_health_reporter *reporter) +{ + refcount_dec(&reporter->refcount); } static int @@ -4730,8 +4748,10 @@ static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb, return -EINVAL; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; + if (!msg) { + err = -ENOMEM; + goto out; + } err = devlink_nl_health_reporter_fill(msg, devlink, reporter, DEVLINK_CMD_HEALTH_REPORTER_GET, @@ -4739,10 +4759,13 @@ static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb, 0); if (err) { nlmsg_free(msg); - return err; + goto out; } - return genlmsg_reply(msg, info); + err = genlmsg_reply(msg, info); +out: + devlink_health_reporter_put(reporter); + return err; } static int @@ -4759,7 +4782,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, list_for_each_entry(devlink, &devlink_list, list) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) continue; - mutex_lock(&devlink->lock); + mutex_lock(&devlink->reporters_lock); list_for_each_entry(reporter, &devlink->reporter_list, list) { if (idx < start) { @@ -4773,12 +4796,12 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, cb->nlh->nlmsg_seq, NLM_F_MULTI); if (err) { - mutex_unlock(&devlink->lock); + mutex_unlock(&devlink->reporters_lock); goto out; } idx++; } - mutex_unlock(&devlink->lock); + mutex_unlock(&devlink->reporters_lock); } out: mutex_unlock(&devlink_mutex); @@ -4793,6 +4816,7 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, { struct devlink *devlink = info->user_ptr[0]; struct devlink_health_reporter *reporter; + int err; reporter = devlink_health_reporter_get_from_info(devlink, info); if (!reporter) @@ -4800,8 +4824,10 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, if (!reporter->ops->recover && (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] || - info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])) - return -EOPNOTSUPP; + info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])) { + err = -EOPNOTSUPP; + goto out; + } if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]) reporter->graceful_period = @@ -4811,7 +4837,11 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, reporter->auto_recover = nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]); + devlink_health_reporter_put(reporter); return 0; +out: + devlink_health_reporter_put(reporter); + return err; } static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, @@ -4819,12 +4849,16 @@ static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, { struct devlink *devlink = info->user_ptr[0]; struct devlink_health_reporter *reporter; + int err; reporter = devlink_health_reporter_get_from_info(devlink, info); if (!reporter) return -EINVAL; - return devlink_health_reporter_recover(reporter, NULL); + err = devlink_health_reporter_recover(reporter, NULL); + + devlink_health_reporter_put(reporter); + return err; } static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, @@ -4839,12 +4873,16 @@ static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, if (!reporter) return -EINVAL; - if (!reporter->ops->diagnose) + if (!reporter->ops->diagnose) { + devlink_health_reporter_put(reporter); return -EOPNOTSUPP; + } fmsg = devlink_fmsg_alloc(); - if (!fmsg) + if (!fmsg) { + devlink_health_reporter_put(reporter); return -ENOMEM; + } err = devlink_fmsg_obj_nest_start(fmsg); if (err) @@ -4863,6 +4901,7 @@ static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, out: devlink_fmsg_free(fmsg); + devlink_health_reporter_put(reporter); return err; } @@ -4877,8 +4916,10 @@ static int devlink_nl_cmd_health_reporter_dump_get_doit(struct sk_buff *skb, if (!reporter) return -EINVAL; - if (!reporter->ops->dump) + if (!reporter->ops->dump) { + devlink_health_reporter_put(reporter); return -EOPNOTSUPP; + } mutex_lock(&reporter->dump_lock); err = devlink_health_do_dump(reporter, NULL); @@ -4890,6 +4931,7 @@ static int devlink_nl_cmd_health_reporter_dump_get_doit(struct sk_buff *skb, out: mutex_unlock(&reporter->dump_lock); + devlink_health_reporter_put(reporter); return err; } @@ -4904,12 +4946,15 @@ devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb, if (!reporter) return -EINVAL; - if (!reporter->ops->dump) + if (!reporter->ops->dump) { + devlink_health_reporter_put(reporter); return -EOPNOTSUPP; + } mutex_lock(&reporter->dump_lock); devlink_health_dump_clear(reporter); mutex_unlock(&reporter->dump_lock); + devlink_health_reporter_put(reporter); return 0; } @@ -5191,7 +5236,8 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_get_doit, .dumpit = devlink_nl_cmd_health_reporter_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, /* can be retrieved by unprivileged users */ }, { @@ -5199,21 +5245,24 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_recover_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_diagnose_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, @@ -5284,6 +5333,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) INIT_LIST_HEAD(&devlink->region_list); INIT_LIST_HEAD(&devlink->reporter_list); mutex_init(&devlink->lock); + mutex_init(&devlink->reporters_lock); return devlink; } EXPORT_SYMBOL_GPL(devlink_alloc); @@ -5326,6 +5376,7 @@ EXPORT_SYMBOL_GPL(devlink_unregister); */ void devlink_free(struct devlink *devlink) { + mutex_destroy(&devlink->reporters_lock); mutex_destroy(&devlink->lock); WARN_ON(!list_empty(&devlink->reporter_list)); WARN_ON(!list_empty(&devlink->region_list)); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index f3f5a78cd062..319ad5490fb3 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2521,7 +2521,7 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) skb->_skb_refdst = (unsigned long)&pkt_dev->xdst.u.dst | SKB_DST_NOREF; rcu_read_lock_bh(); - err = x->outer_mode->output(x, skb); + err = pktgen_xfrm_outer_mode_output(x, skb); rcu_read_unlock_bh(); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 1f48642089ea..c0734028c7dc 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -17,15 +17,6 @@ menuconfig NET_DSA if NET_DSA -config NET_DSA_LEGACY - bool "Support for older platform device and Device Tree registration" - default y - ---help--- - Say Y if you want to enable support for the older platform device and - deprecated Device Tree binding registration. - - This feature is scheduled for removal in 4.17. - config NET_DSA_TAG_BRCM_COMMON tristate default n diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 717ac1618100..8a737b6ee94c 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -2,7 +2,6 @@ # the core obj-$(CONFIG_NET_DSA) += dsa_core.o dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o -dsa_core-$(CONFIG_NET_DSA_LEGACY) += legacy.o # tagging formats obj-$(CONFIG_NET_DSA_TAG_BRCM_COMMON) += tag_brcm.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index ba04c78633be..9e1fc0b08290 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -346,10 +346,6 @@ static int __init dsa_init_module(void) if (rc) return rc; - rc = dsa_legacy_register(); - if (rc) - return rc; - dev_add_pack(&dsa_pack_type); dsa_tag_driver_register(&DSA_TAG_DRIVER_NAME(none_ops), @@ -365,7 +361,6 @@ static void __exit dsa_cleanup_module(void) dsa_slave_unregister_notifier(); dev_remove_pack(&dsa_pack_type); - dsa_legacy_unregister(); destroy_workqueue(dsa_owq); } module_exit(dsa_cleanup_module); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index e860512d673a..b434f5ff55ab 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -90,18 +90,6 @@ void dsa_tag_driver_put(const struct dsa_device_ops *ops); bool dsa_schedule_work(struct work_struct *work); const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops); -/* legacy.c */ -#if IS_ENABLED(CONFIG_NET_DSA_LEGACY) -int dsa_legacy_register(void); -void dsa_legacy_unregister(void); -#else -static inline int dsa_legacy_register(void) -{ - return 0; -} - -static inline void dsa_legacy_unregister(void) { } -#endif int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, @@ -171,6 +159,8 @@ int dsa_port_vlan_add(struct dsa_port *dp, struct switchdev_trans *trans); int dsa_port_vlan_del(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan); +int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags); +int dsa_port_vid_del(struct dsa_port *dp, u16 vid); int dsa_port_link_register_of(struct dsa_port *dp); void dsa_port_link_unregister_of(struct dsa_port *dp); diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c deleted file mode 100644 index 219f4fa7ff4b..000000000000 --- a/net/dsa/legacy.c +++ /dev/null @@ -1,747 +0,0 @@ -/* - * net/dsa/legacy.c - Hardware switch handling - * Copyright (c) 2008-2009 Marvell Semiconductor - * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include <linux/device.h> -#include <linux/list.h> -#include <linux/platform_device.h> -#include <linux/slab.h> -#include <linux/module.h> -#include <linux/of.h> -#include <linux/of_mdio.h> -#include <linux/of_platform.h> -#include <linux/of_net.h> -#include <linux/netdevice.h> -#include <linux/sysfs.h> -#include <linux/phy_fixed.h> -#include <linux/etherdevice.h> - -#include "dsa_priv.h" - -/* switch driver registration ***********************************************/ -static DEFINE_MUTEX(dsa_switch_drivers_mutex); -static LIST_HEAD(dsa_switch_drivers); - -void register_switch_driver(struct dsa_switch_driver *drv) -{ - mutex_lock(&dsa_switch_drivers_mutex); - list_add_tail(&drv->list, &dsa_switch_drivers); - mutex_unlock(&dsa_switch_drivers_mutex); -} -EXPORT_SYMBOL_GPL(register_switch_driver); - -void unregister_switch_driver(struct dsa_switch_driver *drv) -{ - mutex_lock(&dsa_switch_drivers_mutex); - list_del_init(&drv->list); - mutex_unlock(&dsa_switch_drivers_mutex); -} -EXPORT_SYMBOL_GPL(unregister_switch_driver); - -static const struct dsa_switch_ops * -dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, - const char **_name, void **priv) -{ - const struct dsa_switch_ops *ret; - struct list_head *list; - const char *name; - - ret = NULL; - name = NULL; - - mutex_lock(&dsa_switch_drivers_mutex); - list_for_each(list, &dsa_switch_drivers) { - const struct dsa_switch_ops *ops; - struct dsa_switch_driver *drv; - - drv = list_entry(list, struct dsa_switch_driver, list); - ops = drv->ops; - - name = ops->probe(parent, host_dev, sw_addr, priv); - if (name != NULL) { - ret = ops; - break; - } - } - mutex_unlock(&dsa_switch_drivers_mutex); - - *_name = name; - - return ret; -} - -/* basic switch operations **************************************************/ -static int dsa_cpu_dsa_setups(struct dsa_switch *ds) -{ - int ret, port; - - for (port = 0; port < ds->num_ports; port++) { - if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) - continue; - - ret = dsa_port_link_register_of(&ds->ports[port]); - if (ret) - return ret; - } - return 0; -} - -static int dsa_switch_setup_one(struct dsa_switch *ds, - struct net_device *master) -{ - const struct dsa_switch_ops *ops = ds->ops; - struct dsa_switch_tree *dst = ds->dst; - struct dsa_chip_data *cd = ds->cd; - bool valid_name_found = false; - int index = ds->index; - struct dsa_port *dp; - int i, ret; - - /* - * Validate supplied switch configuration. - */ - for (i = 0; i < ds->num_ports; i++) { - char *name; - - dp = &ds->ports[i]; - - name = cd->port_names[i]; - if (name == NULL) - continue; - dp->name = name; - - if (!strcmp(name, "cpu")) { - if (dst->cpu_dp) { - netdev_err(master, - "multiple cpu ports?!\n"); - return -EINVAL; - } - dst->cpu_dp = &ds->ports[i]; - dst->cpu_dp->master = master; - dp->type = DSA_PORT_TYPE_CPU; - } else if (!strcmp(name, "dsa")) { - dp->type = DSA_PORT_TYPE_DSA; - } else { - dp->type = DSA_PORT_TYPE_USER; - } - valid_name_found = true; - } - - if (!valid_name_found && i == ds->num_ports) - return -EINVAL; - - /* Make the built-in MII bus mask match the number of ports, - * switch drivers can override this later - */ - ds->phys_mii_mask |= dsa_user_ports(ds); - - /* - * If the CPU connects to this switch, set the switch tree - * tagging protocol to the preferred tagging format of this - * switch. - */ - if (dst->cpu_dp->ds == ds) { - const struct dsa_device_ops *tag_ops; - enum dsa_tag_protocol tag_protocol; - - tag_protocol = ops->get_tag_protocol(ds, dst->cpu_dp->index); - tag_ops = dsa_tag_driver_get(tag_protocol); - if (IS_ERR(tag_ops)) - return PTR_ERR(tag_ops); - - dst->cpu_dp->tag_ops = tag_ops; - - /* Few copies for faster access in master receive hot path */ - dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv; - dst->cpu_dp->dst = dst; - } - - dsa_tag_driver_put(dst->cpu_dp->tag_ops); - - memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable)); - - /* - * Do basic register setup. - */ - ret = ops->setup(ds); - if (ret < 0) - return ret; - - ret = dsa_switch_register_notifier(ds); - if (ret) - return ret; - - if (!ds->slave_mii_bus && ops->phy_read) { - ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); - if (!ds->slave_mii_bus) - return -ENOMEM; - dsa_slave_mii_bus_init(ds); - - ret = mdiobus_register(ds->slave_mii_bus); - if (ret < 0) - return ret; - } - - /* - * Create network devices for physical switch ports. - */ - for (i = 0; i < ds->num_ports; i++) { - ds->ports[i].dn = cd->port_dn[i]; - ds->ports[i].cpu_dp = dst->cpu_dp; - - if (!dsa_is_user_port(ds, i)) - continue; - - ret = dsa_slave_create(&ds->ports[i]); - if (ret < 0) - netdev_err(master, "[%d]: can't create dsa slave device for port %d(%s): %d\n", - index, i, cd->port_names[i], ret); - } - - /* Perform configuration of the CPU and DSA ports */ - ret = dsa_cpu_dsa_setups(ds); - if (ret < 0) - netdev_err(master, "[%d] : can't configure CPU and DSA ports\n", - index); - - return 0; -} - -static struct dsa_switch * -dsa_switch_setup(struct dsa_switch_tree *dst, struct net_device *master, - int index, struct device *parent, struct device *host_dev) -{ - struct dsa_chip_data *cd = dst->pd->chip + index; - const struct dsa_switch_ops *ops; - struct dsa_switch *ds; - int ret; - const char *name; - void *priv; - - /* - * Probe for switch model. - */ - ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv); - if (!ops) { - netdev_err(master, "[%d]: could not detect attached switch\n", - index); - return ERR_PTR(-EINVAL); - } - netdev_info(master, "[%d]: detected a %s switch\n", - index, name); - - - /* - * Allocate and initialise switch state. - */ - ds = dsa_switch_alloc(parent, DSA_MAX_PORTS); - if (!ds) - return ERR_PTR(-ENOMEM); - - ds->dst = dst; - ds->index = index; - ds->cd = cd; - ds->ops = ops; - ds->priv = priv; - - ret = dsa_switch_setup_one(ds, master); - if (ret) - return ERR_PTR(ret); - - return ds; -} - -static void dsa_switch_destroy(struct dsa_switch *ds) -{ - int port; - - /* Destroy network devices for physical switch ports. */ - for (port = 0; port < ds->num_ports; port++) { - if (!dsa_is_user_port(ds, port)) - continue; - - if (!ds->ports[port].slave) - continue; - - dsa_slave_destroy(ds->ports[port].slave); - } - - /* Disable configuration of the CPU and DSA ports */ - for (port = 0; port < ds->num_ports; port++) { - if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) - continue; - dsa_port_link_unregister_of(&ds->ports[port]); - } - - if (ds->slave_mii_bus && ds->ops->phy_read) - mdiobus_unregister(ds->slave_mii_bus); - - dsa_switch_unregister_notifier(ds); -} - -/* platform driver init and cleanup *****************************************/ -static int dev_is_class(struct device *dev, void *class) -{ - if (dev->class != NULL && !strcmp(dev->class->name, class)) - return 1; - - return 0; -} - -static struct device *dev_find_class(struct device *parent, char *class) -{ - if (dev_is_class(parent, class)) { - get_device(parent); - return parent; - } - - return device_find_child(parent, class, dev_is_class); -} - -struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev) -{ - struct device *d; - - d = dev_find_class(dev, "mdio_bus"); - if (d != NULL) { - struct mii_bus *bus; - - bus = to_mii_bus(d); - put_device(d); - - return bus; - } - - return NULL; -} -EXPORT_SYMBOL_GPL(dsa_host_dev_to_mii_bus); - -#ifdef CONFIG_OF -static int dsa_of_setup_routing_table(struct dsa_platform_data *pd, - struct dsa_chip_data *cd, - int chip_index, int port_index, - struct device_node *link) -{ - const __be32 *reg; - int link_sw_addr; - struct device_node *parent_sw; - int len; - - parent_sw = of_get_parent(link); - if (!parent_sw) - return -EINVAL; - - reg = of_get_property(parent_sw, "reg", &len); - if (!reg || (len != sizeof(*reg) * 2)) - return -EINVAL; - - /* - * Get the destination switch number from the second field of its 'reg' - * property, i.e. for "reg = <0x19 1>" sw_addr is '1'. - */ - link_sw_addr = be32_to_cpup(reg + 1); - - if (link_sw_addr >= pd->nr_chips) - return -EINVAL; - - cd->rtable[link_sw_addr] = port_index; - - return 0; -} - -static int dsa_of_probe_links(struct dsa_platform_data *pd, - struct dsa_chip_data *cd, - int chip_index, int port_index, - struct device_node *port, - const char *port_name) -{ - struct device_node *link; - int link_index; - int ret; - - for (link_index = 0;; link_index++) { - link = of_parse_phandle(port, "link", link_index); - if (!link) - break; - - if (!strcmp(port_name, "dsa") && pd->nr_chips > 1) { - ret = dsa_of_setup_routing_table(pd, cd, chip_index, - port_index, link); - if (ret) - return ret; - } - } - return 0; -} - -static void dsa_of_free_platform_data(struct dsa_platform_data *pd) -{ - int i; - int port_index; - - for (i = 0; i < pd->nr_chips; i++) { - port_index = 0; - while (port_index < DSA_MAX_PORTS) { - kfree(pd->chip[i].port_names[port_index]); - port_index++; - } - - /* Drop our reference to the MDIO bus device */ - put_device(pd->chip[i].host_dev); - } - kfree(pd->chip); -} - -static int dsa_of_probe(struct device *dev) -{ - struct device_node *np = dev->of_node; - struct device_node *child, *mdio, *ethernet, *port; - struct mii_bus *mdio_bus, *mdio_bus_switch; - struct net_device *ethernet_dev; - struct dsa_platform_data *pd; - struct dsa_chip_data *cd; - const char *port_name; - int chip_index, port_index; - const unsigned int *sw_addr, *port_reg; - u32 eeprom_len; - int ret; - - mdio = of_parse_phandle(np, "dsa,mii-bus", 0); - if (!mdio) - return -EINVAL; - - mdio_bus = of_mdio_find_bus(mdio); - if (!mdio_bus) - return -EPROBE_DEFER; - - ethernet = of_parse_phandle(np, "dsa,ethernet", 0); - if (!ethernet) { - ret = -EINVAL; - goto out_put_mdio; - } - - ethernet_dev = of_find_net_device_by_node(ethernet); - if (!ethernet_dev) { - ret = -EPROBE_DEFER; - goto out_put_mdio; - } - - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) { - ret = -ENOMEM; - goto out_put_ethernet; - } - - dev->platform_data = pd; - pd->of_netdev = ethernet_dev; - pd->nr_chips = of_get_available_child_count(np); - if (pd->nr_chips > DSA_MAX_SWITCHES) - pd->nr_chips = DSA_MAX_SWITCHES; - - pd->chip = kcalloc(pd->nr_chips, sizeof(struct dsa_chip_data), - GFP_KERNEL); - if (!pd->chip) { - ret = -ENOMEM; - goto out_free; - } - - chip_index = -1; - for_each_available_child_of_node(np, child) { - int i; - - chip_index++; - cd = &pd->chip[chip_index]; - - cd->of_node = child; - - /* Initialize the routing table */ - for (i = 0; i < DSA_MAX_SWITCHES; ++i) - cd->rtable[i] = DSA_RTABLE_NONE; - - /* When assigning the host device, increment its refcount */ - cd->host_dev = get_device(&mdio_bus->dev); - - sw_addr = of_get_property(child, "reg", NULL); - if (!sw_addr) - continue; - - cd->sw_addr = be32_to_cpup(sw_addr); - if (cd->sw_addr >= PHY_MAX_ADDR) - continue; - - if (!of_property_read_u32(child, "eeprom-length", &eeprom_len)) - cd->eeprom_len = eeprom_len; - - mdio = of_parse_phandle(child, "mii-bus", 0); - if (mdio) { - mdio_bus_switch = of_mdio_find_bus(mdio); - if (!mdio_bus_switch) { - ret = -EPROBE_DEFER; - goto out_free_chip; - } - - /* Drop the mdio_bus device ref, replacing the host - * device with the mdio_bus_switch device, keeping - * the refcount from of_mdio_find_bus() above. - */ - put_device(cd->host_dev); - cd->host_dev = &mdio_bus_switch->dev; - } - - for_each_available_child_of_node(child, port) { - port_reg = of_get_property(port, "reg", NULL); - if (!port_reg) - continue; - - port_index = be32_to_cpup(port_reg); - if (port_index >= DSA_MAX_PORTS) - break; - - port_name = of_get_property(port, "label", NULL); - if (!port_name) - continue; - - cd->port_dn[port_index] = port; - - cd->port_names[port_index] = kstrdup(port_name, - GFP_KERNEL); - if (!cd->port_names[port_index]) { - ret = -ENOMEM; - goto out_free_chip; - } - - ret = dsa_of_probe_links(pd, cd, chip_index, - port_index, port, port_name); - if (ret) - goto out_free_chip; - - } - } - - /* The individual chips hold their own refcount on the mdio bus, - * so drop ours */ - put_device(&mdio_bus->dev); - - return 0; - -out_free_chip: - dsa_of_free_platform_data(pd); -out_free: - kfree(pd); - dev->platform_data = NULL; -out_put_ethernet: - put_device(ðernet_dev->dev); -out_put_mdio: - put_device(&mdio_bus->dev); - return ret; -} - -static void dsa_of_remove(struct device *dev) -{ - struct dsa_platform_data *pd = dev->platform_data; - - if (!dev->of_node) - return; - - dsa_of_free_platform_data(pd); - put_device(&pd->of_netdev->dev); - kfree(pd); -} -#else -static inline int dsa_of_probe(struct device *dev) -{ - return 0; -} - -static inline void dsa_of_remove(struct device *dev) -{ -} -#endif - -static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, - struct device *parent, struct dsa_platform_data *pd) -{ - int i; - unsigned configured = 0; - - dst->pd = pd; - - for (i = 0; i < pd->nr_chips; i++) { - struct dsa_switch *ds; - - ds = dsa_switch_setup(dst, dev, i, parent, pd->chip[i].host_dev); - if (IS_ERR(ds)) { - netdev_err(dev, "[%d]: couldn't create dsa switch instance (error %ld)\n", - i, PTR_ERR(ds)); - continue; - } - - dst->ds[i] = ds; - - ++configured; - } - - /* - * If no switch was found, exit cleanly - */ - if (!configured) - return -EPROBE_DEFER; - - return dsa_master_setup(dst->cpu_dp->master, dst->cpu_dp); -} - -static int dsa_probe(struct platform_device *pdev) -{ - struct dsa_platform_data *pd = pdev->dev.platform_data; - struct net_device *dev; - struct dsa_switch_tree *dst; - int ret; - - if (pdev->dev.of_node) { - ret = dsa_of_probe(&pdev->dev); - if (ret) - return ret; - - pd = pdev->dev.platform_data; - } - - if (pd == NULL || (pd->netdev == NULL && pd->of_netdev == NULL)) - return -EINVAL; - - if (pd->of_netdev) { - dev = pd->of_netdev; - dev_hold(dev); - } else { - dev = dsa_dev_to_net_device(pd->netdev); - } - if (dev == NULL) { - ret = -EPROBE_DEFER; - goto out; - } - - if (dev->dsa_ptr != NULL) { - dev_put(dev); - ret = -EEXIST; - goto out; - } - - dst = devm_kzalloc(&pdev->dev, sizeof(*dst), GFP_KERNEL); - if (dst == NULL) { - dev_put(dev); - ret = -ENOMEM; - goto out; - } - - platform_set_drvdata(pdev, dst); - - ret = dsa_setup_dst(dst, dev, &pdev->dev, pd); - if (ret) { - dev_put(dev); - goto out; - } - - return 0; - -out: - dsa_of_remove(&pdev->dev); - - return ret; -} - -static void dsa_remove_dst(struct dsa_switch_tree *dst) -{ - int i; - - dsa_master_teardown(dst->cpu_dp->master); - - for (i = 0; i < dst->pd->nr_chips; i++) { - struct dsa_switch *ds = dst->ds[i]; - - if (ds) - dsa_switch_destroy(ds); - } - - dev_put(dst->cpu_dp->master); -} - -static int dsa_remove(struct platform_device *pdev) -{ - struct dsa_switch_tree *dst = platform_get_drvdata(pdev); - - dsa_remove_dst(dst); - dsa_of_remove(&pdev->dev); - - return 0; -} - -static void dsa_shutdown(struct platform_device *pdev) -{ -} - -#ifdef CONFIG_PM_SLEEP -static int dsa_suspend(struct device *d) -{ - struct dsa_switch_tree *dst = dev_get_drvdata(d); - int i, ret = 0; - - for (i = 0; i < dst->pd->nr_chips; i++) { - struct dsa_switch *ds = dst->ds[i]; - - if (ds != NULL) - ret = dsa_switch_suspend(ds); - } - - return ret; -} - -static int dsa_resume(struct device *d) -{ - struct dsa_switch_tree *dst = dev_get_drvdata(d); - int i, ret = 0; - - for (i = 0; i < dst->pd->nr_chips; i++) { - struct dsa_switch *ds = dst->ds[i]; - - if (ds != NULL) - ret = dsa_switch_resume(ds); - } - - return ret; -} -#endif - -static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume); - -static const struct of_device_id dsa_of_match_table[] = { - { .compatible = "marvell,dsa", }, - {} -}; -MODULE_DEVICE_TABLE(of, dsa_of_match_table); - -static struct platform_driver dsa_driver = { - .probe = dsa_probe, - .remove = dsa_remove, - .shutdown = dsa_shutdown, - .driver = { - .name = "dsa", - .of_match_table = dsa_of_match_table, - .pm = &dsa_pm_ops, - }, -}; - -int dsa_legacy_register(void) -{ - return platform_driver_register(&dsa_driver); -} - -void dsa_legacy_unregister(void) -{ - platform_driver_unregister(&dsa_driver); -} diff --git a/net/dsa/port.c b/net/dsa/port.c index caeef4c99dc0..1ed287b2badd 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -154,19 +154,67 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) dsa_port_set_state_now(dp, BR_STATE_FORWARDING); } +static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp, + bool vlan_filtering) +{ + struct dsa_switch *ds = dp->ds; + int i; + + if (!ds->vlan_filtering_is_global) + return true; + + /* For cases where enabling/disabling VLAN awareness is global to the + * switch, we need to handle the case where multiple bridges span + * different ports of the same switch device and one of them has a + * different setting than what is being requested. + */ + for (i = 0; i < ds->num_ports; i++) { + struct net_device *other_bridge; + + other_bridge = dsa_to_port(ds, i)->bridge_dev; + if (!other_bridge) + continue; + /* If it's the same bridge, it also has same + * vlan_filtering setting => no need to check + */ + if (other_bridge == dp->bridge_dev) + continue; + if (br_vlan_enabled(other_bridge) != vlan_filtering) { + dev_err(ds->dev, "VLAN filtering is a global setting\n"); + return false; + } + } + return true; +} + int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, struct switchdev_trans *trans) { struct dsa_switch *ds = dp->ds; + int err; /* bridge skips -EOPNOTSUPP, so skip the prepare phase */ if (switchdev_trans_ph_prepare(trans)) return 0; - if (ds->ops->port_vlan_filtering) - return ds->ops->port_vlan_filtering(ds, dp->index, - vlan_filtering); + if (!ds->ops->port_vlan_filtering) + return 0; + + if (!dsa_port_can_apply_vlan_filtering(dp, vlan_filtering)) + return -EINVAL; + + if (dsa_port_is_vlan_filtering(dp) == vlan_filtering) + return 0; + + err = ds->ops->port_vlan_filtering(ds, dp->index, + vlan_filtering); + if (err) + return err; + if (ds->vlan_filtering_is_global) + ds->vlan_filtering = vlan_filtering; + else + dp->vlan_filtering = vlan_filtering; return 0; } @@ -322,6 +370,37 @@ int dsa_port_vlan_del(struct dsa_port *dp, return 0; } +int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags) +{ + struct switchdev_obj_port_vlan vlan = { + .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, + .flags = flags, + .vid_begin = vid, + .vid_end = vid, + }; + struct switchdev_trans trans; + int err; + + trans.ph_prepare = true; + err = dsa_port_vlan_add(dp, &vlan, &trans); + if (err == -EOPNOTSUPP) + return 0; + + trans.ph_prepare = false; + return dsa_port_vlan_add(dp, &vlan, &trans); +} + +int dsa_port_vid_del(struct dsa_port *dp, u16 vid) +{ + struct switchdev_obj_port_vlan vlan = { + .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, + .vid_begin = vid, + .vid_end = vid, + }; + + return dsa_port_vlan_del(dp, &vlan); +} + static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp) { struct device_node *phy_dn; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index ce26dddc8270..8ad9bf957da1 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1001,13 +1001,6 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct dsa_port *dp = dsa_slave_to_port(dev); - struct switchdev_obj_port_vlan vlan = { - .vid_begin = vid, - .vid_end = vid, - /* This API only allows programming tagged, non-PVID VIDs */ - .flags = 0, - }; - struct switchdev_trans trans; struct bridge_vlan_info info; int ret; @@ -1024,25 +1017,14 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, return -EBUSY; } - trans.ph_prepare = true; - ret = dsa_port_vlan_add(dp, &vlan, &trans); - if (ret == -EOPNOTSUPP) - return 0; - - trans.ph_prepare = false; - return dsa_port_vlan_add(dp, &vlan, &trans); + /* This API only allows programming tagged, non-PVID VIDs */ + return dsa_port_vid_add(dp, vid, 0); } static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct dsa_port *dp = dsa_slave_to_port(dev); - struct switchdev_obj_port_vlan vlan = { - .vid_begin = vid, - .vid_end = vid, - /* This API only allows programming tagged, non-PVID VIDs */ - .flags = 0, - }; struct bridge_vlan_info info; int ret; @@ -1059,7 +1041,7 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, return -EBUSY; } - ret = dsa_port_vlan_del(dp, &vlan); + ret = dsa_port_vid_del(dp, vid); if (ret == -EOPNOTSUPP) ret = 0; diff --git a/net/dsa/switch.c b/net/dsa/switch.c index e1fae969aa73..7d8cd9bc0ecc 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -10,6 +10,7 @@ * (at your option) any later version. */ +#include <linux/if_bridge.h> #include <linux/netdevice.h> #include <linux/notifier.h> #include <linux/if_vlan.h> @@ -71,6 +72,9 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds, static int dsa_switch_bridge_leave(struct dsa_switch *ds, struct dsa_notifier_bridge_info *info) { + bool unset_vlan_filtering = br_vlan_enabled(info->br); + int err, i; + if (ds->index == info->sw_index && ds->ops->port_bridge_leave) ds->ops->port_bridge_leave(ds, info->port, info->br); @@ -78,6 +82,31 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, ds->ops->crosschip_bridge_leave(ds, info->sw_index, info->port, info->br); + /* If the bridge was vlan_filtering, the bridge core doesn't trigger an + * event for changing vlan_filtering setting upon slave ports leaving + * it. That is a good thing, because that lets us handle it and also + * handle the case where the switch's vlan_filtering setting is global + * (not per port). When that happens, the correct moment to trigger the + * vlan_filtering callback is only when the last port left this bridge. + */ + if (unset_vlan_filtering && ds->vlan_filtering_is_global) { + for (i = 0; i < ds->num_ports; i++) { + if (i == info->port) + continue; + if (dsa_to_port(ds, i)->bridge_dev == info->br) { + unset_vlan_filtering = false; + break; + } + } + } + if (unset_vlan_filtering) { + struct switchdev_trans trans = {0}; + + err = dsa_port_vlan_filtering(&ds->ports[info->port], + false, &trans); + if (err && err != EOPNOTSUPP) + return err; + } return 0; } @@ -196,7 +225,7 @@ static int dsa_port_vlan_check(struct dsa_switch *ds, int port, if (!dp->bridge_dev) return err; - /* dsa_slave_vlan_rx_{add,kill}_vid() cannot use the prepare pharse and + /* dsa_slave_vlan_rx_{add,kill}_vid() cannot use the prepare phase and * already checks whether there is an overlapping bridge VLAN entry * with the same VID, so here we only need to check that if we are * adding a bridge VLAN entry there is not an overlapping VLAN device diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 32cae39cdff6..8108e97d4285 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -304,7 +304,7 @@ config NET_IPVTI tristate "Virtual (secure) IP: tunneling" select INET_TUNNEL select NET_IP_TUNNEL - depends on INET_XFRM_MODE_TUNNEL + select XFRM ---help--- Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the @@ -396,33 +396,6 @@ config INET_TUNNEL tristate default n -config INET_XFRM_MODE_TRANSPORT - tristate "IP: IPsec transport mode" - default y - select XFRM - ---help--- - Support for IPsec transport mode. - - If unsure, say Y. - -config INET_XFRM_MODE_TUNNEL - tristate "IP: IPsec tunnel mode" - default y - select XFRM - ---help--- - Support for IPsec tunnel mode. - - If unsure, say Y. - -config INET_XFRM_MODE_BEET - tristate "IP: IPsec BEET mode" - default y - select XFRM - ---help--- - Support for IPsec BEET mode. - - If unsure, say Y. - config INET_DIAG tristate "INET: socket monitoring interface" default y diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 58629314eae9..000a61994c8f 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -37,10 +37,7 @@ obj-$(CONFIG_INET_ESP) += esp4.o obj-$(CONFIG_INET_ESP_OFFLOAD) += esp4_offload.o obj-$(CONFIG_INET_IPCOMP) += ipcomp.o obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o -obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o obj-$(CONFIG_INET_TUNNEL) += tunnel4.o -obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o -obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o obj-$(CONFIG_IP_PNP) += ipconfig.o obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ obj-$(CONFIG_INET_DIAG) += inet_diag.o diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 10e809b296ec..fb065a8937ea 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -226,7 +226,7 @@ static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto) tail[plen - 1] = proto; } -static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) +static int esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { int encap_type; struct udphdr *uh; @@ -234,6 +234,7 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru __be16 sport, dport; struct xfrm_encap_tmpl *encap = x->encap; struct ip_esp_hdr *esph = esp->esph; + unsigned int len; spin_lock_bh(&x->lock); sport = encap->encap_sport; @@ -241,11 +242,14 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru encap_type = encap->encap_type; spin_unlock_bh(&x->lock); + len = skb->len + esp->tailen - skb_transport_offset(skb); + if (len + sizeof(struct iphdr) >= IP_MAX_MTU) + return -EMSGSIZE; + uh = (struct udphdr *)esph; uh->source = sport; uh->dest = dport; - uh->len = htons(skb->len + esp->tailen - - skb_transport_offset(skb)); + uh->len = htons(len); uh->check = 0; switch (encap_type) { @@ -262,6 +266,8 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru *skb_mac_header(skb) = IPPROTO_UDP; esp->esph = esph; + + return 0; } int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) @@ -275,8 +281,12 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * int tailen = esp->tailen; /* this is non-NULL only with UDP Encapsulation */ - if (x->encap) - esp_output_udp_encap(x, skb, esp); + if (x->encap) { + int err = esp_output_udp_encap(x, skb, esp); + + if (err < 0) + return err; + } if (!skb_cloned(skb)) { if (tailen <= skb_tailroom(skb)) { diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 8756e0e790d2..8edcfa66d1e5 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -52,13 +52,13 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, goto out; if (sp->len == XFRM_MAX_DEPTH) - goto out; + goto out_reset; x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, (xfrm_address_t *)&ip_hdr(skb)->daddr, spi, IPPROTO_ESP, AF_INET); if (!x) - goto out; + goto out_reset; sp->xvec[sp->len++] = x; sp->olen++; @@ -66,7 +66,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, xo = xfrm_offload(skb); if (!xo) { xfrm_state_put(x); - goto out; + goto out_reset; } } @@ -82,6 +82,8 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, xfrm_input(skb, IPPROTO_ESP, spi, -2); return ERR_PTR(-EINPROGRESS); +out_reset: + secpath_reset(skb); out: skb_push(skb, offset); NAPI_GRO_CB(skb)->same_flow = 0; @@ -107,6 +109,44 @@ static void esp4_gso_encap(struct xfrm_state *x, struct sk_buff *skb) xo->proto = proto; } +static struct sk_buff *xfrm4_tunnel_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + __skb_push(skb, skb->mac_len); + return skb_mac_gso_segment(skb, features); +} + +static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + const struct net_offload *ops; + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct xfrm_offload *xo = xfrm_offload(skb); + + skb->transport_header += x->props.header_len; + ops = rcu_dereference(inet_offloads[xo->proto]); + if (likely(ops && ops->callbacks.gso_segment)) + segs = ops->callbacks.gso_segment(skb, features); + + return segs; +} + +static struct sk_buff *xfrm4_outer_mode_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + switch (x->outer_mode.encap) { + case XFRM_MODE_TUNNEL: + return xfrm4_tunnel_gso_segment(x, skb, features); + case XFRM_MODE_TRANSPORT: + return xfrm4_transport_gso_segment(x, skb, features); + } + + return ERR_PTR(-EOPNOTSUPP); +} + static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, netdev_features_t features) { @@ -138,14 +178,16 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, skb->encap_hdr_csum = 1; - if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev) + if ((!(skb->dev->gso_partial_features & NETIF_F_HW_ESP) && + !(features & NETIF_F_HW_ESP)) || x->xso.dev != skb->dev) esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); - else if (!(features & NETIF_F_HW_ESP_TX_CSUM)) + else if (!(features & NETIF_F_HW_ESP_TX_CSUM) && + !(skb->dev->gso_partial_features & NETIF_F_HW_ESP_TX_CSUM)) esp_features = features & ~NETIF_F_CSUM_MASK; xo->flags |= XFRM_GSO_SEGMENT; - return x->outer_mode->gso_segment(x, skb, esp_features); + return xfrm4_outer_mode_gso_segment(x, skb, esp_features); } static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb) @@ -181,7 +223,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ if (!xo) return -EINVAL; - if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev) { + if ((!(features & NETIF_F_HW_ESP) && + !(skb->dev->gso_partial_features & NETIF_F_HW_ESP)) || + x->xso.dev != skb->dev) { xo->flags |= CRYPTO_FALLBACK; hw_offload = false; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 71c2165a2ce3..d3da6a10f86f 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -159,12 +159,12 @@ static void rt_fibinfo_free(struct rtable __rcu **rtp) dst_release_immediate(&rt->dst); } -static void free_nh_exceptions(struct fib_nh *nh) +static void free_nh_exceptions(struct fib_nh_common *nhc) { struct fnhe_hash_bucket *hash; int i; - hash = rcu_dereference_protected(nh->nh_exceptions, 1); + hash = rcu_dereference_protected(nhc->nhc_exceptions, 1); if (!hash) return; for (i = 0; i < FNHE_HASH_SIZE; i++) { @@ -212,6 +212,9 @@ void fib_nh_common_release(struct fib_nh_common *nhc) dev_put(nhc->nhc_dev); lwtstate_put(nhc->nhc_lwtstate); + rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); + rt_fibinfo_free(&nhc->nhc_rth_input); + free_nh_exceptions(nhc); } EXPORT_SYMBOL_GPL(fib_nh_common_release); @@ -222,9 +225,6 @@ void fib_nh_release(struct net *net, struct fib_nh *fib_nh) net->ipv4.fib_num_tclassid_users--; #endif fib_nh_common_release(&fib_nh->nh_common); - free_nh_exceptions(fib_nh); - rt_fibinfo_free_cpus(fib_nh->nh_pcpu_rth_output); - rt_fibinfo_free(&fib_nh->nh_rth_input); } /* Release a nexthop info record */ @@ -491,23 +491,35 @@ int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap, u16 encap_type, void *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) { + int err; + + nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *, + gfp_flags); + if (!nhc->nhc_pcpu_rth_output) + return -ENOMEM; + if (encap) { struct lwtunnel_state *lwtstate; - int err; if (encap_type == LWTUNNEL_ENCAP_NONE) { NL_SET_ERR_MSG(extack, "LWT encap type not specified"); - return -EINVAL; + err = -EINVAL; + goto lwt_failure; } err = lwtunnel_build_state(encap_type, encap, nhc->nhc_family, cfg, &lwtstate, extack); if (err) - return err; + goto lwt_failure; nhc->nhc_lwtstate = lwtstate_get(lwtstate); } return 0; + +lwt_failure: + rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); + nhc->nhc_pcpu_rth_output = NULL; + return err; } EXPORT_SYMBOL_GPL(fib_nh_common_init); @@ -515,18 +527,14 @@ int fib_nh_init(struct net *net, struct fib_nh *nh, struct fib_config *cfg, int nh_weight, struct netlink_ext_ack *extack) { - int err = -ENOMEM; + int err; nh->fib_nh_family = AF_INET; - nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *); - if (!nh->nh_pcpu_rth_output) - goto err_out; - err = fib_nh_common_init(&nh->nh_common, cfg->fc_encap, cfg->fc_encap_type, cfg, GFP_KERNEL, extack); if (err) - goto init_failure; + return err; nh->fib_nh_oif = cfg->fc_oif; nh->fib_nh_gw_family = cfg->fc_gw_family; @@ -546,12 +554,6 @@ int fib_nh_init(struct net *net, struct fib_nh *nh, nh->fib_nh_weight = nh_weight; #endif return 0; - -init_failure: - rt_fibinfo_free_cpus(nh->nh_pcpu_rth_output); - nh->nh_pcpu_rth_output = NULL; -err_out: - return err; } #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -1711,12 +1713,12 @@ static int call_fib_nh_notifiers(struct fib_nh *nh, * - if the new MTU is greater than the PMTU, don't make any change * - otherwise, unlock and set PMTU */ -static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig) +static void nh_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig) { struct fnhe_hash_bucket *bucket; int i; - bucket = rcu_dereference_protected(nh->nh_exceptions, 1); + bucket = rcu_dereference_protected(nhc->nhc_exceptions, 1); if (!bucket) return; @@ -1747,7 +1749,7 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) hlist_for_each_entry(nh, head, nh_hash) { if (nh->fib_nh_dev == dev) - nh_update_mtu(nh, dev->mtu, orig_mtu); + nh_update_mtu(&nh->nh_common, dev->mtu, orig_mtu); } } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 1132d6d1796a..ed97724c5e33 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -130,6 +130,7 @@ #include <linux/inetdevice.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> +#include <linux/indirect_call_wrapper.h> #include <net/snmp.h> #include <net/ip.h> @@ -188,6 +189,8 @@ bool ip_call_ra_chain(struct sk_buff *skb) return false; } +INDIRECT_CALLABLE_DECLARE(int udp_rcv(struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int tcp_v4_rcv(struct sk_buff *)); void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol) { const struct net_protocol *ipprot; @@ -205,7 +208,8 @@ resubmit: } nf_reset(skb); } - ret = ipprot->handler(skb); + ret = INDIRECT_CALL_2(ipprot->handler, tcp_v4_rcv, udp_rcv, + skb); if (ret < 0) { protocol = -ret; goto resubmit; @@ -305,6 +309,8 @@ drop: return true; } +INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *)); static int ip_rcv_finish_core(struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *dev) { @@ -322,7 +328,8 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) { - err = edemux(skb); + err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux, + udp_v4_early_demux, skb); if (unlikely(err)) goto drop_error; /* must reload iph, skb->head might have changed */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4e42c1974ba2..ac880beda8a7 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -516,6 +516,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->pkt_type = from->pkt_type; to->priority = from->priority; to->protocol = from->protocol; + to->skb_iif = from->skb_iif; skb_dst_drop(to); skb_dst_copy(to, from); to->dev = from->dev; diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 68a21bf75dd0..254a42e83ff9 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -50,7 +50,7 @@ static unsigned int vti_net_id __read_mostly; static int vti_tunnel_init(struct net_device *dev); static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, - int encap_type) + int encap_type, bool update_skb_dev) { struct ip_tunnel *tunnel; const struct iphdr *iph = ip_hdr(skb); @@ -65,6 +65,9 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; + if (update_skb_dev) + skb->dev = tunnel->dev; + return xfrm_input(skb, nexthdr, spi, encap_type); } @@ -74,47 +77,28 @@ drop: return 0; } -static int vti_input_ipip(struct sk_buff *skb, int nexthdr, __be32 spi, - int encap_type) +static int vti_input_proto(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) { - struct ip_tunnel *tunnel; - const struct iphdr *iph = ip_hdr(skb); - struct net *net = dev_net(skb->dev); - struct ip_tunnel_net *itn = net_generic(net, vti_net_id); - - tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, - iph->saddr, iph->daddr, 0); - if (tunnel) { - if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) - goto drop; - - XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; - - skb->dev = tunnel->dev; - - return xfrm_input(skb, nexthdr, spi, encap_type); - } - - return -EINVAL; -drop: - kfree_skb(skb); - return 0; + return vti_input(skb, nexthdr, spi, encap_type, false); } -static int vti_rcv(struct sk_buff *skb) +static int vti_rcv(struct sk_buff *skb, __be32 spi, bool update_skb_dev) { XFRM_SPI_SKB_CB(skb)->family = AF_INET; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); - return vti_input(skb, ip_hdr(skb)->protocol, 0, 0); + return vti_input(skb, ip_hdr(skb)->protocol, spi, 0, update_skb_dev); } -static int vti_rcv_ipip(struct sk_buff *skb) +static int vti_rcv_proto(struct sk_buff *skb) { - XFRM_SPI_SKB_CB(skb)->family = AF_INET; - XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + return vti_rcv(skb, 0, false); +} - return vti_input_ipip(skb, ip_hdr(skb)->protocol, ip_hdr(skb)->saddr, 0); +static int vti_rcv_tunnel(struct sk_buff *skb) +{ + return vti_rcv(skb, ip_hdr(skb)->saddr, true); } static int vti_rcv_cb(struct sk_buff *skb, int err) @@ -123,7 +107,7 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) struct net_device *dev; struct pcpu_sw_netstats *tstats; struct xfrm_state *x; - struct xfrm_mode *inner_mode; + const struct xfrm_mode *inner_mode; struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4; u32 orig_mark = skb->mark; int ret; @@ -142,7 +126,7 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) x = xfrm_input_state(skb); - inner_mode = x->inner_mode; + inner_mode = &x->inner_mode; if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); @@ -153,7 +137,7 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) } } - family = inner_mode->afinfo->family; + family = inner_mode->family; skb->mark = be32_to_cpu(tunnel->parms.i_key); ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); @@ -447,31 +431,31 @@ static void __net_init vti_fb_tunnel_init(struct net_device *dev) } static struct xfrm4_protocol vti_esp4_protocol __read_mostly = { - .handler = vti_rcv, - .input_handler = vti_input, + .handler = vti_rcv_proto, + .input_handler = vti_input_proto, .cb_handler = vti_rcv_cb, .err_handler = vti4_err, .priority = 100, }; static struct xfrm4_protocol vti_ah4_protocol __read_mostly = { - .handler = vti_rcv, - .input_handler = vti_input, + .handler = vti_rcv_proto, + .input_handler = vti_input_proto, .cb_handler = vti_rcv_cb, .err_handler = vti4_err, .priority = 100, }; static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = { - .handler = vti_rcv, - .input_handler = vti_input, + .handler = vti_rcv_proto, + .input_handler = vti_input_proto, .cb_handler = vti_rcv_cb, .err_handler = vti4_err, .priority = 100, }; static struct xfrm_tunnel ipip_handler __read_mostly = { - .handler = vti_rcv_ipip, + .handler = vti_rcv_tunnel, .err_handler = vti4_err, .priority = 0, }; @@ -646,10 +630,8 @@ static int __init vti_init(void) msg = "ipip tunnel"; err = xfrm4_tunnel_register(&ipip_handler, AF_INET); - if (err < 0) { - pr_info("%s: cant't register tunnel\n",__func__); + if (err < 0) goto xfrm_tunnel_failed; - } msg = "netlink interface"; err = rtnl_link_register(&vti_link_ops); @@ -659,9 +641,9 @@ static int __init vti_init(void) return err; rtnl_link_failed: - xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); -xfrm_tunnel_failed: xfrm4_tunnel_deregister(&ipip_handler, AF_INET); +xfrm_tunnel_failed: + xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); xfrm_proto_comp_failed: xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm_proto_ah_failed: @@ -676,6 +658,7 @@ pernet_dev_failed: static void __exit vti_fini(void) { rtnl_link_unregister(&vti_link_ops); + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 795aed6e4720..11ddc276776e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -643,8 +643,9 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh } } -static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, - u32 pmtu, bool lock, unsigned long expires) +static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr, + __be32 gw, u32 pmtu, bool lock, + unsigned long expires) { struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; @@ -653,17 +654,17 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, unsigned int i; int depth; - genid = fnhe_genid(dev_net(nh->fib_nh_dev)); + genid = fnhe_genid(dev_net(nhc->nhc_dev)); hval = fnhe_hashfun(daddr); spin_lock_bh(&fnhe_lock); - hash = rcu_dereference(nh->nh_exceptions); + hash = rcu_dereference(nhc->nhc_exceptions); if (!hash) { hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC); if (!hash) goto out_unlock; - rcu_assign_pointer(nh->nh_exceptions, hash); + rcu_assign_pointer(nhc->nhc_exceptions, hash); } hash += hval; @@ -715,13 +716,13 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, * stale, so anyone caching it rechecks if this exception * applies to them. */ - rt = rcu_dereference(nh->nh_rth_input); + rt = rcu_dereference(nhc->nhc_rth_input); if (rt) rt->dst.obsolete = DST_OBSOLETE_KILL; for_each_possible_cpu(i) { struct rtable __rcu **prt; - prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i); + prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i); rt = rcu_dereference(*prt); if (rt) rt->dst.obsolete = DST_OBSOLETE_KILL; @@ -788,10 +789,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow } else { if (fib_lookup(net, fl4, &res, 0) == 0) { struct fib_nh_common *nhc = FIB_RES_NHC(res); - struct fib_nh *nh; - nh = container_of(nhc, struct fib_nh, nh_common); - update_or_create_fnhe(nh, fl4->daddr, new_gw, + update_or_create_fnhe(nhc, fl4->daddr, new_gw, 0, false, jiffies + ip_rt_gc_timeout); } @@ -1039,10 +1038,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) rcu_read_lock(); if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { struct fib_nh_common *nhc = FIB_RES_NHC(res); - struct fib_nh *nh; - nh = container_of(nhc, struct fib_nh, nh_common); - update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock, + update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, jiffies + ip_rt_mtu_expires); } rcu_read_unlock(); @@ -1328,7 +1325,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } -static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) +static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr) { struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe, __rcu **fnhe_p; @@ -1336,7 +1333,7 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) spin_lock_bh(&fnhe_lock); - hash = rcu_dereference_protected(nh->nh_exceptions, + hash = rcu_dereference_protected(nhc->nhc_exceptions, lockdep_is_held(&fnhe_lock)); hash += hval; @@ -1362,9 +1359,10 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) spin_unlock_bh(&fnhe_lock); } -static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) +static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc, + __be32 daddr) { - struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions); + struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions); struct fib_nh_exception *fnhe; u32 hval; @@ -1378,7 +1376,7 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) if (fnhe->fnhe_daddr == daddr) { if (fnhe->fnhe_expires && time_after(jiffies, fnhe->fnhe_expires)) { - ip_del_fnhe(nh, daddr); + ip_del_fnhe(nhc, daddr); break; } return fnhe; @@ -1405,10 +1403,9 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) mtu = fi->fib_mtu; if (likely(!mtu)) { - struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common); struct fib_nh_exception *fnhe; - fnhe = find_exception(nh, daddr); + fnhe = find_exception(nhc, daddr); if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires)) mtu = fnhe->fnhe_pmtu; } @@ -1469,15 +1466,15 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, return ret; } -static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) +static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt) { struct rtable *orig, *prev, **p; bool ret = true; if (rt_is_input_route(rt)) { - p = (struct rtable **)&nh->nh_rth_input; + p = (struct rtable **)&nhc->nhc_rth_input; } else { - p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output); + p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output); } orig = *p; @@ -1574,7 +1571,6 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, if (fi) { struct fib_nh_common *nhc = FIB_RES_NHC(*res); - struct fib_nh *nh; if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) { rt->rt_gw_family = nhc->nhc_gw_family; @@ -1587,15 +1583,19 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, ip_dst_init_metrics(&rt->dst, fi->fib_metrics); - nh = container_of(nhc, struct fib_nh, nh_common); #ifdef CONFIG_IP_ROUTE_CLASSID - rt->dst.tclassid = nh->nh_tclassid; + { + struct fib_nh *nh; + + nh = container_of(nhc, struct fib_nh, nh_common); + rt->dst.tclassid = nh->nh_tclassid; + } #endif - rt->dst.lwtstate = lwtstate_get(nh->fib_nh_lws); + rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate); if (unlikely(fnhe)) cached = rt_bind_exception(rt, fnhe, daddr, do_cache); else if (do_cache) - cached = rt_cache_route(nh, rt); + cached = rt_cache_route(nhc, rt); if (unlikely(!cached)) { /* Routes we intend to cache in nexthop exception or * FIB nexthop have the DST_NOCACHE bit clear. @@ -1756,7 +1756,6 @@ static int __mkroute_input(struct sk_buff *skb, struct net_device *dev = nhc->nhc_dev; struct fib_nh_exception *fnhe; struct rtable *rth; - struct fib_nh *nh; int err; struct in_device *out_dev; bool do_cache; @@ -1804,13 +1803,12 @@ static int __mkroute_input(struct sk_buff *skb, } } - nh = container_of(nhc, struct fib_nh, nh_common); - fnhe = find_exception(nh, daddr); + fnhe = find_exception(nhc, daddr); if (do_cache) { if (fnhe) rth = rcu_dereference(fnhe->fnhe_rth_input); else - rth = rcu_dereference(nh->nh_rth_input); + rth = rcu_dereference(nhc->nhc_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); goto out; @@ -2105,10 +2103,8 @@ local_input: if (res->fi) { if (!itag) { struct fib_nh_common *nhc = FIB_RES_NHC(*res); - struct fib_nh *nh; - nh = container_of(nhc, struct fib_nh, nh_common); - rth = rcu_dereference(nh->nh_rth_input); + rth = rcu_dereference(nhc->nhc_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); err = 0; @@ -2139,7 +2135,6 @@ local_input: if (do_cache) { struct fib_nh_common *nhc = FIB_RES_NHC(*res); - struct fib_nh *nh; rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate); if (lwtunnel_input_redirect(rth->dst.lwtstate)) { @@ -2148,8 +2143,7 @@ local_input: rth->dst.input = lwtunnel_input; } - nh = container_of(nhc, struct fib_nh, nh_common); - if (unlikely(!rt_cache_route(nh, rth))) + if (unlikely(!rt_cache_route(nhc, rth))) rt_add_uncached_list(rth); } skb_dst_set(skb, &rth->dst); @@ -2321,10 +2315,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res, do_cache &= fi != NULL; if (fi) { struct fib_nh_common *nhc = FIB_RES_NHC(*res); - struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common); struct rtable __rcu **prth; - fnhe = find_exception(nh, fl4->daddr); + fnhe = find_exception(nhc, fl4->daddr); if (!do_cache) goto add; if (fnhe) { @@ -2337,7 +2330,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, do_cache = false; goto add; } - prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); + prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output); } rth = rcu_dereference(*prth); if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst)) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f7567a3698eb..1fa15beb8380 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -457,18 +457,6 @@ void tcp_init_sock(struct sock *sk) } EXPORT_SYMBOL(tcp_init_sock); -void tcp_init_transfer(struct sock *sk, int bpf_op) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - tcp_mtup_init(sk); - icsk->icsk_af_ops->rebuild_header(sk); - tcp_init_metrics(sk); - tcp_call_bpf(sk, bpf_op, 0, NULL); - tcp_init_congestion_control(sk); - tcp_init_buffer_space(sk); -} - static void tcp_tx_timestamp(struct sock *sk, u16 tsflags) { struct sk_buff *skb = tcp_write_queue_tail(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 97671bff597a..077d9abdfcf5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2252,7 +2252,7 @@ static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp, */ static inline bool tcp_packet_delayed(const struct tcp_sock *tp) { - return !tp->retrans_stamp || + return tp->retrans_stamp && tcp_tsopt_ecr_before(tp, tp->retrans_stamp); } @@ -3521,7 +3521,7 @@ static void tcp_xmit_recovery(struct sock *sk, int rexmit) { struct tcp_sock *tp = tcp_sk(sk); - if (rexmit == REXMIT_NONE) + if (rexmit == REXMIT_NONE || sk->sk_state == TCP_SYN_SENT) return; if (unlikely(rexmit == 2)) { @@ -5647,6 +5647,32 @@ discard: } EXPORT_SYMBOL(tcp_rcv_established); +void tcp_init_transfer(struct sock *sk, int bpf_op) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + + tcp_mtup_init(sk); + icsk->icsk_af_ops->rebuild_header(sk); + tcp_init_metrics(sk); + + /* Initialize the congestion window to start the transfer. + * Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been + * retransmitted. In light of RFC6298 more aggressive 1sec + * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK + * retransmission has occurred. + */ + if (tp->total_retrans > 1 && tp->undo_marker) + tp->snd_cwnd = 1; + else + tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk)); + tp->snd_cwnd_stamp = tcp_jiffies32; + + tcp_call_bpf(sk, bpf_op, 0, NULL); + tcp_init_congestion_control(sk); + tcp_init_buffer_space(sk); +} + void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -5748,6 +5774,21 @@ static void smc_check_reset_syn(struct tcp_sock *tp) #endif } +static void tcp_try_undo_spurious_syn(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 syn_stamp; + + /* undo_marker is set when SYN or SYNACK times out. The timeout is + * spurious if the ACK's timestamp option echo value matches the + * original SYN timestamp. + */ + syn_stamp = tp->retrans_stamp; + if (tp->undo_marker && syn_stamp && tp->rx_opt.saw_tstamp && + syn_stamp == tp->rx_opt.rcv_tsecr) + tp->undo_marker = 0; +} + static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th) { @@ -5815,6 +5856,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_ecn_rcv_synack(tp, th); tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); + tcp_try_undo_spurious_syn(sk); tcp_ack(sk, skb, FLAG_SLOWPATH); /* Ok.. it's good. Set up sequence numbers and @@ -5973,6 +6015,27 @@ reset_and_undo: return 1; } +static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) +{ + tcp_try_undo_loss(sk, false); + inet_csk(sk)->icsk_retransmits = 0; + + /* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1, + * we no longer need req so release it. + */ + reqsk_fastopen_remove(sk, tcp_sk(sk)->fastopen_rsk, false); + + /* Re-arm the timer because data may have been sent out. + * This is similar to the regular data transmission case + * when new data has just been ack'ed. + * + * (TFO) - we could try to be more aggressive and + * retransmitting any data sooner based on when they + * are sent out. + */ + tcp_rearm_rto(sk); +} + /* * This function implements the receiving procedure of RFC 793 for * all states except ESTABLISHED and TIME_WAIT. @@ -6069,22 +6132,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (!tp->srtt_us) tcp_synack_rtt_meas(sk, req); - /* Once we leave TCP_SYN_RECV, we no longer need req - * so release it. - */ if (req) { - inet_csk(sk)->icsk_retransmits = 0; - reqsk_fastopen_remove(sk, req, false); - /* Re-arm the timer because data may have been sent out. - * This is similar to the regular data transmission case - * when new data has just been ack'ed. - * - * (TFO) - we could try to be more aggressive and - * retransmitting any data sooner based on when they - * are sent out. - */ - tcp_rearm_rto(sk); + tcp_rcv_synrecv_state_fastopen(sk); } else { + tcp_try_undo_spurious_syn(sk); + tp->retrans_stamp = 0; tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); tp->copied_seq = tp->rcv_nxt; } @@ -6119,16 +6171,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) case TCP_FIN_WAIT1: { int tmo; - /* If we enter the TCP_FIN_WAIT1 state and we are a - * Fast Open socket and this is the first acceptable - * ACK we have received, this would have acknowledged - * our SYNACK so stop the SYNACK timer. - */ - if (req) { - /* We no longer need the request sock. */ - reqsk_fastopen_remove(sk, req, false); - tcp_rearm_rto(sk); - } + if (req) + tcp_rcv_synrecv_state_fastopen(sk); + if (tp->snd_una != tp->write_seq) break; @@ -6303,7 +6348,7 @@ static void tcp_openreq_init(struct request_sock *req, req->cookie_ts = 0; tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; - tcp_rsk(req)->snt_synack = tcp_clock_us(); + tcp_rsk(req)->snt_synack = 0; tcp_rsk(req)->last_oow_ack_time = 0; req->mss = rx_opt->mss_clamp; req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index faa6fa619f59..af81e4a6a8d8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1673,7 +1673,9 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) if (TCP_SKB_CB(tail)->end_seq != TCP_SKB_CB(skb)->seq || TCP_SKB_CB(tail)->ip_dsfield != TCP_SKB_CB(skb)->ip_dsfield || ((TCP_SKB_CB(tail)->tcp_flags | - TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_URG) || + TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_SYN | TCPHDR_RST | TCPHDR_URG)) || + !((TCP_SKB_CB(tail)->tcp_flags & + TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) || ((TCP_SKB_CB(tail)->tcp_flags ^ TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) || #ifdef CONFIG_TLS_DEVICE @@ -1692,6 +1694,15 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq)) TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq; + /* We have to update both TCP_SKB_CB(tail)->tcp_flags and + * thtail->fin, so that the fast path in tcp_rcv_established() + * is not entered if we append a packet with a FIN. + * SYN, RST, URG are not present. + * ACK is set on both packets. + * PSH : we do not really care in TCP stack, + * at least for 'GRO' packets. + */ + thtail->fin |= th->fin; TCP_SKB_CB(tail)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; if (TCP_SKB_CB(skb)->has_rxtstamp) { diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index f262f2cace29..c4848e7a0aad 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -512,16 +512,6 @@ reset: inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; } - /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been - * retransmitted. In light of RFC6298 more aggressive 1sec - * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK - * retransmission has occurred. - */ - if (tp->total_retrans > 1) - tp->snd_cwnd = 1; - else - tp->snd_cwnd = tcp_init_cwnd(tp, dst); - tp->snd_cwnd_stamp = tcp_jiffies32; } bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 79900f783e0d..9c2a0d36fb20 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -522,6 +522,11 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->rx_opt.ts_recent_stamp = 0; newtp->tcp_header_len = sizeof(struct tcphdr); } + if (req->num_timeout) { + newtp->undo_marker = treq->snt_isn; + newtp->retrans_stamp = div_u64(treq->snt_synack, + USEC_PER_SEC / TCP_TS_HZ); + } newtp->tsoffset = treq->ts_off; #ifdef CONFIG_TCP_MD5SIG newtp->md5sig_info = NULL; /*XXX*/ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 32061928b054..0c4ed66dc1bf 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3247,7 +3247,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, skb->skb_mstamp_ns = cookie_init_timestamp(req); else #endif + { skb->skb_mstamp_ns = tcp_clock_ns(); + if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */ + tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb); + } #ifdef CONFIG_TCP_MD5SIG rcu_read_lock(); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index f0c86398e6a7..2ac23da42dd2 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -393,6 +393,9 @@ static void tcp_fastopen_synack_timer(struct sock *sk) tcp_write_err(sk); return; } + /* Lower cwnd after certain SYNACK timeout like tcp_init_transfer() */ + if (icsk->icsk_retransmits == 1) + tcp_enter_loss(sk); /* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error * returned from rtx_syn_ack() to make it more persistent like * regular retransmit because if the child socket has been accepted diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 64f9715173ac..065334b41d57 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -352,6 +352,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, struct sk_buff *pp = NULL; struct udphdr *uh2; struct sk_buff *p; + unsigned int ulen; /* requires non zero csum, for symmetry with GSO */ if (!uh->check) { @@ -359,6 +360,12 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, return NULL; } + /* Do not deal with padded or malicious packets, sorry ! */ + ulen = ntohs(uh->len); + if (ulen <= sizeof(*uh) || ulen != skb_gro_len(skb)) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } /* pull encapsulating udp header */ skb_gro_pull(skb, sizeof(struct udphdr)); skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); @@ -377,13 +384,14 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, /* Terminate the flow on len mismatch or if it grow "too much". * Under small packet flood GRO count could elsewhere grow a lot - * leading to execessive truesize values + * leading to excessive truesize values. + * On len mismatch merge the first packet shorter than gso_size, + * otherwise complete the GRO packet. */ - if (!skb_gro_receive(p, skb) && + if (ulen > ntohs(uh2->len) || skb_gro_receive(p, skb) || + ulen != ntohs(uh2->len) || NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX) pp = p; - else if (uh->len != uh2->len) - pp = p; return pp; } diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c deleted file mode 100644 index 856d2dfdb44b..000000000000 --- a/net/ipv4/xfrm4_mode_beet.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - * xfrm4_mode_beet.c - BEET mode encapsulation for IPv4. - * - * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com> - * Miika Komu <miika@iki.fi> - * Herbert Xu <herbert@gondor.apana.org.au> - * Abhinav Pathak <abhinav.pathak@hiit.fi> - * Jeff Ahrenholz <ahrenholz@gmail.com> - */ - -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/stringify.h> -#include <net/dst.h> -#include <net/ip.h> -#include <net/xfrm.h> - -static void xfrm4_beet_make_header(struct sk_buff *skb) -{ - struct iphdr *iph = ip_hdr(skb); - - iph->ihl = 5; - iph->version = 4; - - iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol; - iph->tos = XFRM_MODE_SKB_CB(skb)->tos; - - iph->id = XFRM_MODE_SKB_CB(skb)->id; - iph->frag_off = XFRM_MODE_SKB_CB(skb)->frag_off; - iph->ttl = XFRM_MODE_SKB_CB(skb)->ttl; -} - -/* Add encapsulation header. - * - * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. - */ -static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ip_beet_phdr *ph; - struct iphdr *top_iph; - int hdrlen, optlen; - - hdrlen = 0; - optlen = XFRM_MODE_SKB_CB(skb)->optlen; - if (unlikely(optlen)) - hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); - - skb_set_network_header(skb, -x->props.header_len - - hdrlen + (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); - if (x->sel.family != AF_INET6) - skb->network_header += IPV4_BEET_PHMAXLEN; - skb->mac_header = skb->network_header + - offsetof(struct iphdr, protocol); - skb->transport_header = skb->network_header + sizeof(*top_iph); - - xfrm4_beet_make_header(skb); - - ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen); - - top_iph = ip_hdr(skb); - - if (unlikely(optlen)) { - BUG_ON(optlen < 0); - - ph->padlen = 4 - (optlen & 4); - ph->hdrlen = optlen / 8; - ph->nexthdr = top_iph->protocol; - if (ph->padlen) - memset(ph + 1, IPOPT_NOP, ph->padlen); - - top_iph->protocol = IPPROTO_BEETPH; - top_iph->ihl = sizeof(struct iphdr) / 4; - } - - top_iph->saddr = x->props.saddr.a4; - top_iph->daddr = x->id.daddr.a4; - - return 0; -} - -static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb) -{ - struct iphdr *iph; - int optlen = 0; - int err = -EINVAL; - - if (unlikely(XFRM_MODE_SKB_CB(skb)->protocol == IPPROTO_BEETPH)) { - struct ip_beet_phdr *ph; - int phlen; - - if (!pskb_may_pull(skb, sizeof(*ph))) - goto out; - - ph = (struct ip_beet_phdr *)skb->data; - - phlen = sizeof(*ph) + ph->padlen; - optlen = ph->hdrlen * 8 + (IPV4_BEET_PHMAXLEN - phlen); - if (optlen < 0 || optlen & 3 || optlen > 250) - goto out; - - XFRM_MODE_SKB_CB(skb)->protocol = ph->nexthdr; - - if (!pskb_may_pull(skb, phlen)) - goto out; - __skb_pull(skb, phlen); - } - - skb_push(skb, sizeof(*iph)); - skb_reset_network_header(skb); - skb_mac_header_rebuild(skb); - - xfrm4_beet_make_header(skb); - - iph = ip_hdr(skb); - - iph->ihl += optlen / 4; - iph->tot_len = htons(skb->len); - iph->daddr = x->sel.daddr.a4; - iph->saddr = x->sel.saddr.a4; - iph->check = 0; - iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl); - err = 0; -out: - return err; -} - -static struct xfrm_mode xfrm4_beet_mode = { - .input2 = xfrm4_beet_input, - .input = xfrm_prepare_input, - .output2 = xfrm4_beet_output, - .output = xfrm4_prepare_output, - .owner = THIS_MODULE, - .encap = XFRM_MODE_BEET, - .flags = XFRM_MODE_FLAG_TUNNEL, -}; - -static int __init xfrm4_beet_init(void) -{ - return xfrm_register_mode(&xfrm4_beet_mode, AF_INET); -} - -static void __exit xfrm4_beet_exit(void) -{ - int err; - - err = xfrm_unregister_mode(&xfrm4_beet_mode, AF_INET); - BUG_ON(err); -} - -module_init(xfrm4_beet_init); -module_exit(xfrm4_beet_exit); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_BEET); diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c deleted file mode 100644 index 1ad2c2c4e250..000000000000 --- a/net/ipv4/xfrm4_mode_transport.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * xfrm4_mode_transport.c - Transport mode encapsulation for IPv4. - * - * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au> - */ - -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/stringify.h> -#include <net/dst.h> -#include <net/ip.h> -#include <net/xfrm.h> -#include <net/protocol.h> - -/* Add encapsulation header. - * - * The IP header will be moved forward to make space for the encapsulation - * header. - */ -static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) -{ - struct iphdr *iph = ip_hdr(skb); - int ihl = iph->ihl * 4; - - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); - - skb_set_network_header(skb, -x->props.header_len); - skb->mac_header = skb->network_header + - offsetof(struct iphdr, protocol); - skb->transport_header = skb->network_header + ihl; - __skb_pull(skb, ihl); - memmove(skb_network_header(skb), iph, ihl); - return 0; -} - -/* Remove encapsulation header. - * - * The IP header will be moved over the top of the encapsulation header. - * - * On entry, skb->h shall point to where the IP header should be and skb->nh - * shall be set to where the IP header currently is. skb->data shall point - * to the start of the payload. - */ -static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) -{ - int ihl = skb->data - skb_transport_header(skb); - - if (skb->transport_header != skb->network_header) { - memmove(skb_transport_header(skb), - skb_network_header(skb), ihl); - skb->network_header = skb->transport_header; - } - ip_hdr(skb)->tot_len = htons(skb->len + ihl); - skb_reset_transport_header(skb); - return 0; -} - -static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x, - struct sk_buff *skb, - netdev_features_t features) -{ - const struct net_offload *ops; - struct sk_buff *segs = ERR_PTR(-EINVAL); - struct xfrm_offload *xo = xfrm_offload(skb); - - skb->transport_header += x->props.header_len; - ops = rcu_dereference(inet_offloads[xo->proto]); - if (likely(ops && ops->callbacks.gso_segment)) - segs = ops->callbacks.gso_segment(skb, features); - - return segs; -} - -static void xfrm4_transport_xmit(struct xfrm_state *x, struct sk_buff *skb) -{ - struct xfrm_offload *xo = xfrm_offload(skb); - - skb_reset_mac_len(skb); - pskb_pull(skb, skb->mac_len + sizeof(struct iphdr) + x->props.header_len); - - if (xo->flags & XFRM_GSO_SEGMENT) { - skb_reset_transport_header(skb); - skb->transport_header -= x->props.header_len; - } -} - -static struct xfrm_mode xfrm4_transport_mode = { - .input = xfrm4_transport_input, - .output = xfrm4_transport_output, - .gso_segment = xfrm4_transport_gso_segment, - .xmit = xfrm4_transport_xmit, - .owner = THIS_MODULE, - .encap = XFRM_MODE_TRANSPORT, -}; - -static int __init xfrm4_transport_init(void) -{ - return xfrm_register_mode(&xfrm4_transport_mode, AF_INET); -} - -static void __exit xfrm4_transport_exit(void) -{ - int err; - - err = xfrm_unregister_mode(&xfrm4_transport_mode, AF_INET); - BUG_ON(err); -} - -module_init(xfrm4_transport_init); -module_exit(xfrm4_transport_exit); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_TRANSPORT); diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c deleted file mode 100644 index 2a9764bd1719..000000000000 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ /dev/null @@ -1,152 +0,0 @@ -/* - * xfrm4_mode_tunnel.c - Tunnel mode encapsulation for IPv4. - * - * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au> - */ - -#include <linux/gfp.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/stringify.h> -#include <net/dst.h> -#include <net/inet_ecn.h> -#include <net/ip.h> -#include <net/xfrm.h> - -static inline void ipip_ecn_decapsulate(struct sk_buff *skb) -{ - struct iphdr *inner_iph = ipip_hdr(skb); - - if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos)) - IP_ECN_set_ce(inner_iph); -} - -/* Add encapsulation header. - * - * The top IP header will be constructed per RFC 2401. - */ -static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - struct iphdr *top_iph; - int flags; - - skb_set_inner_network_header(skb, skb_network_offset(skb)); - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); - - skb_set_network_header(skb, -x->props.header_len); - skb->mac_header = skb->network_header + - offsetof(struct iphdr, protocol); - skb->transport_header = skb->network_header + sizeof(*top_iph); - top_iph = ip_hdr(skb); - - top_iph->ihl = 5; - top_iph->version = 4; - - top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family); - - /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */ - if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) - top_iph->tos = 0; - else - top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos; - top_iph->tos = INET_ECN_encapsulate(top_iph->tos, - XFRM_MODE_SKB_CB(skb)->tos); - - flags = x->props.flags; - if (flags & XFRM_STATE_NOECN) - IP_ECN_clear(top_iph); - - top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? - 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); - - top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst)); - - top_iph->saddr = x->props.saddr.a4; - top_iph->daddr = x->id.daddr.a4; - ip_select_ident(dev_net(dst->dev), skb, NULL); - - return 0; -} - -static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) -{ - int err = -EINVAL; - - if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) - goto out; - - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - goto out; - - err = skb_unclone(skb, GFP_ATOMIC); - if (err) - goto out; - - if (x->props.flags & XFRM_STATE_DECAP_DSCP) - ipv4_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, ipip_hdr(skb)); - if (!(x->props.flags & XFRM_STATE_NOECN)) - ipip_ecn_decapsulate(skb); - - skb_reset_network_header(skb); - skb_mac_header_rebuild(skb); - if (skb->mac_len) - eth_hdr(skb)->h_proto = skb->protocol; - - err = 0; - -out: - return err; -} - -static struct sk_buff *xfrm4_mode_tunnel_gso_segment(struct xfrm_state *x, - struct sk_buff *skb, - netdev_features_t features) -{ - __skb_push(skb, skb->mac_len); - return skb_mac_gso_segment(skb, features); -} - -static void xfrm4_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb) -{ - struct xfrm_offload *xo = xfrm_offload(skb); - - if (xo->flags & XFRM_GSO_SEGMENT) - skb->transport_header = skb->network_header + - sizeof(struct iphdr); - - skb_reset_mac_len(skb); - pskb_pull(skb, skb->mac_len + x->props.header_len); -} - -static struct xfrm_mode xfrm4_tunnel_mode = { - .input2 = xfrm4_mode_tunnel_input, - .input = xfrm_prepare_input, - .output2 = xfrm4_mode_tunnel_output, - .output = xfrm4_prepare_output, - .gso_segment = xfrm4_mode_tunnel_gso_segment, - .xmit = xfrm4_mode_tunnel_xmit, - .owner = THIS_MODULE, - .encap = XFRM_MODE_TUNNEL, - .flags = XFRM_MODE_FLAG_TUNNEL, -}; - -static int __init xfrm4_mode_tunnel_init(void) -{ - return xfrm_register_mode(&xfrm4_tunnel_mode, AF_INET); -} - -static void __exit xfrm4_mode_tunnel_exit(void) -{ - int err; - - err = xfrm_unregister_mode(&xfrm4_tunnel_mode, AF_INET); - BUG_ON(err); -} - -module_init(xfrm4_mode_tunnel_init); -module_exit(xfrm4_mode_tunnel_exit); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_TUNNEL); diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index be980c195fc5..9bb8905088c7 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -58,21 +58,6 @@ int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb) return xfrm4_extract_header(skb); } -int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) -{ - int err; - - err = xfrm_inner_extract_output(x, skb); - if (err) - return err; - - IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; - skb->protocol = htons(ETH_P_IP); - - return x->outer_mode->output2(x, skb); -} -EXPORT_SYMBOL(xfrm4_prepare_output); - int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb) { memset(IPCB(skb), 0, sizeof(*IPCB(skb))); @@ -87,6 +72,8 @@ int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb) static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct xfrm_state *x = skb_dst(skb)->xfrm; + const struct xfrm_state_afinfo *afinfo; + int ret = -EAFNOSUPPORT; #ifdef CONFIG_NETFILTER if (!x) { @@ -95,7 +82,15 @@ static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) } #endif - return x->outer_mode->afinfo->output_finish(sk, skb); + rcu_read_lock(); + afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family); + if (likely(afinfo)) + ret = afinfo->output_finish(sk, skb); + else + kfree_skb(skb); + rcu_read_unlock(); + + return ret; } int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 72d19b1838ed..cdef8f9a3b01 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -12,7 +12,6 @@ #include <linux/err.h> #include <linux/kernel.h> #include <linux/inetdevice.h> -#include <linux/if_tunnel.h> #include <net/dst.h> #include <net/xfrm.h> #include <net/ip.h> @@ -69,17 +68,6 @@ static int xfrm4_get_saddr(struct net *net, int oif, return 0; } -static int xfrm4_get_tos(const struct flowi *fl) -{ - return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; /* Strip ECN bits */ -} - -static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, - int nfheader_len) -{ - return 0; -} - static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { @@ -110,118 +98,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, return 0; } -static void -_decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) -{ - const struct iphdr *iph = ip_hdr(skb); - u8 *xprth = skb_network_header(skb) + iph->ihl * 4; - struct flowi4 *fl4 = &fl->u.ip4; - int oif = 0; - - if (skb_dst(skb)) - oif = skb_dst(skb)->dev->ifindex; - - memset(fl4, 0, sizeof(struct flowi4)); - fl4->flowi4_mark = skb->mark; - fl4->flowi4_oif = reverse ? skb->skb_iif : oif; - - if (!ip_is_fragment(iph)) { - switch (iph->protocol) { - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - case IPPROTO_TCP: - case IPPROTO_SCTP: - case IPPROTO_DCCP: - if (xprth + 4 < skb->data || - pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be16 *ports; - - xprth = skb_network_header(skb) + iph->ihl * 4; - ports = (__be16 *)xprth; - - fl4->fl4_sport = ports[!!reverse]; - fl4->fl4_dport = ports[!reverse]; - } - break; - - case IPPROTO_ICMP: - if (xprth + 2 < skb->data || - pskb_may_pull(skb, xprth + 2 - skb->data)) { - u8 *icmp; - - xprth = skb_network_header(skb) + iph->ihl * 4; - icmp = xprth; - - fl4->fl4_icmp_type = icmp[0]; - fl4->fl4_icmp_code = icmp[1]; - } - break; - - case IPPROTO_ESP: - if (xprth + 4 < skb->data || - pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be32 *ehdr; - - xprth = skb_network_header(skb) + iph->ihl * 4; - ehdr = (__be32 *)xprth; - - fl4->fl4_ipsec_spi = ehdr[0]; - } - break; - - case IPPROTO_AH: - if (xprth + 8 < skb->data || - pskb_may_pull(skb, xprth + 8 - skb->data)) { - __be32 *ah_hdr; - - xprth = skb_network_header(skb) + iph->ihl * 4; - ah_hdr = (__be32 *)xprth; - - fl4->fl4_ipsec_spi = ah_hdr[1]; - } - break; - - case IPPROTO_COMP: - if (xprth + 4 < skb->data || - pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be16 *ipcomp_hdr; - - xprth = skb_network_header(skb) + iph->ihl * 4; - ipcomp_hdr = (__be16 *)xprth; - - fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); - } - break; - - case IPPROTO_GRE: - if (xprth + 12 < skb->data || - pskb_may_pull(skb, xprth + 12 - skb->data)) { - __be16 *greflags; - __be32 *gre_hdr; - - xprth = skb_network_header(skb) + iph->ihl * 4; - greflags = (__be16 *)xprth; - gre_hdr = (__be32 *)xprth; - - if (greflags[0] & GRE_KEY) { - if (greflags[0] & GRE_CSUM) - gre_hdr++; - fl4->fl4_gre_key = gre_hdr[1]; - } - } - break; - - default: - fl4->fl4_ipsec_spi = 0; - break; - } - } - fl4->flowi4_proto = iph->protocol; - fl4->daddr = reverse ? iph->saddr : iph->daddr; - fl4->saddr = reverse ? iph->daddr : iph->saddr; - fl4->flowi4_tos = iph->tos; -} - static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu) { @@ -274,9 +150,6 @@ static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .dst_ops = &xfrm4_dst_ops_template, .dst_lookup = xfrm4_dst_lookup, .get_saddr = xfrm4_get_saddr, - .decode_session = _decode_session4, - .get_tos = xfrm4_get_tos, - .init_path = xfrm4_init_path, .fill_dst = xfrm4_fill_dst, .blackhole_route = ipv4_blackhole_route, }; diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c index 35c54865dc42..bcab48944c15 100644 --- a/net/ipv4/xfrm4_protocol.c +++ b/net/ipv4/xfrm4_protocol.c @@ -46,7 +46,7 @@ static inline struct xfrm4_protocol __rcu **proto_handlers(u8 protocol) handler != NULL; \ handler = rcu_dereference(handler->next)) \ -int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err) +static int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err) { int ret; struct xfrm4_protocol *handler; @@ -61,7 +61,6 @@ int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err) return 0; } -EXPORT_SYMBOL(xfrm4_rcv_cb); int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 613282c65a10..cd915e332c98 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -135,44 +135,11 @@ config INET6_TUNNEL tristate default n -config INET6_XFRM_MODE_TRANSPORT - tristate "IPv6: IPsec transport mode" - default IPV6 - select XFRM - ---help--- - Support for IPsec transport mode. - - If unsure, say Y. - -config INET6_XFRM_MODE_TUNNEL - tristate "IPv6: IPsec tunnel mode" - default IPV6 - select XFRM - ---help--- - Support for IPsec tunnel mode. - - If unsure, say Y. - -config INET6_XFRM_MODE_BEET - tristate "IPv6: IPsec BEET mode" - default IPV6 - select XFRM - ---help--- - Support for IPsec BEET mode. - - If unsure, say Y. - -config INET6_XFRM_MODE_ROUTEOPTIMIZATION - tristate "IPv6: MIPv6 route optimization mode" - select XFRM - ---help--- - Support for MIPv6 route optimization mode. - config IPV6_VTI tristate "Virtual (secure) IPv6: tunneling" select IPV6_TUNNEL select NET_IP_TUNNEL - depends on INET6_XFRM_MODE_TUNNEL + select XFRM ---help--- Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index e0026fa1261b..8ccf35514015 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -35,10 +35,6 @@ obj-$(CONFIG_INET6_ESP_OFFLOAD) += esp6_offload.o obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o -obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o -obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o -obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o -obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o obj-$(CONFIG_IPV6_MIP6) += mip6.o obj-$(CONFIG_IPV6_ILA) += ila/ obj-$(CONFIG_NETFILTER) += netfilter/ diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index d46b4eb645c2..d453cf417b03 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -74,13 +74,13 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, goto out; if (sp->len == XFRM_MAX_DEPTH) - goto out; + goto out_reset; x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, (xfrm_address_t *)&ipv6_hdr(skb)->daddr, spi, IPPROTO_ESP, AF_INET6); if (!x) - goto out; + goto out_reset; sp->xvec[sp->len++] = x; sp->olen++; @@ -88,7 +88,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, xo = xfrm_offload(skb); if (!xo) { xfrm_state_put(x); - goto out; + goto out_reset; } } @@ -109,6 +109,8 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, xfrm_input(skb, IPPROTO_ESP, spi, -2); return ERR_PTR(-EINPROGRESS); +out_reset: + secpath_reset(skb); out: skb_push(skb, offset); NAPI_GRO_CB(skb)->same_flow = 0; @@ -134,6 +136,44 @@ static void esp6_gso_encap(struct xfrm_state *x, struct sk_buff *skb) xo->proto = proto; } +static struct sk_buff *xfrm6_tunnel_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + __skb_push(skb, skb->mac_len); + return skb_mac_gso_segment(skb, features); +} + +static struct sk_buff *xfrm6_transport_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + const struct net_offload *ops; + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct xfrm_offload *xo = xfrm_offload(skb); + + skb->transport_header += x->props.header_len; + ops = rcu_dereference(inet6_offloads[xo->proto]); + if (likely(ops && ops->callbacks.gso_segment)) + segs = ops->callbacks.gso_segment(skb, features); + + return segs; +} + +static struct sk_buff *xfrm6_outer_mode_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + switch (x->outer_mode.encap) { + case XFRM_MODE_TUNNEL: + return xfrm6_tunnel_gso_segment(x, skb, features); + case XFRM_MODE_TRANSPORT: + return xfrm6_transport_gso_segment(x, skb, features); + } + + return ERR_PTR(-EOPNOTSUPP); +} + static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, netdev_features_t features) { @@ -172,7 +212,7 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, xo->flags |= XFRM_GSO_SEGMENT; - return x->outer_mode->gso_segment(x, skb, esp_features); + return xfrm6_outer_mode_gso_segment(x, skb, esp_features); } static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index a8919c217cc2..08e0390e001c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -916,9 +916,7 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i, if (pcpu_rt) { struct fib6_info *from; - from = rcu_dereference_protected(pcpu_rt->from, - lockdep_is_held(&table->tb6_lock)); - rcu_assign_pointer(pcpu_rt->from, NULL); + from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL); fib6_info_release(from); } } diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index cb54a8a3c273..be5f3d7ceb96 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -94,15 +94,21 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) return fl; } +static void fl_free_rcu(struct rcu_head *head) +{ + struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu); + + if (fl->share == IPV6_FL_S_PROCESS) + put_pid(fl->owner.pid); + kfree(fl->opt); + kfree(fl); +} + static void fl_free(struct ip6_flowlabel *fl) { - if (fl) { - if (fl->share == IPV6_FL_S_PROCESS) - put_pid(fl->owner.pid); - kfree(fl->opt); - kfree_rcu(fl, rcu); - } + if (fl) + call_rcu(&fl->rcu, fl_free_rcu); } static void fl_release(struct ip6_flowlabel *fl) @@ -633,9 +639,9 @@ recheck: if (fl1->share == IPV6_FL_S_EXCL || fl1->share != fl->share || ((fl1->share == IPV6_FL_S_PROCESS) && - (fl1->owner.pid == fl->owner.pid)) || + (fl1->owner.pid != fl->owner.pid)) || ((fl1->share == IPV6_FL_S_USER) && - uid_eq(fl1->owner.uid, fl->owner.uid))) + !uid_eq(fl1->owner.uid, fl->owner.uid))) goto release; err = -ENOMEM; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index c7ed2b6d5a1d..b50b1af1f530 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -29,6 +29,7 @@ #include <linux/icmpv6.h> #include <linux/mroute6.h> #include <linux/slab.h> +#include <linux/indirect_call_wrapper.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> @@ -47,6 +48,8 @@ #include <net/inet_ecn.h> #include <net/dst_metadata.h> +INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *)); static void ip6_rcv_finish_core(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -57,7 +60,8 @@ static void ip6_rcv_finish_core(struct net *net, struct sock *sk, ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) - edemux(skb); + INDIRECT_CALL_2(edemux, tcp_v6_early_demux, + udp_v6_early_demux, skb); } if (!skb_valid_dst(skb)) ip6_route_input(skb); @@ -316,6 +320,9 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt, ip6_sublist_rcv(&sublist, curr_dev, curr_net); } +INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *)); + /* * Deliver the packet to the host */ @@ -391,7 +398,8 @@ resubmit_final: !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; - ret = ipprot->handler(skb); + ret = INDIRECT_CALL_2(ipprot->handler, tcp_v6_rcv, udpv6_rcv, + skb); if (ret > 0) { if (ipprot->flags & INET6_PROTO_FINAL) { /* Not an extension header, most likely UDP diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 8b6eefff2f7e..218a0dedc8f4 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -342,7 +342,7 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err) struct net_device *dev; struct pcpu_sw_netstats *tstats; struct xfrm_state *x; - struct xfrm_mode *inner_mode; + const struct xfrm_mode *inner_mode; struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6; u32 orig_mark = skb->mark; int ret; @@ -361,7 +361,7 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err) x = xfrm_input_state(skb); - inner_mode = x->inner_mode; + inner_mode = &x->inner_mode; if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); @@ -372,7 +372,7 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err) } } - family = inner_mode->afinfo->family; + family = inner_mode->family; skb->mark = be32_to_cpu(t->parms.i_key); ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b18e85cd7587..23a20d62daac 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -380,11 +380,8 @@ static void ip6_dst_destroy(struct dst_entry *dst) in6_dev_put(idev); } - rcu_read_lock(); - from = rcu_dereference(rt->from); - rcu_assign_pointer(rt->from, NULL); + from = xchg((__force struct fib6_info **)&rt->from, NULL); fib6_info_release(from); - rcu_read_unlock(); } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -1323,9 +1320,7 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket, /* purge completely the exception to allow releasing the held resources: * some [sk] cache may keep the dst around for unlimited time */ - from = rcu_dereference_protected(rt6_ex->rt6i->from, - lockdep_is_held(&rt6_exception_lock)); - rcu_assign_pointer(rt6_ex->rt6i->from, NULL); + from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL); fib6_info_release(from); dst_dev_put(&rt6_ex->rt6i->dst); @@ -3495,11 +3490,8 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu rcu_read_lock(); res.f6i = rcu_dereference(rt->from); - /* This fib6_info_hold() is safe here because we hold reference to rt - * and rt already holds reference to fib6_info. - */ - fib6_info_hold(res.f6i); - rcu_read_unlock(); + if (!res.f6i) + goto out; res.nh = &res.f6i->fib6_nh; res.fib6_flags = res.f6i->fib6_flags; @@ -3514,10 +3506,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; - /* No need to remove rt from the exception table if rt is - * a cached route because rt6_insert_exception() will - * takes care of it - */ + /* rt6_insert_exception() will take care of duplicated exceptions */ if (rt6_insert_exception(nrt, &res)) { dst_release_immediate(&nrt->dst); goto out; @@ -3530,7 +3519,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); out: - fib6_info_release(res.f6i); + rcu_read_unlock(); neigh_release(neigh); } @@ -3772,23 +3761,34 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) { - int type; struct dst_entry *dst = skb_dst(skb); + struct net *net = dev_net(dst->dev); + struct inet6_dev *idev; + int type; + + if (netif_is_l3_master(skb->dev) && + dst->dev == net->loopback_dev) + idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); + else + idev = ip6_dst_idev(dst); + switch (ipstats_mib_noroutes) { case IPSTATS_MIB_INNOROUTES: type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); if (type == IPV6_ADDR_ANY) { - IP6_INC_STATS(dev_net(dst->dev), - __in6_dev_get_safely(skb->dev), - IPSTATS_MIB_INADDRERRORS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); break; } /* FALLTHROUGH */ case IPSTATS_MIB_OUTNOROUTES: - IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), - ipstats_mib_noroutes); + IP6_INC_STATS(net, idev, ipstats_mib_noroutes); break; } + + /* Start over by dropping the dst for l3mdev case */ + if (netif_is_l3_master(skb->dev)) + skb_dst_drop(skb); + icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); kfree_skb(skb); return 0; @@ -5056,16 +5056,20 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, rcu_read_lock(); from = rcu_dereference(rt->from); - - if (fibmatch) - err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif, - RTM_NEWROUTE, NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0); - else - err = rt6_fill_node(net, skb, from, dst, &fl6.daddr, - &fl6.saddr, iif, RTM_NEWROUTE, - NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, - 0); + if (from) { + if (fibmatch) + err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, + iif, RTM_NEWROUTE, + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0); + else + err = rt6_fill_node(net, skb, from, dst, &fl6.daddr, + &fl6.saddr, iif, RTM_NEWROUTE, + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0); + } else { + err = -ENETUNREACH; + } rcu_read_unlock(); if (err < 0) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 82018bdce863..beaf28456301 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -43,6 +43,7 @@ #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/random.h> +#include <linux/indirect_call_wrapper.h> #include <net/tcp.h> #include <net/ndisc.h> @@ -1435,7 +1436,7 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, skb->tstamp || skb_hwtstamps(skb)->hwtstamp; } -static int tcp_v6_rcv(struct sk_buff *skb) +INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) { struct sk_buff *skb_to_free; int sdif = inet6_sdif(skb); @@ -1654,7 +1655,7 @@ do_time_wait: goto discard_it; } -static void tcp_v6_early_demux(struct sk_buff *skb) +INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) { const struct ipv6hdr *hdr; const struct tcphdr *th; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2464fba569b4..07fa579dfb96 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -36,6 +36,7 @@ #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/uaccess.h> +#include <linux/indirect_call_wrapper.h> #include <net/addrconf.h> #include <net/ndisc.h> @@ -980,7 +981,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, return NULL; } -static void udp_v6_early_demux(struct sk_buff *skb) +INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); const struct udphdr *uh; @@ -1021,7 +1022,7 @@ static void udp_v6_early_demux(struct sk_buff *skb) } } -static __inline__ int udpv6_rcv(struct sk_buff *skb) +INDIRECT_CALLABLE_SCOPE int udpv6_rcv(struct sk_buff *skb) { return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP); } diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c deleted file mode 100644 index 57fd314ec2b8..000000000000 --- a/net/ipv6/xfrm6_mode_beet.c +++ /dev/null @@ -1,131 +0,0 @@ -/* - * xfrm6_mode_beet.c - BEET mode encapsulation for IPv6. - * - * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com> - * Miika Komu <miika@iki.fi> - * Herbert Xu <herbert@gondor.apana.org.au> - * Abhinav Pathak <abhinav.pathak@hiit.fi> - * Jeff Ahrenholz <ahrenholz@gmail.com> - */ - -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/stringify.h> -#include <net/dsfield.h> -#include <net/dst.h> -#include <net/inet_ecn.h> -#include <net/ipv6.h> -#include <net/xfrm.h> - -static void xfrm6_beet_make_header(struct sk_buff *skb) -{ - struct ipv6hdr *iph = ipv6_hdr(skb); - - iph->version = 6; - - memcpy(iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, - sizeof(iph->flow_lbl)); - iph->nexthdr = XFRM_MODE_SKB_CB(skb)->protocol; - - ipv6_change_dsfield(iph, 0, XFRM_MODE_SKB_CB(skb)->tos); - iph->hop_limit = XFRM_MODE_SKB_CB(skb)->ttl; -} - -/* Add encapsulation header. - * - * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. - */ -static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ipv6hdr *top_iph; - struct ip_beet_phdr *ph; - int optlen, hdr_len; - - hdr_len = 0; - optlen = XFRM_MODE_SKB_CB(skb)->optlen; - if (unlikely(optlen)) - hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4); - - skb_set_network_header(skb, -x->props.header_len - hdr_len); - if (x->sel.family != AF_INET6) - skb->network_header += IPV4_BEET_PHMAXLEN; - skb->mac_header = skb->network_header + - offsetof(struct ipv6hdr, nexthdr); - skb->transport_header = skb->network_header + sizeof(*top_iph); - ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdr_len); - - xfrm6_beet_make_header(skb); - - top_iph = ipv6_hdr(skb); - if (unlikely(optlen)) { - - BUG_ON(optlen < 0); - - ph->padlen = 4 - (optlen & 4); - ph->hdrlen = optlen / 8; - ph->nexthdr = top_iph->nexthdr; - if (ph->padlen) - memset(ph + 1, IPOPT_NOP, ph->padlen); - - top_iph->nexthdr = IPPROTO_BEETPH; - } - - top_iph->saddr = *(struct in6_addr *)&x->props.saddr; - top_iph->daddr = *(struct in6_addr *)&x->id.daddr; - return 0; -} - -static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ipv6hdr *ip6h; - int size = sizeof(struct ipv6hdr); - int err; - - err = skb_cow_head(skb, size + skb->mac_len); - if (err) - goto out; - - __skb_push(skb, size); - skb_reset_network_header(skb); - skb_mac_header_rebuild(skb); - - xfrm6_beet_make_header(skb); - - ip6h = ipv6_hdr(skb); - ip6h->payload_len = htons(skb->len - size); - ip6h->daddr = x->sel.daddr.in6; - ip6h->saddr = x->sel.saddr.in6; - err = 0; -out: - return err; -} - -static struct xfrm_mode xfrm6_beet_mode = { - .input2 = xfrm6_beet_input, - .input = xfrm_prepare_input, - .output2 = xfrm6_beet_output, - .output = xfrm6_prepare_output, - .owner = THIS_MODULE, - .encap = XFRM_MODE_BEET, - .flags = XFRM_MODE_FLAG_TUNNEL, -}; - -static int __init xfrm6_beet_init(void) -{ - return xfrm_register_mode(&xfrm6_beet_mode, AF_INET6); -} - -static void __exit xfrm6_beet_exit(void) -{ - int err; - - err = xfrm_unregister_mode(&xfrm6_beet_mode, AF_INET6); - BUG_ON(err); -} - -module_init(xfrm6_beet_init); -module_exit(xfrm6_beet_exit); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_BEET); diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c deleted file mode 100644 index da28e4407b8f..000000000000 --- a/net/ipv6/xfrm6_mode_ro.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * xfrm6_mode_ro.c - Route optimization mode for IPv6. - * - * Copyright (C)2003-2006 Helsinki University of Technology - * Copyright (C)2003-2006 USAGI/WIDE Project - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - */ -/* - * Authors: - * Noriaki TAKAMIYA @USAGI - * Masahide NAKAMURA @USAGI - */ - -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/spinlock.h> -#include <linux/stringify.h> -#include <linux/time.h> -#include <net/ipv6.h> -#include <net/xfrm.h> - -/* Add route optimization header space. - * - * The IP header and mutable extension headers will be moved forward to make - * space for the route optimization header. - */ -static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ipv6hdr *iph; - u8 *prevhdr; - int hdr_len; - - iph = ipv6_hdr(skb); - - hdr_len = x->type->hdr_offset(x, skb, &prevhdr); - if (hdr_len < 0) - return hdr_len; - skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data); - skb_set_network_header(skb, -x->props.header_len); - skb->transport_header = skb->network_header + hdr_len; - __skb_pull(skb, hdr_len); - memmove(ipv6_hdr(skb), iph, hdr_len); - - x->lastused = ktime_get_real_seconds(); - - return 0; -} - -static struct xfrm_mode xfrm6_ro_mode = { - .output = xfrm6_ro_output, - .owner = THIS_MODULE, - .encap = XFRM_MODE_ROUTEOPTIMIZATION, -}; - -static int __init xfrm6_ro_init(void) -{ - return xfrm_register_mode(&xfrm6_ro_mode, AF_INET6); -} - -static void __exit xfrm6_ro_exit(void) -{ - int err; - - err = xfrm_unregister_mode(&xfrm6_ro_mode, AF_INET6); - BUG_ON(err); -} - -module_init(xfrm6_ro_init); -module_exit(xfrm6_ro_exit); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_ROUTEOPTIMIZATION); diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c deleted file mode 100644 index 3c29da5defe6..000000000000 --- a/net/ipv6/xfrm6_mode_transport.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * xfrm6_mode_transport.c - Transport mode encapsulation for IPv6. - * - * Copyright (C) 2002 USAGI/WIDE Project - * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au> - */ - -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/stringify.h> -#include <net/dst.h> -#include <net/ipv6.h> -#include <net/xfrm.h> -#include <net/protocol.h> - -/* Add encapsulation header. - * - * The IP header and mutable extension headers will be moved forward to make - * space for the encapsulation header. - */ -static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ipv6hdr *iph; - u8 *prevhdr; - int hdr_len; - - iph = ipv6_hdr(skb); - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); - - hdr_len = x->type->hdr_offset(x, skb, &prevhdr); - if (hdr_len < 0) - return hdr_len; - skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data); - skb_set_network_header(skb, -x->props.header_len); - skb->transport_header = skb->network_header + hdr_len; - __skb_pull(skb, hdr_len); - memmove(ipv6_hdr(skb), iph, hdr_len); - return 0; -} - -/* Remove encapsulation header. - * - * The IP header will be moved over the top of the encapsulation header. - * - * On entry, skb->h shall point to where the IP header should be and skb->nh - * shall be set to where the IP header currently is. skb->data shall point - * to the start of the payload. - */ -static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) -{ - int ihl = skb->data - skb_transport_header(skb); - - if (skb->transport_header != skb->network_header) { - memmove(skb_transport_header(skb), - skb_network_header(skb), ihl); - skb->network_header = skb->transport_header; - } - ipv6_hdr(skb)->payload_len = htons(skb->len + ihl - - sizeof(struct ipv6hdr)); - skb_reset_transport_header(skb); - return 0; -} - -static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x, - struct sk_buff *skb, - netdev_features_t features) -{ - const struct net_offload *ops; - struct sk_buff *segs = ERR_PTR(-EINVAL); - struct xfrm_offload *xo = xfrm_offload(skb); - - skb->transport_header += x->props.header_len; - ops = rcu_dereference(inet6_offloads[xo->proto]); - if (likely(ops && ops->callbacks.gso_segment)) - segs = ops->callbacks.gso_segment(skb, features); - - return segs; -} - -static void xfrm6_transport_xmit(struct xfrm_state *x, struct sk_buff *skb) -{ - struct xfrm_offload *xo = xfrm_offload(skb); - - skb_reset_mac_len(skb); - pskb_pull(skb, skb->mac_len + sizeof(struct ipv6hdr) + x->props.header_len); - - if (xo->flags & XFRM_GSO_SEGMENT) { - skb_reset_transport_header(skb); - skb->transport_header -= x->props.header_len; - } -} - - -static struct xfrm_mode xfrm6_transport_mode = { - .input = xfrm6_transport_input, - .output = xfrm6_transport_output, - .gso_segment = xfrm4_transport_gso_segment, - .xmit = xfrm6_transport_xmit, - .owner = THIS_MODULE, - .encap = XFRM_MODE_TRANSPORT, -}; - -static int __init xfrm6_transport_init(void) -{ - return xfrm_register_mode(&xfrm6_transport_mode, AF_INET6); -} - -static void __exit xfrm6_transport_exit(void) -{ - int err; - - err = xfrm_unregister_mode(&xfrm6_transport_mode, AF_INET6); - BUG_ON(err); -} - -module_init(xfrm6_transport_init); -module_exit(xfrm6_transport_exit); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TRANSPORT); diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c deleted file mode 100644 index de1b0b8c53b0..000000000000 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ /dev/null @@ -1,151 +0,0 @@ -/* - * xfrm6_mode_tunnel.c - Tunnel mode encapsulation for IPv6. - * - * Copyright (C) 2002 USAGI/WIDE Project - * Copyright (c) 2004-2006 Herbert Xu <herbert@gondor.apana.org.au> - */ - -#include <linux/gfp.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/stringify.h> -#include <net/dsfield.h> -#include <net/dst.h> -#include <net/inet_ecn.h> -#include <net/ip6_route.h> -#include <net/ipv6.h> -#include <net/xfrm.h> - -static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) -{ - struct ipv6hdr *inner_iph = ipipv6_hdr(skb); - - if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos)) - IP6_ECN_set_ce(skb, inner_iph); -} - -/* Add encapsulation header. - * - * The top IP header will be constructed per RFC 2401. - */ -static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - struct ipv6hdr *top_iph; - int dsfield; - - skb_set_inner_network_header(skb, skb_network_offset(skb)); - skb_set_inner_transport_header(skb, skb_transport_offset(skb)); - - skb_set_network_header(skb, -x->props.header_len); - skb->mac_header = skb->network_header + - offsetof(struct ipv6hdr, nexthdr); - skb->transport_header = skb->network_header + sizeof(*top_iph); - top_iph = ipv6_hdr(skb); - - top_iph->version = 6; - - memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, - sizeof(top_iph->flow_lbl)); - top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family); - - if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) - dsfield = 0; - else - dsfield = XFRM_MODE_SKB_CB(skb)->tos; - dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos); - if (x->props.flags & XFRM_STATE_NOECN) - dsfield &= ~INET_ECN_MASK; - ipv6_change_dsfield(top_iph, 0, dsfield); - top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst)); - top_iph->saddr = *(struct in6_addr *)&x->props.saddr; - top_iph->daddr = *(struct in6_addr *)&x->id.daddr; - return 0; -} - -#define for_each_input_rcu(head, handler) \ - for (handler = rcu_dereference(head); \ - handler != NULL; \ - handler = rcu_dereference(handler->next)) - - -static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) -{ - int err = -EINVAL; - - if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) - goto out; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - goto out; - - err = skb_unclone(skb, GFP_ATOMIC); - if (err) - goto out; - - if (x->props.flags & XFRM_STATE_DECAP_DSCP) - ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)), - ipipv6_hdr(skb)); - if (!(x->props.flags & XFRM_STATE_NOECN)) - ipip6_ecn_decapsulate(skb); - - skb_reset_network_header(skb); - skb_mac_header_rebuild(skb); - if (skb->mac_len) - eth_hdr(skb)->h_proto = skb->protocol; - - err = 0; - -out: - return err; -} - -static struct sk_buff *xfrm6_mode_tunnel_gso_segment(struct xfrm_state *x, - struct sk_buff *skb, - netdev_features_t features) -{ - __skb_push(skb, skb->mac_len); - return skb_mac_gso_segment(skb, features); -} - -static void xfrm6_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb) -{ - struct xfrm_offload *xo = xfrm_offload(skb); - - if (xo->flags & XFRM_GSO_SEGMENT) - skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); - - skb_reset_mac_len(skb); - pskb_pull(skb, skb->mac_len + x->props.header_len); -} - -static struct xfrm_mode xfrm6_tunnel_mode = { - .input2 = xfrm6_mode_tunnel_input, - .input = xfrm_prepare_input, - .output2 = xfrm6_mode_tunnel_output, - .output = xfrm6_prepare_output, - .gso_segment = xfrm6_mode_tunnel_gso_segment, - .xmit = xfrm6_mode_tunnel_xmit, - .owner = THIS_MODULE, - .encap = XFRM_MODE_TUNNEL, - .flags = XFRM_MODE_FLAG_TUNNEL, -}; - -static int __init xfrm6_mode_tunnel_init(void) -{ - return xfrm_register_mode(&xfrm6_tunnel_mode, AF_INET6); -} - -static void __exit xfrm6_mode_tunnel_exit(void) -{ - int err; - - err = xfrm_unregister_mode(&xfrm6_tunnel_mode, AF_INET6); - BUG_ON(err); -} - -module_init(xfrm6_mode_tunnel_init); -module_exit(xfrm6_mode_tunnel_exit); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TUNNEL); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6a74080005cf..8ad5e54eb8ca 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -111,21 +111,6 @@ int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) return xfrm6_extract_header(skb); } -int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) -{ - int err; - - err = xfrm_inner_extract_output(x, skb); - if (err) - return err; - - skb->ignore_df = 1; - skb->protocol = htons(ETH_P_IPV6); - - return x->outer_mode->output2(x, skb); -} -EXPORT_SYMBOL(xfrm6_prepare_output); - int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb) { memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); @@ -137,11 +122,28 @@ int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb) return xfrm_output(sk, skb); } +static int __xfrm6_output_state_finish(struct xfrm_state *x, struct sock *sk, + struct sk_buff *skb) +{ + const struct xfrm_state_afinfo *afinfo; + int ret = -EAFNOSUPPORT; + + rcu_read_lock(); + afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family); + if (likely(afinfo)) + ret = afinfo->output_finish(sk, skb); + else + kfree_skb(skb); + rcu_read_unlock(); + + return ret; +} + static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { struct xfrm_state *x = skb_dst(skb)->xfrm; - return x->outer_mode->afinfo->output_finish(sk, skb); + return __xfrm6_output_state_finish(x, sk, skb); } static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -183,7 +185,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) __xfrm6_output_finish); skip_frag: - return x->outer_mode->afinfo->output_finish(sk, skb); + return __xfrm6_output_state_finish(x, sk, skb); } int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 769f8f78d3b8..699e0730ce8e 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -22,9 +22,6 @@ #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/l3mdev.h> -#if IS_ENABLED(CONFIG_IPV6_MIP6) -#include <net/mip6.h> -#endif static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, @@ -71,24 +68,6 @@ static int xfrm6_get_saddr(struct net *net, int oif, return 0; } -static int xfrm6_get_tos(const struct flowi *fl) -{ - return 0; -} - -static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, - int nfheader_len) -{ - if (dst->ops->family == AF_INET6) { - struct rt6_info *rt = (struct rt6_info *)dst; - path->path_cookie = rt6_get_cookie(rt); - } - - path->u.rt6.rt6i_nfheader_len = nfheader_len; - - return 0; -} - static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { @@ -118,108 +97,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, return 0; } -static inline void -_decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) -{ - struct flowi6 *fl6 = &fl->u.ip6; - int onlyproto = 0; - const struct ipv6hdr *hdr = ipv6_hdr(skb); - u32 offset = sizeof(*hdr); - struct ipv6_opt_hdr *exthdr; - const unsigned char *nh = skb_network_header(skb); - u16 nhoff = IP6CB(skb)->nhoff; - int oif = 0; - u8 nexthdr; - - if (!nhoff) - nhoff = offsetof(struct ipv6hdr, nexthdr); - - nexthdr = nh[nhoff]; - - if (skb_dst(skb)) - oif = skb_dst(skb)->dev->ifindex; - - memset(fl6, 0, sizeof(struct flowi6)); - fl6->flowi6_mark = skb->mark; - fl6->flowi6_oif = reverse ? skb->skb_iif : oif; - - fl6->daddr = reverse ? hdr->saddr : hdr->daddr; - fl6->saddr = reverse ? hdr->daddr : hdr->saddr; - - while (nh + offset + sizeof(*exthdr) < skb->data || - pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) { - nh = skb_network_header(skb); - exthdr = (struct ipv6_opt_hdr *)(nh + offset); - - switch (nexthdr) { - case NEXTHDR_FRAGMENT: - onlyproto = 1; - /* fall through */ - case NEXTHDR_ROUTING: - case NEXTHDR_HOP: - case NEXTHDR_DEST: - offset += ipv6_optlen(exthdr); - nexthdr = exthdr->nexthdr; - exthdr = (struct ipv6_opt_hdr *)(nh + offset); - break; - - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - case IPPROTO_TCP: - case IPPROTO_SCTP: - case IPPROTO_DCCP: - if (!onlyproto && (nh + offset + 4 < skb->data || - pskb_may_pull(skb, nh + offset + 4 - skb->data))) { - __be16 *ports; - - nh = skb_network_header(skb); - ports = (__be16 *)(nh + offset); - fl6->fl6_sport = ports[!!reverse]; - fl6->fl6_dport = ports[!reverse]; - } - fl6->flowi6_proto = nexthdr; - return; - - case IPPROTO_ICMPV6: - if (!onlyproto && (nh + offset + 2 < skb->data || - pskb_may_pull(skb, nh + offset + 2 - skb->data))) { - u8 *icmp; - - nh = skb_network_header(skb); - icmp = (u8 *)(nh + offset); - fl6->fl6_icmp_type = icmp[0]; - fl6->fl6_icmp_code = icmp[1]; - } - fl6->flowi6_proto = nexthdr; - return; - -#if IS_ENABLED(CONFIG_IPV6_MIP6) - case IPPROTO_MH: - offset += ipv6_optlen(exthdr); - if (!onlyproto && (nh + offset + 3 < skb->data || - pskb_may_pull(skb, nh + offset + 3 - skb->data))) { - struct ip6_mh *mh; - - nh = skb_network_header(skb); - mh = (struct ip6_mh *)(nh + offset); - fl6->fl6_mh_type = mh->ip6mh_type; - } - fl6->flowi6_proto = nexthdr; - return; -#endif - - /* XXX Why are there these headers? */ - case IPPROTO_AH: - case IPPROTO_ESP: - case IPPROTO_COMP: - default: - fl6->fl6_ipsec_spi = 0; - fl6->flowi6_proto = nexthdr; - return; - } - } -} - static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu) { @@ -291,9 +168,6 @@ static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .dst_ops = &xfrm6_dst_ops_template, .dst_lookup = xfrm6_dst_lookup, .get_saddr = xfrm6_get_saddr, - .decode_session = _decode_session6, - .get_tos = xfrm6_get_tos, - .init_path = xfrm6_init_path, .fill_dst = xfrm6_fill_dst, .blackhole_route = ip6_blackhole_route, }; diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c index cc979b702c89..aaacac7fdbce 100644 --- a/net/ipv6/xfrm6_protocol.c +++ b/net/ipv6/xfrm6_protocol.c @@ -46,7 +46,7 @@ static inline struct xfrm6_protocol __rcu **proto_handlers(u8 protocol) handler != NULL; \ handler = rcu_dereference(handler->next)) \ -int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err) +static int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err) { int ret; struct xfrm6_protocol *handler; @@ -61,7 +61,6 @@ int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err) return 0; } -EXPORT_SYMBOL(xfrm6_rcv_cb); static int xfrm6_esp_rcv(struct sk_buff *skb) { diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index bc65db782bfb..d9e5f6808811 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -345,7 +345,7 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net) unsigned int i; xfrm_flush_gc(); - xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true); + xfrm_state_flush(net, 0, false, true); for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i])); @@ -402,6 +402,10 @@ static void __exit xfrm6_tunnel_fini(void) xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6); xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); unregister_pernet_subsys(&xfrm6_tunnel_net_ops); + /* Someone maybe has gotten the xfrm6_tunnel_spi. + * So need to wait it. + */ + rcu_barrier(); kmem_cache_destroy(xfrm6_tunnel_spi_kmem); } diff --git a/net/key/af_key.c b/net/key/af_key.c index 5651c29cb5bd..4af1e1d60b9f 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1951,8 +1951,10 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) if (rq->sadb_x_ipsecrequest_mode == 0) return -EINVAL; + if (!xfrm_id_proto_valid(rq->sadb_x_ipsecrequest_proto)) + return -EINVAL; - t->id.proto = rq->sadb_x_ipsecrequest_proto; /* XXX check proto */ + t->id.proto = rq->sadb_x_ipsecrequest_proto; if ((mode = pfkey_mode_to_xfrm(rq->sadb_x_ipsecrequest_mode)) < 0) return -EINVAL; t->mode = mode; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index fed6becc5daf..52b5a2797c0c 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -169,8 +169,8 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id) rcu_read_lock_bh(); list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - if (tunnel->tunnel_id == tunnel_id) { - l2tp_tunnel_inc_refcount(tunnel); + if (tunnel->tunnel_id == tunnel_id && + refcount_inc_not_zero(&tunnel->ref_count)) { rcu_read_unlock_bh(); return tunnel; @@ -190,8 +190,8 @@ struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth) rcu_read_lock_bh(); list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - if (++count > nth) { - l2tp_tunnel_inc_refcount(tunnel); + if (++count > nth && + refcount_inc_not_zero(&tunnel->ref_count)) { rcu_read_unlock_bh(); return tunnel; } @@ -909,7 +909,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct l2tp_tunnel *tunnel; - tunnel = l2tp_tunnel(sk); + tunnel = rcu_dereference_sk_user_data(sk); if (tunnel == NULL) goto pass_up; diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index cff0fb3578c9..deb3faf08337 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -841,7 +841,7 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) dir = sdata->vif.debugfs_dir; - if (!dir) + if (IS_ERR_OR_NULL(dir)) return; sprintf(buf, "netdev:%s", sdata->name); diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index e03c46ac8e4d..c62101857b9b 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -112,8 +112,9 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, IEEE80211_HT_CAP_TX_STBC); /* Allow user to configure RX STBC bits */ - if (ht_capa_mask->cap_info & IEEE80211_HT_CAP_RX_STBC) - ht_cap->cap |= ht_capa->cap_info & IEEE80211_HT_CAP_RX_STBC; + if (ht_capa_mask->cap_info & cpu_to_le16(IEEE80211_HT_CAP_RX_STBC)) + ht_cap->cap |= le16_to_cpu(ht_capa->cap_info) & + IEEE80211_HT_CAP_RX_STBC; /* Allow user to decrease AMPDU factor */ if (ht_capa_mask->ampdu_params_info & diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 94459b2b3d2a..410685d38c46 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1907,6 +1907,9 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata) list_del_rcu(&sdata->list); mutex_unlock(&sdata->local->iflist_mtx); + if (sdata->vif.txq) + ieee80211_txq_purge(sdata->local, to_txq_info(sdata->vif.txq)); + synchronize_rcu(); if (sdata->dev) { diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 72668759cd2b..efccd1ac9a66 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -362,8 +362,8 @@ int genl_register_family(struct genl_family *family) } else family->attrbuf = NULL; - family->id = idr_alloc(&genl_fam_idr, family, - start, end + 1, GFP_KERNEL); + family->id = idr_alloc_cyclic(&genl_fam_idr, family, + start, end + 1, GFP_KERNEL); if (family->id < 0) { err = family->id; goto errout_free; @@ -537,21 +537,25 @@ static int genl_family_rcv_msg(const struct genl_family *family, return -EOPNOTSUPP; if (!(ops->validate & GENL_DONT_VALIDATE_DUMP)) { - unsigned int validate = NL_VALIDATE_STRICT; int hdrlen = GENL_HDRLEN + family->hdrsize; - if (ops->validate & GENL_DONT_VALIDATE_DUMP_STRICT) - validate = NL_VALIDATE_LIBERAL; - if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) return -EINVAL; - rc = __nla_validate(nlmsg_attrdata(nlh, hdrlen), - nlmsg_attrlen(nlh, hdrlen), - family->maxattr, family->policy, - validate, extack); - if (rc) - return rc; + if (family->maxattr) { + unsigned int validate = NL_VALIDATE_STRICT; + + if (ops->validate & + GENL_DONT_VALIDATE_DUMP_STRICT) + validate = NL_VALIDATE_LIBERAL; + rc = __nla_validate(nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), + family->maxattr, + family->policy, + validate, extack); + if (rc) + return rc; + } } if (!family->parallel_ops) { diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index c4128082f88b..333ec5f298fe 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -2175,6 +2175,10 @@ static int ovs_ct_limit_cmd_get(struct sk_buff *skb, struct genl_info *info) return PTR_ERR(reply); nla_reply = nla_nest_start_noflag(reply, OVS_CT_LIMIT_ATTR_ZONE_LIMIT); + if (!nla_reply) { + err = -EMSGSIZE; + goto exit_err; + } if (a[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) { err = ovs_ct_limit_get_zone_limit( diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index b95015c7e999..dc9ff9367221 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -455,7 +455,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, upcall->dp_ifindex = dp_ifindex; err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb); - BUG_ON(err); + if (err) + goto out; if (upcall_info->userdata) __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, @@ -471,7 +472,9 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, } err = ovs_nla_put_tunnel_info(user_skb, upcall_info->egress_tun_info); - BUG_ON(err); + if (err) + goto out; + nla_nest_end(user_skb, nla); } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 5c4a118d6f96..90d4e3ce00e5 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2600,8 +2600,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) void *ph; DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name); bool need_wait = !(msg->msg_flags & MSG_DONTWAIT); + unsigned char *addr = NULL; int tp_len, size_max; - unsigned char *addr; void *data; int len_sum = 0; int status = TP_STATUS_AVAILABLE; @@ -2612,7 +2612,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; - addr = NULL; } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@ -2622,10 +2621,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) sll_addr))) goto out; proto = saddr->sll_protocol; - addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); - if (addr && dev && saddr->sll_halen < dev->addr_len) - goto out_put; + if (po->sk.sk_socket->type == SOCK_DGRAM) { + if (dev && msg->msg_namelen < dev->addr_len + + offsetof(struct sockaddr_ll, sll_addr)) + goto out_put; + addr = saddr->sll_addr; + } } err = -ENXIO; @@ -2797,7 +2799,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) struct sk_buff *skb; struct net_device *dev; __be16 proto; - unsigned char *addr; + unsigned char *addr = NULL; int err, reserve = 0; struct sockcm_cookie sockc; struct virtio_net_hdr vnet_hdr = { 0 }; @@ -2814,7 +2816,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; - addr = NULL; } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@ -2822,10 +2823,13 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) goto out; proto = saddr->sll_protocol; - addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); - if (addr && dev && saddr->sll_halen < dev->addr_len) - goto out_unlock; + if (sock->type == SOCK_DGRAM) { + if (dev && msg->msg_namelen < dev->addr_len + + offsetof(struct sockaddr_ll, sll_addr)) + goto out_unlock; + addr = saddr->sll_addr; + } } err = -ENXIO; @@ -3342,20 +3346,29 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { + int copy_len; + /* If the address length field is there to be filled * in, we fill it in now. */ if (sock->type == SOCK_PACKET) { __sockaddr_check_size(sizeof(struct sockaddr_pkt)); msg->msg_namelen = sizeof(struct sockaddr_pkt); + copy_len = msg->msg_namelen; } else { struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); + copy_len = msg->msg_namelen; + if (msg->msg_namelen < sizeof(struct sockaddr_ll)) { + memset(msg->msg_name + + offsetof(struct sockaddr_ll, sll_addr), + 0, sizeof(sll->sll_addr)); + msg->msg_namelen = sizeof(struct sockaddr_ll); + } } - memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, - msg->msg_namelen); + memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } if (pkt_sk(sk)->auxdata) { diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 70559854837e..8946c89d7392 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -772,7 +772,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn, unsigned long frag_off; unsigned long to_copy; unsigned long copied; - uint64_t uncongested = 0; + __le64 uncongested = 0; void *addr; /* catch completely corrupt packets */ @@ -789,7 +789,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn, copied = 0; while (copied < RDS_CONG_MAP_BYTES) { - uint64_t *src, *dst; + __le64 *src, *dst; unsigned int k; to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); @@ -824,9 +824,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn, } /* the congestion map is in little endian order */ - uncongested = le64_to_cpu(uncongested); - - rds_cong_map_updated(map, uncongested); + rds_cong_map_updated(map, le64_to_cpu(uncongested)); } static void rds_ib_process_recv(struct rds_connection *conn, diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 8aa2937b069f..fe96881a334d 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -604,30 +604,30 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) _enter(""); - if (list_empty(&rxnet->calls)) - return; + if (!list_empty(&rxnet->calls)) { + write_lock(&rxnet->call_lock); - write_lock(&rxnet->call_lock); + while (!list_empty(&rxnet->calls)) { + call = list_entry(rxnet->calls.next, + struct rxrpc_call, link); + _debug("Zapping call %p", call); - while (!list_empty(&rxnet->calls)) { - call = list_entry(rxnet->calls.next, struct rxrpc_call, link); - _debug("Zapping call %p", call); + rxrpc_see_call(call); + list_del_init(&call->link); - rxrpc_see_call(call); - list_del_init(&call->link); + pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", + call, atomic_read(&call->usage), + rxrpc_call_states[call->state], + call->flags, call->events); - pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", - call, atomic_read(&call->usage), - rxrpc_call_states[call->state], - call->flags, call->events); + write_unlock(&rxnet->call_lock); + cond_resched(); + write_lock(&rxnet->call_lock); + } write_unlock(&rxnet->call_lock); - cond_resched(); - write_lock(&rxnet->call_lock); } - write_unlock(&rxnet->call_lock); - atomic_dec(&rxnet->nr_calls); wait_var_event(&rxnet->nr_calls, !atomic_read(&rxnet->nr_calls)); } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 04e9ef088535..4b8710a266cc 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -847,7 +847,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, /* Similarly success statistics must be moved as pointers */ new->pcpu_success = n->pcpu_success; #endif - memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); + memcpy(&new->sel, s, struct_size(s, keys, s->nkeys)); if (tcf_exts_init(&new->exts, net, TCA_U32_ACT, TCA_U32_POLICE)) { kfree(new); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 848aab3693bd..cce1e9ee85af 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -32,6 +32,7 @@ #include <net/pkt_sched.h> #include <net/dst.h> #include <trace/events/qdisc.h> +#include <trace/events/net.h> #include <net/xfrm.h> /* Qdisc to use by default */ @@ -441,6 +442,7 @@ static void dev_watchdog(struct timer_list *t) } if (some_queue_timedout) { + trace_net_dev_xmit_timeout(dev, i); WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", dev->name, netdev_drivername(dev), i); dev->netdev_ops->ndo_tx_timeout(dev); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 09563c245473..539677120b9f 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -16,6 +16,7 @@ #include <linux/math64.h> #include <linux/module.h> #include <linux/spinlock.h> +#include <linux/rcupdate.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> @@ -41,25 +42,88 @@ struct sched_entry { u8 command; }; +struct sched_gate_list { + struct rcu_head rcu; + struct list_head entries; + size_t num_entries; + ktime_t cycle_close_time; + s64 cycle_time; + s64 cycle_time_extension; + s64 base_time; +}; + struct taprio_sched { struct Qdisc **qdiscs; struct Qdisc *root; - s64 base_time; int clockid; atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+ * speeds it's sub-nanoseconds per byte */ - size_t num_entries; /* Protects the update side of the RCU protected current_entry */ spinlock_t current_entry_lock; struct sched_entry __rcu *current_entry; - struct list_head entries; + struct sched_gate_list __rcu *oper_sched; + struct sched_gate_list __rcu *admin_sched; ktime_t (*get_time)(void); struct hrtimer advance_timer; struct list_head taprio_list; }; +static ktime_t sched_base_time(const struct sched_gate_list *sched) +{ + if (!sched) + return KTIME_MAX; + + return ns_to_ktime(sched->base_time); +} + +static void taprio_free_sched_cb(struct rcu_head *head) +{ + struct sched_gate_list *sched = container_of(head, struct sched_gate_list, rcu); + struct sched_entry *entry, *n; + + if (!sched) + return; + + list_for_each_entry_safe(entry, n, &sched->entries, list) { + list_del(&entry->list); + kfree(entry); + } + + kfree(sched); +} + +static void switch_schedules(struct taprio_sched *q, + struct sched_gate_list **admin, + struct sched_gate_list **oper) +{ + rcu_assign_pointer(q->oper_sched, *admin); + rcu_assign_pointer(q->admin_sched, NULL); + + if (*oper) + call_rcu(&(*oper)->rcu, taprio_free_sched_cb); + + *oper = *admin; + *admin = NULL; +} + +static ktime_t get_cycle_time(struct sched_gate_list *sched) +{ + struct sched_entry *entry; + ktime_t cycle = 0; + + if (sched->cycle_time != 0) + return sched->cycle_time; + + list_for_each_entry(entry, &sched->entries, list) + cycle = ktime_add_ns(cycle, entry->interval); + + sched->cycle_time = cycle; + + return cycle; +} + static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -136,8 +200,8 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); + struct sk_buff *skb = NULL; struct sched_entry *entry; - struct sk_buff *skb; u32 gate_mask; int i; @@ -154,10 +218,9 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch) * "AdminGateSates" */ gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN; - rcu_read_unlock(); if (!gate_mask) - return NULL; + goto done; for (i = 0; i < dev->num_tx_queues; i++) { struct Qdisc *child = q->qdiscs[i]; @@ -197,22 +260,72 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch) skb = child->ops->dequeue(child); if (unlikely(!skb)) - return NULL; + goto done; qdisc_bstats_update(sch, skb); qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; - return skb; + goto done; } - return NULL; +done: + rcu_read_unlock(); + + return skb; +} + +static bool should_restart_cycle(const struct sched_gate_list *oper, + const struct sched_entry *entry) +{ + if (list_is_last(&entry->list, &oper->entries)) + return true; + + if (ktime_compare(entry->close_time, oper->cycle_close_time) == 0) + return true; + + return false; +} + +static bool should_change_schedules(const struct sched_gate_list *admin, + const struct sched_gate_list *oper, + ktime_t close_time) +{ + ktime_t next_base_time, extension_time; + + if (!admin) + return false; + + next_base_time = sched_base_time(admin); + + /* This is the simple case, the close_time would fall after + * the next schedule base_time. + */ + if (ktime_compare(next_base_time, close_time) <= 0) + return true; + + /* This is the cycle_time_extension case, if the close_time + * plus the amount that can be extended would fall after the + * next schedule base_time, we can extend the current schedule + * for that amount. + */ + extension_time = ktime_add_ns(close_time, oper->cycle_time_extension); + + /* FIXME: the IEEE 802.1Q-2018 Specification isn't clear about + * how precisely the extension should be made. So after + * conformance testing, this logic may change. + */ + if (ktime_compare(next_base_time, extension_time) <= 0) + return true; + + return false; } static enum hrtimer_restart advance_sched(struct hrtimer *timer) { struct taprio_sched *q = container_of(timer, struct taprio_sched, advance_timer); + struct sched_gate_list *oper, *admin; struct sched_entry *entry, *next; struct Qdisc *sch = q->root; ktime_t close_time; @@ -220,25 +333,46 @@ static enum hrtimer_restart advance_sched(struct hrtimer *timer) spin_lock(&q->current_entry_lock); entry = rcu_dereference_protected(q->current_entry, lockdep_is_held(&q->current_entry_lock)); + oper = rcu_dereference_protected(q->oper_sched, + lockdep_is_held(&q->current_entry_lock)); + admin = rcu_dereference_protected(q->admin_sched, + lockdep_is_held(&q->current_entry_lock)); - /* This is the case that it's the first time that the schedule - * runs, so it only happens once per schedule. The first entry - * is pre-calculated during the schedule initialization. + if (!oper) + switch_schedules(q, &admin, &oper); + + /* This can happen in two cases: 1. this is the very first run + * of this function (i.e. we weren't running any schedule + * previously); 2. The previous schedule just ended. The first + * entry of all schedules are pre-calculated during the + * schedule initialization. */ - if (unlikely(!entry)) { - next = list_first_entry(&q->entries, struct sched_entry, + if (unlikely(!entry || entry->close_time == oper->base_time)) { + next = list_first_entry(&oper->entries, struct sched_entry, list); close_time = next->close_time; goto first_run; } - if (list_is_last(&entry->list, &q->entries)) - next = list_first_entry(&q->entries, struct sched_entry, + if (should_restart_cycle(oper, entry)) { + next = list_first_entry(&oper->entries, struct sched_entry, list); - else + oper->cycle_close_time = ktime_add_ns(oper->cycle_close_time, + oper->cycle_time); + } else { next = list_next_entry(entry, list); + } close_time = ktime_add_ns(entry->close_time, next->interval); + close_time = min_t(ktime_t, close_time, oper->cycle_close_time); + + if (should_change_schedules(admin, oper, close_time)) { + /* Set things so the next time this runs, the new + * schedule runs. + */ + close_time = sched_base_time(admin); + switch_schedules(q, &admin, &oper); + } next->close_time = close_time; taprio_set_budget(q, next); @@ -271,10 +405,12 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_PRIOMAP] = { .len = sizeof(struct tc_mqprio_qopt) }, - [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED }, - [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 }, - [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED }, - [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 }, + [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED }, + [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 }, + [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED }, + [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 }, + [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 }, + [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 }, }; static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, @@ -322,71 +458,8 @@ static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry, return fill_sched_entry(tb, entry, extack); } -/* Returns the number of entries in case of success */ -static int parse_sched_single_entry(struct nlattr *n, - struct taprio_sched *q, - struct netlink_ext_ack *extack) -{ - struct nlattr *tb_entry[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; - struct nlattr *tb_list[TCA_TAPRIO_SCHED_MAX + 1] = { }; - struct sched_entry *entry; - bool found = false; - u32 index; - int err; - - err = nla_parse_nested_deprecated(tb_list, TCA_TAPRIO_SCHED_MAX, n, - entry_list_policy, NULL); - if (err < 0) { - NL_SET_ERR_MSG(extack, "Could not parse nested entry"); - return -EINVAL; - } - - if (!tb_list[TCA_TAPRIO_SCHED_ENTRY]) { - NL_SET_ERR_MSG(extack, "Single-entry must include an entry"); - return -EINVAL; - } - - err = nla_parse_nested_deprecated(tb_entry, - TCA_TAPRIO_SCHED_ENTRY_MAX, - tb_list[TCA_TAPRIO_SCHED_ENTRY], - entry_policy, NULL); - if (err < 0) { - NL_SET_ERR_MSG(extack, "Could not parse nested entry"); - return -EINVAL; - } - - if (!tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]) { - NL_SET_ERR_MSG(extack, "Entry must specify an index\n"); - return -EINVAL; - } - - index = nla_get_u32(tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]); - if (index >= q->num_entries) { - NL_SET_ERR_MSG(extack, "Index for single entry exceeds number of entries in schedule"); - return -EINVAL; - } - - list_for_each_entry(entry, &q->entries, list) { - if (entry->index == index) { - found = true; - break; - } - } - - if (!found) { - NL_SET_ERR_MSG(extack, "Could not find entry"); - return -ENOENT; - } - - err = fill_sched_entry(tb_entry, entry, extack); - if (err < 0) - return err; - - return q->num_entries; -} - static int parse_sched_list(struct nlattr *list, - struct taprio_sched *q, + struct sched_gate_list *sched, struct netlink_ext_ack *extack) { struct nlattr *n; @@ -416,64 +489,42 @@ static int parse_sched_list(struct nlattr *list, return err; } - list_add_tail(&entry->list, &q->entries); + list_add_tail(&entry->list, &sched->entries); i++; } - q->num_entries = i; + sched->num_entries = i; return i; } -/* Returns the number of entries in case of success */ -static int parse_taprio_opt(struct nlattr **tb, struct taprio_sched *q, - struct netlink_ext_ack *extack) +static int parse_taprio_schedule(struct nlattr **tb, + struct sched_gate_list *new, + struct netlink_ext_ack *extack) { int err = 0; - int clockid; - - if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] && - tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) - return -EINVAL; - - if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] && q->num_entries == 0) - return -EINVAL; - if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) - return -EINVAL; + if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) { + NL_SET_ERR_MSG(extack, "Adding a single entry is not supported"); + return -ENOTSUPP; + } if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]) - q->base_time = nla_get_s64( - tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]); + new->base_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]); - if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { - clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); - - /* We only support static clockids and we don't allow - * for it to be modified after the first init. - */ - if (clockid < 0 || (q->clockid != -1 && q->clockid != clockid)) - return -EINVAL; + if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION]) + new->cycle_time_extension = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION]); - q->clockid = clockid; - } + if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]) + new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]); if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]) err = parse_sched_list( - tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], q, extack); - else if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) - err = parse_sched_single_entry( - tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY], q, extack); - - /* parse_sched_* return the number of entries in the schedule, - * a schedule with zero entries is an error. - */ - if (err == 0) { - NL_SET_ERR_MSG(extack, "The schedule should contain at least one entry"); - return -EINVAL; - } + tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], new, extack); + if (err < 0) + return err; - return err; + return 0; } static int taprio_parse_mqprio_opt(struct net_device *dev, @@ -482,11 +533,17 @@ static int taprio_parse_mqprio_opt(struct net_device *dev, { int i, j; - if (!qopt) { + if (!qopt && !dev->num_tc) { NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary"); return -EINVAL; } + /* If num_tc is already set, it means that the user already + * configured the mqprio part + */ + if (dev->num_tc) + return 0; + /* Verify num_tc is not out of max range */ if (qopt->num_tc > TC_MAX_QUEUE) { NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range"); @@ -532,14 +589,15 @@ static int taprio_parse_mqprio_opt(struct net_device *dev, return 0; } -static int taprio_get_start_time(struct Qdisc *sch, ktime_t *start) +static int taprio_get_start_time(struct Qdisc *sch, + struct sched_gate_list *sched, + ktime_t *start) { struct taprio_sched *q = qdisc_priv(sch); - struct sched_entry *entry; ktime_t now, base, cycle; s64 n; - base = ns_to_ktime(q->base_time); + base = sched_base_time(sched); now = q->get_time(); if (ktime_after(base, now)) { @@ -547,11 +605,7 @@ static int taprio_get_start_time(struct Qdisc *sch, ktime_t *start) return 0; } - /* Calculate the cycle_time, by summing all the intervals. - */ - cycle = 0; - list_for_each_entry(entry, &q->entries, list) - cycle = ktime_add_ns(cycle, entry->interval); + cycle = get_cycle_time(sched); /* The qdisc is expected to have at least one sched_entry. Moreover, * any entry must have 'interval' > 0. Thus if the cycle time is zero, @@ -569,22 +623,40 @@ static int taprio_get_start_time(struct Qdisc *sch, ktime_t *start) return 0; } -static void taprio_start_sched(struct Qdisc *sch, ktime_t start) +static void setup_first_close_time(struct taprio_sched *q, + struct sched_gate_list *sched, ktime_t base) { - struct taprio_sched *q = qdisc_priv(sch); struct sched_entry *first; - unsigned long flags; + ktime_t cycle; - spin_lock_irqsave(&q->current_entry_lock, flags); + first = list_first_entry(&sched->entries, + struct sched_entry, list); - first = list_first_entry(&q->entries, struct sched_entry, - list); + cycle = get_cycle_time(sched); - first->close_time = ktime_add_ns(start, first->interval); + /* FIXME: find a better place to do this */ + sched->cycle_close_time = ktime_add_ns(base, cycle); + + first->close_time = ktime_add_ns(base, first->interval); taprio_set_budget(q, first); rcu_assign_pointer(q->current_entry, NULL); +} - spin_unlock_irqrestore(&q->current_entry_lock, flags); +static void taprio_start_sched(struct Qdisc *sch, + ktime_t start, struct sched_gate_list *new) +{ + struct taprio_sched *q = qdisc_priv(sch); + ktime_t expires; + + expires = hrtimer_get_expires(&q->advance_timer); + if (expires == 0) + expires = KTIME_MAX; + + /* If the new schedule starts before the next expiration, we + * reprogram it to the earliest one, so we change the admin + * schedule to the operational one at the right time. + */ + start = min_t(ktime_t, start, expires); hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS); } @@ -639,10 +711,12 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { }; + struct sched_gate_list *oper, *admin, *new_admin; struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct tc_mqprio_qopt *mqprio = NULL; - int i, err, size; + int i, err, clockid; + unsigned long flags; ktime_t start; err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt, @@ -657,48 +731,64 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (err < 0) return err; - /* A schedule with less than one entry is an error */ - size = parse_taprio_opt(tb, q, extack); - if (size < 0) - return size; + new_admin = kzalloc(sizeof(*new_admin), GFP_KERNEL); + if (!new_admin) { + NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule"); + return -ENOMEM; + } + INIT_LIST_HEAD(&new_admin->entries); - hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); - q->advance_timer.function = advance_sched; + rcu_read_lock(); + oper = rcu_dereference(q->oper_sched); + admin = rcu_dereference(q->admin_sched); + rcu_read_unlock(); - switch (q->clockid) { - case CLOCK_REALTIME: - q->get_time = ktime_get_real; - break; - case CLOCK_MONOTONIC: - q->get_time = ktime_get; - break; - case CLOCK_BOOTTIME: - q->get_time = ktime_get_boottime; - break; - case CLOCK_TAI: - q->get_time = ktime_get_clocktai; - break; - default: - return -ENOTSUPP; + if (mqprio && (oper || admin)) { + NL_SET_ERR_MSG(extack, "Changing the traffic mapping of a running schedule is not supported"); + err = -ENOTSUPP; + goto free_sched; } - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *dev_queue; - struct Qdisc *qdisc; + err = parse_taprio_schedule(tb, new_admin, extack); + if (err < 0) + goto free_sched; - dev_queue = netdev_get_tx_queue(dev, i); - qdisc = qdisc_create_dflt(dev_queue, - &pfifo_qdisc_ops, - TC_H_MAKE(TC_H_MAJ(sch->handle), - TC_H_MIN(i + 1)), - extack); - if (!qdisc) - return -ENOMEM; + if (new_admin->num_entries == 0) { + NL_SET_ERR_MSG(extack, "There should be at least one entry in the schedule"); + err = -EINVAL; + goto free_sched; + } - if (i < dev->real_num_tx_queues) - qdisc_hash_add(qdisc, false); + if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { + clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); - q->qdiscs[i] = qdisc; + /* We only support static clockids and we don't allow + * for it to be modified after the first init. + */ + if (clockid < 0 || + (q->clockid != -1 && q->clockid != clockid)) { + NL_SET_ERR_MSG(extack, "Changing the 'clockid' of a running schedule is not supported"); + err = -ENOTSUPP; + goto free_sched; + } + + q->clockid = clockid; + } + + if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { + NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory"); + err = -EINVAL; + goto free_sched; + } + + taprio_set_picos_per_byte(dev, q); + + /* Protects against enqueue()/dequeue() */ + spin_lock_bh(qdisc_lock(sch)); + + if (!hrtimer_active(&q->advance_timer)) { + hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); + q->advance_timer.function = advance_sched; } if (mqprio) { @@ -714,24 +804,60 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, mqprio->prio_tc_map[i]); } - taprio_set_picos_per_byte(dev, q); + switch (q->clockid) { + case CLOCK_REALTIME: + q->get_time = ktime_get_real; + break; + case CLOCK_MONOTONIC: + q->get_time = ktime_get; + break; + case CLOCK_BOOTTIME: + q->get_time = ktime_get_boottime; + break; + case CLOCK_TAI: + q->get_time = ktime_get_clocktai; + break; + default: + NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); + err = -EINVAL; + goto unlock; + } - err = taprio_get_start_time(sch, &start); + err = taprio_get_start_time(sch, new_admin, &start); if (err < 0) { NL_SET_ERR_MSG(extack, "Internal error: failed get start time"); - return err; + goto unlock; } - taprio_start_sched(sch, start); + setup_first_close_time(q, new_admin, start); - return 0; + /* Protects against advance_sched() */ + spin_lock_irqsave(&q->current_entry_lock, flags); + + taprio_start_sched(sch, start, new_admin); + + rcu_assign_pointer(q->admin_sched, new_admin); + if (admin) + call_rcu(&admin->rcu, taprio_free_sched_cb); + new_admin = NULL; + + spin_unlock_irqrestore(&q->current_entry_lock, flags); + + err = 0; + +unlock: + spin_unlock_bh(qdisc_lock(sch)); + +free_sched: + kfree(new_admin); + + return err; } static void taprio_destroy(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); - struct sched_entry *entry, *n; unsigned int i; spin_lock(&taprio_list_lock); @@ -750,10 +876,11 @@ static void taprio_destroy(struct Qdisc *sch) netdev_set_num_tc(dev, 0); - list_for_each_entry_safe(entry, n, &q->entries, list) { - list_del(&entry->list); - kfree(entry); - } + if (q->oper_sched) + call_rcu(&q->oper_sched->rcu, taprio_free_sched_cb); + + if (q->admin_sched) + call_rcu(&q->admin_sched->rcu, taprio_free_sched_cb); } static int taprio_init(struct Qdisc *sch, struct nlattr *opt, @@ -761,12 +888,12 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); + int i; - INIT_LIST_HEAD(&q->entries); spin_lock_init(&q->current_entry_lock); - /* We may overwrite the configuration later */ hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS); + q->advance_timer.function = advance_sched; q->root = sch; @@ -796,6 +923,25 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, list_add(&q->taprio_list, &taprio_list); spin_unlock(&taprio_list_lock); + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *dev_queue; + struct Qdisc *qdisc; + + dev_queue = netdev_get_tx_queue(dev, i); + qdisc = qdisc_create_dflt(dev_queue, + &pfifo_qdisc_ops, + TC_H_MAKE(TC_H_MAJ(sch->handle), + TC_H_MIN(i + 1)), + extack); + if (!qdisc) + return -ENOMEM; + + if (i < dev->real_num_tx_queues) + qdisc_hash_add(qdisc, false); + + q->qdiscs[i] = qdisc; + } + return taprio_change(sch, opt, extack); } @@ -867,15 +1013,55 @@ nla_put_failure: return -1; } +static int dump_schedule(struct sk_buff *msg, + const struct sched_gate_list *root) +{ + struct nlattr *entry_list; + struct sched_entry *entry; + + if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_BASE_TIME, + root->base_time, TCA_TAPRIO_PAD)) + return -1; + + if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME, + root->cycle_time, TCA_TAPRIO_PAD)) + return -1; + + if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, + root->cycle_time_extension, TCA_TAPRIO_PAD)) + return -1; + + entry_list = nla_nest_start_noflag(msg, + TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST); + if (!entry_list) + goto error_nest; + + list_for_each_entry(entry, &root->entries, list) { + if (dump_entry(msg, entry) < 0) + goto error_nest; + } + + nla_nest_end(msg, entry_list); + return 0; + +error_nest: + nla_nest_cancel(msg, entry_list); + return -1; +} + static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); + struct sched_gate_list *oper, *admin; struct tc_mqprio_qopt opt = { 0 }; - struct nlattr *nest, *entry_list; - struct sched_entry *entry; + struct nlattr *nest, *sched_nest; unsigned int i; + rcu_read_lock(); + oper = rcu_dereference(q->oper_sched); + admin = rcu_dereference(q->admin_sched); + opt.num_tc = netdev_get_num_tc(dev); memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); @@ -886,35 +1072,41 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) - return -ENOSPC; + goto start_error; if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt)) goto options_error; - if (nla_put_s64(skb, TCA_TAPRIO_ATTR_SCHED_BASE_TIME, - q->base_time, TCA_TAPRIO_PAD)) - goto options_error; - if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) goto options_error; - entry_list = nla_nest_start_noflag(skb, - TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST); - if (!entry_list) + if (oper && dump_schedule(skb, oper)) goto options_error; - list_for_each_entry(entry, &q->entries, list) { - if (dump_entry(skb, entry) < 0) - goto options_error; - } + if (!admin) + goto done; + + sched_nest = nla_nest_start_noflag(skb, TCA_TAPRIO_ATTR_ADMIN_SCHED); + + if (dump_schedule(skb, admin)) + goto admin_error; - nla_nest_end(skb, entry_list); + nla_nest_end(skb, sched_nest); + +done: + rcu_read_unlock(); return nla_nest_end(skb, nest); +admin_error: + nla_nest_cancel(skb, sched_nest); + options_error: nla_nest_cancel(skb, nest); - return -1; + +start_error: + rcu_read_unlock(); + return -ENOSPC; } static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl) @@ -1001,6 +1193,7 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = { .id = "taprio", .priv_size = sizeof(struct taprio_sched), .init = taprio_init, + .change = taprio_change, .destroy = taprio_destroy, .peek = taprio_peek, .dequeue = taprio_dequeue, diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 1d143bc3f73d..4aa03588f87b 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1112,32 +1112,6 @@ static void sctp_cmd_send_msg(struct sctp_association *asoc, } -/* Sent the next ASCONF packet currently stored in the association. - * This happens after the ASCONF_ACK was succeffully processed. - */ -static void sctp_cmd_send_asconf(struct sctp_association *asoc) -{ - struct net *net = sock_net(asoc->base.sk); - - /* Send the next asconf chunk from the addip chunk - * queue. - */ - if (!list_empty(&asoc->addip_chunk_list)) { - struct list_head *entry = asoc->addip_chunk_list.next; - struct sctp_chunk *asconf = list_entry(entry, - struct sctp_chunk, list); - list_del_init(entry); - - /* Hold the chunk until an ASCONF_ACK is received. */ - sctp_chunk_hold(asconf); - if (sctp_primitive_ASCONF(net, asoc, asconf)) - sctp_chunk_free(asconf); - else - asoc->addip_last_asconf = asconf; - } -} - - /* These three macros allow us to pull the debugging code out of the * main flow of sctp_do_sm() to keep attention focused on the real * functionality there. @@ -1783,9 +1757,6 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type, } sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp); break; - case SCTP_CMD_SEND_NEXT_ASCONF: - sctp_cmd_send_asconf(asoc); - break; case SCTP_CMD_PURGE_ASCONF_QUEUE: sctp_asconf_queue_teardown(asoc); break; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 7dfc34b28f4f..e3f4abe6134e 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3824,6 +3824,29 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net, return SCTP_DISPOSITION_CONSUME; } +static enum sctp_disposition sctp_send_next_asconf( + struct net *net, + const struct sctp_endpoint *ep, + struct sctp_association *asoc, + const union sctp_subtype type, + struct sctp_cmd_seq *commands) +{ + struct sctp_chunk *asconf; + struct list_head *entry; + + if (list_empty(&asoc->addip_chunk_list)) + return SCTP_DISPOSITION_CONSUME; + + entry = asoc->addip_chunk_list.next; + asconf = list_entry(entry, struct sctp_chunk, list); + + list_del_init(entry); + sctp_chunk_hold(asconf); + asoc->addip_last_asconf = asconf; + + return sctp_sf_do_prm_asconf(net, ep, asoc, type, asconf, commands); +} + /* * ADDIP Section 4.3 General rules for address manipulation * When building TLV parameters for the ASCONF Chunk that will add or @@ -3915,14 +3938,10 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net, SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); if (!sctp_process_asconf_ack((struct sctp_association *)asoc, - asconf_ack)) { - /* Successfully processed ASCONF_ACK. We can - * release the next asconf if we have one. - */ - sctp_add_cmd_sf(commands, SCTP_CMD_SEND_NEXT_ASCONF, - SCTP_NULL()); - return SCTP_DISPOSITION_CONSUME; - } + asconf_ack)) + return sctp_send_next_asconf(net, ep, + (struct sctp_association *)asoc, + type, commands); abort = sctp_make_abort(asoc, asconf_ack, sizeof(struct sctp_errhdr)); diff --git a/net/socket.c b/net/socket.c index a180e1a9ff23..472fbefa5d9b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -90,6 +90,7 @@ #include <linux/slab.h> #include <linux/xattr.h> #include <linux/nospec.h> +#include <linux/indirect_call_wrapper.h> #include <linux/uaccess.h> #include <asm/unistd.h> @@ -108,6 +109,13 @@ #include <net/busy_poll.h> #include <linux/errqueue.h> +/* proto_ops for ipv4 and ipv6 use the same {recv,send}msg function */ +#if IS_ENABLED(CONFIG_INET) +#define INDIRECT_CALL_INET4(f, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__) +#else +#define INDIRECT_CALL_INET4(f, f1, ...) f(__VA_ARGS__) +#endif + #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sysctl_net_busy_read __read_mostly; unsigned int sysctl_net_busy_poll __read_mostly; @@ -645,10 +653,12 @@ EXPORT_SYMBOL(__sock_tx_timestamp); * Sends @msg through @sock, passing through LSM. * Returns the number of bytes sent, or an error code. */ - +INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *, + size_t)); static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) { - int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg)); + int ret = INDIRECT_CALL_INET4(sock->ops->sendmsg, inet_sendmsg, sock, + msg, msg_data_left(msg)); BUG_ON(ret == -EIOCBQUEUED); return ret; } @@ -874,11 +884,13 @@ EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); * Receives @msg from @sock, passing through LSM. Returns the total number * of bytes received, or an error. */ - +INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *, + size_t , int )); static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, int flags) { - return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags); + return INDIRECT_CALL_INET4(sock->ops->recvmsg, inet_recvmsg, sock, msg, + msg_data_left(msg), flags); } int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags) diff --git a/net/tipc/link.c b/net/tipc/link.c index 1c514b64a0a9..f5cd986e1e50 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1705,6 +1705,41 @@ tnl: } } +/** + * tipc_link_failover_prepare() - prepare tnl for link failover + * + * This is a special version of the precursor - tipc_link_tnl_prepare(), + * see the tipc_node_link_failover() for details + * + * @l: failover link + * @tnl: tunnel link + * @xmitq: queue for messages to be xmited + */ +void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl, + struct sk_buff_head *xmitq) +{ + struct sk_buff_head *fdefq = &tnl->failover_deferdq; + + tipc_link_create_dummy_tnl_msg(tnl, xmitq); + + /* This failover link enpoint was never established before, + * so it has not received anything from peer. + * Otherwise, it must be a normal failover situation or the + * node has entered SELF_DOWN_PEER_LEAVING and both peer nodes + * would have to start over from scratch instead. + */ + WARN_ON(l && tipc_link_is_up(l)); + tnl->drop_point = 1; + tnl->failover_reasm_skb = NULL; + + /* Initiate the link's failover deferdq */ + if (unlikely(!skb_queue_empty(fdefq))) { + pr_warn("Link failover deferdq not empty: %d!\n", + skb_queue_len(fdefq)); + __skb_queue_purge(fdefq); + } +} + /* tipc_link_validate_msg(): validate message against current link state * Returns true if message should be accepted, otherwise false */ diff --git a/net/tipc/link.h b/net/tipc/link.h index 8439e0ee53a8..adcad65e761c 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -90,6 +90,8 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq); void tipc_link_create_dummy_tnl_msg(struct tipc_link *tnl, struct sk_buff_head *xmitq); +void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl, + struct sk_buff_head *xmitq); void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_fsm_evt(struct tipc_link *l, int evt); bool tipc_link_is_up(struct tipc_link *l); diff --git a/net/tipc/node.c b/net/tipc/node.c index 0eb1bf850219..9e106d3ed187 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -714,7 +714,6 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, *slot0 = bearer_id; *slot1 = bearer_id; tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT); - n->failover_sent = false; n->action_flags |= TIPC_NOTIFY_NODE_UP; tipc_link_set_active(nl, true); tipc_bcast_add_peer(n->net, nl, xmitq); @@ -757,6 +756,45 @@ static void tipc_node_link_up(struct tipc_node *n, int bearer_id, } /** + * tipc_node_link_failover() - start failover in case "half-failover" + * + * This function is only called in a very special situation where link + * failover can be already started on peer node but not on this node. + * This can happen when e.g. + * 1. Both links <1A-2A>, <1B-2B> down + * 2. Link endpoint 2A up, but 1A still down (e.g. due to network + * disturbance, wrong session, etc.) + * 3. Link <1B-2B> up + * 4. Link endpoint 2A down (e.g. due to link tolerance timeout) + * 5. Node B starts failover onto link <1B-2B> + * + * ==> Node A does never start link/node failover! + * + * @n: tipc node structure + * @l: link peer endpoint failingover (- can be NULL) + * @tnl: tunnel link + * @xmitq: queue for messages to be xmited on tnl link later + */ +static void tipc_node_link_failover(struct tipc_node *n, struct tipc_link *l, + struct tipc_link *tnl, + struct sk_buff_head *xmitq) +{ + /* Avoid to be "self-failover" that can never end */ + if (!tipc_link_is_up(tnl)) + return; + + tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); + tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); + + n->sync_point = tipc_link_rcv_nxt(tnl) + (U16_MAX / 2 - 1); + tipc_link_failover_prepare(l, tnl, xmitq); + + if (l) + tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); + tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT); +} + +/** * __tipc_node_link_down - handle loss of link */ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, @@ -1675,14 +1713,16 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl), tipc_link_inputq(l)); } + /* If parallel link was already down, and this happened before - * the tunnel link came up, FAILOVER was never sent. Ensure that - * FAILOVER is sent to get peer out of NODE_FAILINGOVER state. + * the tunnel link came up, node failover was never started. + * Ensure that a FAILOVER_MSG is sent to get peer out of + * NODE_FAILINGOVER state, also this node must accept + * TUNNEL_MSGs from peer. */ - if (n->state != NODE_FAILINGOVER && !n->failover_sent) { - tipc_link_create_dummy_tnl_msg(l, xmitq); - n->failover_sent = true; - } + if (n->state != NODE_FAILINGOVER) + tipc_node_link_failover(n, pl, l, xmitq); + /* If pkts arrive out of order, use lowest calculated syncpt */ if (less(syncpt, n->sync_point)) n->sync_point = syncpt; diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 26f26e71ef3f..e225c81e6b35 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -580,7 +580,7 @@ void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn) static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb) { struct strp_msg *rxm = strp_msg(skb); - int err = 0, offset = rxm->offset, copy, nsg; + int err = 0, offset = rxm->offset, copy, nsg, data_len, pos; struct sk_buff *skb_iter, *unused; struct scatterlist sg[1]; char *orig_buf, *buf; @@ -611,25 +611,42 @@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb) else err = 0; - copy = min_t(int, skb_pagelen(skb) - offset, - rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE); + data_len = rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE; - if (skb->decrypted) - skb_store_bits(skb, offset, buf, copy); + if (skb_pagelen(skb) > offset) { + copy = min_t(int, skb_pagelen(skb) - offset, data_len); - offset += copy; - buf += copy; + if (skb->decrypted) + skb_store_bits(skb, offset, buf, copy); + offset += copy; + buf += copy; + } + + pos = skb_pagelen(skb); skb_walk_frags(skb, skb_iter) { - copy = min_t(int, skb_iter->len, - rxm->full_len - offset + rxm->offset - - TLS_CIPHER_AES_GCM_128_TAG_SIZE); + int frag_pos; + + /* Practically all frags must belong to msg if reencrypt + * is needed with current strparser and coalescing logic, + * but strparser may "get optimized", so let's be safe. + */ + if (pos + skb_iter->len <= offset) + goto done_with_frag; + if (pos >= data_len + rxm->offset) + break; + + frag_pos = offset - pos; + copy = min_t(int, skb_iter->len - frag_pos, + data_len + rxm->offset - offset); if (skb_iter->decrypted) - skb_store_bits(skb_iter, offset, buf, copy); + skb_store_bits(skb_iter, frag_pos, buf, copy); offset += copy; buf += copy; +done_with_frag: + pos += skb_iter->len; } free_buf: diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index a3ebd4b02714..c3a5fe624b4e 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -201,13 +201,14 @@ static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln) skb_put(nskb, skb->len); memcpy(nskb->data, skb->data, headln); - update_chksum(nskb, headln); nskb->destructor = skb->destructor; nskb->sk = sk; skb->destructor = NULL; skb->sk = NULL; + update_chksum(nskb, headln); + delta = nskb->truesize - skb->truesize; if (likely(delta < 0)) WARN_ON_ONCE(refcount_sub_and_test(-delta, &sk->sk_wmem_alloc)); diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 816425ffe05a..4831ad745f91 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3769,10 +3769,9 @@ void wiphy_regulatory_register(struct wiphy *wiphy) /* * The last request may have been received before this * registration call. Call the driver notifier if - * initiator is USER and user type is CELL_BASE. + * initiator is USER. */ - if (lr->initiator == NL80211_REGDOM_SET_BY_USER && - lr->user_reg_hint_type == NL80211_USER_REG_HINT_CELL_BASE) + if (lr->initiator == NL80211_REGDOM_SET_BY_USER) reg_call_notifier(wiphy, lr); } diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 5d43aaa17027..1ec8071226b2 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -3,7 +3,7 @@ # config XFRM bool - depends on NET + depends on INET select GRO_CELLS select SKB_EXTENSIONS @@ -15,9 +15,9 @@ config XFRM_ALGO select XFRM select CRYPTO +if INET config XFRM_USER tristate "Transformation user configuration interface" - depends on INET select XFRM_ALGO ---help--- Support for Transformation(XFRM) user configuration interface @@ -56,7 +56,7 @@ config XFRM_MIGRATE config XFRM_STATISTICS bool "Transformation statistics" - depends on INET && XFRM && PROC_FS + depends on XFRM && PROC_FS ---help--- This statistics is not a SNMP/MIB specification but shows statistics about transformation error (or almost error) factor @@ -95,3 +95,5 @@ config NET_KEY_MIGRATE <draft-sugimoto-mip6-pfkey-migrate>. If unsure, say N. + +endif # INET diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 2db1626557c5..b24cd86a02c3 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -23,6 +23,60 @@ #include <linux/notifier.h> #ifdef CONFIG_XFRM_OFFLOAD +static void __xfrm_transport_prep(struct xfrm_state *x, struct sk_buff *skb, + unsigned int hsize) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + + skb_reset_mac_len(skb); + pskb_pull(skb, skb->mac_len + hsize + x->props.header_len); + + if (xo->flags & XFRM_GSO_SEGMENT) { + skb_reset_transport_header(skb); + skb->transport_header -= x->props.header_len; + } +} + +static void __xfrm_mode_tunnel_prep(struct xfrm_state *x, struct sk_buff *skb, + unsigned int hsize) + +{ + struct xfrm_offload *xo = xfrm_offload(skb); + + if (xo->flags & XFRM_GSO_SEGMENT) + skb->transport_header = skb->network_header + hsize; + + skb_reset_mac_len(skb); + pskb_pull(skb, skb->mac_len + x->props.header_len); +} + +/* Adjust pointers into the packet when IPsec is done at layer2 */ +static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb) +{ + switch (x->outer_mode.encap) { + case XFRM_MODE_TUNNEL: + if (x->outer_mode.family == AF_INET) + return __xfrm_mode_tunnel_prep(x, skb, + sizeof(struct iphdr)); + if (x->outer_mode.family == AF_INET6) + return __xfrm_mode_tunnel_prep(x, skb, + sizeof(struct ipv6hdr)); + break; + case XFRM_MODE_TRANSPORT: + if (x->outer_mode.family == AF_INET) + return __xfrm_transport_prep(x, skb, + sizeof(struct iphdr)); + if (x->outer_mode.family == AF_INET6) + return __xfrm_transport_prep(x, skb, + sizeof(struct ipv6hdr)); + break; + case XFRM_MODE_ROUTEOPTIMIZATION: + case XFRM_MODE_IN_TRIGGER: + case XFRM_MODE_BEET: + break; + } +} + struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again) { int err; @@ -78,7 +132,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur } if (!skb->next) { - x->outer_mode->xmit(x, skb); + esp_features |= skb->dev->gso_partial_features; + xfrm_outer_mode_prep(x, skb); xo->flags |= XFRM_DEV_RESUME; @@ -101,12 +156,14 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur do { struct sk_buff *nskb = skb2->next; + + esp_features |= skb->dev->gso_partial_features; skb_mark_not_on_list(skb2); xo = xfrm_offload(skb2); xo->flags |= XFRM_DEV_RESUME; - x->outer_mode->xmit(x, skb2); + xfrm_outer_mode_prep(x, skb2); err = x->type_offload->xmit(x, skb2, esp_features); if (!err) { diff --git a/net/xfrm/xfrm_inout.h b/net/xfrm/xfrm_inout.h new file mode 100644 index 000000000000..c7b0318938e2 --- /dev/null +++ b/net/xfrm/xfrm_inout.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/ipv6.h> +#include <net/dsfield.h> +#include <net/xfrm.h> + +#ifndef XFRM_INOUT_H +#define XFRM_INOUT_H 1 + +static inline void xfrm6_beet_make_header(struct sk_buff *skb) +{ + struct ipv6hdr *iph = ipv6_hdr(skb); + + iph->version = 6; + + memcpy(iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, + sizeof(iph->flow_lbl)); + iph->nexthdr = XFRM_MODE_SKB_CB(skb)->protocol; + + ipv6_change_dsfield(iph, 0, XFRM_MODE_SKB_CB(skb)->tos); + iph->hop_limit = XFRM_MODE_SKB_CB(skb)->ttl; +} + +static inline void xfrm4_beet_make_header(struct sk_buff *skb) +{ + struct iphdr *iph = ip_hdr(skb); + + iph->ihl = 5; + iph->version = 4; + + iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol; + iph->tos = XFRM_MODE_SKB_CB(skb)->tos; + + iph->id = XFRM_MODE_SKB_CB(skb)->id; + iph->frag_off = XFRM_MODE_SKB_CB(skb)->frag_off; + iph->ttl = XFRM_MODE_SKB_CB(skb)->ttl; +} + +#endif diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index b3b613660d44..314973aaa414 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -21,6 +21,8 @@ #include <net/ip_tunnels.h> #include <net/ip6_tunnel.h> +#include "xfrm_inout.h" + struct xfrm_trans_tasklet { struct tasklet_struct tasklet; struct sk_buff_head queue; @@ -166,35 +168,299 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) } EXPORT_SYMBOL(xfrm_parse_spi); -int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) +static int xfrm4_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb) +{ + struct iphdr *iph; + int optlen = 0; + int err = -EINVAL; + + if (unlikely(XFRM_MODE_SKB_CB(skb)->protocol == IPPROTO_BEETPH)) { + struct ip_beet_phdr *ph; + int phlen; + + if (!pskb_may_pull(skb, sizeof(*ph))) + goto out; + + ph = (struct ip_beet_phdr *)skb->data; + + phlen = sizeof(*ph) + ph->padlen; + optlen = ph->hdrlen * 8 + (IPV4_BEET_PHMAXLEN - phlen); + if (optlen < 0 || optlen & 3 || optlen > 250) + goto out; + + XFRM_MODE_SKB_CB(skb)->protocol = ph->nexthdr; + + if (!pskb_may_pull(skb, phlen)) + goto out; + __skb_pull(skb, phlen); + } + + skb_push(skb, sizeof(*iph)); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + + xfrm4_beet_make_header(skb); + + iph = ip_hdr(skb); + + iph->ihl += optlen / 4; + iph->tot_len = htons(skb->len); + iph->daddr = x->sel.daddr.a4; + iph->saddr = x->sel.saddr.a4; + iph->check = 0; + iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl); + err = 0; +out: + return err; +} + +static void ipip_ecn_decapsulate(struct sk_buff *skb) +{ + struct iphdr *inner_iph = ipip_hdr(skb); + + if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos)) + IP_ECN_set_ce(inner_iph); +} + +static int xfrm4_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) { - struct xfrm_mode *inner_mode = x->inner_mode; + int err = -EINVAL; + + if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) + goto out; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto out; + + err = skb_unclone(skb, GFP_ATOMIC); + if (err) + goto out; + + if (x->props.flags & XFRM_STATE_DECAP_DSCP) + ipv4_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, ipip_hdr(skb)); + if (!(x->props.flags & XFRM_STATE_NOECN)) + ipip_ecn_decapsulate(skb); + + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + if (skb->mac_len) + eth_hdr(skb)->h_proto = skb->protocol; + + err = 0; + +out: + return err; +} + +static void ipip6_ecn_decapsulate(struct sk_buff *skb) +{ + struct ipv6hdr *inner_iph = ipipv6_hdr(skb); + + if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos)) + IP6_ECN_set_ce(skb, inner_iph); +} + +static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = -EINVAL; + + if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) + goto out; + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + goto out; + + err = skb_unclone(skb, GFP_ATOMIC); + if (err) + goto out; + + if (x->props.flags & XFRM_STATE_DECAP_DSCP) + ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)), + ipipv6_hdr(skb)); + if (!(x->props.flags & XFRM_STATE_NOECN)) + ipip6_ecn_decapsulate(skb); + + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + if (skb->mac_len) + eth_hdr(skb)->h_proto = skb->protocol; + + err = 0; + +out: + return err; +} + +static int xfrm6_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipv6hdr *ip6h; + int size = sizeof(struct ipv6hdr); int err; - err = x->outer_mode->afinfo->extract_input(x, skb); + err = skb_cow_head(skb, size + skb->mac_len); if (err) + goto out; + + __skb_push(skb, size); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + + xfrm6_beet_make_header(skb); + + ip6h = ipv6_hdr(skb); + ip6h->payload_len = htons(skb->len - size); + ip6h->daddr = x->sel.daddr.in6; + ip6h->saddr = x->sel.saddr.in6; + err = 0; +out: + return err; +} + +/* Remove encapsulation header. + * + * The IP header will be moved over the top of the encapsulation + * header. + * + * On entry, the transport header shall point to where the IP header + * should be and the network header shall be set to where the IP + * header currently is. skb->data shall point to the start of the + * payload. + */ +static int +xfrm_inner_mode_encap_remove(struct xfrm_state *x, + const struct xfrm_mode *inner_mode, + struct sk_buff *skb) +{ + switch (inner_mode->encap) { + case XFRM_MODE_BEET: + if (inner_mode->family == AF_INET) + return xfrm4_remove_beet_encap(x, skb); + if (inner_mode->family == AF_INET6) + return xfrm6_remove_beet_encap(x, skb); + break; + case XFRM_MODE_TUNNEL: + if (inner_mode->family == AF_INET) + return xfrm4_remove_tunnel_encap(x, skb); + if (inner_mode->family == AF_INET6) + return xfrm6_remove_tunnel_encap(x, skb); + break; + } + + WARN_ON_ONCE(1); + return -EOPNOTSUPP; +} + +static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) +{ + const struct xfrm_mode *inner_mode = &x->inner_mode; + const struct xfrm_state_afinfo *afinfo; + int err = -EAFNOSUPPORT; + + rcu_read_lock(); + afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family); + if (likely(afinfo)) + err = afinfo->extract_input(x, skb); + + if (err) { + rcu_read_unlock(); return err; + } if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); - if (inner_mode == NULL) + if (!inner_mode) { + rcu_read_unlock(); return -EAFNOSUPPORT; + } } - skb->protocol = inner_mode->afinfo->eth_proto; - return inner_mode->input2(x, skb); + afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); + if (unlikely(!afinfo)) { + rcu_read_unlock(); + return -EAFNOSUPPORT; + } + + skb->protocol = afinfo->eth_proto; + rcu_read_unlock(); + return xfrm_inner_mode_encap_remove(x, inner_mode, skb); +} + +/* Remove encapsulation header. + * + * The IP header will be moved over the top of the encapsulation header. + * + * On entry, skb_transport_header() shall point to where the IP header + * should be and skb_network_header() shall be set to where the IP header + * currently is. skb->data shall point to the start of the payload. + */ +static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) +{ + int ihl = skb->data - skb_transport_header(skb); + + if (skb->transport_header != skb->network_header) { + memmove(skb_transport_header(skb), + skb_network_header(skb), ihl); + skb->network_header = skb->transport_header; + } + ip_hdr(skb)->tot_len = htons(skb->len + ihl); + skb_reset_transport_header(skb); + return 0; +} + +static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + int ihl = skb->data - skb_transport_header(skb); + + if (skb->transport_header != skb->network_header) { + memmove(skb_transport_header(skb), + skb_network_header(skb), ihl); + skb->network_header = skb->transport_header; + } + ipv6_hdr(skb)->payload_len = htons(skb->len + ihl - + sizeof(struct ipv6hdr)); + skb_reset_transport_header(skb); + return 0; +#else + WARN_ON_ONCE(1); + return -EAFNOSUPPORT; +#endif +} + +static int xfrm_inner_mode_input(struct xfrm_state *x, + const struct xfrm_mode *inner_mode, + struct sk_buff *skb) +{ + switch (inner_mode->encap) { + case XFRM_MODE_BEET: + case XFRM_MODE_TUNNEL: + return xfrm_prepare_input(x, skb); + case XFRM_MODE_TRANSPORT: + if (inner_mode->family == AF_INET) + return xfrm4_transport_input(x, skb); + if (inner_mode->family == AF_INET6) + return xfrm6_transport_input(x, skb); + break; + case XFRM_MODE_ROUTEOPTIMIZATION: + WARN_ON_ONCE(1); + break; + default: + WARN_ON_ONCE(1); + break; + } + + return -EOPNOTSUPP; } -EXPORT_SYMBOL(xfrm_prepare_input); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) { + const struct xfrm_state_afinfo *afinfo; struct net *net = dev_net(skb->dev); + const struct xfrm_mode *inner_mode; int err; __be32 seq; __be32 seq_hi; struct xfrm_state *x = NULL; xfrm_address_t *daddr; - struct xfrm_mode *inner_mode; u32 mark = skb->mark; unsigned int family = AF_UNSPEC; int decaps = 0; @@ -216,7 +482,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } - family = x->outer_mode->afinfo->family; + family = x->outer_mode.family; /* An encap_type of -1 indicates async resumption. */ if (encap_type == -1) { @@ -400,7 +666,7 @@ resume: XFRM_MODE_SKB_CB(skb)->protocol = nexthdr; - inner_mode = x->inner_mode; + inner_mode = &x->inner_mode; if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); @@ -410,12 +676,12 @@ resume: } } - if (inner_mode->input(x, skb)) { + if (xfrm_inner_mode_input(x, inner_mode, skb)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); goto drop; } - if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { + if (x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL) { decaps = 1; break; } @@ -425,7 +691,7 @@ resume: * transport mode so the outer address is identical. */ daddr = &x->id.daddr; - family = x->outer_mode->afinfo->family; + family = x->outer_mode.family; err = xfrm_parse_spi(skb, nexthdr, &spi, &seq); if (err < 0) { @@ -453,7 +719,12 @@ resume: if (xo) xfrm_gro = xo->flags & XFRM_GRO; - err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async); + err = -EAFNOSUPPORT; + rcu_read_lock(); + afinfo = xfrm_state_afinfo_get_rcu(x->inner_mode.family); + if (likely(afinfo)) + err = afinfo->transport_finish(skb, xfrm_gro || async); + rcu_read_unlock(); if (xfrm_gro) { sp = skb_sec_path(skb); if (sp) diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index dbb3c1945b5c..ad3a2555c517 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -70,17 +70,28 @@ static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x) return NULL; } -static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb) +static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb, + unsigned short family) { struct xfrmi_net *xfrmn; - int ifindex; struct xfrm_if *xi; + int ifindex = 0; if (!secpath_exists(skb) || !skb->dev) return NULL; + switch (family) { + case AF_INET6: + ifindex = inet6_sdif(skb); + break; + case AF_INET: + ifindex = inet_sdif(skb); + break; + } + if (!ifindex) + ifindex = skb->dev->ifindex; + xfrmn = net_generic(xs_net(xfrm_input_state(skb)), xfrmi_net_id); - ifindex = skb->dev->ifindex; for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) { if (ifindex == xi->dev->ifindex && @@ -244,8 +255,8 @@ static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet) static int xfrmi_rcv_cb(struct sk_buff *skb, int err) { + const struct xfrm_mode *inner_mode; struct pcpu_sw_netstats *tstats; - struct xfrm_mode *inner_mode; struct net_device *dev; struct xfrm_state *x; struct xfrm_if *xi; @@ -273,7 +284,7 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err) xnet = !net_eq(xi->net, dev_net(skb->dev)); if (xnet) { - inner_mode = x->inner_mode; + inner_mode = &x->inner_mode; if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); @@ -285,7 +296,7 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err) } if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, - inner_mode->afinfo->family)) + inner_mode->family)) return -EPERM; } diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 9333153bafda..a55510f9ff35 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -17,9 +17,13 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <net/dst.h> +#include <net/inet_ecn.h> #include <net/xfrm.h> +#include "xfrm_inout.h" + static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb); +static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); static int xfrm_skb_check_space(struct sk_buff *skb) { @@ -50,6 +54,360 @@ static struct dst_entry *skb_dst_pop(struct sk_buff *skb) return child; } +/* Add encapsulation header. + * + * The IP header will be moved forward to make space for the encapsulation + * header. + */ +static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) +{ + struct iphdr *iph = ip_hdr(skb); + int ihl = iph->ihl * 4; + + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + + skb_set_network_header(skb, -x->props.header_len); + skb->mac_header = skb->network_header + + offsetof(struct iphdr, protocol); + skb->transport_header = skb->network_header + ihl; + __skb_pull(skb, ihl); + memmove(skb_network_header(skb), iph, ihl); + return 0; +} + +/* Add encapsulation header. + * + * The IP header and mutable extension headers will be moved forward to make + * space for the encapsulation header. + */ +static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct ipv6hdr *iph; + u8 *prevhdr; + int hdr_len; + + iph = ipv6_hdr(skb); + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + + hdr_len = x->type->hdr_offset(x, skb, &prevhdr); + if (hdr_len < 0) + return hdr_len; + skb_set_mac_header(skb, + (prevhdr - x->props.header_len) - skb->data); + skb_set_network_header(skb, -x->props.header_len); + skb->transport_header = skb->network_header + hdr_len; + __skb_pull(skb, hdr_len); + memmove(ipv6_hdr(skb), iph, hdr_len); + return 0; +#else + WARN_ON_ONCE(1); + return -EAFNOSUPPORT; +#endif +} + +/* Add route optimization header space. + * + * The IP header and mutable extension headers will be moved forward to make + * space for the route optimization header. + */ +static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct ipv6hdr *iph; + u8 *prevhdr; + int hdr_len; + + iph = ipv6_hdr(skb); + + hdr_len = x->type->hdr_offset(x, skb, &prevhdr); + if (hdr_len < 0) + return hdr_len; + skb_set_mac_header(skb, + (prevhdr - x->props.header_len) - skb->data); + skb_set_network_header(skb, -x->props.header_len); + skb->transport_header = skb->network_header + hdr_len; + __skb_pull(skb, hdr_len); + memmove(ipv6_hdr(skb), iph, hdr_len); + + x->lastused = ktime_get_real_seconds(); + + return 0; +#else + WARN_ON_ONCE(1); + return -EAFNOSUPPORT; +#endif +} + +/* Add encapsulation header. + * + * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. + */ +static int xfrm4_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ip_beet_phdr *ph; + struct iphdr *top_iph; + int hdrlen, optlen; + + hdrlen = 0; + optlen = XFRM_MODE_SKB_CB(skb)->optlen; + if (unlikely(optlen)) + hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); + + skb_set_network_header(skb, -x->props.header_len - hdrlen + + (XFRM_MODE_SKB_CB(skb)->ihl - sizeof(*top_iph))); + if (x->sel.family != AF_INET6) + skb->network_header += IPV4_BEET_PHMAXLEN; + skb->mac_header = skb->network_header + + offsetof(struct iphdr, protocol); + skb->transport_header = skb->network_header + sizeof(*top_iph); + + xfrm4_beet_make_header(skb); + + ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen); + + top_iph = ip_hdr(skb); + + if (unlikely(optlen)) { + if (WARN_ON(optlen < 0)) + return -EINVAL; + + ph->padlen = 4 - (optlen & 4); + ph->hdrlen = optlen / 8; + ph->nexthdr = top_iph->protocol; + if (ph->padlen) + memset(ph + 1, IPOPT_NOP, ph->padlen); + + top_iph->protocol = IPPROTO_BEETPH; + top_iph->ihl = sizeof(struct iphdr) / 4; + } + + top_iph->saddr = x->props.saddr.a4; + top_iph->daddr = x->id.daddr.a4; + + return 0; +} + +/* Add encapsulation header. + * + * The top IP header will be constructed per RFC 2401. + */ +static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct iphdr *top_iph; + int flags; + + skb_set_inner_network_header(skb, skb_network_offset(skb)); + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + + skb_set_network_header(skb, -x->props.header_len); + skb->mac_header = skb->network_header + + offsetof(struct iphdr, protocol); + skb->transport_header = skb->network_header + sizeof(*top_iph); + top_iph = ip_hdr(skb); + + top_iph->ihl = 5; + top_iph->version = 4; + + top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family); + + /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */ + if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) + top_iph->tos = 0; + else + top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos; + top_iph->tos = INET_ECN_encapsulate(top_iph->tos, + XFRM_MODE_SKB_CB(skb)->tos); + + flags = x->props.flags; + if (flags & XFRM_STATE_NOECN) + IP_ECN_clear(top_iph); + + top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? + 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); + + top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst)); + + top_iph->saddr = x->props.saddr.a4; + top_iph->daddr = x->id.daddr.a4; + ip_select_ident(dev_net(dst->dev), skb, NULL); + + return 0; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int xfrm6_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct ipv6hdr *top_iph; + int dsfield; + + skb_set_inner_network_header(skb, skb_network_offset(skb)); + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + + skb_set_network_header(skb, -x->props.header_len); + skb->mac_header = skb->network_header + + offsetof(struct ipv6hdr, nexthdr); + skb->transport_header = skb->network_header + sizeof(*top_iph); + top_iph = ipv6_hdr(skb); + + top_iph->version = 6; + + memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, + sizeof(top_iph->flow_lbl)); + top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family); + + if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) + dsfield = 0; + else + dsfield = XFRM_MODE_SKB_CB(skb)->tos; + dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos); + if (x->props.flags & XFRM_STATE_NOECN) + dsfield &= ~INET_ECN_MASK; + ipv6_change_dsfield(top_iph, 0, dsfield); + top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst)); + top_iph->saddr = *(struct in6_addr *)&x->props.saddr; + top_iph->daddr = *(struct in6_addr *)&x->id.daddr; + return 0; +} + +static int xfrm6_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipv6hdr *top_iph; + struct ip_beet_phdr *ph; + int optlen, hdr_len; + + hdr_len = 0; + optlen = XFRM_MODE_SKB_CB(skb)->optlen; + if (unlikely(optlen)) + hdr_len += IPV4_BEET_PHMAXLEN - (optlen & 4); + + skb_set_network_header(skb, -x->props.header_len - hdr_len); + if (x->sel.family != AF_INET6) + skb->network_header += IPV4_BEET_PHMAXLEN; + skb->mac_header = skb->network_header + + offsetof(struct ipv6hdr, nexthdr); + skb->transport_header = skb->network_header + sizeof(*top_iph); + ph = __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdr_len); + + xfrm6_beet_make_header(skb); + + top_iph = ipv6_hdr(skb); + if (unlikely(optlen)) { + if (WARN_ON(optlen < 0)) + return -EINVAL; + + ph->padlen = 4 - (optlen & 4); + ph->hdrlen = optlen / 8; + ph->nexthdr = top_iph->nexthdr; + if (ph->padlen) + memset(ph + 1, IPOPT_NOP, ph->padlen); + + top_iph->nexthdr = IPPROTO_BEETPH; + } + + top_iph->saddr = *(struct in6_addr *)&x->props.saddr; + top_iph->daddr = *(struct in6_addr *)&x->id.daddr; + return 0; +} +#endif + +/* Add encapsulation header. + * + * On exit, the transport header will be set to the start of the + * encapsulation header to be filled in by x->type->output and the mac + * header will be set to the nextheader (protocol for IPv4) field of the + * extension header directly preceding the encapsulation header, or in + * its absence, that of the top IP header. + * The value of the network header will always point to the top IP header + * while skb->data will point to the payload. + */ +static int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) +{ + int err; + + err = xfrm_inner_extract_output(x, skb); + if (err) + return err; + + IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; + skb->protocol = htons(ETH_P_IP); + + switch (x->outer_mode.encap) { + case XFRM_MODE_BEET: + return xfrm4_beet_encap_add(x, skb); + case XFRM_MODE_TUNNEL: + return xfrm4_tunnel_encap_add(x, skb); + } + + WARN_ON_ONCE(1); + return -EOPNOTSUPP; +} + +static int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + int err; + + err = xfrm_inner_extract_output(x, skb); + if (err) + return err; + + skb->ignore_df = 1; + skb->protocol = htons(ETH_P_IPV6); + + switch (x->outer_mode.encap) { + case XFRM_MODE_BEET: + return xfrm6_beet_encap_add(x, skb); + case XFRM_MODE_TUNNEL: + return xfrm6_tunnel_encap_add(x, skb); + default: + WARN_ON_ONCE(1); + return -EOPNOTSUPP; + } +#endif + WARN_ON_ONCE(1); + return -EAFNOSUPPORT; +} + +static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) +{ + switch (x->outer_mode.encap) { + case XFRM_MODE_BEET: + case XFRM_MODE_TUNNEL: + if (x->outer_mode.family == AF_INET) + return xfrm4_prepare_output(x, skb); + if (x->outer_mode.family == AF_INET6) + return xfrm6_prepare_output(x, skb); + break; + case XFRM_MODE_TRANSPORT: + if (x->outer_mode.family == AF_INET) + return xfrm4_transport_output(x, skb); + if (x->outer_mode.family == AF_INET6) + return xfrm6_transport_output(x, skb); + break; + case XFRM_MODE_ROUTEOPTIMIZATION: + if (x->outer_mode.family == AF_INET6) + return xfrm6_ro_output(x, skb); + WARN_ON_ONCE(1); + break; + default: + WARN_ON_ONCE(1); + break; + } + + return -EOPNOTSUPP; +} + +#if IS_ENABLED(CONFIG_NET_PKTGEN) +int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) +{ + return xfrm_outer_mode_output(x, skb); +} +EXPORT_SYMBOL_GPL(pktgen_xfrm_outer_mode_output); +#endif + static int xfrm_output_one(struct sk_buff *skb, int err) { struct dst_entry *dst = skb_dst(skb); @@ -68,7 +426,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) skb->mark = xfrm_smark_get(skb->mark, x); - err = x->outer_mode->output(x, skb); + err = xfrm_outer_mode_output(x, skb); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); goto error_nolock; @@ -131,7 +489,7 @@ resume: } skb_dst_set(skb, dst); x = dst->xfrm; - } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); + } while (x && !(x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL)); return 0; @@ -258,20 +616,29 @@ out: } EXPORT_SYMBOL_GPL(xfrm_output); -int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) +static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) { - struct xfrm_mode *inner_mode; + const struct xfrm_state_afinfo *afinfo; + const struct xfrm_mode *inner_mode; + int err = -EAFNOSUPPORT; + if (x->sel.family == AF_UNSPEC) inner_mode = xfrm_ip2inner_mode(x, xfrm_af2proto(skb_dst(skb)->ops->family)); else - inner_mode = x->inner_mode; + inner_mode = &x->inner_mode; if (inner_mode == NULL) return -EAFNOSUPPORT; - return inner_mode->afinfo->extract_output(x, skb); + + rcu_read_lock(); + afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); + if (likely(afinfo)) + err = afinfo->extract_output(x, skb); + rcu_read_unlock(); + + return err; } -EXPORT_SYMBOL_GPL(xfrm_inner_extract_output); void xfrm_local_error(struct sk_buff *skb, int mtu) { diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8d1a898d0ba5..410233c5681e 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -27,10 +27,14 @@ #include <linux/cpu.h> #include <linux/audit.h> #include <linux/rhashtable.h> +#include <linux/if_tunnel.h> #include <net/dst.h> #include <net/flow.h> #include <net/xfrm.h> #include <net/ip.h> +#if IS_ENABLED(CONFIG_IPV6_MIP6) +#include <net/mip6.h> +#endif #ifdef CONFIG_XFRM_STATISTICS #include <net/snmp.h> #endif @@ -2450,18 +2454,10 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, static int xfrm_get_tos(const struct flowi *fl, int family) { - const struct xfrm_policy_afinfo *afinfo; - int tos; + if (family == AF_INET) + return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; - afinfo = xfrm_policy_get_afinfo(family); - if (!afinfo) - return 0; - - tos = afinfo->get_tos(fl); - - rcu_read_unlock(); - - return tos; + return 0; } static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) @@ -2499,21 +2495,14 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) return xdst; } -static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, - int nfheader_len) +static void xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, + int nfheader_len) { - const struct xfrm_policy_afinfo *afinfo = - xfrm_policy_get_afinfo(dst->ops->family); - int err; - - if (!afinfo) - return -EINVAL; - - err = afinfo->init_path(path, dst, nfheader_len); - - rcu_read_unlock(); - - return err; + if (dst->ops->family == AF_INET6) { + struct rt6_info *rt = (struct rt6_info *)dst; + path->path_cookie = rt6_get_cookie(rt); + path->u.rt6.rt6i_nfheader_len = nfheader_len; + } } static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, @@ -2545,10 +2534,11 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, const struct flowi *fl, struct dst_entry *dst) { + const struct xfrm_state_afinfo *afinfo; + const struct xfrm_mode *inner_mode; struct net *net = xp_net(policy); unsigned long now = jiffies; struct net_device *dev; - struct xfrm_mode *inner_mode; struct xfrm_dst *xdst_prev = NULL; struct xfrm_dst *xdst0 = NULL; int i = 0; @@ -2594,7 +2584,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, goto put_states; } } else - inner_mode = xfrm[i]->inner_mode; + inner_mode = &xfrm[i]->inner_mode; xdst->route = dst; dst_copy_metrics(dst1, dst); @@ -2622,7 +2612,14 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst1->lastuse = now; dst1->input = dst_discard; - dst1->output = inner_mode->afinfo->output; + + rcu_read_lock(); + afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); + if (likely(afinfo)) + dst1->output = afinfo->output; + else + dst1->output = dst_discard_out; + rcu_read_unlock(); xdst_prev = xdst; @@ -3263,20 +3260,229 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star return start; } +static void +decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) +{ + const struct iphdr *iph = ip_hdr(skb); + u8 *xprth = skb_network_header(skb) + iph->ihl * 4; + struct flowi4 *fl4 = &fl->u.ip4; + int oif = 0; + + if (skb_dst(skb)) + oif = skb_dst(skb)->dev->ifindex; + + memset(fl4, 0, sizeof(struct flowi4)); + fl4->flowi4_mark = skb->mark; + fl4->flowi4_oif = reverse ? skb->skb_iif : oif; + + if (!ip_is_fragment(iph)) { + switch (iph->protocol) { + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_TCP: + case IPPROTO_SCTP: + case IPPROTO_DCCP: + if (xprth + 4 < skb->data || + pskb_may_pull(skb, xprth + 4 - skb->data)) { + __be16 *ports; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ports = (__be16 *)xprth; + + fl4->fl4_sport = ports[!!reverse]; + fl4->fl4_dport = ports[!reverse]; + } + break; + case IPPROTO_ICMP: + if (xprth + 2 < skb->data || + pskb_may_pull(skb, xprth + 2 - skb->data)) { + u8 *icmp; + + xprth = skb_network_header(skb) + iph->ihl * 4; + icmp = xprth; + + fl4->fl4_icmp_type = icmp[0]; + fl4->fl4_icmp_code = icmp[1]; + } + break; + case IPPROTO_ESP: + if (xprth + 4 < skb->data || + pskb_may_pull(skb, xprth + 4 - skb->data)) { + __be32 *ehdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ehdr = (__be32 *)xprth; + + fl4->fl4_ipsec_spi = ehdr[0]; + } + break; + case IPPROTO_AH: + if (xprth + 8 < skb->data || + pskb_may_pull(skb, xprth + 8 - skb->data)) { + __be32 *ah_hdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ah_hdr = (__be32 *)xprth; + + fl4->fl4_ipsec_spi = ah_hdr[1]; + } + break; + case IPPROTO_COMP: + if (xprth + 4 < skb->data || + pskb_may_pull(skb, xprth + 4 - skb->data)) { + __be16 *ipcomp_hdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ipcomp_hdr = (__be16 *)xprth; + + fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); + } + break; + case IPPROTO_GRE: + if (xprth + 12 < skb->data || + pskb_may_pull(skb, xprth + 12 - skb->data)) { + __be16 *greflags; + __be32 *gre_hdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + greflags = (__be16 *)xprth; + gre_hdr = (__be32 *)xprth; + + if (greflags[0] & GRE_KEY) { + if (greflags[0] & GRE_CSUM) + gre_hdr++; + fl4->fl4_gre_key = gre_hdr[1]; + } + } + break; + default: + fl4->fl4_ipsec_spi = 0; + break; + } + } + fl4->flowi4_proto = iph->protocol; + fl4->daddr = reverse ? iph->saddr : iph->daddr; + fl4->saddr = reverse ? iph->daddr : iph->saddr; + fl4->flowi4_tos = iph->tos; +} + +#if IS_ENABLED(CONFIG_IPV6) +static void +decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse) +{ + struct flowi6 *fl6 = &fl->u.ip6; + int onlyproto = 0; + const struct ipv6hdr *hdr = ipv6_hdr(skb); + u32 offset = sizeof(*hdr); + struct ipv6_opt_hdr *exthdr; + const unsigned char *nh = skb_network_header(skb); + u16 nhoff = IP6CB(skb)->nhoff; + int oif = 0; + u8 nexthdr; + + if (!nhoff) + nhoff = offsetof(struct ipv6hdr, nexthdr); + + nexthdr = nh[nhoff]; + + if (skb_dst(skb)) + oif = skb_dst(skb)->dev->ifindex; + + memset(fl6, 0, sizeof(struct flowi6)); + fl6->flowi6_mark = skb->mark; + fl6->flowi6_oif = reverse ? skb->skb_iif : oif; + + fl6->daddr = reverse ? hdr->saddr : hdr->daddr; + fl6->saddr = reverse ? hdr->daddr : hdr->saddr; + + while (nh + offset + sizeof(*exthdr) < skb->data || + pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) { + nh = skb_network_header(skb); + exthdr = (struct ipv6_opt_hdr *)(nh + offset); + + switch (nexthdr) { + case NEXTHDR_FRAGMENT: + onlyproto = 1; + /* fall through */ + case NEXTHDR_ROUTING: + case NEXTHDR_HOP: + case NEXTHDR_DEST: + offset += ipv6_optlen(exthdr); + nexthdr = exthdr->nexthdr; + exthdr = (struct ipv6_opt_hdr *)(nh + offset); + break; + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_TCP: + case IPPROTO_SCTP: + case IPPROTO_DCCP: + if (!onlyproto && (nh + offset + 4 < skb->data || + pskb_may_pull(skb, nh + offset + 4 - skb->data))) { + __be16 *ports; + + nh = skb_network_header(skb); + ports = (__be16 *)(nh + offset); + fl6->fl6_sport = ports[!!reverse]; + fl6->fl6_dport = ports[!reverse]; + } + fl6->flowi6_proto = nexthdr; + return; + case IPPROTO_ICMPV6: + if (!onlyproto && (nh + offset + 2 < skb->data || + pskb_may_pull(skb, nh + offset + 2 - skb->data))) { + u8 *icmp; + + nh = skb_network_header(skb); + icmp = (u8 *)(nh + offset); + fl6->fl6_icmp_type = icmp[0]; + fl6->fl6_icmp_code = icmp[1]; + } + fl6->flowi6_proto = nexthdr; + return; +#if IS_ENABLED(CONFIG_IPV6_MIP6) + case IPPROTO_MH: + offset += ipv6_optlen(exthdr); + if (!onlyproto && (nh + offset + 3 < skb->data || + pskb_may_pull(skb, nh + offset + 3 - skb->data))) { + struct ip6_mh *mh; + + nh = skb_network_header(skb); + mh = (struct ip6_mh *)(nh + offset); + fl6->fl6_mh_type = mh->ip6mh_type; + } + fl6->flowi6_proto = nexthdr; + return; +#endif + /* XXX Why are there these headers? */ + case IPPROTO_AH: + case IPPROTO_ESP: + case IPPROTO_COMP: + default: + fl6->fl6_ipsec_spi = 0; + fl6->flowi6_proto = nexthdr; + return; + } + } +} +#endif + int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned int family, int reverse) { - const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - int err; - - if (unlikely(afinfo == NULL)) + switch (family) { + case AF_INET: + decode_session4(skb, fl, reverse); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + decode_session6(skb, fl, reverse); + break; +#endif + default: return -EAFNOSUPPORT; + } - afinfo->decode_session(skb, fl, reverse); - - err = security_xfrm_decode_session(skb, &fl->flowi_secid); - rcu_read_unlock(); - return err; + return security_xfrm_decode_session(skb, &fl->flowi_secid); } EXPORT_SYMBOL(__xfrm_decode_session); @@ -3313,7 +3519,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, ifcb = xfrm_if_get_cb(); if (ifcb) { - xi = ifcb->decode_session(skb); + xi = ifcb->decode_session(skb, family); if (xi) { if_id = xi->p.if_id; net = xi->net; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1bb971f46fc6..3edbf4b26116 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -173,7 +173,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); -bool km_is_alive(const struct km_event *c); +static bool km_is_alive(const struct km_event *c); void km_state_expired(struct xfrm_state *x, int hard, u32 portid); static DEFINE_SPINLOCK(xfrm_type_lock); @@ -330,100 +330,67 @@ static void xfrm_put_type_offload(const struct xfrm_type_offload *type) module_put(type->owner); } -static DEFINE_SPINLOCK(xfrm_mode_lock); -int xfrm_register_mode(struct xfrm_mode *mode, int family) -{ - struct xfrm_state_afinfo *afinfo; - struct xfrm_mode **modemap; - int err; - - if (unlikely(mode->encap >= XFRM_MODE_MAX)) - return -EINVAL; - - afinfo = xfrm_state_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - - err = -EEXIST; - modemap = afinfo->mode_map; - spin_lock_bh(&xfrm_mode_lock); - if (modemap[mode->encap]) - goto out; - - err = -ENOENT; - if (!try_module_get(afinfo->owner)) - goto out; - - mode->afinfo = afinfo; - modemap[mode->encap] = mode; - err = 0; - -out: - spin_unlock_bh(&xfrm_mode_lock); - rcu_read_unlock(); - return err; -} -EXPORT_SYMBOL(xfrm_register_mode); - -int xfrm_unregister_mode(struct xfrm_mode *mode, int family) -{ - struct xfrm_state_afinfo *afinfo; - struct xfrm_mode **modemap; - int err; - - if (unlikely(mode->encap >= XFRM_MODE_MAX)) - return -EINVAL; - - afinfo = xfrm_state_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - - err = -ENOENT; - modemap = afinfo->mode_map; - spin_lock_bh(&xfrm_mode_lock); - if (likely(modemap[mode->encap] == mode)) { - modemap[mode->encap] = NULL; - module_put(mode->afinfo->owner); - err = 0; - } - - spin_unlock_bh(&xfrm_mode_lock); - rcu_read_unlock(); - return err; -} -EXPORT_SYMBOL(xfrm_unregister_mode); - -static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) -{ - struct xfrm_state_afinfo *afinfo; - struct xfrm_mode *mode; - int modload_attempted = 0; +static const struct xfrm_mode xfrm4_mode_map[XFRM_MODE_MAX] = { + [XFRM_MODE_BEET] = { + .encap = XFRM_MODE_BEET, + .flags = XFRM_MODE_FLAG_TUNNEL, + .family = AF_INET, + }, + [XFRM_MODE_TRANSPORT] = { + .encap = XFRM_MODE_TRANSPORT, + .family = AF_INET, + }, + [XFRM_MODE_TUNNEL] = { + .encap = XFRM_MODE_TUNNEL, + .flags = XFRM_MODE_FLAG_TUNNEL, + .family = AF_INET, + }, +}; + +static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = { + [XFRM_MODE_BEET] = { + .encap = XFRM_MODE_BEET, + .flags = XFRM_MODE_FLAG_TUNNEL, + .family = AF_INET6, + }, + [XFRM_MODE_ROUTEOPTIMIZATION] = { + .encap = XFRM_MODE_ROUTEOPTIMIZATION, + .family = AF_INET6, + }, + [XFRM_MODE_TRANSPORT] = { + .encap = XFRM_MODE_TRANSPORT, + .family = AF_INET6, + }, + [XFRM_MODE_TUNNEL] = { + .encap = XFRM_MODE_TUNNEL, + .flags = XFRM_MODE_FLAG_TUNNEL, + .family = AF_INET6, + }, +}; + +static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) +{ + const struct xfrm_mode *mode; if (unlikely(encap >= XFRM_MODE_MAX)) return NULL; -retry: - afinfo = xfrm_state_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return NULL; - - mode = READ_ONCE(afinfo->mode_map[encap]); - if (unlikely(mode && !try_module_get(mode->owner))) - mode = NULL; - - rcu_read_unlock(); - if (!mode && !modload_attempted) { - request_module("xfrm-mode-%d-%d", family, encap); - modload_attempted = 1; - goto retry; + switch (family) { + case AF_INET: + mode = &xfrm4_mode_map[encap]; + if (mode->family == family) + return mode; + break; + case AF_INET6: + mode = &xfrm6_mode_map[encap]; + if (mode->family == family) + return mode; + break; + default: + break; } - return mode; -} - -static void xfrm_put_mode(struct xfrm_mode *mode) -{ - module_put(mode->owner); + return NULL; } void xfrm_state_free(struct xfrm_state *x) @@ -444,12 +411,6 @@ static void ___xfrm_state_destroy(struct xfrm_state *x) kfree(x->coaddr); kfree(x->replay_esn); kfree(x->preplay_esn); - if (x->inner_mode) - xfrm_put_mode(x->inner_mode); - if (x->inner_mode_iaf) - xfrm_put_mode(x->inner_mode_iaf); - if (x->outer_mode) - xfrm_put_mode(x->outer_mode); if (x->type_offload) xfrm_put_type_offload(x->type_offload); if (x->type) { @@ -590,8 +551,6 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) x->lft.hard_packet_limit = XFRM_INF; x->replay_maxage = 0; x->replay_maxdiff = 0; - x->inner_mode = NULL; - x->inner_mode_iaf = NULL; spin_lock_init(&x->lock); } return x; @@ -2066,7 +2025,7 @@ int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address } EXPORT_SYMBOL(km_report); -bool km_is_alive(const struct km_event *c) +static bool km_is_alive(const struct km_event *c) { struct xfrm_mgr *km; bool is_alive = false; @@ -2082,7 +2041,6 @@ bool km_is_alive(const struct km_event *c) return is_alive; } -EXPORT_SYMBOL(km_is_alive); int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) { @@ -2195,6 +2153,7 @@ struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family) return rcu_dereference(xfrm_state_afinfo[family]); } +EXPORT_SYMBOL_GPL(xfrm_state_afinfo_get_rcu); struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) { @@ -2242,8 +2201,9 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu) int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) { - struct xfrm_state_afinfo *afinfo; - struct xfrm_mode *inner_mode; + const struct xfrm_state_afinfo *afinfo; + const struct xfrm_mode *inner_mode; + const struct xfrm_mode *outer_mode; int family = x->props.family; int err; @@ -2269,25 +2229,22 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) goto error; if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) && - family != x->sel.family) { - xfrm_put_mode(inner_mode); + family != x->sel.family) goto error; - } - x->inner_mode = inner_mode; + x->inner_mode = *inner_mode; } else { - struct xfrm_mode *inner_mode_iaf; + const struct xfrm_mode *inner_mode_iaf; int iafamily = AF_INET; inner_mode = xfrm_get_mode(x->props.mode, x->props.family); if (inner_mode == NULL) goto error; - if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) { - xfrm_put_mode(inner_mode); + if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) goto error; - } - x->inner_mode = inner_mode; + + x->inner_mode = *inner_mode; if (x->props.family == AF_INET) iafamily = AF_INET6; @@ -2295,9 +2252,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) inner_mode_iaf = xfrm_get_mode(x->props.mode, iafamily); if (inner_mode_iaf) { if (inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL) - x->inner_mode_iaf = inner_mode_iaf; - else - xfrm_put_mode(inner_mode_iaf); + x->inner_mode_iaf = *inner_mode_iaf; } } @@ -2311,12 +2266,13 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) if (err) goto error; - x->outer_mode = xfrm_get_mode(x->props.mode, family); - if (x->outer_mode == NULL) { + outer_mode = xfrm_get_mode(x->props.mode, family); + if (!outer_mode) { err = -EPROTONOSUPPORT; goto error; } + x->outer_mode = *outer_mode; if (init_replay) { err = xfrm_init_replay(x); if (err) @@ -2384,7 +2340,7 @@ void xfrm_state_fini(struct net *net) flush_work(&net->xfrm.state_hash_work); flush_work(&xfrm_state_gc_work); - xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true); + xfrm_state_flush(net, 0, false, true); WARN_ON(!list_empty(&net->xfrm.state_all)); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d7cb16f0df5b..eb8d14389601 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1424,7 +1424,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) ret = verify_policy_dir(p->dir); if (ret) return ret; - if (p->index && ((p->index & XFRM_POLICY_MAX) != p->dir)) + if (p->index && (xfrm_policy_id2dir(p->index) != p->dir)) return -EINVAL; return 0; @@ -1513,20 +1513,8 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) return -EINVAL; } - switch (ut[i].id.proto) { - case IPPROTO_AH: - case IPPROTO_ESP: - case IPPROTO_COMP: -#if IS_ENABLED(CONFIG_IPV6) - case IPPROTO_ROUTING: - case IPPROTO_DSTOPTS: -#endif - case IPSEC_PROTO_ANY: - break; - default: + if (!xfrm_id_proto_valid(ut[i].id.proto)) return -EINVAL; - } - } return 0; |