From efd7ef1c1929d7a0329d4349252863c04d6f1729 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 11 Mar 2015 23:04:08 -0500 Subject: net: Kill hold_net release_net hold_net and release_net were an idea that turned out to be useless. The code has been disabled since 2008. Kill the code it is long past due. Signed-off-by: "Eric W. Biederman" Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 962ee9d71964..39fe369b46ad 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6841,8 +6841,6 @@ void free_netdev(struct net_device *dev) { struct napi_struct *p, *n; - release_net(dev_net(dev)); - netif_free_tx_queues(dev); #ifdef CONFIG_SYSFS kvfree(dev->_rx); -- cgit v1.2.3 From db24a9044ee191c397dcd1c6574f56d67d7c8df5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 17 Mar 2015 20:23:15 -0600 Subject: net: add support for phys_port_name Similar to port id allow netdevices to specify port names and export the name via sysfs. Drivers can implement the netdevice operation to assist udev in having sane default names for the devices using the rule: $ cat /etc/udev/rules.d/80-net-setup-link.rules SUBSYSTEM=="net", ACTION=="add", ATTR{phys_port_name}!="", NAME="$attr{phys_port_name}" Use of phys_name versus phys_id was suggested-by Jiri Pirko. Signed-off-by: David Ahern Acked-by: Jiri Pirko Acked-by: Scott Feldman Signed-off-by: David S. Miller --- Documentation/ABI/testing/sysfs-class-net | 8 ++++++++ include/linux/netdevice.h | 4 ++++ include/uapi/linux/if_link.h | 1 + net/core/dev.c | 18 ++++++++++++++++++ net/core/net-sysfs.c | 23 +++++++++++++++++++++++ net/core/rtnetlink.c | 21 +++++++++++++++++++++ 6 files changed, 75 insertions(+) (limited to 'net/core/dev.c') diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net index beb8ec4dabbc..5ecfd72ba684 100644 --- a/Documentation/ABI/testing/sysfs-class-net +++ b/Documentation/ABI/testing/sysfs-class-net @@ -188,6 +188,14 @@ Description: Indicates the interface unique physical port identifier within the NIC, as a string. +What: /sys/class/net//phys_port_name +Date: March 2015 +KernelVersion: 4.0 +Contact: netdev@vger.kernel.org +Description: + Indicates the interface physical port name within the NIC, + as a string. + What: /sys/class/net//speed Date: October 2009 KernelVersion: 2.6.33 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 76c5de4978a8..ec8f9b5f6500 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1164,6 +1164,8 @@ struct net_device_ops { bool new_carrier); int (*ndo_get_phys_port_id)(struct net_device *dev, struct netdev_phys_item_id *ppid); + int (*ndo_get_phys_port_name)(struct net_device *dev, + char *name, size_t len); void (*ndo_add_vxlan_port)(struct net_device *dev, sa_family_t sa_family, __be16 port); @@ -2947,6 +2949,8 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *); int dev_change_carrier(struct net_device *, bool new_carrier); int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_item_id *ppid); +int dev_get_phys_port_name(struct net_device *dev, + char *name, size_t len); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 756436e1ce89..7158fd00a109 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -147,6 +147,7 @@ enum { IFLA_CARRIER_CHANGES, IFLA_PHYS_SWITCH_ID, IFLA_LINK_NETNSID, + IFLA_PHYS_PORT_NAME, __IFLA_MAX }; diff --git a/net/core/dev.c b/net/core/dev.c index 39fe369b46ad..a1f24151db5b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5911,6 +5911,24 @@ int dev_get_phys_port_id(struct net_device *dev, } EXPORT_SYMBOL(dev_get_phys_port_id); +/** + * dev_get_phys_port_name - Get device physical port name + * @dev: device + * @name: port name + * + * Get device physical port name + */ +int dev_get_phys_port_name(struct net_device *dev, + char *name, size_t len) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!ops->ndo_get_phys_port_name) + return -EOPNOTSUPP; + return ops->ndo_get_phys_port_name(dev, name, len); +} +EXPORT_SYMBOL(dev_get_phys_port_name); + /** * dev_new_index - allocate an ifindex * @net: the applicable net namespace diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 7e58bd7ec232..cc5cf689809c 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -418,6 +418,28 @@ static ssize_t phys_port_id_show(struct device *dev, } static DEVICE_ATTR_RO(phys_port_id); +static ssize_t phys_port_name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + ssize_t ret = -EINVAL; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (dev_isalive(netdev)) { + char name[IFNAMSIZ]; + + ret = dev_get_phys_port_name(netdev, name, sizeof(name)); + if (!ret) + ret = sprintf(buf, "%s\n", name); + } + rtnl_unlock(); + + return ret; +} +static DEVICE_ATTR_RO(phys_port_name); + static ssize_t phys_switch_id_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -465,6 +487,7 @@ static struct attribute *net_class_attrs[] = { &dev_attr_tx_queue_len.attr, &dev_attr_gro_flush_timeout.attr, &dev_attr_phys_port_id.attr, + &dev_attr_phys_port_name.attr, &dev_attr_phys_switch_id.attr, NULL, }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 25b4b5d23485..6abe634c666c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -982,6 +982,24 @@ static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev) return 0; } +static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev) +{ + char name[IFNAMSIZ]; + int err; + + err = dev_get_phys_port_name(dev, name, sizeof(name)); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + + if (nla_put(skb, IFLA_PHYS_PORT_NAME, strlen(name), name)) + return -EMSGSIZE; + + return 0; +} + static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { int err; @@ -1072,6 +1090,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (rtnl_phys_port_id_fill(skb, dev)) goto nla_put_failure; + if (rtnl_phys_port_name_fill(skb, dev)) + goto nla_put_failure; + if (rtnl_phys_switch_id_fill(skb, dev)) goto nla_put_failure; -- cgit v1.2.3 From 99c4a26a159b28fa46a3e746a9b41b297e73d261 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 Mar 2015 22:52:33 -0400 Subject: net: Fix high overhead of vlan sub-device teardown. When a networking device is taken down that has a non-trivial number of VLAN devices configured under it, we eat a full synchronize_net() for every such VLAN device. This is because of the call chain: NETDEV_DOWN notifier --> vlan_device_event() --> dev_change_flags() --> __dev_change_flags() --> __dev_close() --> __dev_close_many() --> dev_deactivate_many() --> synchronize_net() This is kind of rediculous because we already have infrastructure for batching doing operation X to a list of net devices so that we only incur one sync. So make use of that by exporting dev_close_many() and adjusting it's interfaace so that the caller can fully manage the batch list. Use this in vlan_device_event() and all the overhead goes away. Reported-by: Salam Noureddine Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/8021q/vlan.c | 16 +++++++++++++--- net/core/dev.c | 10 ++++++---- 3 files changed, 20 insertions(+), 7 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ec8f9b5f6500..76951c5fbedf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2156,6 +2156,7 @@ struct net_device *__dev_get_by_name(struct net *net, const char *name); int dev_alloc_name(struct net_device *dev, const char *name); int dev_open(struct net_device *dev); int dev_close(struct net_device *dev); +int dev_close_many(struct list_head *head, bool unlink); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct sk_buff *newskb); int dev_queue_xmit(struct sk_buff *skb); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 64c6bed4a3d3..98a30a5b8664 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -413,7 +413,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, vlan_transfer_features(dev, vlandev); break; - case NETDEV_DOWN: + case NETDEV_DOWN: { + struct net_device *tmp; + LIST_HEAD(close_list); + if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) vlan_vid_del(dev, htons(ETH_P_8021Q), 0); @@ -425,11 +428,18 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, vlan = vlan_dev_priv(vlandev); if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) - dev_change_flags(vlandev, flgs & ~IFF_UP); + list_add(&vlandev->close_list, &close_list); + } + + dev_close_many(&close_list, false); + + list_for_each_entry_safe(vlandev, tmp, &close_list, close_list) { netif_stacked_transfer_operstate(dev, vlandev); + list_del_init(&vlandev->close_list); } + list_del(&close_list); break; - + } case NETDEV_UP: /* Put all VLANs for this dev in the up state too. */ vlan_group_for_each_dev(grp, i, vlandev) { diff --git a/net/core/dev.c b/net/core/dev.c index a1f24151db5b..5d43e010ef87 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1385,7 +1385,7 @@ static int __dev_close(struct net_device *dev) return retval; } -static int dev_close_many(struct list_head *head) +int dev_close_many(struct list_head *head, bool unlink) { struct net_device *dev, *tmp; @@ -1399,11 +1399,13 @@ static int dev_close_many(struct list_head *head) list_for_each_entry_safe(dev, tmp, head, close_list) { rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL); call_netdevice_notifiers(NETDEV_DOWN, dev); - list_del_init(&dev->close_list); + if (unlink) + list_del_init(&dev->close_list); } return 0; } +EXPORT_SYMBOL(dev_close_many); /** * dev_close - shutdown an interface. @@ -1420,7 +1422,7 @@ int dev_close(struct net_device *dev) LIST_HEAD(single); list_add(&dev->close_list, &single); - dev_close_many(&single); + dev_close_many(&single, true); list_del(&single); } return 0; @@ -5986,7 +5988,7 @@ static void rollback_registered_many(struct list_head *head) /* If device is running, close it first. */ list_for_each_entry(dev, head, unreg_list) list_add_tail(&dev->close_list, &close_head); - dev_close_many(&close_head); + dev_close_many(&close_head, true); list_for_each_entry(dev, head, unreg_list) { /* And unlink it from device chain. */ -- cgit v1.2.3 From 08b4b8ea799d27c5dd28e8cb9188d2e88e58d294 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 20 Mar 2015 14:29:09 -0700 Subject: net: clear skb->priority when forwarding to another netns skb->priority can be set for two purposes: 1) With respect to IP TOS field, which is computed by a mask. Ususally used for priority qdisc's (pfifo, prio etc.), on TX side (we only have ingress qdisc on RX side). 2) Used as a classid or flowid, works in the same way with tc classid. What's more, this can even override the classid of tc filters. For case 1), it has been respected within its netns, I don't see any point of keeping it for another netns, especially when packets will be forwarded to Rx path (no matter from TX path or RX path). For case 2) we care, our applications run inside a netns, and we classify the packets by our own filters outside, If some application sets this priority, it could bypass our filters, therefore clear it when moving out of a netns, it makes no sense to bypass tc filters out of its netns. Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 5d43e010ef87..a0408d497dae 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1696,6 +1696,7 @@ int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) } skb_scrub_packet(skb, true); + skb->priority = 0; skb->protocol = eth_type_trans(skb, dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); -- cgit v1.2.3 From f5a7fb88e1f82542ca14ba93a1d4fa35471c60ca Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 27 Mar 2015 14:31:11 +0900 Subject: vlan: Introduce helper functions to check if skb is tagged Separate the two checks for single vlan and multiple vlans in netif_skb_features(). This allows us to move the check for multiple vlans to another function later. Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ net/core/dev.c | 24 ++++++++---------------- 2 files changed, 53 insertions(+), 16 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index b11b28a30b9e..4265d440ec4d 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -561,4 +561,49 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, skb->protocol = htons(ETH_P_802_2); } +/** + * skb_vlan_tagged - check if skb is vlan tagged. + * @skb: skbuff to query + * + * Returns true if the skb is tagged, regardless of whether it is hardware + * accelerated or not. + */ +static inline bool skb_vlan_tagged(const struct sk_buff *skb) +{ + if (!skb_vlan_tag_present(skb) && + likely(skb->protocol != htons(ETH_P_8021Q) && + skb->protocol != htons(ETH_P_8021AD))) + return false; + + return true; +} + +/** + * skb_vlan_tagged_multi - check if skb is vlan tagged with multiple headers. + * @skb: skbuff to query + * + * Returns true if the skb is tagged with multiple vlan headers, regardless + * of whether it is hardware accelerated or not. + */ +static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) +{ + __be16 protocol = skb->protocol; + + if (!skb_vlan_tag_present(skb)) { + struct vlan_ethhdr *veh; + + if (likely(protocol != htons(ETH_P_8021Q) && + protocol != htons(ETH_P_8021AD))) + return false; + + veh = (struct vlan_ethhdr *)skb->data; + protocol = veh->h_vlan_encapsulated_proto; + } + + if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) + return false; + + return true; +} + #endif /* !(_LINUX_IF_VLAN_H_) */ diff --git a/net/core/dev.c b/net/core/dev.c index a0408d497dae..04bffcd4a48d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2567,7 +2567,6 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) struct net_device *dev = skb->dev; netdev_features_t features = dev->features; u16 gso_segs = skb_shinfo(skb)->gso_segs; - __be16 protocol = skb->protocol; if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs) features &= ~NETIF_F_GSO_MASK; @@ -2579,22 +2578,15 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) if (skb->encapsulation) features &= dev->hw_enc_features; - if (!skb_vlan_tag_present(skb)) { - if (unlikely(protocol == htons(ETH_P_8021Q) || - protocol == htons(ETH_P_8021AD))) { - struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; - protocol = veh->h_vlan_encapsulated_proto; - } else { - goto finalize; - } - } - - features = netdev_intersect_features(features, - dev->vlan_features | - NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); + if (skb_vlan_tagged(skb)) + features = netdev_intersect_features(features, + dev->vlan_features | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); + else + goto finalize; - if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) + if (skb_vlan_tagged_multi(skb)) features = netdev_intersect_features(features, NETIF_F_SG | NETIF_F_HIGHDMA | -- cgit v1.2.3 From 8cb65d00086bfba22bac87ff18b751432fc74003 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 27 Mar 2015 14:31:12 +0900 Subject: net: Move check for multiple vlans to drivers To allow drivers to handle the features check for multiple tags, move the check to ndo_features_check(). As no drivers currently handle multiple tagged TSO, introduce dflt_features_check() and call it if the driver does not have ndo_features_check(). Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 1 + drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 + drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 1 + include/linux/if_vlan.h | 22 ++++++++++++++++++++++ net/core/dev.c | 21 +++++++++------------ 5 files changed, 34 insertions(+), 12 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 9677431c582a..039b0c1f480e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12557,6 +12557,7 @@ static netdev_features_t bnx2x_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { + features = vlan_features_check(skb, features); return vxlan_features_check(skb, features); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index a8339e98ad24..ebc93a101c93 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2373,6 +2373,7 @@ static netdev_features_t mlx4_en_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { + features = vlan_features_check(skb, features); return vxlan_features_check(skb, features); } #endif diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index a430a34a4434..367f3976df56 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -507,6 +507,7 @@ static netdev_features_t qlcnic_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { + features = vlan_features_check(skb, features); return vxlan_features_check(skb, features); } #endif diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 4265d440ec4d..920e4457ce6e 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -606,4 +606,26 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) return true; } +/** + * vlan_features_check - drop unsafe features for skb with multiple tags. + * @skb: skbuff to query + * @features: features to be checked + * + * Returns features without unsafe ones if the skb has multiple tags. + */ +static inline netdev_features_t vlan_features_check(const struct sk_buff *skb, + netdev_features_t features) +{ + if (skb_vlan_tagged_multi(skb)) + features = netdev_intersect_features(features, + NETIF_F_SG | + NETIF_F_HIGHDMA | + NETIF_F_FRAGLIST | + NETIF_F_GEN_CSUM | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); + + return features; +} + #endif /* !(_LINUX_IF_VLAN_H_) */ diff --git a/net/core/dev.c b/net/core/dev.c index 04bffcd4a48d..cb46badbef5a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2562,6 +2562,13 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, return features; } +static netdev_features_t dflt_features_check(const struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + return vlan_features_check(skb, features); +} + netdev_features_t netif_skb_features(struct sk_buff *skb) { struct net_device *dev = skb->dev; @@ -2583,22 +2590,12 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); - else - goto finalize; - if (skb_vlan_tagged_multi(skb)) - features = netdev_intersect_features(features, - NETIF_F_SG | - NETIF_F_HIGHDMA | - NETIF_F_FRAGLIST | - NETIF_F_GEN_CSUM | - NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); - -finalize: if (dev->netdev_ops->ndo_features_check) features &= dev->netdev_ops->ndo_features_check(skb, dev, features); + else + features &= dflt_features_check(skb, dev, features); return harmonize_features(skb, features); } -- cgit v1.2.3 From e38f30256b36700aa63aa709dc091bf6eb69c257 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 27 Mar 2015 14:31:13 +0900 Subject: net: Introduce passthru_features_check As there are a number of (especially virtual) devices that don't need the multiple vlan check, introduce passthru_features_check() for convenience. Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ net/core/dev.c | 8 ++++++++ 2 files changed, 11 insertions(+) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 08c4ab37189f..967bb4c8caf1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3657,6 +3657,9 @@ void netdev_change_features(struct net_device *dev); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); +netdev_features_t passthru_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features); netdev_features_t netif_skb_features(struct sk_buff *skb); static inline bool net_gso_ok(netdev_features_t features, int gso_type) diff --git a/net/core/dev.c b/net/core/dev.c index cb46badbef5a..3a06003ecafd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2562,6 +2562,14 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, return features; } +netdev_features_t passthru_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + return features; +} +EXPORT_SYMBOL(passthru_features_check); + static netdev_features_t dflt_features_check(const struct sk_buff *skb, struct net_device *dev, netdev_features_t features) -- cgit v1.2.3 From fbcb21705930f2930f506149d0b8d36dfbe45107 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 30 Mar 2015 16:56:01 +0200 Subject: net: rename dev to orig_dev in deliver_ptype_list_skb Unlike other places, this function uses name "dev" for what should be "orig_dev", which might be a bit confusing. So fix this. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 3a06003ecafd..65492b0354c0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1740,7 +1740,8 @@ static inline int deliver_skb(struct sk_buff *skb, static inline void deliver_ptype_list_skb(struct sk_buff *skb, struct packet_type **pt, - struct net_device *dev, __be16 type, + struct net_device *orig_dev, + __be16 type, struct list_head *ptype_list) { struct packet_type *ptype, *pt_prev = *pt; @@ -1749,7 +1750,7 @@ static inline void deliver_ptype_list_skb(struct sk_buff *skb, if (ptype->type != type) continue; if (pt_prev) - deliver_skb(skb, pt_prev, dev); + deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } *pt = pt_prev; -- cgit v1.2.3 From a54acb3a6f853e8394c4cb7b6a4d93c88f13eefd Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 2 Apr 2015 17:07:00 +0200 Subject: dev: introduce dev_get_iflink() The goal of this patch is to prepare the removal of the iflink field. It introduces a new ndo function, which will be implemented by virtual interfaces. There is no functional change into this patch. All readers of iflink field now call dev_get_iflink(). Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_core.c | 2 +- include/linux/netdevice.h | 4 ++++ net/batman-adv/hard-interface.c | 5 +++-- net/bridge/br_netlink.c | 4 ++-- net/core/dev.c | 21 +++++++++++++++++++-- net/core/link_watch.c | 4 ++-- net/core/net-sysfs.c | 10 +++++++++- net/core/rtnetlink.c | 8 ++++---- net/ipv4/ipmr.c | 2 +- net/ipv6/addrconf.c | 4 ++-- net/ipv6/ip6mr.c | 2 +- 11 files changed, 48 insertions(+), 18 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 2a175006028b..131bde98188d 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -330,7 +330,7 @@ static int ipvlan_process_v4_outbound(struct sk_buff *skb) struct rtable *rt; int err, ret = NET_XMIT_DROP; struct flowi4 fl4 = { - .flowi4_oif = dev->iflink, + .flowi4_oif = dev_get_iflink(dev), .flowi4_tos = RT_TOS(ip4h->tos), .flowi4_flags = FLOWI_FLAG_ANYSRC, .daddr = ip4h->daddr, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 967bb4c8caf1..788eb7a622ad 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1030,6 +1030,8 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * int queue_index, u32 maxrate); * Called when a user wants to set a max-rate limitation of specific * TX queue. + * int (*ndo_get_iflink)(const struct net_device *dev); + * Called to get the iflink value of this device. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1191,6 +1193,7 @@ struct net_device_ops { int (*ndo_set_tx_maxrate)(struct net_device *dev, int queue_index, u32 maxrate); + int (*ndo_get_iflink)(const struct net_device *dev); }; /** @@ -2149,6 +2152,7 @@ void __dev_remove_pack(struct packet_type *pt); void dev_add_offload(struct packet_offload *po); void dev_remove_offload(struct packet_offload *po); +int dev_get_iflink(const struct net_device *dev); struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags, unsigned short mask); struct net_device *dev_get_by_name(struct net *net, const char *name); diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index fbda6b54baff..baf1f9843f2c 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -83,11 +83,12 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) return true; /* no more parents..stop recursion */ - if (net_dev->iflink == 0 || net_dev->iflink == net_dev->ifindex) + if (dev_get_iflink(net_dev) == 0 || + dev_get_iflink(net_dev) == net_dev->ifindex) return false; /* recurse over the parent device */ - parent_dev = __dev_get_by_index(&init_net, net_dev->iflink); + parent_dev = __dev_get_by_index(&init_net, dev_get_iflink(net_dev)); /* if we got a NULL parent_dev there is something broken.. */ if (WARN(!parent_dev, "Cannot find parent device")) return false; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index e1115a224a95..0e4ddb81610d 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -305,8 +305,8 @@ static int br_fill_ifinfo(struct sk_buff *skb, nla_put_u8(skb, IFLA_OPERSTATE, operstate) || (dev->addr_len && nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || - (dev->ifindex != dev->iflink && - nla_put_u32(skb, IFLA_LINK, dev->iflink))) + (dev->ifindex != dev_get_iflink(dev) && + nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev)))) goto nla_put_failure; if (event == RTM_NEWLINK && port) { diff --git a/net/core/dev.c b/net/core/dev.c index 65492b0354c0..77172d085760 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -659,6 +659,23 @@ __setup("netdev=", netdev_boot_setup); *******************************************************************************/ +/** + * dev_get_iflink - get 'iflink' value of a interface + * @dev: targeted interface + * + * Indicates the ifindex the interface is linked to. + * Physical interfaces have the same 'ifindex' and 'iflink' values. + */ + +int dev_get_iflink(const struct net_device *dev) +{ + if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink) + return dev->netdev_ops->ndo_get_iflink(dev); + + return dev->iflink; +} +EXPORT_SYMBOL(dev_get_iflink); + /** * __dev_get_by_name - find a device by its name * @net: the applicable net namespace @@ -6345,7 +6362,7 @@ int register_netdevice(struct net_device *dev) else if (__dev_get_by_index(net, dev->ifindex)) goto err_uninit; - if (dev->iflink == -1) + if (dev_get_iflink(dev) == -1) dev->iflink = dev->ifindex; /* Transfer changeable features to wanted_features and enable @@ -7061,7 +7078,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* If there is an ifindex conflict assign a new one */ if (__dev_get_by_index(net, dev->ifindex)) { - int iflink = (dev->iflink == dev->ifindex); + int iflink = (dev_get_iflink(dev) == dev->ifindex); dev->ifindex = dev_new_index(net); if (iflink) dev->iflink = dev->ifindex; diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 49a9e3e06c08..982861607f88 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -40,7 +40,7 @@ static DEFINE_SPINLOCK(lweventlist_lock); static unsigned char default_operstate(const struct net_device *dev) { if (!netif_carrier_ok(dev)) - return (dev->ifindex != dev->iflink ? + return (dev->ifindex != dev_get_iflink(dev) ? IF_OPER_LOWERLAYERDOWN : IF_OPER_DOWN); if (netif_dormant(dev)) @@ -89,7 +89,7 @@ static bool linkwatch_urgent_event(struct net_device *dev) if (!netif_running(dev)) return false; - if (dev->ifindex != dev->iflink) + if (dev->ifindex != dev_get_iflink(dev)) return true; if (dev->priv_flags & IFF_TEAM_PORT) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index cc5cf689809c..4238d6da5c60 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -109,11 +109,19 @@ NETDEVICE_SHOW_RO(dev_id, fmt_hex); NETDEVICE_SHOW_RO(dev_port, fmt_dec); NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec); NETDEVICE_SHOW_RO(addr_len, fmt_dec); -NETDEVICE_SHOW_RO(iflink, fmt_dec); NETDEVICE_SHOW_RO(ifindex, fmt_dec); NETDEVICE_SHOW_RO(type, fmt_dec); NETDEVICE_SHOW_RO(link_mode, fmt_dec); +static ssize_t iflink_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct net_device *ndev = to_net_dev(dev); + + return sprintf(buf, fmt_dec, dev_get_iflink(ndev)); +} +static DEVICE_ATTR_RO(iflink); + static ssize_t format_name_assign_type(const struct net_device *dev, char *buf) { return sprintf(buf, fmt_dec, dev->name_assign_type); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b96ac2109c82..ee0186cdd5cf 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1055,8 +1055,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, #ifdef CONFIG_RPS nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || #endif - (dev->ifindex != dev->iflink && - nla_put_u32(skb, IFLA_LINK, dev->iflink)) || + (dev->ifindex != dev_get_iflink(dev) && + nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) || (upper_dev && nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) || nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || @@ -2863,8 +2863,8 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, nla_put_u32(skb, IFLA_MASTER, br_dev->ifindex)) || (dev->addr_len && nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || - (dev->ifindex != dev->iflink && - nla_put_u32(skb, IFLA_LINK, dev->iflink))) + (dev->ifindex != dev_get_iflink(dev) && + nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev)))) goto nla_put_failure; br_afspec = nla_nest_start(skb, IFLA_AF_SPEC); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index b4a545d24adb..eec68b0c3bc8 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -801,7 +801,7 @@ static int vif_add(struct net *net, struct mr_table *mrt, v->pkt_out = 0; v->link = dev->ifindex; if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER)) - v->link = dev->iflink; + v->link = dev_get_iflink(dev); /* And finish update writing critical data */ write_lock_bh(&mrt_lock); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5c9e94cb1b2c..37b70e82bff8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4858,8 +4858,8 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, (dev->addr_len && nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || nla_put_u32(skb, IFLA_MTU, dev->mtu) || - (dev->ifindex != dev->iflink && - nla_put_u32(skb, IFLA_LINK, dev->iflink))) + (dev->ifindex != dev_get_iflink(dev) && + nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev)))) goto nla_put_failure; protoinfo = nla_nest_start(skb, IFLA_PROTINFO); if (!protoinfo) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index caf6b99374e6..18a5ab286420 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -992,7 +992,7 @@ static int mif6_add(struct net *net, struct mr6_table *mrt, v->pkt_out = 0; v->link = dev->ifindex; if (v->flags & MIFF_REGISTER) - v->link = dev->iflink; + v->link = dev_get_iflink(dev); /* And finish update writing critical data */ write_lock_bh(&mrt_lock); -- cgit v1.2.3 From 7a66bbc96ce9ad8261fa5f7f6ae65370eb6866ee Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 2 Apr 2015 17:07:09 +0200 Subject: net: remove iflink field from struct net_device Now that all users of iflink have the ndo_get_iflink handler available, it's possible to remove this field. By default, dev_get_iflink() returns the ifindex of the interface. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +-- net/core/dev.c | 13 ++----------- 2 files changed, 3 insertions(+), 13 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 788eb7a622ad..846a1f5bc9db 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1538,7 +1538,7 @@ struct net_device { netdev_features_t mpls_features; int ifindex; - int iflink; + int group; struct net_device_stats stats; @@ -1741,7 +1741,6 @@ struct net_device { #endif struct phy_device *phydev; struct lock_class_key *qdisc_tx_busylock; - int group; struct pm_qos_request pm_qos_req; }; #define to_net_dev(d) container_of(d, struct net_device, dev) diff --git a/net/core/dev.c b/net/core/dev.c index 77172d085760..3be107e0bc93 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -672,7 +672,7 @@ int dev_get_iflink(const struct net_device *dev) if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink) return dev->netdev_ops->ndo_get_iflink(dev); - return dev->iflink; + return dev->ifindex; } EXPORT_SYMBOL(dev_get_iflink); @@ -6331,8 +6331,6 @@ int register_netdevice(struct net_device *dev) spin_lock_init(&dev->addr_list_lock); netdev_set_addr_lockdep_class(dev); - dev->iflink = -1; - ret = dev_get_valid_name(net, dev, dev->name); if (ret < 0) goto out; @@ -6362,9 +6360,6 @@ int register_netdevice(struct net_device *dev) else if (__dev_get_by_index(net, dev->ifindex)) goto err_uninit; - if (dev_get_iflink(dev) == -1) - dev->iflink = dev->ifindex; - /* Transfer changeable features to wanted_features and enable * software offloads (GSO and GRO). */ @@ -7077,12 +7072,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char dev_net_set(dev, net); /* If there is an ifindex conflict assign a new one */ - if (__dev_get_by_index(net, dev->ifindex)) { - int iflink = (dev_get_iflink(dev) == dev->ifindex); + if (__dev_get_by_index(net, dev->ifindex)) dev->ifindex = dev_new_index(net); - if (iflink) - dev->iflink = dev->ifindex; - } /* Send a netdev-add uevent to the new namespace */ kobject_uevent(&dev->dev.kobj, KOBJ_ADD); -- cgit v1.2.3 From e1622baf54df8cc958bf29d71de5ad545ea7d93c Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 2 Apr 2015 17:07:10 +0200 Subject: dev: set iflink to 0 for virtual interfaces Virtual interfaces are supposed to set an iflink value != of their ifindex. It was not the case for some of them, like vxlan, bond or bridge. Let's set iflink to 0 when dev->rtnl_link_ops is set. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 3be107e0bc93..26622d614f81 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -672,6 +672,10 @@ int dev_get_iflink(const struct net_device *dev) if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink) return dev->netdev_ops->ndo_get_iflink(dev); + /* If dev->rtnl_link_ops is set, it's a virtual interface. */ + if (dev->rtnl_link_ops) + return 0; + return dev->ifindex; } EXPORT_SYMBOL(dev_get_iflink); -- cgit v1.2.3 From 7026b1ddb6b8d4e6ee33dc2bd06c0ca8746fa7ab Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 5 Apr 2015 22:19:04 -0400 Subject: netfilter: Pass socket pointer down through okfn(). On the output paths in particular, we have to sometimes deal with two socket contexts. First, and usually skb->sk, is the local socket that generated the frame. And second, is potentially the socket used to control a tunneling socket, such as one the encapsulates using UDP. We do not want to disassociate skb->sk when encapsulating in order to fix this, because that would break socket memory accounting. The most extreme case where this can cause huge problems is an AF_PACKET socket transmitting over a vxlan device. We hit code paths doing checks that assume they are dealing with an ipv4 socket, but are actually operating upon the AF_PACKET one. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 +++++-- include/linux/netfilter.h | 62 +++++++++++++++++-------------- include/linux/netfilter_bridge.h | 2 +- include/net/dn_neigh.h | 6 +-- include/net/ip.h | 3 +- include/net/ip6_route.h | 3 +- include/net/ipv6.h | 2 +- include/net/xfrm.h | 8 ++-- net/bridge/br_forward.c | 13 ++++--- net/bridge/br_input.c | 16 ++++---- net/bridge/br_multicast.c | 3 +- net/bridge/br_netfilter.c | 56 +++++++++++++++------------- net/bridge/br_private.h | 6 +-- net/bridge/br_stp_bpdu.c | 5 ++- net/core/dev.c | 10 ++--- net/decnet/dn_neigh.c | 35 +++++++++-------- net/decnet/dn_nsp_in.c | 5 ++- net/decnet/dn_route.c | 26 +++++++------ net/ipv4/arp.c | 10 +++-- net/ipv4/ip_forward.c | 8 ++-- net/ipv4/ip_input.c | 10 +++-- net/ipv4/ip_output.c | 45 ++++++++++++---------- net/ipv4/ipmr.c | 7 ++-- net/ipv4/raw.c | 4 +- net/ipv4/xfrm4_input.c | 5 ++- net/ipv4/xfrm4_output.c | 12 +++--- net/ipv6/ip6_input.c | 11 +++--- net/ipv6/ip6_output.c | 33 ++++++++-------- net/ipv6/ip6mr.c | 7 ++-- net/ipv6/mcast.c | 9 +++-- net/ipv6/ndisc.c | 5 ++- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 2 +- net/ipv6/output_core.c | 4 +- net/ipv6/raw.c | 4 +- net/ipv6/xfrm6_input.c | 3 +- net/ipv6/xfrm6_output.c | 15 ++++---- net/netfilter/ipvs/ip_vs_xmit.c | 8 ++-- net/netfilter/nf_queue.c | 2 +- net/xfrm/xfrm_output.c | 16 ++++---- 39 files changed, 277 insertions(+), 218 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 41bf58a2b936..45823db2efb0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2165,8 +2165,12 @@ int dev_open(struct net_device *dev); int dev_close(struct net_device *dev); int dev_close_many(struct list_head *head, bool unlink); void dev_disable_lro(struct net_device *dev); -int dev_loopback_xmit(struct sk_buff *newskb); -int dev_queue_xmit(struct sk_buff *skb); +int dev_loopback_xmit(struct sock *sk, struct sk_buff *newskb); +int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb); +static inline int dev_queue_xmit(struct sk_buff *skb) +{ + return dev_queue_xmit_sk(skb->sk, skb); +} int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv); int register_netdevice(struct net_device *dev); void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); @@ -2927,7 +2931,11 @@ static inline void dev_consume_skb_any(struct sk_buff *skb) int netif_rx(struct sk_buff *skb); int netif_rx_ni(struct sk_buff *skb); -int netif_receive_skb(struct sk_buff *skb); +int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb); +static inline int netif_receive_skb(struct sk_buff *skb) +{ + return netif_receive_skb_sk(skb->sk, skb); +} gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index f8f58fab2402..63560d0a8dfe 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -54,7 +54,7 @@ struct nf_hook_state { struct net_device *in; struct net_device *out; struct sock *sk; - int (*okfn)(struct sk_buff *); + int (*okfn)(struct sock *, struct sk_buff *); }; static inline void nf_hook_state_init(struct nf_hook_state *p, @@ -63,7 +63,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, struct net_device *indev, struct net_device *outdev, struct sock *sk, - int (*okfn)(struct sk_buff *)) + int (*okfn)(struct sock *, struct sk_buff *)) { p->hook = hook; p->thresh = thresh; @@ -156,26 +156,29 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); * value indicates the packet has been consumed by the hook. */ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, + struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh) + int (*okfn)(struct sock *, struct sk_buff *), + int thresh) { if (nf_hooks_active(pf, hook)) { struct nf_hook_state state; nf_hook_state_init(&state, hook, thresh, pf, - indev, outdev, NULL, okfn); + indev, outdev, sk, okfn); return nf_hook_slow(skb, &state); } return 1; } -static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, - struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *)) +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sock *sk, + struct sk_buff *skb, struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sock *, struct sk_buff *)) { - return nf_hook_thresh(pf, hook, skb, indev, outdev, okfn, INT_MIN); + return nf_hook_thresh(pf, hook, sk, skb, indev, outdev, okfn, INT_MIN); } /* Activate hook; either okfn or kfree_skb called, unless a hook @@ -196,35 +199,36 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, */ static inline int -NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sk_buff *skb, - struct net_device *in, struct net_device *out, - int (*okfn)(struct sk_buff *), int thresh) +NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sock *sk, + struct sk_buff *skb, struct net_device *in, + struct net_device *out, + int (*okfn)(struct sock *, struct sk_buff *), int thresh) { - int ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, thresh); + int ret = nf_hook_thresh(pf, hook, sk, skb, in, out, okfn, thresh); if (ret == 1) - ret = okfn(skb); + ret = okfn(sk, skb); return ret; } static inline int -NF_HOOK_COND(uint8_t pf, unsigned int hook, struct sk_buff *skb, - struct net_device *in, struct net_device *out, - int (*okfn)(struct sk_buff *), bool cond) +NF_HOOK_COND(uint8_t pf, unsigned int hook, struct sock *sk, + struct sk_buff *skb, struct net_device *in, struct net_device *out, + int (*okfn)(struct sock *, struct sk_buff *), bool cond) { int ret; if (!cond || - ((ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, INT_MIN)) == 1)) - ret = okfn(skb); + ((ret = nf_hook_thresh(pf, hook, sk, skb, in, out, okfn, INT_MIN)) == 1)) + ret = okfn(sk, skb); return ret; } static inline int -NF_HOOK(uint8_t pf, unsigned int hook, struct sk_buff *skb, +NF_HOOK(uint8_t pf, unsigned int hook, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, - int (*okfn)(struct sk_buff *)) + int (*okfn)(struct sock *, struct sk_buff *)) { - return NF_HOOK_THRESH(pf, hook, skb, in, out, okfn, INT_MIN); + return NF_HOOK_THRESH(pf, hook, sk, skb, in, out, okfn, INT_MIN); } /* Call setsockopt() */ @@ -324,19 +328,21 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) } #else /* !CONFIG_NETFILTER */ -#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) -#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb) +#define NF_HOOK(pf, hook, sk, skb, indev, outdev, okfn) (okfn)(sk, skb) +#define NF_HOOK_COND(pf, hook, sk, skb, indev, outdev, okfn, cond) (okfn)(sk, skb) static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, + struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh) + int (*okfn)(struct sock *sk, struct sk_buff *), int thresh) { - return okfn(skb); + return okfn(sk, skb); } -static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, - struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *)) +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sock *sk, + struct sk_buff *skb, struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sock *, struct sk_buff *)) { return 1; } diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 2734977199ca..5fc0a0fe244b 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -30,7 +30,7 @@ static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) return 0; } -int br_handle_frame_finish(struct sk_buff *skb); +int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb); static inline void br_drop_fake_rtable(struct sk_buff *skb) { diff --git a/include/net/dn_neigh.h b/include/net/dn_neigh.h index 0f26aa707e62..d0424269313f 100644 --- a/include/net/dn_neigh.h +++ b/include/net/dn_neigh.h @@ -18,11 +18,11 @@ struct dn_neigh { void dn_neigh_init(void); void dn_neigh_cleanup(void); -int dn_neigh_router_hello(struct sk_buff *skb); -int dn_neigh_endnode_hello(struct sk_buff *skb); +int dn_neigh_router_hello(struct sock *sk, struct sk_buff *skb); +int dn_neigh_endnode_hello(struct sock *sk, struct sk_buff *skb); void dn_neigh_pointopoint_hello(struct sk_buff *skb); int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n); -int dn_to_neigh_output(struct sk_buff *skb); +int dn_to_neigh_output(struct sock *sk, struct sk_buff *skb); extern struct neigh_table dn_neigh_table; diff --git a/include/net/ip.h b/include/net/ip.h index 69cd9cb8400c..d14af7edd197 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -108,7 +108,8 @@ int ip_local_deliver(struct sk_buff *skb); int ip_mr_input(struct sk_buff *skb); int ip_output(struct sock *sk, struct sk_buff *skb); int ip_mc_output(struct sock *sk, struct sk_buff *skb); -int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); +int ip_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)); int ip_do_nat(struct sk_buff *skb); void ip_send_check(struct iphdr *ip); int __ip_local_out(struct sk_buff *skb); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index eda131d179d9..5e192068e6cb 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -170,7 +170,8 @@ static inline bool ipv6_anycast_destination(const struct sk_buff *skb) return rt->rt6i_flags & RTF_ANYCAST; } -int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); +int ip6_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)); static inline int ip6_skb_dst_mtu(struct sk_buff *skb) { diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 65142e6af440..b6ae959824ff 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -769,7 +769,7 @@ static inline u8 ip6_tclass(__be32 flowinfo) int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); -int ip6_rcv_finish(struct sk_buff *skb); +int ip6_rcv_finish(struct sock *sk, struct sk_buff *skb); /* * upper-layer output functions diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 461f83539493..36ac102c97c7 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -332,7 +332,7 @@ struct xfrm_state_afinfo { int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); int (*output)(struct sock *sk, struct sk_buff *skb); - int (*output_finish)(struct sk_buff *skb); + int (*output_finish)(struct sock *sk, struct sk_buff *skb); int (*extract_input)(struct xfrm_state *x, struct sk_buff *skb); int (*extract_output)(struct xfrm_state *x, @@ -1503,7 +1503,7 @@ int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm_input_resume(struct sk_buff *skb, int nexthdr); int xfrm_output_resume(struct sk_buff *skb, int err); -int xfrm_output(struct sk_buff *skb); +int xfrm_output(struct sock *sk, struct sk_buff *skb); int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); void xfrm_local_error(struct sk_buff *skb, int mtu); int xfrm4_extract_header(struct sk_buff *skb); @@ -1524,7 +1524,7 @@ static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm4_output(struct sock *sk, struct sk_buff *skb); -int xfrm4_output_finish(struct sk_buff *skb); +int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb); int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err); int xfrm4_protocol_register(struct xfrm4_protocol *handler, unsigned char protocol); int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char protocol); @@ -1549,7 +1549,7 @@ __be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr); int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm6_output(struct sock *sk, struct sk_buff *skb); -int xfrm6_output_finish(struct sk_buff *skb); +int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb); int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr); diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 3304a5442331..e97572b5d2cc 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -35,7 +35,7 @@ static inline int should_deliver(const struct net_bridge_port *p, p->state == BR_STATE_FORWARDING; } -int br_dev_queue_push_xmit(struct sk_buff *skb) +int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb) { if (!is_skb_forwardable(skb->dev, skb)) { kfree_skb(skb); @@ -49,9 +49,10 @@ int br_dev_queue_push_xmit(struct sk_buff *skb) } EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit); -int br_forward_finish(struct sk_buff *skb) +int br_forward_finish(struct sock *sk, struct sk_buff *skb) { - return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev, + return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, sk, skb, + NULL, skb->dev, br_dev_queue_push_xmit); } @@ -75,7 +76,8 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) return; } - NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb, + NULL, skb->dev, br_forward_finish); } @@ -96,7 +98,8 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) skb->dev = to->dev; skb_forward_csum(skb); - NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev, + NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, NULL, skb, + indev, skb->dev, br_forward_finish); } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 052c5ebbc947..f921a5dce22d 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -55,8 +55,9 @@ static int br_pass_frame_up(struct sk_buff *skb) if (!skb) return NET_RX_DROP; - return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL, - netif_receive_skb); + return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb, + indev, NULL, + netif_receive_skb_sk); } static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, @@ -119,7 +120,7 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, } /* note: already called with rcu_read_lock */ -int br_handle_frame_finish(struct sk_buff *skb) +int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb) { const unsigned char *dest = eth_hdr(skb)->h_dest; struct net_bridge_port *p = br_port_get_rcu(skb->dev); @@ -207,7 +208,7 @@ drop: EXPORT_SYMBOL_GPL(br_handle_frame_finish); /* note: already called with rcu_read_lock */ -static int br_handle_local_finish(struct sk_buff *skb) +static int br_handle_local_finish(struct sock *sk, struct sk_buff *skb) { struct net_bridge_port *p = br_port_get_rcu(skb->dev); u16 vid = 0; @@ -277,8 +278,8 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) } /* Deliver packet to local host only */ - if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, - NULL, br_handle_local_finish)) { + if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb, + skb->dev, NULL, br_handle_local_finish)) { return RX_HANDLER_CONSUMED; /* consumed by filter */ } else { *pskb = skb; @@ -302,7 +303,8 @@ forward: if (ether_addr_equal(p->br->dev->dev_addr, dest)) skb->pkt_type = PACKET_HOST; - NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, NULL, skb, + skb->dev, NULL, br_handle_frame_finish); break; default: diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index c465876c7861..4b6722f8f179 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -814,7 +814,8 @@ static void __br_multicast_send_query(struct net_bridge *br, if (port) { skb->dev = port->dev; - NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb, + NULL, skb->dev, br_dev_queue_push_xmit); } else { br_multicast_select_own_querier(br, ip, skb); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 7527e94dd5dc..acd31c9f2116 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -261,7 +261,7 @@ static void nf_bridge_update_protocol(struct sk_buff *skb) /* PF_BRIDGE/PRE_ROUTING *********************************************/ /* Undo the changes made for ip6tables PREROUTING and continue the * bridge PRE_ROUTING hook. */ -static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) +static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = skb->nf_bridge; struct rtable *rt; @@ -282,7 +282,8 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb, + skb->dev, NULL, br_handle_frame_finish, 1); return 0; @@ -293,7 +294,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) * don't, we use the neighbour framework to find out. In both cases, we make * sure that br_handle_frame_finish() is called afterwards. */ -static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) +static int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = skb->nf_bridge; struct neighbour *neigh; @@ -310,7 +311,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) if (neigh->hh.hh_len) { neigh_hh_bridge(&neigh->hh, skb); skb->dev = nf_bridge->physindev; - ret = br_handle_frame_finish(skb); + ret = br_handle_frame_finish(sk, skb); } else { /* the neighbour function below overwrites the complete * MAC header, so we save the Ethernet source address and @@ -387,7 +388,7 @@ static bool dnat_took_place(const struct sk_buff *skb) * device, we proceed as if ip_route_input() succeeded. If it differs from the * logical bridge port or if ip_route_output_key() fails we drop the packet. */ -static int br_nf_pre_routing_finish(struct sk_buff *skb) +static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb->dev; struct iphdr *iph = ip_hdr(skb); @@ -440,7 +441,7 @@ bridged_dnat: nf_bridge_push_encap_header(skb); NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, - skb, skb->dev, NULL, + sk, skb, skb->dev, NULL, br_nf_pre_routing_finish_bridge, 1); return 0; @@ -460,7 +461,8 @@ bridged_dnat: skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb, + skb->dev, NULL, br_handle_frame_finish, 1); return 0; @@ -596,7 +598,8 @@ static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, return NF_DROP; skb->protocol = htons(ETH_P_IPV6); - NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb, + skb->dev, NULL, br_nf_pre_routing_finish_ipv6); return NF_STOLEN; @@ -651,7 +654,8 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, skb->protocol = htons(ETH_P_IP); - NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->sk, skb, + skb->dev, NULL, br_nf_pre_routing_finish); return NF_STOLEN; @@ -674,7 +678,7 @@ static unsigned int br_nf_local_in(const struct nf_hook_ops *ops, } /* PF_BRIDGE/FORWARD *************************************************/ -static int br_nf_forward_finish(struct sk_buff *skb) +static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = skb->nf_bridge; struct net_device *in; @@ -691,8 +695,8 @@ static int br_nf_forward_finish(struct sk_buff *skb) } nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, in, - skb->dev, br_forward_finish, 1); + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, sk, skb, + in, skb->dev, br_forward_finish, 1); return 0; } @@ -746,7 +750,8 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, else skb->protocol = htons(ETH_P_IPV6); - NF_HOOK(pf, NF_INET_FORWARD, skb, brnf_get_logical_dev(skb, state->in), + NF_HOOK(pf, NF_INET_FORWARD, NULL, skb, + brnf_get_logical_dev(skb, state->in), parent, br_nf_forward_finish); return NF_STOLEN; @@ -780,8 +785,8 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, return NF_ACCEPT; } *d = state->in; - NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, skb, state->in, - state->out, br_nf_forward_finish); + NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->sk, skb, + state->in, state->out, br_nf_forward_finish); return NF_STOLEN; } @@ -804,24 +809,24 @@ static bool nf_bridge_copy_header(struct sk_buff *skb) return true; } -static int br_nf_push_frag_xmit(struct sk_buff *skb) +static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) { if (!nf_bridge_copy_header(skb)) { kfree_skb(skb); return 0; } - return br_dev_queue_push_xmit(skb); + return br_dev_queue_push_xmit(sk, skb); } -static int br_nf_dev_queue_xmit(struct sk_buff *skb) +static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) { int ret; int frag_max_size; unsigned int mtu_reserved; if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP)) - return br_dev_queue_push_xmit(skb); + return br_dev_queue_push_xmit(sk, skb); mtu_reserved = nf_bridge_mtu_reduction(skb); /* This is wrong! We should preserve the original fragment @@ -833,16 +838,16 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb) /* Drop invalid packet */ return NF_DROP; IPCB(skb)->frag_max_size = frag_max_size; - ret = ip_fragment(skb, br_nf_push_frag_xmit); + ret = ip_fragment(sk, skb, br_nf_push_frag_xmit); } else - ret = br_dev_queue_push_xmit(skb); + ret = br_dev_queue_push_xmit(sk, skb); return ret; } #else -static int br_nf_dev_queue_xmit(struct sk_buff *skb) +static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) { - return br_dev_queue_push_xmit(skb); + return br_dev_queue_push_xmit(sk, skb); } #endif @@ -887,7 +892,8 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, else skb->protocol = htons(ETH_P_IPV6); - NF_HOOK(pf, NF_INET_POST_ROUTING, skb, NULL, realoutdev, + NF_HOOK(pf, NF_INET_POST_ROUTING, state->sk, skb, + NULL, realoutdev, br_nf_dev_queue_xmit); return NF_STOLEN; @@ -927,7 +933,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), skb->nf_bridge->data, ETH_HLEN-ETH_ALEN); skb->dev = nf_bridge->physindev; - br_handle_frame_finish(skb); + br_handle_frame_finish(NULL, skb); } static int br_nf_dev_xmit(struct sk_buff *skb) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index b46fa0c5b8ec..6ca0251cb478 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -410,10 +410,10 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, /* br_forward.c */ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb); -int br_dev_queue_push_xmit(struct sk_buff *skb); +int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb); void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0); -int br_forward_finish(struct sk_buff *skb); +int br_forward_finish(struct sock *sk, struct sk_buff *skb); void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast); void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, struct sk_buff *skb2, bool unicast); @@ -431,7 +431,7 @@ void br_port_flags_change(struct net_bridge_port *port, unsigned long mask); void br_manage_promisc(struct net_bridge *br); /* br_input.c */ -int br_handle_frame_finish(struct sk_buff *skb); +int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb); rx_handler_result_t br_handle_frame(struct sk_buff **pskb); static inline bool br_rx_handler_check_rcu(const struct net_device *dev) diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index bdb459d21ad8..534fc4cd263e 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -54,8 +54,9 @@ static void br_send_bpdu(struct net_bridge_port *p, skb_reset_mac_header(skb); - NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, - dev_queue_xmit); + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb, + NULL, skb->dev, + dev_queue_xmit_sk); } static inline void br_set_ticks(unsigned char *dest, int j) diff --git a/net/core/dev.c b/net/core/dev.c index 3b3965288f52..b2775f06c710 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2879,7 +2879,7 @@ EXPORT_SYMBOL(xmit_recursion); * dev_loopback_xmit - loop back @skb * @skb: buffer to transmit */ -int dev_loopback_xmit(struct sk_buff *skb) +int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb) { skb_reset_mac_header(skb); __skb_pull(skb, skb_network_offset(skb)); @@ -3017,11 +3017,11 @@ out: return rc; } -int dev_queue_xmit(struct sk_buff *skb) +int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb) { return __dev_queue_xmit(skb, NULL); } -EXPORT_SYMBOL(dev_queue_xmit); +EXPORT_SYMBOL(dev_queue_xmit_sk); int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv) { @@ -3853,13 +3853,13 @@ static int netif_receive_skb_internal(struct sk_buff *skb) * NET_RX_SUCCESS: no congestion * NET_RX_DROP: packet was dropped */ -int netif_receive_skb(struct sk_buff *skb) +int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb) { trace_netif_receive_skb_entry(skb); return netif_receive_skb_internal(skb); } -EXPORT_SYMBOL(netif_receive_skb); +EXPORT_SYMBOL(netif_receive_skb_sk); /* Network device is going away, flush any packets still pending * Called with irqs disabled. diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index be1f08cdad29..4507b188fc51 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -194,7 +194,7 @@ static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb) return err; } -static int dn_neigh_output_packet(struct sk_buff *skb) +static int dn_neigh_output_packet(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *)dst; @@ -206,7 +206,8 @@ static int dn_neigh_output_packet(struct sk_buff *skb) /* * For talking to broadcast devices: Ethernet & PPP */ -static int dn_long_output(struct neighbour *neigh, struct sk_buff *skb) +static int dn_long_output(struct neighbour *neigh, struct sock *sk, + struct sk_buff *skb) { struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_long_packet) + 3; @@ -245,14 +246,15 @@ static int dn_long_output(struct neighbour *neigh, struct sk_buff *skb) skb_reset_network_header(skb); - return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, skb, NULL, - neigh->dev, dn_neigh_output_packet); + return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb, + NULL, neigh->dev, dn_neigh_output_packet); } /* * For talking to pointopoint and multidrop devices: DDCMP and X.25 */ -static int dn_short_output(struct neighbour *neigh, struct sk_buff *skb) +static int dn_short_output(struct neighbour *neigh, struct sock *sk, + struct sk_buff *skb) { struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; @@ -284,8 +286,8 @@ static int dn_short_output(struct neighbour *neigh, struct sk_buff *skb) skb_reset_network_header(skb); - return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, skb, NULL, - neigh->dev, dn_neigh_output_packet); + return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb, + NULL, neigh->dev, dn_neigh_output_packet); } /* @@ -293,7 +295,8 @@ static int dn_short_output(struct neighbour *neigh, struct sk_buff *skb) * Phase 3 output is the same as short output, execpt that * it clears the area bits before transmission. */ -static int dn_phase3_output(struct neighbour *neigh, struct sk_buff *skb) +static int dn_phase3_output(struct neighbour *neigh, struct sock *sk, + struct sk_buff *skb) { struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; @@ -324,11 +327,11 @@ static int dn_phase3_output(struct neighbour *neigh, struct sk_buff *skb) skb_reset_network_header(skb); - return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, skb, NULL, - neigh->dev, dn_neigh_output_packet); + return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb, + NULL, neigh->dev, dn_neigh_output_packet); } -int dn_to_neigh_output(struct sk_buff *skb) +int dn_to_neigh_output(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *) dst; @@ -347,11 +350,11 @@ int dn_to_neigh_output(struct sk_buff *skb) rcu_read_unlock(); if (dn->flags & DN_NDFLAG_P3) - return dn_phase3_output(neigh, skb); + return dn_phase3_output(neigh, sk, skb); if (use_long) - return dn_long_output(neigh, skb); + return dn_long_output(neigh, sk, skb); else - return dn_short_output(neigh, skb); + return dn_short_output(neigh, sk, skb); } /* @@ -372,7 +375,7 @@ void dn_neigh_pointopoint_hello(struct sk_buff *skb) /* * Ethernet router hello message received */ -int dn_neigh_router_hello(struct sk_buff *skb) +int dn_neigh_router_hello(struct sock *sk, struct sk_buff *skb) { struct rtnode_hello_message *msg = (struct rtnode_hello_message *)skb->data; @@ -434,7 +437,7 @@ int dn_neigh_router_hello(struct sk_buff *skb) /* * Endnode hello message received */ -int dn_neigh_endnode_hello(struct sk_buff *skb) +int dn_neigh_endnode_hello(struct sock *sk, struct sk_buff *skb) { struct endnode_hello_message *msg = (struct endnode_hello_message *)skb->data; struct neighbour *neigh; diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index fe5f01485d33..a321eac9fd0c 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -714,7 +714,7 @@ out: return ret; } -static int dn_nsp_rx_packet(struct sk_buff *skb) +static int dn_nsp_rx_packet(struct sock *sk2, struct sk_buff *skb) { struct dn_skb_cb *cb = DN_SKB_CB(skb); struct sock *sk = NULL; @@ -814,7 +814,8 @@ free_out: int dn_nsp_rx(struct sk_buff *skb) { - return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN, skb, skb->dev, NULL, + return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN, NULL, skb, + skb->dev, NULL, dn_nsp_rx_packet); } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 9ab0c4ba297f..03227ffd19ce 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -512,7 +512,7 @@ static int dn_return_long(struct sk_buff *skb) * * Returns: result of input function if route is found, error code otherwise */ -static int dn_route_rx_packet(struct sk_buff *skb) +static int dn_route_rx_packet(struct sock *sk, struct sk_buff *skb) { struct dn_skb_cb *cb; int err; @@ -573,7 +573,8 @@ static int dn_route_rx_long(struct sk_buff *skb) ptr++; cb->hops = *ptr++; /* Visit Count */ - return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, skb, skb->dev, NULL, + return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, NULL, skb, + skb->dev, NULL, dn_route_rx_packet); drop_it: @@ -600,7 +601,8 @@ static int dn_route_rx_short(struct sk_buff *skb) ptr += 2; cb->hops = *ptr & 0x3f; - return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, skb, skb->dev, NULL, + return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, NULL, skb, + skb->dev, NULL, dn_route_rx_packet); drop_it: @@ -608,7 +610,7 @@ drop_it: return NET_RX_DROP; } -static int dn_route_discard(struct sk_buff *skb) +static int dn_route_discard(struct sock *sk, struct sk_buff *skb) { /* * I know we drop the packet here, but thats considered success in @@ -618,7 +620,7 @@ static int dn_route_discard(struct sk_buff *skb) return NET_RX_SUCCESS; } -static int dn_route_ptp_hello(struct sk_buff *skb) +static int dn_route_ptp_hello(struct sock *sk, struct sk_buff *skb) { dn_dev_hello(skb); dn_neigh_pointopoint_hello(skb); @@ -704,22 +706,22 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type switch (flags & DN_RT_CNTL_MSK) { case DN_RT_PKT_HELO: return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - skb, skb->dev, NULL, + NULL, skb, skb->dev, NULL, dn_route_ptp_hello); case DN_RT_PKT_L1RT: case DN_RT_PKT_L2RT: return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE, - skb, skb->dev, NULL, + NULL, skb, skb->dev, NULL, dn_route_discard); case DN_RT_PKT_ERTH: return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - skb, skb->dev, NULL, + NULL, skb, skb->dev, NULL, dn_neigh_router_hello); case DN_RT_PKT_EEDH: return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - skb, skb->dev, NULL, + NULL, skb, skb->dev, NULL, dn_neigh_endnode_hello); } } else { @@ -768,7 +770,8 @@ static int dn_output(struct sock *sk, struct sk_buff *skb) cb->rt_flags |= DN_RT_F_IE; cb->hops = 0; - return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT, skb, NULL, dev, + return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT, sk, skb, + NULL, dev, dn_to_neigh_output); error: @@ -816,7 +819,8 @@ static int dn_forward(struct sk_buff *skb) if (rt->rt_flags & RTCF_DOREDIRECT) cb->rt_flags |= DN_RT_F_IE; - return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD, skb, dev, skb->dev, + return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD, NULL, skb, + dev, skb->dev, dn_to_neigh_output); drop: diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index c6e67aa46c32..933a92820d26 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -591,7 +591,8 @@ EXPORT_SYMBOL(arp_create); void arp_xmit(struct sk_buff *skb) { /* Send it off, maybe filter it using firewalling first. */ - NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit); + NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, NULL, skb, + NULL, skb->dev, dev_queue_xmit_sk); } EXPORT_SYMBOL(arp_xmit); @@ -625,7 +626,7 @@ EXPORT_SYMBOL(arp_send); * Process an arp request. */ -static int arp_process(struct sk_buff *skb) +static int arp_process(struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb->dev; struct in_device *in_dev = __in_dev_get_rcu(dev); @@ -846,7 +847,7 @@ out: static void parp_redo(struct sk_buff *skb) { - arp_process(skb); + arp_process(NULL, skb); } @@ -879,7 +880,8 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev, memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); - return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process); + return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, NULL, skb, + dev, NULL, arp_process); consumeskb: consume_skb(skb); diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index d9bc28ac5d1b..939992c456f3 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -57,7 +57,7 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) } -static int ip_forward_finish(struct sk_buff *skb) +static int ip_forward_finish(struct sock *sk, struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); @@ -68,7 +68,7 @@ static int ip_forward_finish(struct sk_buff *skb) ip_forward_options(skb); skb_sender_cpu_clear(skb); - return dst_output(skb); + return dst_output_sk(sk, skb); } int ip_forward(struct sk_buff *skb) @@ -136,8 +136,8 @@ int ip_forward(struct sk_buff *skb) skb->priority = rt_tos2priority(iph->tos); - return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, - rt->dst.dev, ip_forward_finish); + return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, NULL, skb, + skb->dev, rt->dst.dev, ip_forward_finish); sr_failed: /* diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 2e0410ed8f16..2db4c8773c1b 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -187,7 +187,7 @@ bool ip_call_ra_chain(struct sk_buff *skb) return false; } -static int ip_local_deliver_finish(struct sk_buff *skb) +static int ip_local_deliver_finish(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb->dev); @@ -253,7 +253,8 @@ int ip_local_deliver(struct sk_buff *skb) return 0; } - return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, skb, skb->dev, NULL, + return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, NULL, skb, + skb->dev, NULL, ip_local_deliver_finish); } @@ -309,7 +310,7 @@ drop: int sysctl_ip_early_demux __read_mostly = 1; EXPORT_SYMBOL(sysctl_ip_early_demux); -static int ip_rcv_finish(struct sk_buff *skb) +static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; @@ -451,7 +452,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, /* Must drop socket now because of tproxy. */ skb_orphan(skb); - return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL, + return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, NULL, skb, + dev, NULL, ip_rcv_finish); csum_error: diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 26f6f7956168..5da4d15262fd 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -91,14 +91,19 @@ void ip_send_check(struct iphdr *iph) } EXPORT_SYMBOL(ip_send_check); -int __ip_local_out(struct sk_buff *skb) +int __ip_local_out_sk(struct sock *sk, struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); iph->tot_len = htons(skb->len); ip_send_check(iph); - return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL, - skb_dst(skb)->dev, dst_output); + return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, sk, skb, NULL, + skb_dst(skb)->dev, dst_output_sk); +} + +int __ip_local_out(struct sk_buff *skb) +{ + return __ip_local_out_sk(skb->sk, skb); } int ip_local_out_sk(struct sock *sk, struct sk_buff *skb) @@ -163,7 +168,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, } EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); -static inline int ip_finish_output2(struct sk_buff *skb) +static inline int ip_finish_output2(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct rtable *rt = (struct rtable *)dst; @@ -211,7 +216,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) return -EINVAL; } -static int ip_finish_output_gso(struct sk_buff *skb) +static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb) { netdev_features_t features; struct sk_buff *segs; @@ -220,7 +225,7 @@ static int ip_finish_output_gso(struct sk_buff *skb) /* common case: locally created skb or seglen is <= mtu */ if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) || skb_gso_network_seglen(skb) <= ip_skb_dst_mtu(skb)) - return ip_finish_output2(skb); + return ip_finish_output2(sk, skb); /* Slowpath - GSO segment length is exceeding the dst MTU. * @@ -243,7 +248,7 @@ static int ip_finish_output_gso(struct sk_buff *skb) int err; segs->next = NULL; - err = ip_fragment(segs, ip_finish_output2); + err = ip_fragment(sk, segs, ip_finish_output2); if (err && ret == 0) ret = err; @@ -253,22 +258,22 @@ static int ip_finish_output_gso(struct sk_buff *skb) return ret; } -static int ip_finish_output(struct sk_buff *skb) +static int ip_finish_output(struct sock *sk, struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ if (skb_dst(skb)->xfrm) { IPCB(skb)->flags |= IPSKB_REROUTED; - return dst_output(skb); + return dst_output_sk(sk, skb); } #endif if (skb_is_gso(skb)) - return ip_finish_output_gso(skb); + return ip_finish_output_gso(sk, skb); if (skb->len > ip_skb_dst_mtu(skb)) - return ip_fragment(skb, ip_finish_output2); + return ip_fragment(sk, skb, ip_finish_output2); - return ip_finish_output2(skb); + return ip_finish_output2(sk, skb); } int ip_mc_output(struct sock *sk, struct sk_buff *skb) @@ -307,7 +312,7 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb) struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); if (newskb) NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, - newskb, NULL, newskb->dev, + sk, newskb, NULL, newskb->dev, dev_loopback_xmit); } @@ -322,11 +327,11 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb) if (rt->rt_flags&RTCF_BROADCAST) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); if (newskb) - NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb, + NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, newskb, NULL, newskb->dev, dev_loopback_xmit); } - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, + return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb, NULL, skb->dev, ip_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } @@ -340,7 +345,8 @@ int ip_output(struct sock *sk, struct sk_buff *skb) skb->dev = dev; skb->protocol = htons(ETH_P_IP); - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, dev, + return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb, + NULL, dev, ip_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } @@ -480,7 +486,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) * single device frame, and queue such a frame for sending. */ -int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) +int ip_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)) { struct iphdr *iph; int ptr; @@ -593,7 +600,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) ip_send_check(iph); } - err = output(skb); + err = output(sk, skb); if (!err) IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); @@ -730,7 +737,7 @@ slow_path: ip_send_check(iph); - err = output(skb2); + err = output(sk, skb2); if (err) goto fail; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5f17d0e78071..3a2c0162c3ba 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1679,7 +1679,7 @@ static void ip_encap(struct net *net, struct sk_buff *skb, nf_reset(skb); } -static inline int ipmr_forward_finish(struct sk_buff *skb) +static inline int ipmr_forward_finish(struct sock *sk, struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); @@ -1689,7 +1689,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb) if (unlikely(opt->optlen)) ip_forward_options(skb); - return dst_output(skb); + return dst_output_sk(sk, skb); } /* @@ -1788,7 +1788,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, * not mrouter) cannot join to more than one interface - it will * result in receiving multiple packets. */ - NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev, + NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, NULL, skb, + skb->dev, dev, ipmr_forward_finish); return; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 6d0fa8fb8af0..c0bb648fb2f9 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -412,8 +412,8 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, icmp_out_count(net, ((struct icmphdr *) skb_transport_header(skb))->type); - err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL, - rt->dst.dev, dst_output); + err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, sk, skb, + NULL, rt->dst.dev, dst_output_sk); if (err > 0) err = net_xmit_errno(err); if (err) diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index cac7468db0a1..60b032f58ccc 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -22,7 +22,7 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb) return xfrm4_extract_header(skb); } -static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) +static inline int xfrm4_rcv_encap_finish(struct sock *sk, struct sk_buff *skb) { if (!skb_dst(skb)) { const struct iphdr *iph = ip_hdr(skb); @@ -52,7 +52,8 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) iph->tot_len = htons(skb->len); ip_send_check(iph); - NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, NULL, skb, + skb->dev, NULL, xfrm4_rcv_encap_finish); return 0; } diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index dab73813cb92..2878dbfffeb7 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -69,7 +69,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) } EXPORT_SYMBOL(xfrm4_prepare_output); -int xfrm4_output_finish(struct sk_buff *skb) +int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb) { memset(IPCB(skb), 0, sizeof(*IPCB(skb))); @@ -77,26 +77,26 @@ int xfrm4_output_finish(struct sk_buff *skb) IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; #endif - return xfrm_output(skb); + return xfrm_output(sk, skb); } -static int __xfrm4_output(struct sk_buff *skb) +static int __xfrm4_output(struct sock *sk, struct sk_buff *skb) { struct xfrm_state *x = skb_dst(skb)->xfrm; #ifdef CONFIG_NETFILTER if (!x) { IPCB(skb)->flags |= IPSKB_REROUTED; - return dst_output(skb); + return dst_output_sk(sk, skb); } #endif - return x->outer_mode->afinfo->output_finish(skb); + return x->outer_mode->afinfo->output_finish(sk, skb); } int xfrm4_output(struct sock *sk, struct sk_buff *skb) { - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, + return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb, NULL, skb_dst(skb)->dev, __xfrm4_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index fb97f7f8d4ed..f2e464eba5ef 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -46,8 +46,7 @@ #include #include - -int ip6_rcv_finish(struct sk_buff *skb) +int ip6_rcv_finish(struct sock *sk, struct sk_buff *skb) { if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { const struct inet6_protocol *ipprot; @@ -183,7 +182,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt /* Must drop socket now because of tproxy. */ skb_orphan(skb); - return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, dev, NULL, + return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb, + dev, NULL, ip6_rcv_finish); err: IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); @@ -198,7 +198,7 @@ drop: */ -static int ip6_input_finish(struct sk_buff *skb) +static int ip6_input_finish(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb_dst(skb)->dev); const struct inet6_protocol *ipprot; @@ -277,7 +277,8 @@ discard: int ip6_input(struct sk_buff *skb) { - return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, skb, skb->dev, NULL, + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, NULL, skb, + skb->dev, NULL, ip6_input_finish); } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 654f245aa930..7fde1f265c90 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -56,7 +56,7 @@ #include #include -static int ip6_finish_output2(struct sk_buff *skb) +static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; @@ -70,7 +70,7 @@ static int ip6_finish_output2(struct sk_buff *skb) if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); - if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && + if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && ((mroute6_socket(dev_net(dev), skb) && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, @@ -82,7 +82,7 @@ static int ip6_finish_output2(struct sk_buff *skb) */ if (newskb) NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, - newskb, NULL, newskb->dev, + sk, newskb, NULL, newskb->dev, dev_loopback_xmit); if (ipv6_hdr(skb)->hop_limit == 0) { @@ -122,14 +122,14 @@ static int ip6_finish_output2(struct sk_buff *skb) return -EINVAL; } -static int ip6_finish_output(struct sk_buff *skb) +static int ip6_finish_output(struct sock *sk, struct sk_buff *skb) { if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)) || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) - return ip6_fragment(skb, ip6_finish_output2); + return ip6_fragment(sk, skb, ip6_finish_output2); else - return ip6_finish_output2(skb); + return ip6_finish_output2(sk, skb); } int ip6_output(struct sock *sk, struct sk_buff *skb) @@ -143,7 +143,8 @@ int ip6_output(struct sock *sk, struct sk_buff *skb) return 0; } - return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, + return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb, + NULL, dev, ip6_finish_output, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } @@ -223,8 +224,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); - return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, - dst->dev, dst_output); + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, + NULL, dst->dev, dst_output_sk); } skb->dev = dst->dev; @@ -316,10 +317,10 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) return 0; } -static inline int ip6_forward_finish(struct sk_buff *skb) +static inline int ip6_forward_finish(struct sock *sk, struct sk_buff *skb) { skb_sender_cpu_clear(skb); - return dst_output(skb); + return dst_output_sk(sk, skb); } static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) @@ -511,7 +512,8 @@ int ip6_forward(struct sk_buff *skb) IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); - return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, + return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb, + skb->dev, dst->dev, ip6_forward_finish); error: @@ -538,7 +540,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) skb_copy_secmark(to, from); } -int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) +int ip6_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)) { struct sk_buff *frag; struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); @@ -667,7 +670,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) ip6_copy_metadata(frag, skb); } - err = output(skb); + err = output(sk, skb); if (!err) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGCREATES); @@ -800,7 +803,7 @@ slow_path: /* * Put this fragment into the sending queue. */ - err = output(frag); + err = output(sk, frag); if (err) goto fail; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 8493a22e74eb..74ceb73c1c9a 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1986,13 +1986,13 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) } #endif -static inline int ip6mr_forward2_finish(struct sk_buff *skb) +static inline int ip6mr_forward2_finish(struct sock *sk, struct sk_buff *skb) { IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTFORWDATAGRAMS); IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTOCTETS, skb->len); - return dst_output(skb); + return dst_output_sk(sk, skb); } /* @@ -2064,7 +2064,8 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, IP6CB(skb)->flags |= IP6SKB_FORWARDED; - return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev, + return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb, + skb->dev, dev, ip6mr_forward2_finish); out_free: diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index fac1f27e428e..083b2927fc67 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1644,8 +1644,9 @@ static void mld_sendpack(struct sk_buff *skb) payload_len = skb->len; - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, - dst_output); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, + net->ipv6.igmp_sk, skb, NULL, skb->dev, + dst_output_sk); out: if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT); @@ -2007,8 +2008,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) } skb_dst_set(skb, dst); - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, - dst_output); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, + NULL, skb->dev, dst_output_sk); out: if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, type); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 71fde6cafb35..96f153c0846b 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -463,8 +463,9 @@ static void ndisc_send_skb(struct sk_buff *skb, idev = __in6_dev_get(dst->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, - dst_output); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, + NULL, dst->dev, + dst_output_sk); if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index e2b882056751..a45db0b4785c 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -75,7 +75,7 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, nf_ct_frag6_consume_orig(reasm); - NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, reasm, + NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, state->sk, reasm, state->in, state->out, state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 4016a6ef9d61..7d1131dc29fe 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -146,8 +146,8 @@ int __ip6_local_out(struct sk_buff *skb) ipv6_hdr(skb)->payload_len = htons(len); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); - return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, - skb_dst(skb)->dev, dst_output); + return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb->sk, skb, + NULL, skb_dst(skb)->dev, dst_output_sk); } EXPORT_SYMBOL_GPL(__ip6_local_out); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 79ccdb4c1b33..8072bd4139b7 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -652,8 +652,8 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, goto error_fault; IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, - rt->dst.dev, dst_output); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, + NULL, rt->dst.dev, dst_output_sk); if (err > 0) err = net_xmit_errno(err); if (err) diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index f48fbe4d16f5..74bd17882a2f 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -42,7 +42,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) ipv6_hdr(skb)->payload_len = htons(skb->len); __skb_push(skb, skb->data - skb_network_header(skb)); - NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb, + skb->dev, NULL, ip6_rcv_finish); return -1; } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 010f8bd2d577..09c76a7b474d 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -120,7 +120,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) } EXPORT_SYMBOL(xfrm6_prepare_output); -int xfrm6_output_finish(struct sk_buff *skb) +int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb) { memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); @@ -128,10 +128,10 @@ int xfrm6_output_finish(struct sk_buff *skb) IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; #endif - return xfrm_output(skb); + return xfrm_output(sk, skb); } -static int __xfrm6_output(struct sk_buff *skb) +static int __xfrm6_output(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; @@ -140,7 +140,7 @@ static int __xfrm6_output(struct sk_buff *skb) #ifdef CONFIG_NETFILTER if (!x) { IP6CB(skb)->flags |= IP6SKB_REROUTED; - return dst_output(skb); + return dst_output_sk(sk, skb); } #endif @@ -160,14 +160,15 @@ static int __xfrm6_output(struct sk_buff *skb) if (x->props.mode == XFRM_MODE_TUNNEL && ((skb->len > mtu && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)))) { - return ip6_fragment(skb, x->outer_mode->afinfo->output_finish); + return ip6_fragment(sk, skb, + x->outer_mode->afinfo->output_finish); } - return x->outer_mode->afinfo->output_finish(skb); + return x->outer_mode->afinfo->output_finish(sk, skb); } int xfrm6_output(struct sock *sk, struct sk_buff *skb) { - return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, + return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb, NULL, skb_dst(skb)->dev, __xfrm6_output, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index bf02932b7188..19986ec5f21a 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -536,8 +536,8 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, ip_vs_update_conntrack(skb, cp, 1); if (!local) { skb_forward_csum(skb); - NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, - dst_output); + NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, + NULL, skb_dst(skb)->dev, dst_output_sk); } else ret = NF_ACCEPT; return ret; @@ -554,8 +554,8 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, ip_vs_notrack(skb); if (!local) { skb_forward_csum(skb); - NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, - dst_output); + NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, + NULL, skb_dst(skb)->dev, dst_output_sk); } else ret = NF_ACCEPT; return ret; diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index c4a706678f88..3f3ac57b2998 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -202,7 +202,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) case NF_ACCEPT: case NF_STOP: local_bh_disable(); - entry->state.okfn(skb); + entry->state.okfn(entry->state.sk, skb); local_bh_enable(); break; case NF_QUEUE: diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 7c532856b398..fbcedbe33190 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -19,7 +19,7 @@ #include #include -static int xfrm_output2(struct sk_buff *skb); +static int xfrm_output2(struct sock *sk, struct sk_buff *skb); static int xfrm_skb_check_space(struct sk_buff *skb) { @@ -130,7 +130,7 @@ int xfrm_output_resume(struct sk_buff *skb, int err) return dst_output(skb); err = nf_hook(skb_dst(skb)->ops->family, - NF_INET_POST_ROUTING, skb, + NF_INET_POST_ROUTING, skb->sk, skb, NULL, skb_dst(skb)->dev, xfrm_output2); if (unlikely(err != 1)) goto out; @@ -144,12 +144,12 @@ out: } EXPORT_SYMBOL_GPL(xfrm_output_resume); -static int xfrm_output2(struct sk_buff *skb) +static int xfrm_output2(struct sock *sk, struct sk_buff *skb) { return xfrm_output_resume(skb, 1); } -static int xfrm_output_gso(struct sk_buff *skb) +static int xfrm_output_gso(struct sock *sk, struct sk_buff *skb) { struct sk_buff *segs; @@ -165,7 +165,7 @@ static int xfrm_output_gso(struct sk_buff *skb) int err; segs->next = NULL; - err = xfrm_output2(segs); + err = xfrm_output2(sk, segs); if (unlikely(err)) { kfree_skb_list(nskb); @@ -178,13 +178,13 @@ static int xfrm_output_gso(struct sk_buff *skb) return 0; } -int xfrm_output(struct sk_buff *skb) +int xfrm_output(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb_dst(skb)->dev); int err; if (skb_is_gso(skb)) - return xfrm_output_gso(skb); + return xfrm_output_gso(sk, skb); if (skb->ip_summed == CHECKSUM_PARTIAL) { err = skb_checksum_help(skb); @@ -195,7 +195,7 @@ int xfrm_output(struct sk_buff *skb) } } - return xfrm_output2(skb); + return xfrm_output2(sk, skb); } EXPORT_SYMBOL_GPL(xfrm_output); -- cgit v1.2.3 From 4577139b2dabf58973d59d157aae4ddd3bde863a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 10 Apr 2015 23:07:54 +0200 Subject: net: use jump label patching for ingress qdisc in __netif_receive_skb_core Even if we make use of classifier and actions from the egress path, we're going into handle_ing() executing additional code on a per-packet cost for ingress qdisc, just to realize that nothing is attached on ingress. Instead, this can just be blinded out as a no-op entirely with the use of a static key. On input fast-path, we already make use of static keys in various places, e.g. skb time stamping, in RPS, etc. It makes sense to not waste time when we're assured that no ingress qdisc is attached anywhere. Enabling/disabling of that code path is being done via two helpers, namely net_{inc,dec}_ingress_queue(), that are being invoked under RTNL mutex when a ingress qdisc is being either initialized or destructed. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 15 ++++++++++++++- net/core/dev.c | 31 ++++++++++++++++++++++++------- net/sched/sch_ingress.c | 9 +++++++++ 3 files changed, 47 insertions(+), 8 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 5db76a32fcab..2da5d1081ad9 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -77,7 +77,20 @@ static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) return rtnl_dereference(dev->ingress_queue); } -extern struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); +struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); + +#ifdef CONFIG_NET_CLS_ACT +void net_inc_ingress_queue(void); +void net_dec_ingress_queue(void); +#else +static inline void net_inc_ingress_queue(void) +{ +} + +static inline void net_dec_ingress_queue(void) +{ +} +#endif extern void rtnetlink_init(void); extern void __rtnl_unlock(void); diff --git a/net/core/dev.c b/net/core/dev.c index b2775f06c710..af4a1b0adc10 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1630,6 +1630,22 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) } EXPORT_SYMBOL(call_netdevice_notifiers); +#ifdef CONFIG_NET_CLS_ACT +static struct static_key ingress_needed __read_mostly; + +void net_inc_ingress_queue(void) +{ + static_key_slow_inc(&ingress_needed); +} +EXPORT_SYMBOL_GPL(net_inc_ingress_queue); + +void net_dec_ingress_queue(void) +{ + static_key_slow_dec(&ingress_needed); +} +EXPORT_SYMBOL_GPL(net_dec_ingress_queue); +#endif + static struct static_key netstamp_needed __read_mostly; #ifdef HAVE_JUMP_LABEL /* We are not allowed to call static_key_slow_dec() from irq context @@ -3547,7 +3563,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue); if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc) - goto out; + return skb; if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); @@ -3561,8 +3577,6 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, return NULL; } -out: - skb->tc_verd = 0; return skb; } #endif @@ -3698,12 +3712,15 @@ another_round: skip_taps: #ifdef CONFIG_NET_CLS_ACT - skb = handle_ing(skb, &pt_prev, &ret, orig_dev); - if (!skb) - goto unlock; + if (static_key_false(&ingress_needed)) { + skb = handle_ing(skb, &pt_prev, &ret, orig_dev); + if (!skb) + goto unlock; + } + + skb->tc_verd = 0; ncls: #endif - if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) goto drop; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index eb5b8445fef9..4cdbfb85686a 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -88,11 +88,19 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch) /* ------------------------------------------------------------- */ +static int ingress_init(struct Qdisc *sch, struct nlattr *opt) +{ + net_inc_ingress_queue(); + + return 0; +} + static void ingress_destroy(struct Qdisc *sch) { struct ingress_qdisc_data *p = qdisc_priv(sch); tcf_destroy_chain(&p->filter_list); + net_dec_ingress_queue(); } static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -124,6 +132,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { .id = "ingress", .priv_size = sizeof(struct ingress_qdisc_data), .enqueue = ingress_enqueue, + .init = ingress_init, .destroy = ingress_destroy, .dump = ingress_dump, .owner = THIS_MODULE, -- cgit v1.2.3 From 8b86a61da37cbbcf4bd6e87fda494a59b1cf16c4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 17 Apr 2015 15:45:04 +0200 Subject: net: remove unused 'dev' argument from netif_needs_gso() In commit 04ffcb255f22 ("net: Add ndo_gso_check") Tom originally added the 'dev' argument to be able to call ndo_gso_check(). Then later, when generalizing this in commit 5f35227ea34b ("net: Generalize ndo_gso_check to ndo_features_check") Jesse removed the call to ndo_gso_check() in netif_needs_gso() by calling the new ndo_features_check() in a different place. This made the 'dev' argument unused. Remove the unused argument and go back to the code as before. Cc: Tom Herbert Cc: Jesse Gross Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 2 +- drivers/net/xen-netfront.c | 2 +- include/linux/netdevice.h | 2 +- net/core/dev.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net/core/dev.c') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 8362aef0c15e..58c6ba5746d5 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -313,7 +313,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) */ if (q->flags & IFF_VNET_HDR) features |= vlan->tap_features; - if (netif_needs_gso(dev, skb, features)) { + if (netif_needs_gso(skb, features)) { struct sk_buff *segs = __skb_gso_segment(skb, features, false); if (IS_ERR(segs)) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 720aaf6313d2..8dcf31063e9a 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -560,7 +560,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(!netif_carrier_ok(dev) || (slots > 1 && !xennet_can_sg(dev)) || - netif_needs_gso(dev, skb, netif_skb_features(skb)))) { + netif_needs_gso(skb, netif_skb_features(skb)))) { spin_unlock_irqrestore(&queue->tx_lock, flags); goto drop; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b5679aed660b..bcbde799ec69 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3713,7 +3713,7 @@ static inline bool skb_gso_ok(struct sk_buff *skb, netdev_features_t features) (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST)); } -static inline bool netif_needs_gso(struct net_device *dev, struct sk_buff *skb, +static inline bool netif_needs_gso(struct sk_buff *skb, netdev_features_t features) { return skb_is_gso(skb) && (!skb_gso_ok(skb, features) || diff --git a/net/core/dev.c b/net/core/dev.c index af4a1b0adc10..1796cef55ab5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2713,7 +2713,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device if (unlikely(!skb)) goto out_null; - if (netif_needs_gso(dev, skb, features)) { + if (netif_needs_gso(skb, features)) { struct sk_buff *segs; segs = skb_gso_segment(skb, features); -- cgit v1.2.3