diff options
Diffstat (limited to 'net')
224 files changed, 3655 insertions, 2715 deletions
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c index 1e0071fdcf72..78c8a495b571 100644 --- a/net/6lowpan/iphc.c +++ b/net/6lowpan/iphc.c @@ -366,7 +366,18 @@ lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev, return err; } - hdr.payload_len = htons(skb->len); + switch (lowpan_priv(dev)->lltype) { + case LOWPAN_LLTYPE_IEEE802154: + if (lowpan_802154_cb(skb)->d_size) + hdr.payload_len = htons(lowpan_802154_cb(skb)->d_size - + sizeof(struct ipv6hdr)); + else + hdr.payload_len = htons(skb->len); + break; + default: + hdr.payload_len = htons(skb->len); + break; + } pr_debug("skb headroom size = %d, data length = %d\n", skb_headroom(skb), skb->len); diff --git a/net/6lowpan/nhc_udp.c b/net/6lowpan/nhc_udp.c index c6bcaeb428ae..72d0b57eb6e5 100644 --- a/net/6lowpan/nhc_udp.c +++ b/net/6lowpan/nhc_udp.c @@ -71,7 +71,18 @@ static int udp_uncompress(struct sk_buff *skb, size_t needed) * here, we obtain the hint from the remaining size of the * frame */ - uh.len = htons(skb->len + sizeof(struct udphdr)); + switch (lowpan_priv(skb->dev)->lltype) { + case LOWPAN_LLTYPE_IEEE802154: + if (lowpan_802154_cb(skb)->d_size) + uh.len = htons(lowpan_802154_cb(skb)->d_size - + sizeof(struct ipv6hdr)); + else + uh.len = htons(skb->len + sizeof(struct udphdr)); + break; + default: + uh.len = htons(skb->len + sizeof(struct udphdr)); + break; + } pr_debug("uncompressed UDP length: src = %d", ntohs(uh.len)); /* replace the compressed UDP head by the uncompressed UDP diff --git a/net/Kconfig b/net/Kconfig index 7021c1bf44d6..127da94ae25e 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -232,6 +232,7 @@ source "net/netlink/Kconfig" source "net/mpls/Kconfig" source "net/hsr/Kconfig" source "net/switchdev/Kconfig" +source "net/l3mdev/Kconfig" config RPS bool diff --git a/net/Makefile b/net/Makefile index 3995613e5510..a5d04098dfce 100644 --- a/net/Makefile +++ b/net/Makefile @@ -74,3 +74,6 @@ obj-$(CONFIG_HSR) += hsr/ ifneq ($(CONFIG_NET_SWITCHDEV),) obj-y += switchdev/ endif +ifneq ($(CONFIG_NET_L3_MASTER_DEV),) +obj-y += l3mdev/ +endif diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index adcbc74c2432..a7cdd99ec3f1 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -693,7 +693,8 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) hci_setup_event_mask(req); - if (hdev->commands[6] & 0x20) { + if (hdev->commands[6] & 0x20 && + !test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) { struct hci_cp_read_stored_link_key cp; bacpy(&cp.bdaddr, BDADDR_ANY); @@ -1548,7 +1549,7 @@ static void hci_pend_le_actions_clear(struct hci_dev *hdev) BT_DBG("All LE pending actions cleared"); } -static int hci_dev_do_close(struct hci_dev *hdev) +int hci_dev_do_close(struct hci_dev *hdev) { BT_DBG("%s %p", hdev->name, hdev); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 186041866315..8acec932123a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4719,6 +4719,27 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, struct hci_conn *conn; bool match; u32 flags; + u8 *ptr, real_len; + + /* Find the end of the data in case the report contains padded zero + * bytes at the end causing an invalid length value. + * + * When data is NULL, len is 0 so there is no need for extra ptr + * check as 'ptr < data + 0' is already false in such case. + */ + for (ptr = data; ptr < data + len && *ptr; ptr += *ptr + 1) { + if (ptr + 1 + *ptr > data + len) + break; + } + + real_len = ptr - data; + + /* Adjust for actual length */ + if (len != real_len) { + BT_ERR_RATELIMITED("%s advertising data length corrected", + hdev->name); + len = real_len; + } /* If the direct address is present, then this report is from * a LE Direct Advertising Report event. In that case it is diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index f2d30d1156c9..150556345263 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -503,7 +503,16 @@ static int hci_sock_release(struct socket *sock) if (hdev) { if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { - hci_dev_close(hdev->id); + /* When releasing an user channel exclusive access, + * call hci_dev_do_close directly instead of calling + * hci_dev_close to ensure the exclusive access will + * be released and the controller brought back down. + * + * The checking of HCI_AUTO_OFF is not needed in this + * case since it will have been cleared already when + * opening the user channel. + */ + hci_dev_do_close(hdev); hci_dev_clear_flag(hdev, HCI_USER_CHANNEL); mgmt_index_added(hdev); } diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c index b36bc0415854..8b4cdce3f62e 100644 --- a/net/bluetooth/lib.c +++ b/net/bluetooth/lib.c @@ -166,3 +166,19 @@ void bt_err(const char *format, ...) va_end(args); } EXPORT_SYMBOL(bt_err); + +void bt_err_ratelimited(const char *format, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, format); + + vaf.fmt = format; + vaf.va = &args; + + pr_err_ratelimited("%pV", &vaf); + + va_end(args); +} +EXPORT_SYMBOL(bt_err_ratelimited); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 0510a577a7b5..25644e1bc479 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -495,7 +495,7 @@ static int smp_ah(struct crypto_blkcipher *tfm, const u8 irk[16], } /* The output of the random address function ah is: - * ah(h, r) = e(k, r') mod 2^24 + * ah(k, r) = e(k, r') mod 2^24 * The output of the security function e is then truncated to 24 bits * by taking the least significant 24 bits of the output of e as the * result of ah. diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 6ed2feb51e3c..bdfb9544ca03 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -56,7 +56,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); - if (!br_allowed_ingress(br, br_get_vlan_info(br), skb, &vid)) + if (!br_allowed_ingress(br, br_vlan_group(br), skb, &vid)) goto out; if (is_broadcast_ether_addr(dest)) @@ -391,7 +391,7 @@ void br_dev_setup(struct net_device *dev) br->bridge_max_age = br->max_age = 20 * HZ; br->bridge_hello_time = br->hello_time = 2 * HZ; br->bridge_forward_delay = br->forward_delay = 15 * HZ; - br->ageing_time = 300 * HZ; + br->ageing_time = BR_DEFAULT_AGEING_TIME; br_netfilter_rtable_init(br); br_stp_timer_init(br); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 9e9875da0a4f..7826782d62ab 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -133,15 +133,12 @@ static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr) static void fdb_del_external_learn(struct net_bridge_fdb_entry *f) { - struct switchdev_obj obj = { - .id = SWITCHDEV_OBJ_PORT_FDB, - .u.fdb = { - .addr = f->addr.addr, - .vid = f->vlan_id, - }, + struct switchdev_obj_fdb fdb = { + .addr = f->addr.addr, + .vid = f->vlan_id, }; - switchdev_port_obj_del(f->dst->dev, &obj); + switchdev_port_obj_del(f->dst->dev, SWITCHDEV_OBJ_PORT_FDB, &fdb); } static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) @@ -163,22 +160,27 @@ static void fdb_delete_local(struct net_bridge *br, struct net_bridge_fdb_entry *f) { const unsigned char *addr = f->addr.addr; - u16 vid = f->vlan_id; + struct net_bridge_vlan_group *vg; + const struct net_bridge_vlan *v; struct net_bridge_port *op; + u16 vid = f->vlan_id; /* Maybe another port has same hw addr? */ list_for_each_entry(op, &br->port_list, list) { + vg = nbp_vlan_group(op); if (op != p && ether_addr_equal(op->dev->dev_addr, addr) && - (!vid || nbp_vlan_find(op, vid))) { + (!vid || br_vlan_find(vg, vid))) { f->dst = op; f->added_by_user = 0; return; } } + vg = br_vlan_group(br); + v = br_vlan_find(vg, vid); /* Maybe bridge device has same hw addr? */ if (p && ether_addr_equal(br->dev->dev_addr, addr) && - (!vid || br_vlan_find(br, vid))) { + (!vid || (v && br_vlan_should_use(v)))) { f->dst = NULL; f->added_by_user = 0; return; @@ -203,14 +205,14 @@ void br_fdb_find_delete_local(struct net_bridge *br, void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) { + struct net_bridge_vlan_group *vg; struct net_bridge *br = p->br; - struct net_port_vlans *pv = nbp_get_vlan_info(p); - bool no_vlan = !pv; + struct net_bridge_vlan *v; int i; - u16 vid; spin_lock_bh(&br->hash_lock); + vg = nbp_vlan_group(p); /* Search all chains since old address/hash is unknown */ for (i = 0; i < BR_HASH_SIZE; i++) { struct hlist_node *h; @@ -226,7 +228,7 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) * configured, we can safely be done at * this point. */ - if (no_vlan) + if (!vg || !vg->num_vlans) goto insert; } } @@ -236,15 +238,15 @@ insert: /* insert new address, may fail if invalid address or dup. */ fdb_insert(br, p, newaddr, 0); - if (no_vlan) + if (!vg || !vg->num_vlans) goto done; /* Now add entries for every VLAN configured on the port. * This function runs under RTNL so the bitmap will not change * from under us. */ - for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) - fdb_insert(br, p, newaddr, vid); + list_for_each_entry(v, &vg->vlan_list, vlist) + fdb_insert(br, p, newaddr, v->vid); done: spin_unlock_bh(&br->hash_lock); @@ -252,9 +254,9 @@ done: void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) { + struct net_bridge_vlan_group *vg; struct net_bridge_fdb_entry *f; - struct net_port_vlans *pv; - u16 vid = 0; + struct net_bridge_vlan *v; spin_lock_bh(&br->hash_lock); @@ -264,20 +266,18 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) fdb_delete_local(br, NULL, f); fdb_insert(br, NULL, newaddr, 0); - + vg = br_vlan_group(br); + if (!vg || !vg->num_vlans) + goto out; /* Now remove and add entries for every VLAN configured on the * bridge. This function runs under RTNL so the bitmap will not * change from under us. */ - pv = br_get_vlan_info(br); - if (!pv) - goto out; - - for_each_set_bit_from(vid, pv->vlan_bitmap, VLAN_N_VID) { - f = __br_fdb_get(br, br->dev->dev_addr, vid); + list_for_each_entry(v, &vg->vlan_list, vlist) { + f = __br_fdb_get(br, br->dev->dev_addr, v->vid); if (f && f->is_local && !f->dst) fdb_delete_local(br, NULL, f); - fdb_insert(br, NULL, newaddr, vid); + fdb_insert(br, NULL, newaddr, v->vid); } out: spin_unlock_bh(&br->hash_lock); @@ -299,6 +299,8 @@ void br_fdb_cleanup(unsigned long _data) unsigned long this_timer; if (f->is_static) continue; + if (f->added_by_external_learn) + continue; this_timer = f->updated + delay; if (time_before_eq(this_timer, jiffies)) fdb_delete(br, f); @@ -842,9 +844,10 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlh_flags) { + struct net_bridge_vlan_group *vg; struct net_bridge_port *p; + struct net_bridge_vlan *v; int err = 0; - struct net_port_vlans *pv; if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) { pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state); @@ -863,9 +866,10 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; } - pv = nbp_get_vlan_info(p); + vg = nbp_vlan_group(p); if (vid) { - if (!pv || !test_bit(vid, pv->vlan_bitmap)) { + v = br_vlan_find(vg, vid); + if (!v) { pr_info("bridge: RTM_NEWNEIGH with unconfigured " "vlan %d on port %s\n", vid, dev->name); return -EINVAL; @@ -875,15 +879,15 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); } else { err = __br_fdb_add(ndm, p, addr, nlh_flags, 0); - if (err || !pv) + if (err || !vg || !vg->num_vlans) goto out; /* We have vlans configured on this port and user didn't * specify a VLAN. To be nice, add/update entry for every * vlan on this port. */ - for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { - err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); + list_for_each_entry(v, &vg->vlan_list, vlist) { + err = __br_fdb_add(ndm, p, addr, nlh_flags, v->vid); if (err) goto out; } @@ -925,9 +929,10 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid) { + struct net_bridge_vlan_group *vg; struct net_bridge_port *p; + struct net_bridge_vlan *v; int err; - struct net_port_vlans *pv; p = br_port_get_rtnl(dev); if (p == NULL) { @@ -936,9 +941,10 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; } - pv = nbp_get_vlan_info(p); + vg = nbp_vlan_group(p); if (vid) { - if (!pv || !test_bit(vid, pv->vlan_bitmap)) { + v = br_vlan_find(vg, vid); + if (!v) { pr_info("bridge: RTM_DELNEIGH with unconfigured " "vlan %d on port %s\n", vid, dev->name); return -EINVAL; @@ -948,16 +954,11 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], } else { err = -ENOENT; err &= __br_fdb_delete(p, addr, 0); - if (!pv) + if (!vg || !vg->num_vlans) goto out; - /* We have vlans configured on this port and user didn't - * specify a VLAN. To be nice, add/update entry for every - * vlan on this port. - */ - for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { - err &= __br_fdb_delete(p, addr, vid); - } + list_for_each_entry(v, &vg->vlan_list, vlist) + err &= __br_fdb_delete(p, addr, v->vid); } out: return err; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index fa7bfced888e..6d5ed795c3e2 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -30,12 +30,14 @@ static int deliver_clone(const struct net_bridge_port *prev, static inline int should_deliver(const struct net_bridge_port *p, const struct sk_buff *skb) { + struct net_bridge_vlan_group *vg; + + vg = nbp_vlan_group(p); return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && - br_allowed_egress(p->br, nbp_get_vlan_info(p), skb) && - p->state == BR_STATE_FORWARDING; + br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING; } -int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb) +int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { if (!is_skb_forwardable(skb->dev, skb)) goto drop; @@ -65,10 +67,10 @@ drop: } EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit); -int br_forward_finish(struct sock *sk, struct sk_buff *skb) +int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, sk, skb, - NULL, skb->dev, + return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, + net, sk, skb, NULL, skb->dev, br_dev_queue_push_xmit); } @@ -76,7 +78,10 @@ EXPORT_SYMBOL_GPL(br_forward_finish); static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) { - skb = br_handle_vlan(to->br, nbp_get_vlan_info(to), skb); + struct net_bridge_vlan_group *vg; + + vg = nbp_vlan_group(to); + skb = br_handle_vlan(to->br, vg, skb); if (!skb) return; @@ -92,13 +97,14 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) return; } - NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb, - NULL, skb->dev, + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, + dev_net(skb->dev), NULL, skb,NULL, skb->dev, br_forward_finish); } static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) { + struct net_bridge_vlan_group *vg; struct net_device *indev; if (skb_warn_if_lro(skb)) { @@ -106,7 +112,8 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) return; } - skb = br_handle_vlan(to->br, nbp_get_vlan_info(to), skb); + vg = nbp_vlan_group(to); + skb = br_handle_vlan(to->br, vg, skb); if (!skb) return; @@ -114,8 +121,8 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) skb->dev = to->dev; skb_forward_csum(skb); - NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, NULL, skb, - indev, skb->dev, + NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, + dev_net(indev), NULL, skb, indev, skb->dev, br_forward_finish); } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 45e4757c6fd2..934cae9fa317 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -248,7 +248,6 @@ static void del_nbp(struct net_bridge_port *p) list_del_rcu(&p->list); - nbp_vlan_flush(p); br_fdb_delete_by_port(br, p, 0, 1); nbp_update_port_count(br); @@ -257,6 +256,8 @@ static void del_nbp(struct net_bridge_port *p) dev->priv_flags &= ~IFF_BRIDGE_PORT; netdev_rx_handler_unregister(dev); + /* use the synchronize_rcu done by netdev_rx_handler_unregister */ + nbp_vlan_flush(p); br_multicast_del_port(p); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index f921a5dce22d..f5c5a4500e2f 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -26,38 +26,44 @@ br_should_route_hook_t __rcu *br_should_route_hook __read_mostly; EXPORT_SYMBOL(br_should_route_hook); +static int +br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + return netif_receive_skb(skb); +} + static int br_pass_frame_up(struct sk_buff *skb) { struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; struct net_bridge *br = netdev_priv(brdev); + struct net_bridge_vlan_group *vg; struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); - struct net_port_vlans *pv; u64_stats_update_begin(&brstats->syncp); brstats->rx_packets++; brstats->rx_bytes += skb->len; u64_stats_update_end(&brstats->syncp); + vg = br_vlan_group(br); /* Bridge is just like any other port. Make sure the * packet is allowed except in promisc modue when someone * may be running packet capture. */ - pv = br_get_vlan_info(br); if (!(brdev->flags & IFF_PROMISC) && - !br_allowed_egress(br, pv, skb)) { + !br_allowed_egress(vg, skb)) { kfree_skb(skb); return NET_RX_DROP; } indev = skb->dev; skb->dev = brdev; - skb = br_handle_vlan(br, pv, skb); + skb = br_handle_vlan(br, vg, skb); if (!skb) return NET_RX_DROP; - return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb, - indev, NULL, - netif_receive_skb_sk); + return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, + dev_net(indev), NULL, skb, indev, NULL, + br_netif_receive_skb); } static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, @@ -120,7 +126,7 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, } /* note: already called with rcu_read_lock */ -int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb) +int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { const unsigned char *dest = eth_hdr(skb)->h_dest; struct net_bridge_port *p = br_port_get_rcu(skb->dev); @@ -134,7 +140,7 @@ int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb) if (!p || p->state == BR_STATE_DISABLED) goto drop; - if (!br_allowed_ingress(p->br, nbp_get_vlan_info(p), skb, &vid)) + if (!br_allowed_ingress(p->br, nbp_vlan_group(p), skb, &vid)) goto out; /* insert into forwarding database after filtering to avoid spoofing */ @@ -208,7 +214,7 @@ drop: EXPORT_SYMBOL_GPL(br_handle_frame_finish); /* note: already called with rcu_read_lock */ -static int br_handle_local_finish(struct sock *sk, struct sk_buff *skb) +static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { struct net_bridge_port *p = br_port_get_rcu(skb->dev); u16 vid = 0; @@ -278,8 +284,9 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) } /* Deliver packet to local host only */ - if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb, - skb->dev, NULL, br_handle_local_finish)) { + if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, + dev_net(skb->dev), NULL, skb, skb->dev, NULL, + br_handle_local_finish)) { return RX_HANDLER_CONSUMED; /* consumed by filter */ } else { *pskb = skb; @@ -303,8 +310,8 @@ forward: if (ether_addr_equal(p->br->dev->dev_addr, dest)) skb->pkt_type = PACKET_HOST; - NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, NULL, skb, - skb->dev, NULL, + NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, + dev_net(skb->dev), NULL, skb, skb->dev, NULL, br_handle_frame_finish); break; default: diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index d747275fad18..cd8deea2d074 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -464,11 +464,11 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); - unsigned short vid = VLAN_N_VID; + struct net_bridge_vlan_group *vg; struct net_device *dev, *pdev; struct br_mdb_entry *entry; struct net_bridge_port *p; - struct net_port_vlans *pv; + struct net_bridge_vlan *v; struct net_bridge *br; int err; @@ -489,10 +489,10 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) if (!p || p->br != br || p->state == BR_STATE_DISABLED) return -EINVAL; - pv = nbp_get_vlan_info(p); - if (br_vlan_enabled(br) && pv && entry->vid == 0) { - for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { - entry->vid = vid; + vg = nbp_vlan_group(p); + if (br_vlan_enabled(br) && vg && entry->vid == 0) { + list_for_each_entry(v, &vg->vlan_list, vlist) { + entry->vid = v->vid; err = __br_mdb_add(net, br, entry); if (err) break; @@ -566,11 +566,11 @@ unlock: static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); - unsigned short vid = VLAN_N_VID; + struct net_bridge_vlan_group *vg; struct net_device *dev, *pdev; struct br_mdb_entry *entry; struct net_bridge_port *p; - struct net_port_vlans *pv; + struct net_bridge_vlan *v; struct net_bridge *br; int err; @@ -591,10 +591,10 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) if (!p || p->br != br || p->state == BR_STATE_DISABLED) return -EINVAL; - pv = nbp_get_vlan_info(p); - if (br_vlan_enabled(br) && pv && entry->vid == 0) { - for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { - entry->vid = vid; + vg = nbp_vlan_group(p); + if (br_vlan_enabled(br) && vg && entry->vid == 0) { + list_for_each_entry(v, &vg->vlan_list, vlist) { + entry->vid = v->vid; err = __br_mdb_del(br, entry); if (!err) __br_mdb_notify(dev, entry, RTM_DELMDB); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 480b3de1a0e3..03661d97463c 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -829,8 +829,8 @@ static void __br_multicast_send_query(struct net_bridge *br, if (port) { skb->dev = port->dev; - NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb, - NULL, skb->dev, + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, + dev_net(port->dev), NULL, skb, NULL, skb->dev, br_dev_queue_push_xmit); } else { br_multicast_select_own_querier(br, ip, skb); diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 0a6f095bb0c9..13f03671c88d 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -189,10 +189,9 @@ static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb) * expected format */ -static int br_validate_ipv4(struct sk_buff *skb) +static int br_validate_ipv4(struct net *net, struct sk_buff *skb) { const struct iphdr *iph; - struct net_device *dev = skb->dev; u32 len; if (!pskb_may_pull(skb, sizeof(struct iphdr))) @@ -213,13 +212,13 @@ static int br_validate_ipv4(struct sk_buff *skb) len = ntohs(iph->tot_len); if (skb->len < len) { - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS); + IP_INC_STATS_BH(net, IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } else if (len < (iph->ihl*4)) goto inhdr_error; if (pskb_trim_rcsum(skb, len)) { - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); + IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS); goto drop; } @@ -232,7 +231,7 @@ static int br_validate_ipv4(struct sk_buff *skb) return 0; inhdr_error: - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS); + IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS); drop: return -1; } @@ -256,7 +255,7 @@ void nf_bridge_update_protocol(struct sk_buff *skb) * don't, we use the neighbour framework to find out. In both cases, we make * sure that br_handle_frame_finish() is called afterwards. */ -int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb) +int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb) { struct neighbour *neigh; struct dst_entry *dst; @@ -273,7 +272,7 @@ int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb) if (neigh->hh.hh_len) { neigh_hh_bridge(&neigh->hh, skb); skb->dev = nf_bridge->physindev; - ret = br_handle_frame_finish(sk, skb); + ret = br_handle_frame_finish(net, sk, skb); } else { /* the neighbour function below overwrites the complete * MAC header, so we save the Ethernet source address and @@ -342,7 +341,7 @@ br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb, * device, we proceed as if ip_route_input() succeeded. If it differs from the * logical bridge port or if ip_route_output_key() fails we drop the packet. */ -static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) +static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb->dev; struct iphdr *iph = ip_hdr(skb); @@ -371,7 +370,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) goto free_skb; - rt = ip_route_output(dev_net(dev), iph->daddr, 0, + rt = ip_route_output(net, iph->daddr, 0, RT_TOS(iph->tos), 0); if (!IS_ERR(rt)) { /* - Bridged-and-DNAT'ed traffic doesn't @@ -393,7 +392,7 @@ bridged_dnat: nf_bridge_push_encap_header(skb); NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, - sk, skb, skb->dev, NULL, + net, sk, skb, skb->dev, NULL, br_nf_pre_routing_finish_bridge, 1); return 0; @@ -413,7 +412,7 @@ bridged_dnat: skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb, + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, br_handle_frame_finish, 1); @@ -464,7 +463,7 @@ struct net_device *setup_pre_routing(struct sk_buff *skb) * receiving device) to make netfilter happy, the REDIRECT * target in particular. Save the original destination IP * address to be able to detect DNAT afterwards. */ -static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, +static unsigned int br_nf_pre_routing(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -486,7 +485,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, return NF_ACCEPT; nf_bridge_pull_encap_header_rcsum(skb); - return br_nf_pre_routing_ipv6(ops, skb, state); + return br_nf_pre_routing_ipv6(priv, skb, state); } if (!brnf_call_iptables && !br->nf_call_iptables) @@ -497,7 +496,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, nf_bridge_pull_encap_header_rcsum(skb); - if (br_validate_ipv4(skb)) + if (br_validate_ipv4(state->net, skb)) return NF_DROP; nf_bridge_put(skb->nf_bridge); @@ -511,7 +510,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, skb->protocol = htons(ETH_P_IP); - NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->sk, skb, + NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb, skb->dev, NULL, br_nf_pre_routing_finish); @@ -526,7 +525,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, * took place when the packet entered the bridge), but we * register an IPv4 PRE_ROUTING 'sabotage' hook that will * prevent this from happening. */ -static unsigned int br_nf_local_in(const struct nf_hook_ops *ops, +static unsigned int br_nf_local_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -535,7 +534,7 @@ static unsigned int br_nf_local_in(const struct nf_hook_ops *ops, } /* PF_BRIDGE/FORWARD *************************************************/ -static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb) +static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct net_device *in; @@ -559,7 +558,7 @@ static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb) } nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, sk, skb, + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, net, sk, skb, in, skb->dev, br_forward_finish, 1); return 0; } @@ -570,7 +569,7 @@ static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb) * but we are still able to filter on the 'real' indev/outdev * because of the physdev module. For ARP, indev and outdev are the * bridge ports. */ -static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, +static unsigned int br_nf_forward_ip(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -609,13 +608,13 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, } if (pf == NFPROTO_IPV4) { - if (br_validate_ipv4(skb)) + if (br_validate_ipv4(state->net, skb)) return NF_DROP; IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; } if (pf == NFPROTO_IPV6) { - if (br_validate_ipv6(skb)) + if (br_validate_ipv6(state->net, skb)) return NF_DROP; IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; } @@ -626,14 +625,14 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, else skb->protocol = htons(ETH_P_IPV6); - NF_HOOK(pf, NF_INET_FORWARD, NULL, skb, + NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb, brnf_get_logical_dev(skb, state->in), parent, br_nf_forward_finish); return NF_STOLEN; } -static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, +static unsigned int br_nf_forward_arp(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -661,14 +660,14 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, return NF_ACCEPT; } *d = state->in; - NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->sk, skb, + NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->net, state->sk, skb, state->in, state->out, br_nf_forward_finish); return NF_STOLEN; } #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) -static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) +static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { struct brnf_frag_data *data; int err; @@ -690,23 +689,27 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) __skb_push(skb, data->encap_size); nf_bridge_info_free(skb); - return br_dev_queue_push_xmit(sk, skb); + return br_dev_queue_push_xmit(net, sk, skb); +} +static int br_nf_push_frag_xmit_sk(struct sock *sk, struct sk_buff *skb) +{ + struct net *net = dev_net(skb_dst(skb)->dev); + return br_nf_push_frag_xmit(net, sk, skb); } #endif #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) -static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb, - int (*output)(struct sock *, struct sk_buff *)) +static int +br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)) { unsigned int mtu = ip_skb_dst_mtu(skb); struct iphdr *iph = ip_hdr(skb); - struct rtable *rt = skb_rtable(skb); - struct net_device *dev = rt->dst.dev; if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size > mtu))) { - IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } @@ -722,7 +725,7 @@ static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) return 0; } -static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) +static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { struct nf_bridge_info *nf_bridge; unsigned int mtu_reserved; @@ -731,7 +734,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) { nf_bridge_info_free(skb); - return br_dev_queue_push_xmit(sk, skb); + return br_dev_queue_push_xmit(net, sk, skb); } nf_bridge = nf_bridge_info_get(skb); @@ -743,7 +746,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) { struct brnf_frag_data *data; - if (br_validate_ipv4(skb)) + if (br_validate_ipv4(net, skb)) goto drop; IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; @@ -760,7 +763,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) skb_copy_from_linear_data_offset(skb, -data->size, data->mac, data->size); - return br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit); + return br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit_sk); } #endif #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) @@ -768,7 +771,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); struct brnf_frag_data *data; - if (br_validate_ipv6(skb)) + if (br_validate_ipv6(net, skb)) goto drop; IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; @@ -783,21 +786,21 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) data->size); if (v6ops) - return v6ops->fragment(sk, skb, br_nf_push_frag_xmit); + return v6ops->fragment(sk, skb, br_nf_push_frag_xmit_sk); kfree_skb(skb); return -EMSGSIZE; } #endif nf_bridge_info_free(skb); - return br_dev_queue_push_xmit(sk, skb); + return br_dev_queue_push_xmit(net, sk, skb); drop: kfree_skb(skb); return 0; } /* PF_BRIDGE/POST_ROUTING ********************************************/ -static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, +static unsigned int br_nf_post_routing(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -836,7 +839,7 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, else skb->protocol = htons(ETH_P_IPV6); - NF_HOOK(pf, NF_INET_POST_ROUTING, state->sk, skb, + NF_HOOK(pf, NF_INET_POST_ROUTING, state->net, state->sk, skb, NULL, realoutdev, br_nf_dev_queue_xmit); @@ -846,7 +849,7 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, /* IP/SABOTAGE *****************************************************/ /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING * for the second time. */ -static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, +static unsigned int ip_sabotage_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -880,7 +883,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) skb->dev = nf_bridge->physindev; nf_bridge->physoutdev = NULL; - br_handle_frame_finish(NULL, skb); + br_handle_frame_finish(dev_net(skb->dev), NULL, skb); } static int br_nf_dev_xmit(struct sk_buff *skb) diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 77383bfe7ea3..d61f56efc8dc 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -100,10 +100,9 @@ bad: return -1; } -int br_validate_ipv6(struct sk_buff *skb) +int br_validate_ipv6(struct net *net, struct sk_buff *skb) { const struct ipv6hdr *hdr; - struct net_device *dev = skb->dev; struct inet6_dev *idev = __in6_dev_get(skb->dev); u32 pkt_len; u8 ip6h_len = sizeof(struct ipv6hdr); @@ -123,12 +122,12 @@ int br_validate_ipv6(struct sk_buff *skb) if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { if (pkt_len + ip6h_len > skb->len) { - IP6_INC_STATS_BH(dev_net(dev), idev, + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { - IP6_INC_STATS_BH(dev_net(dev), idev, + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; } @@ -143,7 +142,7 @@ int br_validate_ipv6(struct sk_buff *skb) return 0; inhdr_error: - IP6_INC_STATS_BH(dev_net(dev), idev, IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); drop: return -1; } @@ -161,7 +160,7 @@ br_nf_ipv6_daddr_was_changed(const struct sk_buff *skb, * for br_nf_pre_routing_finish(), same logic is used here but * equivalent IPv6 function ip6_route_input() called indirectly. */ -static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) +static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; @@ -189,7 +188,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, - sk, skb, skb->dev, NULL, + net, sk, skb, skb->dev, NULL, br_nf_pre_routing_finish_bridge, 1); return 0; @@ -208,7 +207,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb, + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, br_handle_frame_finish, 1); @@ -218,13 +217,13 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) /* Replicate the checks that IPv6 does on packet reception and pass the packet * to ip6tables. */ -unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, +unsigned int br_nf_pre_routing_ipv6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_bridge_info *nf_bridge; - if (br_validate_ipv6(skb)) + if (br_validate_ipv6(state->net, skb)) return NF_DROP; nf_bridge_put(skb->nf_bridge); @@ -237,7 +236,7 @@ unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr; skb->protocol = htons(ETH_P_IPV6); - NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb, + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->net, state->sk, skb, skb->dev, NULL, br_nf_pre_routing_finish_ipv6); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index ea748c93a07f..c64dcad11662 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -21,36 +21,35 @@ #include "br_private.h" #include "br_private_stp.h" -static int br_get_num_vlan_infos(const struct net_port_vlans *pv, - u32 filter_mask) +static int __get_num_vlan_infos(struct net_bridge_vlan_group *vg, + u32 filter_mask) { - u16 vid_range_start = 0, vid_range_end = 0; - u16 vid_range_flags = 0; - u16 pvid, vid, flags; + struct net_bridge_vlan *v; + u16 vid_range_start = 0, vid_range_end = 0, vid_range_flags = 0; + u16 flags, pvid; int num_vlans = 0; - if (filter_mask & RTEXT_FILTER_BRVLAN) - return pv->num_vlans; - if (!(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) return 0; - /* Count number of vlan info's - */ - pvid = br_get_pvid(pv); - for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { + pvid = br_get_pvid(vg); + /* Count number of vlan infos */ + list_for_each_entry(v, &vg->vlan_list, vlist) { flags = 0; - if (vid == pvid) + /* only a context, bridge vlan not activated */ + if (!br_vlan_should_use(v)) + continue; + if (v->vid == pvid) flags |= BRIDGE_VLAN_INFO_PVID; - if (test_bit(vid, pv->untagged_bitmap)) + if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED) flags |= BRIDGE_VLAN_INFO_UNTAGGED; if (vid_range_start == 0) { goto initvars; - } else if ((vid - vid_range_end) == 1 && + } else if ((v->vid - vid_range_end) == 1 && flags == vid_range_flags) { - vid_range_end = vid; + vid_range_end = v->vid; continue; } else { if ((vid_range_end - vid_range_start) > 0) @@ -59,8 +58,8 @@ static int br_get_num_vlan_infos(const struct net_port_vlans *pv, num_vlans += 1; } initvars: - vid_range_start = vid; - vid_range_end = vid; + vid_range_start = v->vid; + vid_range_end = v->vid; vid_range_flags = flags; } @@ -74,28 +73,37 @@ initvars: return num_vlans; } +static int br_get_num_vlan_infos(struct net_bridge_vlan_group *vg, + u32 filter_mask) +{ + if (!vg) + return 0; + + if (filter_mask & RTEXT_FILTER_BRVLAN) + return vg->num_vlans; + + return __get_num_vlan_infos(vg, filter_mask); +} + static size_t br_get_link_af_size_filtered(const struct net_device *dev, u32 filter_mask) { - struct net_port_vlans *pv; + struct net_bridge_vlan_group *vg = NULL; + struct net_bridge_port *p; + struct net_bridge *br; int num_vlan_infos; rcu_read_lock(); - if (br_port_exists(dev)) - pv = nbp_get_vlan_info(br_port_get_rcu(dev)); - else if (dev->priv_flags & IFF_EBRIDGE) - pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev)); - else - pv = NULL; - if (pv) - num_vlan_infos = br_get_num_vlan_infos(pv, filter_mask); - else - num_vlan_infos = 0; + if (br_port_exists(dev)) { + p = br_port_get_rcu(dev); + vg = nbp_vlan_group(p); + } else if (dev->priv_flags & IFF_EBRIDGE) { + br = netdev_priv(dev); + vg = br_vlan_group(br); + } + num_vlan_infos = br_get_num_vlan_infos(vg, filter_mask); rcu_read_unlock(); - if (!num_vlan_infos) - return 0; - /* Each VLAN is returned in bridge_vlan_info along with flags */ return num_vlan_infos * nla_total_size(sizeof(struct bridge_vlan_info)); } @@ -185,31 +193,33 @@ nla_put_failure: } static int br_fill_ifvlaninfo_compressed(struct sk_buff *skb, - const struct net_port_vlans *pv) + struct net_bridge_vlan_group *vg) { - u16 vid_range_start = 0, vid_range_end = 0; - u16 vid_range_flags = 0; - u16 pvid, vid, flags; + struct net_bridge_vlan *v; + u16 vid_range_start = 0, vid_range_end = 0, vid_range_flags = 0; + u16 flags, pvid; int err = 0; /* Pack IFLA_BRIDGE_VLAN_INFO's for every vlan * and mark vlan info with begin and end flags * if vlaninfo represents a range */ - pvid = br_get_pvid(pv); - for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { + pvid = br_get_pvid(vg); + list_for_each_entry(v, &vg->vlan_list, vlist) { flags = 0; - if (vid == pvid) + if (!br_vlan_should_use(v)) + continue; + if (v->vid == pvid) flags |= BRIDGE_VLAN_INFO_PVID; - if (test_bit(vid, pv->untagged_bitmap)) + if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED) flags |= BRIDGE_VLAN_INFO_UNTAGGED; if (vid_range_start == 0) { goto initvars; - } else if ((vid - vid_range_end) == 1 && + } else if ((v->vid - vid_range_end) == 1 && flags == vid_range_flags) { - vid_range_end = vid; + vid_range_end = v->vid; continue; } else { err = br_fill_ifvlaninfo_range(skb, vid_range_start, @@ -220,8 +230,8 @@ static int br_fill_ifvlaninfo_compressed(struct sk_buff *skb, } initvars: - vid_range_start = vid; - vid_range_end = vid; + vid_range_start = v->vid; + vid_range_end = v->vid; vid_range_flags = flags; } @@ -238,19 +248,23 @@ initvars: } static int br_fill_ifvlaninfo(struct sk_buff *skb, - const struct net_port_vlans *pv) + struct net_bridge_vlan_group *vg) { struct bridge_vlan_info vinfo; - u16 pvid, vid; + struct net_bridge_vlan *v; + u16 pvid; - pvid = br_get_pvid(pv); - for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { - vinfo.vid = vid; + pvid = br_get_pvid(vg); + list_for_each_entry(v, &vg->vlan_list, vlist) { + if (!br_vlan_should_use(v)) + continue; + + vinfo.vid = v->vid; vinfo.flags = 0; - if (vid == pvid) + if (v->vid == pvid) vinfo.flags |= BRIDGE_VLAN_INFO_PVID; - if (test_bit(vid, pv->untagged_bitmap)) + if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED) vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED; if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, @@ -269,11 +283,11 @@ nla_put_failure: * Contains port and master info as well as carrier and bridge state. */ static int br_fill_ifinfo(struct sk_buff *skb, - const struct net_bridge_port *port, + struct net_bridge_port *port, u32 pid, u32 seq, int event, unsigned int flags, u32 filter_mask, const struct net_device *dev) { - const struct net_bridge *br; + struct net_bridge *br; struct ifinfomsg *hdr; struct nlmsghdr *nlh; u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; @@ -320,16 +334,16 @@ static int br_fill_ifinfo(struct sk_buff *skb, /* Check if the VID information is requested */ if ((filter_mask & RTEXT_FILTER_BRVLAN) || (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) { - const struct net_port_vlans *pv; + struct net_bridge_vlan_group *vg; struct nlattr *af; int err; if (port) - pv = nbp_get_vlan_info(port); + vg = nbp_vlan_group(port); else - pv = br_get_vlan_info(br); + vg = br_vlan_group(br); - if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) + if (!vg || !vg->num_vlans) goto done; af = nla_nest_start(skb, IFLA_AF_SPEC); @@ -337,9 +351,9 @@ static int br_fill_ifinfo(struct sk_buff *skb, goto nla_put_failure; if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) - err = br_fill_ifvlaninfo_compressed(skb, pv); + err = br_fill_ifvlaninfo_compressed(skb, vg); else - err = br_fill_ifvlaninfo(skb, pv); + err = br_fill_ifvlaninfo(skb, vg); if (err) goto nla_put_failure; nla_nest_end(skb, af); @@ -413,14 +427,14 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, switch (cmd) { case RTM_SETLINK: if (p) { + /* if the MASTER flag is set this will act on the global + * per-VLAN entry as well + */ err = nbp_vlan_add(p, vinfo->vid, vinfo->flags); if (err) break; - - if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) - err = br_vlan_add(p->br, vinfo->vid, - vinfo->flags); } else { + vinfo->flags |= BRIDGE_VLAN_INFO_BRENTRY; err = br_vlan_add(br, vinfo->vid, vinfo->flags); } break; @@ -857,20 +871,22 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) static size_t br_get_link_af_size(const struct net_device *dev) { - struct net_port_vlans *pv; - - if (br_port_exists(dev)) - pv = nbp_get_vlan_info(br_port_get_rtnl(dev)); - else if (dev->priv_flags & IFF_EBRIDGE) - pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev)); - else - return 0; + struct net_bridge_port *p; + struct net_bridge *br; + int num_vlans = 0; - if (!pv) - return 0; + if (br_port_exists(dev)) { + p = br_port_get_rtnl(dev); + num_vlans = br_get_num_vlan_infos(nbp_vlan_group(p), + RTEXT_FILTER_BRVLAN); + } else if (dev->priv_flags & IFF_EBRIDGE) { + br = netdev_priv(dev); + num_vlans = br_get_num_vlan_infos(br_vlan_group(br), + RTEXT_FILTER_BRVLAN); + } /* Each VLAN is returned in bridge_vlan_info along with flags */ - return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info)); + return num_vlans * nla_total_size(sizeof(struct bridge_vlan_info)); } static struct rtnl_af_ops br_af_ops __read_mostly = { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 213baf7aaa93..4ed8308db66e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -20,6 +20,7 @@ #include <net/route.h> #include <net/ip6_fib.h> #include <linux/if_vlan.h> +#include <linux/rhashtable.h> #define BR_HASH_BITS 8 #define BR_HASH_SIZE (1 << BR_HASH_BITS) @@ -28,7 +29,6 @@ #define BR_PORT_BITS 10 #define BR_MAX_PORTS (1<<BR_PORT_BITS) -#define BR_VLAN_BITMAP_LEN BITS_TO_LONGS(VLAN_N_VID) #define BR_VERSION "2.3" @@ -77,17 +77,61 @@ struct bridge_mcast_querier { }; #endif -struct net_port_vlans { - u16 port_idx; - u16 pvid; +/** + * struct net_bridge_vlan - per-vlan entry + * + * @vnode: rhashtable member + * @vid: VLAN id + * @flags: bridge vlan flags + * @br: if MASTER flag set, this points to a bridge struct + * @port: if MASTER flag unset, this points to a port struct + * @refcnt: if MASTER flag set, this is bumped for each port referencing it + * @brvlan: if MASTER flag unset, this points to the global per-VLAN context + * for this VLAN entry + * @vlist: sorted list of VLAN entries + * @rcu: used for entry destruction + * + * This structure is shared between the global per-VLAN entries contained in + * the bridge rhashtable and the local per-port per-VLAN entries contained in + * the port's rhashtable. The union entries should be interpreted depending on + * the entry flags that are set. + */ +struct net_bridge_vlan { + struct rhash_head vnode; + u16 vid; + u16 flags; union { - struct net_bridge_port *port; - struct net_bridge *br; - } parent; + struct net_bridge *br; + struct net_bridge_port *port; + }; + union { + atomic_t refcnt; + struct net_bridge_vlan *brvlan; + }; + struct list_head vlist; + struct rcu_head rcu; - unsigned long vlan_bitmap[BR_VLAN_BITMAP_LEN]; - unsigned long untagged_bitmap[BR_VLAN_BITMAP_LEN]; +}; + +/** + * struct net_bridge_vlan_group + * + * @vlan_hash: VLAN entry rhashtable + * @vlan_list: sorted VLAN entry list + * @num_vlans: number of total VLAN entries + * @pvid: PVID VLAN id + * + * IMPORTANT: Be careful when checking if there're VLAN entries using list + * primitives because the bridge can have entries in its list which + * are just for global context but not for filtering, i.e. they have + * the master flag set but not the brentry flag. If you have to check + * if there're "real" entries in the bridge please test @num_vlans + */ +struct net_bridge_vlan_group { + struct rhashtable vlan_hash; + struct list_head vlan_list; u16 num_vlans; + u16 pvid; }; struct net_bridge_fdb_entry @@ -185,7 +229,7 @@ struct net_bridge_port struct netpoll *np; #endif #ifdef CONFIG_BRIDGE_VLAN_FILTERING - struct net_port_vlans __rcu *vlan_info; + struct net_bridge_vlan_group *vlgrp; #endif }; @@ -293,10 +337,10 @@ struct net_bridge struct kobject *ifobj; u32 auto_cnt; #ifdef CONFIG_BRIDGE_VLAN_FILTERING + struct net_bridge_vlan_group *vlgrp; u8 vlan_enabled; __be16 vlan_proto; u16 default_pvid; - struct net_port_vlans __rcu *vlan_info; #endif }; @@ -344,6 +388,31 @@ static inline int br_is_root_bridge(const struct net_bridge *br) return !memcmp(&br->bridge_id, &br->designated_root, 8); } +/* check if a VLAN entry is global */ +static inline bool br_vlan_is_master(const struct net_bridge_vlan *v) +{ + return v->flags & BRIDGE_VLAN_INFO_MASTER; +} + +/* check if a VLAN entry is used by the bridge */ +static inline bool br_vlan_is_brentry(const struct net_bridge_vlan *v) +{ + return v->flags & BRIDGE_VLAN_INFO_BRENTRY; +} + +/* check if we should use the vlan entry is usable */ +static inline bool br_vlan_should_use(const struct net_bridge_vlan *v) +{ + if (br_vlan_is_master(v)) { + if (br_vlan_is_brentry(v)) + return true; + else + return false; + } + + return true; +} + /* br_device.c */ void br_dev_setup(struct net_device *dev); void br_dev_delete(struct net_device *dev, struct list_head *list); @@ -413,10 +482,10 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, /* br_forward.c */ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb); -int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb); +int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb); void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0); -int br_forward_finish(struct sock *sk, struct sk_buff *skb); +int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb); void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast); void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, struct sk_buff *skb2, bool unicast); @@ -434,7 +503,7 @@ void br_port_flags_change(struct net_bridge_port *port, unsigned long mask); void br_manage_promisc(struct net_bridge *br); /* br_input.c */ -int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb); +int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb); rx_handler_result_t br_handle_frame(struct sk_buff **pskb); static inline bool br_rx_handler_check_rcu(const struct net_device *dev) @@ -601,18 +670,19 @@ static inline void br_mdb_uninit(void) /* br_vlan.c */ #ifdef CONFIG_BRIDGE_VLAN_FILTERING -bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, - struct sk_buff *skb, u16 *vid); -bool br_allowed_egress(struct net_bridge *br, const struct net_port_vlans *v, +bool br_allowed_ingress(const struct net_bridge *br, + struct net_bridge_vlan_group *vg, struct sk_buff *skb, + u16 *vid); +bool br_allowed_egress(struct net_bridge_vlan_group *vg, const struct sk_buff *skb); bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid); struct sk_buff *br_handle_vlan(struct net_bridge *br, - const struct net_port_vlans *v, + struct net_bridge_vlan_group *vg, struct sk_buff *skb); int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags); int br_vlan_delete(struct net_bridge *br, u16 vid); void br_vlan_flush(struct net_bridge *br); -bool br_vlan_find(struct net_bridge *br, u16 vid); +struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid); void br_recalculate_fwd_mask(struct net_bridge *br); int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); @@ -623,19 +693,19 @@ int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val); int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags); int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); void nbp_vlan_flush(struct net_bridge_port *port); -bool nbp_vlan_find(struct net_bridge_port *port, u16 vid); int nbp_vlan_init(struct net_bridge_port *port); +int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask); -static inline struct net_port_vlans *br_get_vlan_info( - const struct net_bridge *br) +static inline struct net_bridge_vlan_group *br_vlan_group( + const struct net_bridge *br) { - return rcu_dereference_rtnl(br->vlan_info); + return br->vlgrp; } -static inline struct net_port_vlans *nbp_get_vlan_info( - const struct net_bridge_port *p) +static inline struct net_bridge_vlan_group *nbp_vlan_group( + const struct net_bridge_port *p) { - return rcu_dereference_rtnl(p->vlan_info); + return p->vlgrp; } /* Since bridge now depends on 8021Q module, but the time bridge sees the @@ -645,9 +715,9 @@ static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid) { int err = 0; - if (skb_vlan_tag_present(skb)) + if (skb_vlan_tag_present(skb)) { *vid = skb_vlan_tag_get(skb) & VLAN_VID_MASK; - else { + } else { *vid = 0; err = -EINVAL; } @@ -655,13 +725,13 @@ static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid) return err; } -static inline u16 br_get_pvid(const struct net_port_vlans *v) +static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg) { - if (!v) + if (!vg) return 0; smp_rmb(); - return v->pvid; + return vg->pvid; } static inline int br_vlan_enabled(struct net_bridge *br) @@ -669,16 +739,15 @@ static inline int br_vlan_enabled(struct net_bridge *br) return br->vlan_enabled; } #else -static inline bool br_allowed_ingress(struct net_bridge *br, - struct net_port_vlans *v, +static inline bool br_allowed_ingress(const struct net_bridge *br, + struct net_bridge_vlan_group *vg, struct sk_buff *skb, u16 *vid) { return true; } -static inline bool br_allowed_egress(struct net_bridge *br, - const struct net_port_vlans *v, +static inline bool br_allowed_egress(struct net_bridge_vlan_group *vg, const struct sk_buff *skb) { return true; @@ -691,7 +760,7 @@ static inline bool br_should_learn(struct net_bridge_port *p, } static inline struct sk_buff *br_handle_vlan(struct net_bridge *br, - const struct net_port_vlans *v, + struct net_bridge_vlan_group *vg, struct sk_buff *skb) { return skb; @@ -711,11 +780,6 @@ static inline void br_vlan_flush(struct net_bridge *br) { } -static inline bool br_vlan_find(struct net_bridge *br, u16 vid) -{ - return false; -} - static inline void br_recalculate_fwd_mask(struct net_bridge *br) { } @@ -739,21 +803,11 @@ static inline void nbp_vlan_flush(struct net_bridge_port *port) { } -static inline struct net_port_vlans *br_get_vlan_info( - const struct net_bridge *br) +static inline struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, + u16 vid) { return NULL; } -static inline struct net_port_vlans *nbp_get_vlan_info( - const struct net_bridge_port *p) -{ - return NULL; -} - -static inline bool nbp_vlan_find(struct net_bridge_port *port, u16 vid) -{ - return false; -} static inline int nbp_vlan_init(struct net_bridge_port *port) { @@ -764,7 +818,8 @@ static inline u16 br_vlan_get_tag(const struct sk_buff *skb, u16 *tag) { return 0; } -static inline u16 br_get_pvid(const struct net_port_vlans *v) + +static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg) { return 0; } @@ -779,6 +834,24 @@ static inline int __br_vlan_filter_toggle(struct net_bridge *br, { return -EOPNOTSUPP; } + +static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p, + u32 filter_mask) +{ + return 0; +} + +static inline struct net_bridge_vlan_group *br_vlan_group( + const struct net_bridge *br) +{ + return NULL; +} + +static inline struct net_bridge_vlan_group *nbp_vlan_group( + const struct net_bridge_port *p) +{ + return NULL; +} #endif struct nf_br_ops { diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index ed74ffaa851f..3a7392e6010e 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -576,17 +576,12 @@ void __br_set_forward_delay(struct net_bridge *br, unsigned long t) int br_set_forward_delay(struct net_bridge *br, unsigned long val) { unsigned long t = clock_t_to_jiffies(val); - int err = -ERANGE; - spin_lock_bh(&br->lock); - if (br->stp_enabled != BR_NO_STP && - (t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY)) - goto unlock; + if (t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY) + return -ERANGE; + spin_lock_bh(&br->lock); __br_set_forward_delay(br, t); - err = 0; - -unlock: spin_unlock_bh(&br->lock); - return err; + return 0; } diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 534fc4cd263e..5881fbc114a9 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -30,6 +30,12 @@ #define LLC_RESERVE sizeof(struct llc_pdu_un) +static int br_send_bpdu_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + return dev_queue_xmit(skb); +} + static void br_send_bpdu(struct net_bridge_port *p, const unsigned char *data, int length) { @@ -54,9 +60,9 @@ static void br_send_bpdu(struct net_bridge_port *p, skb_reset_mac_header(skb); - NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb, - NULL, skb->dev, - dev_queue_xmit_sk); + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, + dev_net(p->dev), NULL, skb, NULL, skb->dev, + br_send_bpdu_finish); } static inline void br_set_ticks(unsigned char *dest, int j) diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 5f5a02b49a99..75214a51cf0e 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -6,35 +6,67 @@ #include "br_private.h" -static void __vlan_add_pvid(struct net_port_vlans *v, u16 vid) +static inline int br_vlan_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) { - if (v->pvid == vid) + const struct net_bridge_vlan *vle = ptr; + u16 vid = *(u16 *)arg->key; + + return vle->vid != vid; +} + +static const struct rhashtable_params br_vlan_rht_params = { + .head_offset = offsetof(struct net_bridge_vlan, vnode), + .key_offset = offsetof(struct net_bridge_vlan, vid), + .key_len = sizeof(u16), + .nelem_hint = 3, + .locks_mul = 1, + .max_size = VLAN_N_VID, + .obj_cmpfn = br_vlan_cmp, + .automatic_shrinking = true, +}; + +static struct net_bridge_vlan *br_vlan_lookup(struct rhashtable *tbl, u16 vid) +{ + return rhashtable_lookup_fast(tbl, &vid, br_vlan_rht_params); +} + +static void __vlan_add_pvid(struct net_bridge_vlan_group *vg, u16 vid) +{ + if (vg->pvid == vid) return; smp_wmb(); - v->pvid = vid; + vg->pvid = vid; } -static void __vlan_delete_pvid(struct net_port_vlans *v, u16 vid) +static void __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid) { - if (v->pvid != vid) + if (vg->pvid != vid) return; smp_wmb(); - v->pvid = 0; + vg->pvid = 0; } -static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags) +static void __vlan_add_flags(struct net_bridge_vlan *v, u16 flags) { + struct net_bridge_vlan_group *vg; + + if (br_vlan_is_master(v)) + vg = v->br->vlgrp; + else + vg = v->port->vlgrp; + if (flags & BRIDGE_VLAN_INFO_PVID) - __vlan_add_pvid(v, vid); + __vlan_add_pvid(vg, v->vid); else - __vlan_delete_pvid(v, vid); + __vlan_delete_pvid(vg, v->vid); if (flags & BRIDGE_VLAN_INFO_UNTAGGED) - set_bit(vid, v->untagged_bitmap); + v->flags |= BRIDGE_VLAN_INFO_UNTAGGED; else - clear_bit(vid, v->untagged_bitmap); + v->flags &= ~BRIDGE_VLAN_INFO_UNTAGGED; } static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br, @@ -50,16 +82,13 @@ static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br, if (ops->ndo_vlan_rx_add_vid) { err = vlan_vid_add(dev, br->vlan_proto, vid); } else { - struct switchdev_obj vlan_obj = { - .id = SWITCHDEV_OBJ_PORT_VLAN, - .u.vlan = { - .flags = flags, - .vid_begin = vid, - .vid_end = vid, - }, + struct switchdev_obj_vlan v = { + .flags = flags, + .vid_begin = vid, + .vid_end = vid, }; - err = switchdev_port_obj_add(dev, &vlan_obj); + err = switchdev_port_obj_add(dev, SWITCHDEV_OBJ_PORT_VLAN, &v); if (err == -EOPNOTSUPP) err = 0; } @@ -67,54 +96,26 @@ static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br, return err; } -static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) +static void __vlan_add_list(struct net_bridge_vlan *v) { - struct net_bridge_port *p = NULL; - struct net_bridge *br; - struct net_device *dev; - int err; - - if (test_bit(vid, v->vlan_bitmap)) { - __vlan_add_flags(v, vid, flags); - return 0; - } - - if (v->port_idx) { - p = v->parent.port; - br = p->br; - dev = p->dev; - } else { - br = v->parent.br; - dev = br->dev; - } - - if (p) { - /* Add VLAN to the device filter if it is supported. - * This ensures tagged traffic enters the bridge when - * promiscuous mode is disabled by br_manage_promisc(). - */ - err = __vlan_vid_add(dev, br, vid, flags); - if (err) - return err; - } + struct list_head *headp, *hpos; + struct net_bridge_vlan *vent; - err = br_fdb_insert(br, p, dev->dev_addr, vid); - if (err) { - br_err(br, "failed insert local address into bridge " - "forwarding table\n"); - goto out_filt; + headp = br_vlan_is_master(v) ? &v->br->vlgrp->vlan_list : + &v->port->vlgrp->vlan_list; + list_for_each_prev(hpos, headp) { + vent = list_entry(hpos, struct net_bridge_vlan, vlist); + if (v->vid < vent->vid) + continue; + else + break; } + list_add(&v->vlist, hpos); +} - set_bit(vid, v->vlan_bitmap); - v->num_vlans++; - __vlan_add_flags(v, vid, flags); - - return 0; - -out_filt: - if (p) - vlan_vid_del(dev, br->vlan_proto, vid); - return err; +static void __vlan_del_list(struct net_bridge_vlan *v) +{ + list_del(&v->vlist); } static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br, @@ -130,15 +131,12 @@ static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br, if (ops->ndo_vlan_rx_kill_vid) { vlan_vid_del(dev, br->vlan_proto, vid); } else { - struct switchdev_obj vlan_obj = { - .id = SWITCHDEV_OBJ_PORT_VLAN, - .u.vlan = { - .vid_begin = vid, - .vid_end = vid, - }, + struct switchdev_obj_vlan v = { + .vid_begin = vid, + .vid_end = vid, }; - err = switchdev_port_obj_del(dev, &vlan_obj); + err = switchdev_port_obj_del(dev, SWITCHDEV_OBJ_PORT_VLAN, &v); if (err == -EOPNOTSUPP) err = 0; } @@ -146,63 +144,193 @@ static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br, return err; } -static int __vlan_del(struct net_port_vlans *v, u16 vid) +/* This is the shared VLAN add function which works for both ports and bridge + * devices. There are four possible calls to this function in terms of the + * vlan entry type: + * 1. vlan is being added on a port (no master flags, global entry exists) + * 2. vlan is being added on a bridge (both master and brvlan flags) + * 3. vlan is being added on a port, but a global entry didn't exist which + * is being created right now (master flag set, brvlan flag unset), the + * global entry is used for global per-vlan features, but not for filtering + * 4. same as 3 but with both master and brvlan flags set so the entry + * will be used for filtering in both the port and the bridge + */ +static int __vlan_add(struct net_bridge_vlan *v, u16 flags) { - if (!test_bit(vid, v->vlan_bitmap)) - return -EINVAL; + struct net_bridge_vlan *masterv = NULL; + struct net_bridge_port *p = NULL; + struct rhashtable *tbl; + struct net_device *dev; + struct net_bridge *br; + int err; + + if (br_vlan_is_master(v)) { + br = v->br; + dev = br->dev; + tbl = &br->vlgrp->vlan_hash; + } else { + p = v->port; + br = p->br; + dev = p->dev; + tbl = &p->vlgrp->vlan_hash; + } + + if (p) { + u16 master_flags = flags; + + /* Add VLAN to the device filter if it is supported. + * This ensures tagged traffic enters the bridge when + * promiscuous mode is disabled by br_manage_promisc(). + */ + err = __vlan_vid_add(dev, br, v->vid, flags); + if (err) + goto out; + + /* need to work on the master vlan too */ + if (flags & BRIDGE_VLAN_INFO_MASTER) { + master_flags |= BRIDGE_VLAN_INFO_BRENTRY; + err = br_vlan_add(br, v->vid, master_flags); + if (err) + goto out_filt; + } + + masterv = br_vlan_find(br->vlgrp, v->vid); + if (!masterv) { + /* missing global ctx, create it now */ + err = br_vlan_add(br, v->vid, 0); + if (err) + goto out_filt; + masterv = br_vlan_find(br->vlgrp, v->vid); + WARN_ON(!masterv); + } + atomic_inc(&masterv->refcnt); + v->brvlan = masterv; + } + + /* Add the dev mac only if it's a usable vlan */ + if (br_vlan_should_use(v)) { + err = br_fdb_insert(br, p, dev->dev_addr, v->vid); + if (err) { + br_err(br, "failed insert local address into bridge forwarding table\n"); + goto out_filt; + } + } + + err = rhashtable_lookup_insert_fast(tbl, &v->vnode, br_vlan_rht_params); + if (err) + goto out_fdb_insert; + + __vlan_add_list(v); + __vlan_add_flags(v, flags); + if (br_vlan_is_master(v)) { + if (br_vlan_is_brentry(v)) + br->vlgrp->num_vlans++; + } else { + p->vlgrp->num_vlans++; + } +out: + return err; + +out_fdb_insert: + br_fdb_find_delete_local(br, p, br->dev->dev_addr, v->vid); + +out_filt: + if (p) { + __vlan_vid_del(dev, br, v->vid); + if (masterv) { + atomic_dec(&masterv->refcnt); + v->brvlan = NULL; + } + } + + goto out; +} - __vlan_delete_pvid(v, vid); - clear_bit(vid, v->untagged_bitmap); +static int __vlan_del(struct net_bridge_vlan *v) +{ + struct net_bridge_vlan *masterv = v; + struct net_bridge_vlan_group *vg; + struct net_bridge_port *p = NULL; + struct net_bridge *br; + int err = 0; - if (v->port_idx) { - struct net_bridge_port *p = v->parent.port; - int err; + if (br_vlan_is_master(v)) { + br = v->br; + vg = v->br->vlgrp; + } else { + p = v->port; + br = p->br; + vg = v->port->vlgrp; + masterv = v->brvlan; + } - err = __vlan_vid_del(p->dev, p->br, vid); + __vlan_delete_pvid(vg, v->vid); + if (p) { + err = __vlan_vid_del(p->dev, p->br, v->vid); if (err) - return err; + goto out; } - clear_bit(vid, v->vlan_bitmap); - v->num_vlans--; - if (bitmap_empty(v->vlan_bitmap, VLAN_N_VID)) { - if (v->port_idx) - RCU_INIT_POINTER(v->parent.port->vlan_info, NULL); - else - RCU_INIT_POINTER(v->parent.br->vlan_info, NULL); + if (br_vlan_is_master(v)) { + if (br_vlan_is_brentry(v)) { + v->flags &= ~BRIDGE_VLAN_INFO_BRENTRY; + br->vlgrp->num_vlans--; + } + } else { + p->vlgrp->num_vlans--; + } + + if (masterv != v) { + rhashtable_remove_fast(&vg->vlan_hash, &v->vnode, + br_vlan_rht_params); + __vlan_del_list(v); kfree_rcu(v, rcu); } - return 0; + + if (atomic_dec_and_test(&masterv->refcnt)) { + rhashtable_remove_fast(&masterv->br->vlgrp->vlan_hash, + &masterv->vnode, br_vlan_rht_params); + __vlan_del_list(masterv); + kfree_rcu(masterv, rcu); + } +out: + return err; } -static void __vlan_flush(struct net_port_vlans *v) +static void __vlan_flush(struct net_bridge_vlan_group *vlgrp) { - smp_wmb(); - v->pvid = 0; - bitmap_zero(v->vlan_bitmap, VLAN_N_VID); - if (v->port_idx) - RCU_INIT_POINTER(v->parent.port->vlan_info, NULL); - else - RCU_INIT_POINTER(v->parent.br->vlan_info, NULL); - kfree_rcu(v, rcu); + struct net_bridge_vlan *vlan, *tmp; + + __vlan_delete_pvid(vlgrp, vlgrp->pvid); + list_for_each_entry_safe(vlan, tmp, &vlgrp->vlan_list, vlist) + __vlan_del(vlan); + rhashtable_destroy(&vlgrp->vlan_hash); + kfree(vlgrp); } struct sk_buff *br_handle_vlan(struct net_bridge *br, - const struct net_port_vlans *pv, + struct net_bridge_vlan_group *vg, struct sk_buff *skb) { + struct net_bridge_vlan *v; u16 vid; /* If this packet was not filtered at input, let it pass */ if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) goto out; - /* Vlan filter table must be configured at this point. The + /* At this point, we know that the frame was filtered and contains + * a valid vlan id. If the vlan id has untagged flag set, + * send untagged; otherwise, send tagged. + */ + br_vlan_get_tag(skb, &vid); + v = br_vlan_find(vg, vid); + /* Vlan entry must be configured at this point. The * only exception is the bridge is set in promisc mode and the * packet is destined for the bridge device. In this case * pass the packet as is. */ - if (!pv) { + if (!v || !br_vlan_should_use(v)) { if ((br->dev->flags & IFF_PROMISC) && skb->dev == br->dev) { goto out; } else { @@ -210,13 +338,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, return NULL; } } - - /* At this point, we know that the frame was filtered and contains - * a valid vlan id. If the vlan id is set in the untagged bitmap, - * send untagged; otherwise, send tagged. - */ - br_vlan_get_tag(skb, &vid); - if (test_bit(vid, pv->untagged_bitmap)) + if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED) skb->vlan_tci = 0; out: @@ -224,29 +346,13 @@ out: } /* Called under RCU */ -bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, - struct sk_buff *skb, u16 *vid) +static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto, + struct sk_buff *skb, u16 *vid) { + const struct net_bridge_vlan *v; bool tagged; - __be16 proto; - - /* If VLAN filtering is disabled on the bridge, all packets are - * permitted. - */ - if (!br->vlan_enabled) { - BR_INPUT_SKB_CB(skb)->vlan_filtered = false; - return true; - } - - /* If there are no vlan in the permitted list, all packets are - * rejected. - */ - if (!v) - goto drop; BR_INPUT_SKB_CB(skb)->vlan_filtered = true; - proto = br->vlan_proto; - /* If vlan tx offload is disabled on bridge device and frame was * sent from vlan device on the bridge device, it does not have * HW accelerated vlan tag. @@ -281,7 +387,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, } if (!*vid) { - u16 pvid = br_get_pvid(v); + u16 pvid = br_get_pvid(vg); /* Frame had a tag with VID 0 or did not have a tag. * See if pvid is set on this port. That tells us which @@ -309,29 +415,43 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, } /* Frame had a valid vlan tag. See if vlan is allowed */ - if (test_bit(*vid, v->vlan_bitmap)) + v = br_vlan_find(vg, *vid); + if (v && br_vlan_should_use(v)) return true; drop: kfree_skb(skb); return false; } +bool br_allowed_ingress(const struct net_bridge *br, + struct net_bridge_vlan_group *vg, struct sk_buff *skb, + u16 *vid) +{ + /* If VLAN filtering is disabled on the bridge, all packets are + * permitted. + */ + if (!br->vlan_enabled) { + BR_INPUT_SKB_CB(skb)->vlan_filtered = false; + return true; + } + + return __allowed_ingress(vg, br->vlan_proto, skb, vid); +} + /* Called under RCU. */ -bool br_allowed_egress(struct net_bridge *br, - const struct net_port_vlans *v, +bool br_allowed_egress(struct net_bridge_vlan_group *vg, const struct sk_buff *skb) { + const struct net_bridge_vlan *v; u16 vid; /* If this packet was not filtered at input, let it pass */ if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) return true; - if (!v) - return false; - br_vlan_get_tag(skb, &vid); - if (test_bit(vid, v->vlan_bitmap)) + v = br_vlan_find(vg, vid); + if (v && br_vlan_should_use(v)) return true; return false; @@ -340,29 +460,29 @@ bool br_allowed_egress(struct net_bridge *br, /* Called under RCU */ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) { + struct net_bridge_vlan_group *vg; struct net_bridge *br = p->br; - struct net_port_vlans *v; /* If filtering was disabled at input, let it pass. */ if (!br->vlan_enabled) return true; - v = rcu_dereference(p->vlan_info); - if (!v) + vg = p->vlgrp; + if (!vg || !vg->num_vlans) return false; if (!br_vlan_get_tag(skb, vid) && skb->vlan_proto != br->vlan_proto) *vid = 0; if (!*vid) { - *vid = br_get_pvid(v); + *vid = br_get_pvid(vg); if (!*vid) return false; return true; } - if (test_bit(*vid, v->vlan_bitmap)) + if (br_vlan_find(vg, *vid)) return true; return false; @@ -373,31 +493,47 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) */ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) { - struct net_port_vlans *pv = NULL; - int err; + struct net_bridge_vlan *vlan; + int ret; ASSERT_RTNL(); - pv = rtnl_dereference(br->vlan_info); - if (pv) - return __vlan_add(pv, vid, flags); + vlan = br_vlan_find(br->vlgrp, vid); + if (vlan) { + if (!br_vlan_is_brentry(vlan)) { + /* Trying to change flags of non-existent bridge vlan */ + if (!(flags & BRIDGE_VLAN_INFO_BRENTRY)) + return -EINVAL; + /* It was only kept for port vlans, now make it real */ + ret = br_fdb_insert(br, NULL, br->dev->dev_addr, + vlan->vid); + if (ret) { + br_err(br, "failed insert local address into bridge forwarding table\n"); + return ret; + } + atomic_inc(&vlan->refcnt); + vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY; + br->vlgrp->num_vlans++; + } + __vlan_add_flags(vlan, flags); + return 0; + } - /* Create port vlan infomration - */ - pv = kzalloc(sizeof(*pv), GFP_KERNEL); - if (!pv) + vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); + if (!vlan) return -ENOMEM; - pv->parent.br = br; - err = __vlan_add(pv, vid, flags); - if (err) - goto out; + vlan->vid = vid; + vlan->flags = flags | BRIDGE_VLAN_INFO_MASTER; + vlan->flags &= ~BRIDGE_VLAN_INFO_PVID; + vlan->br = br; + if (flags & BRIDGE_VLAN_INFO_BRENTRY) + atomic_set(&vlan->refcnt, 1); + ret = __vlan_add(vlan, flags); + if (ret) + kfree(vlan); - rcu_assign_pointer(br->vlan_info, pv); - return 0; -out: - kfree(pv); - return err; + return ret; } /* Must be protected by RTNL. @@ -405,49 +541,32 @@ out: */ int br_vlan_delete(struct net_bridge *br, u16 vid) { - struct net_port_vlans *pv; + struct net_bridge_vlan *v; ASSERT_RTNL(); - pv = rtnl_dereference(br->vlan_info); - if (!pv) - return -EINVAL; + v = br_vlan_find(br->vlgrp, vid); + if (!v || !br_vlan_is_brentry(v)) + return -ENOENT; br_fdb_find_delete_local(br, NULL, br->dev->dev_addr, vid); - __vlan_del(pv, vid); - return 0; + return __vlan_del(v); } void br_vlan_flush(struct net_bridge *br) { - struct net_port_vlans *pv; - ASSERT_RTNL(); - pv = rtnl_dereference(br->vlan_info); - if (!pv) - return; - __vlan_flush(pv); + __vlan_flush(br_vlan_group(br)); } -bool br_vlan_find(struct net_bridge *br, u16 vid) +struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid) { - struct net_port_vlans *pv; - bool found = false; - - rcu_read_lock(); - pv = rcu_dereference(br->vlan_info); - - if (!pv) - goto out; + if (!vg) + return NULL; - if (test_bit(vid, pv->vlan_bitmap)) - found = true; - -out: - rcu_read_unlock(); - return found; + return br_vlan_lookup(&vg->vlan_hash, vid); } /* Must be protected by RTNL. */ @@ -505,21 +624,16 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto) { int err = 0; struct net_bridge_port *p; - struct net_port_vlans *pv; + struct net_bridge_vlan *vlan; __be16 oldproto; - u16 vid, errvid; if (br->vlan_proto == proto) return 0; /* Add VLANs for the new proto to the device filter. */ list_for_each_entry(p, &br->port_list, list) { - pv = rtnl_dereference(p->vlan_info); - if (!pv) - continue; - - for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { - err = vlan_vid_add(p->dev, proto, vid); + list_for_each_entry(vlan, &p->vlgrp->vlan_list, vlist) { + err = vlan_vid_add(p->dev, proto, vlan->vid); if (err) goto err_filt; } @@ -532,30 +646,19 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto) br_recalculate_fwd_mask(br); /* Delete VLANs for the old proto from the device filter. */ - list_for_each_entry(p, &br->port_list, list) { - pv = rtnl_dereference(p->vlan_info); - if (!pv) - continue; - - for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) - vlan_vid_del(p->dev, oldproto, vid); - } + list_for_each_entry(p, &br->port_list, list) + list_for_each_entry(vlan, &p->vlgrp->vlan_list, vlist) + vlan_vid_del(p->dev, oldproto, vlan->vid); return 0; err_filt: - errvid = vid; - for_each_set_bit(vid, pv->vlan_bitmap, errvid) - vlan_vid_del(p->dev, proto, vid); - - list_for_each_entry_continue_reverse(p, &br->port_list, list) { - pv = rtnl_dereference(p->vlan_info); - if (!pv) - continue; + list_for_each_entry_continue_reverse(vlan, &p->vlgrp->vlan_list, vlist) + vlan_vid_del(p->dev, proto, vlan->vid); - for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) - vlan_vid_del(p->dev, proto, vid); - } + list_for_each_entry_continue_reverse(p, &br->port_list, list) + list_for_each_entry(vlan, &p->vlgrp->vlan_list, vlist) + vlan_vid_del(p->dev, proto, vlan->vid); return err; } @@ -576,9 +679,19 @@ int br_vlan_set_proto(struct net_bridge *br, unsigned long val) return err; } -static bool vlan_default_pvid(struct net_port_vlans *pv, u16 vid) +static bool vlan_default_pvid(struct net_bridge_vlan_group *vg, u16 vid) { - return pv && vid == pv->pvid && test_bit(vid, pv->untagged_bitmap); + struct net_bridge_vlan *v; + + if (vid != vg->pvid) + return false; + + v = br_vlan_lookup(&vg->vlan_hash, vid); + if (v && br_vlan_should_use(v) && + (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)) + return true; + + return false; } static void br_vlan_disable_default_pvid(struct net_bridge *br) @@ -589,11 +702,11 @@ static void br_vlan_disable_default_pvid(struct net_bridge *br) /* Disable default_pvid on all ports where it is still * configured. */ - if (vlan_default_pvid(br_get_vlan_info(br), pvid)) + if (vlan_default_pvid(br->vlgrp, pvid)) br_vlan_delete(br, pvid); list_for_each_entry(p, &br->port_list, list) { - if (vlan_default_pvid(nbp_get_vlan_info(p), pvid)) + if (vlan_default_pvid(p->vlgrp, pvid)) nbp_vlan_delete(p, pvid); } @@ -602,6 +715,7 @@ static void br_vlan_disable_default_pvid(struct net_bridge *br) static int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid) { + const struct net_bridge_vlan *pvent; struct net_bridge_port *p; u16 old_pvid; int err = 0; @@ -617,11 +731,13 @@ static int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid) /* Update default_pvid config only if we do not conflict with * user configuration. */ - if ((!old_pvid || vlan_default_pvid(br_get_vlan_info(br), old_pvid)) && - !br_vlan_find(br, pvid)) { + pvent = br_vlan_find(br->vlgrp, pvid); + if ((!old_pvid || vlan_default_pvid(br->vlgrp, old_pvid)) && + (!pvent || !br_vlan_should_use(pvent))) { err = br_vlan_add(br, pvid, BRIDGE_VLAN_INFO_PVID | - BRIDGE_VLAN_INFO_UNTAGGED); + BRIDGE_VLAN_INFO_UNTAGGED | + BRIDGE_VLAN_INFO_BRENTRY); if (err) goto out; br_vlan_delete(br, old_pvid); @@ -633,8 +749,8 @@ static int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid) * user configuration. */ if ((old_pvid && - !vlan_default_pvid(nbp_get_vlan_info(p), old_pvid)) || - nbp_vlan_find(p, pvid)) + !vlan_default_pvid(p->vlgrp, old_pvid)) || + br_vlan_find(p->vlgrp, pvid)) continue; err = nbp_vlan_add(p, pvid, @@ -668,7 +784,8 @@ err_port: if (old_pvid) br_vlan_add(br, old_pvid, BRIDGE_VLAN_INFO_PVID | - BRIDGE_VLAN_INFO_UNTAGGED); + BRIDGE_VLAN_INFO_UNTAGGED | + BRIDGE_VLAN_INFO_BRENTRY); br_vlan_delete(br, pvid); } goto out; @@ -707,10 +824,66 @@ unlock: int br_vlan_init(struct net_bridge *br) { + int ret = -ENOMEM; + + br->vlgrp = kzalloc(sizeof(struct net_bridge_vlan_group), GFP_KERNEL); + if (!br->vlgrp) + goto out; + ret = rhashtable_init(&br->vlgrp->vlan_hash, &br_vlan_rht_params); + if (ret) + goto err_rhtbl; + INIT_LIST_HEAD(&br->vlgrp->vlan_list); br->vlan_proto = htons(ETH_P_8021Q); br->default_pvid = 1; - return br_vlan_add(br, 1, - BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED); + ret = br_vlan_add(br, 1, + BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED | + BRIDGE_VLAN_INFO_BRENTRY); + if (ret) + goto err_vlan_add; + +out: + return ret; + +err_vlan_add: + rhashtable_destroy(&br->vlgrp->vlan_hash); +err_rhtbl: + kfree(br->vlgrp); + + goto out; +} + +int nbp_vlan_init(struct net_bridge_port *p) +{ + struct net_bridge_vlan_group *vg; + int ret = -ENOMEM; + + vg = kzalloc(sizeof(struct net_bridge_vlan_group), GFP_KERNEL); + if (!vg) + goto out; + + ret = rhashtable_init(&vg->vlan_hash, &br_vlan_rht_params); + if (ret) + goto err_rhtbl; + INIT_LIST_HEAD(&vg->vlan_list); + /* Make sure everything's committed before publishing vg */ + smp_wmb(); + p->vlgrp = vg; + if (p->br->default_pvid) { + ret = nbp_vlan_add(p, p->br->default_pvid, + BRIDGE_VLAN_INFO_PVID | + BRIDGE_VLAN_INFO_UNTAGGED); + if (ret) + goto err_vlan_add; + } +out: + return ret; + +err_vlan_add: + rhashtable_destroy(&vg->vlan_hash); +err_rhtbl: + kfree(vg); + + goto out; } /* Must be protected by RTNL. @@ -718,35 +891,28 @@ int br_vlan_init(struct net_bridge *br) */ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) { - struct net_port_vlans *pv = NULL; - int err; + struct net_bridge_vlan *vlan; + int ret; ASSERT_RTNL(); - pv = rtnl_dereference(port->vlan_info); - if (pv) - return __vlan_add(pv, vid, flags); - - /* Create port vlan infomration - */ - pv = kzalloc(sizeof(*pv), GFP_KERNEL); - if (!pv) { - err = -ENOMEM; - goto clean_up; + vlan = br_vlan_find(port->vlgrp, vid); + if (vlan) { + __vlan_add_flags(vlan, flags); + return 0; } - pv->port_idx = port->port_no; - pv->parent.port = port; - err = __vlan_add(pv, vid, flags); - if (err) - goto clean_up; + vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); + if (!vlan) + return -ENOMEM; - rcu_assign_pointer(port->vlan_info, pv); - return 0; + vlan->vid = vid; + vlan->port = port; + ret = __vlan_add(vlan, flags); + if (ret) + kfree(vlan); -clean_up: - kfree(pv); - return err; + return ret; } /* Must be protected by RTNL. @@ -754,61 +920,27 @@ clean_up: */ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid) { - struct net_port_vlans *pv; + struct net_bridge_vlan *v; ASSERT_RTNL(); - pv = rtnl_dereference(port->vlan_info); - if (!pv) - return -EINVAL; - + v = br_vlan_find(port->vlgrp, vid); + if (!v) + return -ENOENT; br_fdb_find_delete_local(port->br, port, port->dev->dev_addr, vid); br_fdb_delete_by_port(port->br, port, vid, 0); - return __vlan_del(pv, vid); + return __vlan_del(v); } void nbp_vlan_flush(struct net_bridge_port *port) { - struct net_port_vlans *pv; - u16 vid; + struct net_bridge_vlan *vlan; ASSERT_RTNL(); - pv = rtnl_dereference(port->vlan_info); - if (!pv) - return; - - for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) - vlan_vid_del(port->dev, port->br->vlan_proto, vid); + list_for_each_entry(vlan, &port->vlgrp->vlan_list, vlist) + vlan_vid_del(port->dev, port->br->vlan_proto, vlan->vid); - __vlan_flush(pv); -} - -bool nbp_vlan_find(struct net_bridge_port *port, u16 vid) -{ - struct net_port_vlans *pv; - bool found = false; - - rcu_read_lock(); - pv = rcu_dereference(port->vlan_info); - - if (!pv) - goto out; - - if (test_bit(vid, pv->vlan_bitmap)) - found = true; - -out: - rcu_read_unlock(); - return found; -} - -int nbp_vlan_init(struct net_bridge_port *p) -{ - return p->br->default_pvid ? - nbp_vlan_add(p, p->br->default_pvid, - BRIDGE_VLAN_INFO_PVID | - BRIDGE_VLAN_INFO_UNTAGGED) : - 0; + __vlan_flush(nbp_vlan_group(port)); } diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 17f2e4bc2a29..0ad639a96142 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -180,7 +180,7 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_log_info *info = par->targinfo; struct nf_loginfo li; - struct net *net = dev_net(par->in ? par->in : par->out); + struct net *net = par->net; li.type = NF_LOG_TYPE_LOG; li.u.log.level = info->loglevel; diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c index 59ac7952010d..54816150608e 100644 --- a/net/bridge/netfilter/ebt_nflog.c +++ b/net/bridge/netfilter/ebt_nflog.c @@ -24,7 +24,7 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nflog_info *info = par->targinfo; struct nf_loginfo li; - struct net *net = dev_net(par->in ? par->in : par->out); + struct net *net = par->net; li.type = NF_LOG_TYPE_ULOG; li.u.ulog.copy_len = info->len; diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index d2cdf5d6e98c..ec94c6f1ae88 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -50,10 +50,14 @@ static const struct ebt_table broute_table = { static int ebt_broute(struct sk_buff *skb) { + struct nf_hook_state state; int ret; - ret = ebt_do_table(NF_BR_BROUTING, skb, skb->dev, NULL, - dev_net(skb->dev)->xt.broute_table); + nf_hook_state_init(&state, NULL, NF_BR_BROUTING, INT_MIN, + NFPROTO_BRIDGE, skb->dev, NULL, NULL, + dev_net(skb->dev), NULL); + + ret = ebt_do_table(skb, &state, state.net->xt.broute_table); if (ret == NF_DROP) return 1; /* route it */ return 0; /* bridge it */ diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 8a3f63b2e807..f9242dffa65e 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -57,19 +57,17 @@ static const struct ebt_table frame_filter = { }; static unsigned int -ebt_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, +ebt_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ebt_do_table(ops->hooknum, skb, state->in, state->out, - dev_net(state->in)->xt.frame_filter); + return ebt_do_table(skb, state, state->net->xt.frame_filter); } static unsigned int -ebt_out_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, +ebt_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ebt_do_table(ops->hooknum, skb, state->in, state->out, - dev_net(state->out)->xt.frame_filter); + return ebt_do_table(skb, state, state->net->xt.frame_filter); } static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index c5ef5b1ab678..4bbefe03ab58 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -57,19 +57,17 @@ static struct ebt_table frame_nat = { }; static unsigned int -ebt_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb, +ebt_nat_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ebt_do_table(ops->hooknum, skb, state->in, state->out, - dev_net(state->in)->xt.frame_nat); + return ebt_do_table(skb, state, state->net->xt.frame_nat); } static unsigned int -ebt_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb, +ebt_nat_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ebt_do_table(ops->hooknum, skb, state->in, state->out, - dev_net(state->out)->xt.frame_nat); + return ebt_do_table(skb, state, state->net->xt.frame_nat); } static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 48b6b01295de..f46ca417bf2d 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -183,10 +183,11 @@ struct ebt_entry *ebt_next_entry(const struct ebt_entry *entry) } /* Do some firewalling */ -unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - struct ebt_table *table) +unsigned int ebt_do_table(struct sk_buff *skb, + const struct nf_hook_state *state, + struct ebt_table *table) { + unsigned int hook = state->hook; int i, nentries; struct ebt_entry *point; struct ebt_counter *counter_base, *cb_base; @@ -199,8 +200,9 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, struct xt_action_param acpar; acpar.family = NFPROTO_BRIDGE; - acpar.in = in; - acpar.out = out; + acpar.net = state->net; + acpar.in = state->in; + acpar.out = state->out; acpar.hotdrop = false; acpar.hooknum = hook; @@ -220,7 +222,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, base = private->entries; i = 0; while (i < nentries) { - if (ebt_basic_match(point, skb, in, out)) + if (ebt_basic_match(point, skb, state->in, state->out)) goto letscontinue; if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0) diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index a343e62442b1..62f6b1b19589 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -65,31 +65,29 @@ int nft_bridge_ip6hdr_validate(struct sk_buff *skb) EXPORT_SYMBOL_GPL(nft_bridge_ip6hdr_validate); static inline void nft_bridge_set_pktinfo_ipv4(struct nft_pktinfo *pkt, - const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { if (nft_bridge_iphdr_validate(skb)) - nft_set_pktinfo_ipv4(pkt, ops, skb, state); + nft_set_pktinfo_ipv4(pkt, skb, state); else - nft_set_pktinfo(pkt, ops, skb, state); + nft_set_pktinfo(pkt, skb, state); } static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { #if IS_ENABLED(CONFIG_IPV6) if (nft_bridge_ip6hdr_validate(skb) && - nft_set_pktinfo_ipv6(pkt, ops, skb, state) == 0) + nft_set_pktinfo_ipv6(pkt, skb, state) == 0) return; #endif - nft_set_pktinfo(pkt, ops, skb, state); + nft_set_pktinfo(pkt, skb, state); } static unsigned int -nft_do_chain_bridge(const struct nf_hook_ops *ops, +nft_do_chain_bridge(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -97,17 +95,17 @@ nft_do_chain_bridge(const struct nf_hook_ops *ops, switch (eth_hdr(skb)->h_proto) { case htons(ETH_P_IP): - nft_bridge_set_pktinfo_ipv4(&pkt, ops, skb, state); + nft_bridge_set_pktinfo_ipv4(&pkt, skb, state); break; case htons(ETH_P_IPV6): - nft_bridge_set_pktinfo_ipv6(&pkt, ops, skb, state); + nft_bridge_set_pktinfo_ipv6(&pkt, skb, state); break; default: - nft_set_pktinfo(&pkt, ops, skb, state); + nft_set_pktinfo(&pkt, skb, state); break; } - return nft_do_chain(&pkt, ops); + return nft_do_chain(&pkt, priv); } static struct nft_af_info nft_af_bridge __read_mostly = { diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 858d848564ee..fdba3d9fbff3 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -261,7 +261,6 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); - struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); const unsigned char *dest = eth_hdr(pkt->skb)->h_dest; if (is_broadcast_ether_addr(dest) || @@ -273,16 +272,16 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, - pkt->ops->hooknum, + pkt->hook, priv->icmp_code); break; case NFT_REJECT_TCP_RST: nft_reject_br_send_v4_tcp_reset(pkt->skb, pkt->in, - pkt->ops->hooknum); + pkt->hook); break; case NFT_REJECT_ICMPX_UNREACH: nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, - pkt->ops->hooknum, + pkt->hook, nft_reject_icmp_code(priv->icmp_code)); break; } @@ -290,17 +289,17 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, case htons(ETH_P_IPV6): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in, - pkt->ops->hooknum, + nft_reject_br_send_v6_unreach(pkt->net, pkt->skb, + pkt->in, pkt->hook, priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nft_reject_br_send_v6_tcp_reset(net, pkt->skb, pkt->in, - pkt->ops->hooknum); + nft_reject_br_send_v6_tcp_reset(pkt->net, pkt->skb, + pkt->in, pkt->hook); break; case NFT_REJECT_ICMPX_UNREACH: - nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in, - pkt->ops->hooknum, + nft_reject_br_send_v6_unreach(pkt->net, pkt->skb, + pkt->in, pkt->hook, nft_reject_icmpv6_code(priv->icmp_code)); break; } diff --git a/net/core/dev.c b/net/core/dev.c index 6bb6470f5b7b..323c04edd779 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2915,9 +2915,11 @@ EXPORT_SYMBOL(xmit_recursion); /** * dev_loopback_xmit - loop back @skb + * @net: network namespace this loopback is happening in + * @sk: sk needed to be a netfilter okfn * @skb: buffer to transmit */ -int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb) +int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { skb_reset_mac_header(skb); __skb_pull(skb, skb_network_offset(skb)); @@ -3143,11 +3145,11 @@ out: return rc; } -int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb) +int dev_queue_xmit(struct sk_buff *skb) { return __dev_queue_xmit(skb, NULL); } -EXPORT_SYMBOL(dev_queue_xmit_sk); +EXPORT_SYMBOL(dev_queue_xmit); int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv) { @@ -3668,6 +3670,14 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, case TC_ACT_QUEUED: kfree_skb(skb); return NULL; + case TC_ACT_REDIRECT: + /* skb_mac_header check was done by cls/act_bpf, so + * we can safely push the L2 header back before + * redirecting to another netdev + */ + __skb_push(skb, skb->mac_len); + skb_do_redirect(skb); + return NULL; default: break; } @@ -3982,13 +3992,13 @@ static int netif_receive_skb_internal(struct sk_buff *skb) * NET_RX_SUCCESS: no congestion * NET_RX_DROP: packet was dropped */ -int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb) +int netif_receive_skb(struct sk_buff *skb) { trace_netif_receive_skb_entry(skb); return netif_receive_skb_internal(skb); } -EXPORT_SYMBOL(netif_receive_skb_sk); +EXPORT_SYMBOL(netif_receive_skb); /* Network device is going away, flush any packets still pending * Called with irqs disabled. @@ -4857,8 +4867,7 @@ struct netdev_adjacent { struct rcu_head rcu; }; -static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, - struct net_device *adj_dev, +static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev, struct list_head *adj_list) { struct netdev_adjacent *adj; @@ -4884,7 +4893,7 @@ bool netdev_has_upper_dev(struct net_device *dev, { ASSERT_RTNL(); - return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper); + return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper); } EXPORT_SYMBOL(netdev_has_upper_dev); @@ -5146,7 +5155,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, struct netdev_adjacent *adj; int ret; - adj = __netdev_find_adj(dev, adj_dev, dev_list); + adj = __netdev_find_adj(adj_dev, dev_list); if (adj) { adj->ref_nr++; @@ -5202,7 +5211,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, { struct netdev_adjacent *adj; - adj = __netdev_find_adj(dev, adj_dev, dev_list); + adj = __netdev_find_adj(adj_dev, dev_list); if (!adj) { pr_err("tried to remove device %s from %s\n", @@ -5323,10 +5332,10 @@ static int __netdev_upper_dev_link(struct net_device *dev, return -EBUSY; /* To prevent loops, check if dev is not upper device to upper_dev. */ - if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper)) + if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper)) return -EBUSY; - if (__netdev_find_adj(dev, upper_dev, &dev->adj_list.upper)) + if (__netdev_find_adj(upper_dev, &dev->adj_list.upper)) return -EEXIST; if (master && netdev_master_upper_dev_get(dev)) @@ -5604,7 +5613,7 @@ void *netdev_lower_dev_get_private(struct net_device *dev, if (!lower_dev) return NULL; - lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower); + lower = __netdev_find_adj(lower_dev, &dev->adj_list.lower); if (!lower) return NULL; diff --git a/net/core/filter.c b/net/core/filter.c index 05a04ea87172..60e3fe7c59c0 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1404,9 +1404,6 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) if (unlikely(!dev)) return -EINVAL; - if (unlikely(!(dev->flags & IFF_UP))) - return -EINVAL; - skb2 = skb_clone(skb, GFP_ATOMIC); if (unlikely(!skb2)) return -ENOMEM; @@ -1427,6 +1424,48 @@ const struct bpf_func_proto bpf_clone_redirect_proto = { .arg3_type = ARG_ANYTHING, }; +struct redirect_info { + u32 ifindex; + u32 flags; +}; + +static DEFINE_PER_CPU(struct redirect_info, redirect_info); +static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + + ri->ifindex = ifindex; + ri->flags = flags; + return TC_ACT_REDIRECT; +} + +int skb_do_redirect(struct sk_buff *skb) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct net_device *dev; + + dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex); + ri->ifindex = 0; + if (unlikely(!dev)) { + kfree_skb(skb); + return -EINVAL; + } + + if (BPF_IS_REDIRECT_INGRESS(ri->flags)) + return dev_forward_skb(dev, skb); + + skb->dev = dev; + return dev_queue_xmit(skb); +} + +const struct bpf_func_proto bpf_redirect_proto = { + .func = bpf_redirect, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, +}; + static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { return task_get_classid((struct sk_buff *) (unsigned long) r1); @@ -1607,6 +1646,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_get_tunnel_key_proto; case BPF_FUNC_skb_set_tunnel_key: return bpf_get_skb_set_tunnel_key_proto(); + case BPF_FUNC_redirect: + return &bpf_redirect_proto; default: return sk_filter_func_proto(func_id); } @@ -1632,6 +1673,9 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type) static bool sk_filter_is_valid_access(int off, int size, enum bpf_access_type type) { + if (off == offsetof(struct __sk_buff, tc_classid)) + return false; + if (type == BPF_WRITE) { switch (off) { case offsetof(struct __sk_buff, cb[0]) ... @@ -1648,6 +1692,9 @@ static bool sk_filter_is_valid_access(int off, int size, static bool tc_cls_act_is_valid_access(int off, int size, enum bpf_access_type type) { + if (off == offsetof(struct __sk_buff, tc_classid)) + return type == BPF_WRITE ? true : false; + if (type == BPF_WRITE) { switch (off) { case offsetof(struct __sk_buff, mark): @@ -1760,6 +1807,14 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off); break; + case offsetof(struct __sk_buff, tc_classid): + ctx_off -= offsetof(struct __sk_buff, tc_classid); + ctx_off += offsetof(struct sk_buff, cb); + ctx_off += offsetof(struct qdisc_skb_cb, tc_classid); + WARN_ON(type != BPF_WRITE); + *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off); + break; + case offsetof(struct __sk_buff, tc_index): #ifdef CONFIG_NET_SCHED BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 2b515ba7e94f..8c57fdf4d68e 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2235,14 +2235,42 @@ static void neigh_update_notify(struct neighbour *neigh) __neigh_notify(neigh, RTM_NEWNEIGH, 0); } +static bool neigh_master_filtered(struct net_device *dev, int master_idx) +{ + struct net_device *master; + + if (!master_idx) + return false; + + master = netdev_master_upper_dev_get(dev); + if (!master || master->ifindex != master_idx) + return true; + + return false; +} + static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); + const struct nlmsghdr *nlh = cb->nlh; + struct nlattr *tb[NDA_MAX + 1]; struct neighbour *n; int rc, h, s_h = cb->args[1]; int idx, s_idx = idx = cb->args[2]; struct neigh_hash_table *nht; + int filter_master_idx = 0; + unsigned int flags = NLM_F_MULTI; + int err; + + err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL); + if (!err) { + if (tb[NDA_MASTER]) + filter_master_idx = nla_get_u32(tb[NDA_MASTER]); + + if (filter_master_idx) + flags |= NLM_F_DUMP_FILTERED; + } rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); @@ -2255,12 +2283,14 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, n = rcu_dereference_bh(n->next)) { if (!net_eq(dev_net(n->dev), net)) continue; + if (neigh_master_filtered(n->dev, filter_master_idx)) + continue; if (idx < s_idx) goto next; if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, - NLM_F_MULTI) < 0) { + flags) < 0) { rc = -1; goto out; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 830f8a7c1cb1..410c6e42bf1f 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1003,15 +1003,12 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue, } #ifdef CONFIG_XPS -static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) +static unsigned int get_netdev_queue_index(struct netdev_queue *queue) { struct net_device *dev = queue->dev; - int i; - - for (i = 0; i < dev->num_tx_queues; i++) - if (queue == &dev->_tx[i]) - break; + unsigned int i; + i = queue - dev->_tx; BUG_ON(i >= dev->num_tx_queues); return i; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 8bdada242a7d..94acfc89ad97 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -140,7 +140,7 @@ static void queue_process(struct work_struct *work) * case. Further, we test the poll_owner to avoid recursion on UP * systems where the lock doesn't exist. */ -static int poll_one_napi(struct napi_struct *napi, int budget) +static void poll_one_napi(struct napi_struct *napi) { int work = 0; @@ -149,33 +149,33 @@ static int poll_one_napi(struct napi_struct *napi, int budget) * holding the napi->poll_lock. */ if (!test_bit(NAPI_STATE_SCHED, &napi->state)) - return budget; + return; /* If we set this bit but see that it has already been set, * that indicates that napi has been disabled and we need * to abort this operation */ if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state)) - goto out; + return; - work = napi->poll(napi, budget); - WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll); + /* We explicilty pass the polling call a budget of 0 to + * indicate that we are clearing the Tx path only. + */ + work = napi->poll(napi, 0); + WARN_ONCE(work, "%pF exceeded budget in poll\n", napi->poll); trace_napi_poll(napi); clear_bit(NAPI_STATE_NPSVC, &napi->state); - -out: - return budget - work; } -static void poll_napi(struct net_device *dev, int budget) +static void poll_napi(struct net_device *dev) { struct napi_struct *napi; list_for_each_entry(napi, &dev->napi_list, dev_list) { if (napi->poll_owner != smp_processor_id() && spin_trylock(&napi->poll_lock)) { - budget = poll_one_napi(napi, budget); + poll_one_napi(napi); spin_unlock(&napi->poll_lock); } } @@ -185,7 +185,6 @@ static void netpoll_poll_dev(struct net_device *dev) { const struct net_device_ops *ops; struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); - int budget = 0; /* Don't do any rx activity if the dev_lock mutex is held * the dev_open/close paths use this to block netpoll activity @@ -208,7 +207,7 @@ static void netpoll_poll_dev(struct net_device *dev) /* Process pending work on NIC */ ops->ndo_poll_controller(dev); - poll_napi(dev, budget); + poll_napi(dev); up(&ni->dev_lock); diff --git a/net/core/request_sock.c b/net/core/request_sock.c index b42f0e26f89e..e22cfa4ed25f 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -59,6 +59,13 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); spin_lock_init(&queue->syn_wait_lock); + + spin_lock_init(&queue->fastopenq.lock); + queue->fastopenq.rskq_rst_head = NULL; + queue->fastopenq.rskq_rst_tail = NULL; + queue->fastopenq.qlen = 0; + queue->fastopenq.max_qlen = 0; + queue->rskq_accept_head = NULL; lopt->nr_table_entries = nr_table_entries; lopt->max_qlen_log = ilog2(nr_table_entries); @@ -174,7 +181,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, struct sock *lsk = req->rsk_listener; struct fastopen_queue *fastopenq; - fastopenq = inet_csk(lsk)->icsk_accept_queue.fastopenq; + fastopenq = &inet_csk(lsk)->icsk_accept_queue.fastopenq; tcp_sk(sk)->fastopen_rsk = NULL; spin_lock_bh(&fastopenq->lock); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 0ec48403ed68..474a6da3b51a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1272,7 +1272,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (!(af = nla_nest_start(skb, af_ops->family))) goto nla_put_failure; - err = af_ops->fill_link_af(skb, dev); + err = af_ops->fill_link_af(skb, dev, ext_filter_mask); /* * Caller may return ENODATA to indicate that there diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index bebc735f5afc..e1f823451565 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -229,7 +229,7 @@ void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb); int dccp_retransmit_skb(struct sock *sk); void dccp_send_ack(struct sock *sk); -void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, +void dccp_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *rsk); void dccp_send_sync(struct sock *sk, const u64 seq, @@ -270,13 +270,13 @@ int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp, int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); -struct sock *dccp_create_openreq_child(struct sock *sk, +struct sock *dccp_create_openreq_child(const struct sock *sk, const struct request_sock *req, const struct sk_buff *skb); int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); -struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, +struct sock *dccp_v4_request_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst); struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, @@ -293,7 +293,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized); void dccp_destroy_sock(struct sock *sk); void dccp_close(struct sock *sk, long timeout); -struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, +struct sk_buff *dccp_make_response(const struct sock *sk, struct dst_entry *dst, struct request_sock *req); int dccp_connect(struct sock *sk); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index ccf4c5629b3c..5b7818c63cec 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -390,7 +390,8 @@ static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb) * * This is the equivalent of TCP's tcp_v4_syn_recv_sock */ -struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, +struct sock *dccp_v4_request_recv_sock(const struct sock *sk, + struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) { @@ -498,7 +499,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, return &rt->dst; } -static int dccp_v4_send_response(struct sock *sk, struct request_sock *req) +static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req) { int err = -1; struct sk_buff *skb; @@ -527,7 +528,7 @@ out: return err; } -static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) +static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) { int err; const struct iphdr *rxiph; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 5165571f397a..e8753aa3b7a4 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -181,7 +181,7 @@ out: } -static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) +static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -234,7 +234,7 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req) kfree_skb(inet_rsk(req)->pktopts); } -static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) +static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) { const struct ipv6hdr *rxip6h; struct sk_buff *skb; @@ -408,13 +408,14 @@ drop: return -1; } -static struct sock *dccp_v6_request_recv_sock(struct sock *sk, +static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) { struct inet_request_sock *ireq = inet_rsk(req); - struct ipv6_pinfo *newnp, *np = inet6_sk(sk); + struct ipv6_pinfo *newnp; + const struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *newinet; struct dccp6_sock *newdp6; struct sock *newsk; @@ -462,22 +463,11 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, if (sk_acceptq_is_full(sk)) goto out_overflow; - if (dst == NULL) { - struct in6_addr *final_p, final; + if (!dst) { struct flowi6 fl6; - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_DCCP; - fl6.daddr = ireq->ir_v6_rmt_addr; - final_p = fl6_update_dst(&fl6, np->opt, &final); - fl6.saddr = ireq->ir_v6_loc_addr; - fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.fl6_dport = ireq->ir_rmt_port; - fl6.fl6_sport = htons(ireq->ir_num); - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - - dst = ip6_dst_lookup_flow(sk, &fl6, final_p); - if (IS_ERR(dst)) + dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_DCCP); + if (!dst) goto out; } diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 838f524cf11a..d10aace43672 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -72,7 +72,7 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) dccp_done(sk); } -struct sock *dccp_create_openreq_child(struct sock *sk, +struct sock *dccp_create_openreq_child(const struct sock *sk, const struct request_sock *req, const struct sk_buff *skb) { @@ -236,7 +236,7 @@ int dccp_child_process(struct sock *parent, struct sock *child, EXPORT_SYMBOL_GPL(dccp_child_process); -void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, +void dccp_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *rsk) { DCCP_BUG("DCCP-ACK packets are never sent in LISTEN/RESPOND state"); diff --git a/net/dccp/output.c b/net/dccp/output.c index 0248e8a3460c..4ce912e691d0 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -390,7 +390,7 @@ int dccp_retransmit_skb(struct sock *sk) return dccp_transmit_skb(sk, skb_clone(sk->sk_send_head, GFP_ATOMIC)); } -struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, +struct sk_buff *dccp_make_response(const struct sock *sk, struct dst_entry *dst, struct request_sock *req) { struct dccp_hdr *dh; @@ -398,13 +398,18 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, const u32 dccp_header_size = sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext) + sizeof(struct dccp_hdr_response); - struct sk_buff *skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1, - GFP_ATOMIC); - if (skb == NULL) + struct sk_buff *skb; + + /* sk is marked const to clearly express we dont hold socket lock. + * sock_wmalloc() will atomically change sk->sk_wmem_alloc, + * it is safe to promote sk to non const. + */ + skb = sock_wmalloc((struct sock *)sk, MAX_DCCP_HEADER, 1, + GFP_ATOMIC); + if (!skb) return NULL; - /* Reserve space for headers. */ - skb_reserve(skb, sk->sk_prot->max_header); + skb_reserve(skb, MAX_DCCP_HEADER); skb_dst_set(skb, dst_clone(dst)); diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 4507b188fc51..482730cd8a56 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -194,7 +194,7 @@ static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb) return err; } -static int dn_neigh_output_packet(struct sock *sk, struct sk_buff *skb) +static int dn_neigh_output_packet(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *)dst; @@ -246,8 +246,9 @@ static int dn_long_output(struct neighbour *neigh, struct sock *sk, skb_reset_network_header(skb); - return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb, - NULL, neigh->dev, dn_neigh_output_packet); + return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, + &init_net, sk, skb, NULL, neigh->dev, + dn_neigh_output_packet); } /* @@ -286,8 +287,9 @@ static int dn_short_output(struct neighbour *neigh, struct sock *sk, skb_reset_network_header(skb); - return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb, - NULL, neigh->dev, dn_neigh_output_packet); + return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, + &init_net, sk, skb, NULL, neigh->dev, + dn_neigh_output_packet); } /* @@ -327,11 +329,12 @@ static int dn_phase3_output(struct neighbour *neigh, struct sock *sk, skb_reset_network_header(skb); - return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb, - NULL, neigh->dev, dn_neigh_output_packet); + return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, + &init_net, sk, skb, NULL, neigh->dev, + dn_neigh_output_packet); } -int dn_to_neigh_output(struct sock *sk, struct sk_buff *skb) +int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *) dst; @@ -375,7 +378,7 @@ void dn_neigh_pointopoint_hello(struct sk_buff *skb) /* * Ethernet router hello message received */ -int dn_neigh_router_hello(struct sock *sk, struct sk_buff *skb) +int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb) { struct rtnode_hello_message *msg = (struct rtnode_hello_message *)skb->data; @@ -437,7 +440,7 @@ int dn_neigh_router_hello(struct sock *sk, struct sk_buff *skb) /* * Endnode hello message received */ -int dn_neigh_endnode_hello(struct sock *sk, struct sk_buff *skb) +int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb) { struct endnode_hello_message *msg = (struct endnode_hello_message *)skb->data; struct neighbour *neigh; diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index a321eac9fd0c..7ac086d5c0c0 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -714,7 +714,8 @@ out: return ret; } -static int dn_nsp_rx_packet(struct sock *sk2, struct sk_buff *skb) +static int dn_nsp_rx_packet(struct net *net, struct sock *sk2, + struct sk_buff *skb) { struct dn_skb_cb *cb = DN_SKB_CB(skb); struct sock *sk = NULL; @@ -814,8 +815,8 @@ free_out: int dn_nsp_rx(struct sk_buff *skb) { - return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN, NULL, skb, - skb->dev, NULL, + return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN, + &init_net, NULL, skb, skb->dev, NULL, dn_nsp_rx_packet); } diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index 1aaa51ebbda6..4b02dd300f50 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -85,7 +85,7 @@ static void dn_nsp_send(struct sk_buff *skb) if (dst) { try_again: skb_dst_set(skb, dst); - dst_output(skb); + dst_output(skb->sk, skb); return; } @@ -582,7 +582,7 @@ static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg, * associations. */ skb_dst_set(skb, dst_clone(dst)); - dst_output(skb); + dst_output(skb->sk, skb); } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 03227ffd19ce..e930321e2c1d 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -512,7 +512,7 @@ static int dn_return_long(struct sk_buff *skb) * * Returns: result of input function if route is found, error code otherwise */ -static int dn_route_rx_packet(struct sock *sk, struct sk_buff *skb) +static int dn_route_rx_packet(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dn_skb_cb *cb; int err; @@ -573,8 +573,8 @@ static int dn_route_rx_long(struct sk_buff *skb) ptr++; cb->hops = *ptr++; /* Visit Count */ - return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, NULL, skb, - skb->dev, NULL, + return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, + &init_net, NULL, skb, skb->dev, NULL, dn_route_rx_packet); drop_it: @@ -601,8 +601,8 @@ static int dn_route_rx_short(struct sk_buff *skb) ptr += 2; cb->hops = *ptr & 0x3f; - return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, NULL, skb, - skb->dev, NULL, + return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, + &init_net, NULL, skb, skb->dev, NULL, dn_route_rx_packet); drop_it: @@ -610,7 +610,7 @@ drop_it: return NET_RX_DROP; } -static int dn_route_discard(struct sock *sk, struct sk_buff *skb) +static int dn_route_discard(struct net *net, struct sock *sk, struct sk_buff *skb) { /* * I know we drop the packet here, but thats considered success in @@ -620,7 +620,7 @@ static int dn_route_discard(struct sock *sk, struct sk_buff *skb) return NET_RX_SUCCESS; } -static int dn_route_ptp_hello(struct sock *sk, struct sk_buff *skb) +static int dn_route_ptp_hello(struct net *net, struct sock *sk, struct sk_buff *skb) { dn_dev_hello(skb); dn_neigh_pointopoint_hello(skb); @@ -706,22 +706,22 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type switch (flags & DN_RT_CNTL_MSK) { case DN_RT_PKT_HELO: return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - NULL, skb, skb->dev, NULL, + &init_net, NULL, skb, skb->dev, NULL, dn_route_ptp_hello); case DN_RT_PKT_L1RT: case DN_RT_PKT_L2RT: return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE, - NULL, skb, skb->dev, NULL, + &init_net, NULL, skb, skb->dev, NULL, dn_route_discard); case DN_RT_PKT_ERTH: return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - NULL, skb, skb->dev, NULL, + &init_net, NULL, skb, skb->dev, NULL, dn_neigh_router_hello); case DN_RT_PKT_EEDH: return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - NULL, skb, skb->dev, NULL, + &init_net, NULL, skb, skb->dev, NULL, dn_neigh_endnode_hello); } } else { @@ -770,8 +770,8 @@ static int dn_output(struct sock *sk, struct sk_buff *skb) cb->rt_flags |= DN_RT_F_IE; cb->hops = 0; - return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT, sk, skb, - NULL, dev, + return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT, + &init_net, sk, skb, NULL, dev, dn_to_neigh_output); error: @@ -819,8 +819,8 @@ static int dn_forward(struct sk_buff *skb) if (rt->rt_flags & RTCF_DOREDIRECT) cb->rt_flags |= DN_RT_F_IE; - return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD, NULL, skb, - dev, skb->dev, + return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD, + &init_net, NULL, skb, dev, skb->dev, dn_to_neigh_output); drop: diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index af34fc9bdf69..85f2fdc360c2 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -87,7 +87,7 @@ static void dnrmg_send_peer(struct sk_buff *skb) } -static unsigned int dnrmg_hook(const struct nf_hook_ops *ops, +static unsigned int dnrmg_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 7d91f4612ac0..7b1d9ec74e09 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -242,16 +242,15 @@ static int dsa_bridge_check_vlan_range(struct dsa_switch *ds, } static int dsa_slave_port_vlan_add(struct net_device *dev, - struct switchdev_obj *obj) + const struct switchdev_obj_vlan *vlan, + struct switchdev_trans *trans) { - struct switchdev_obj_vlan *vlan = &obj->u.vlan; struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; u16 vid; int err; - switch (obj->trans) { - case SWITCHDEV_TRANS_PREPARE: + if (switchdev_trans_ph_prepare(trans)) { if (!ds->drv->port_vlan_add || !ds->drv->port_pvid_set) return -EOPNOTSUPP; @@ -263,8 +262,7 @@ static int dsa_slave_port_vlan_add(struct net_device *dev, vlan->vid_end); if (err) return err; - break; - case SWITCHDEV_TRANS_COMMIT: + } else { for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { err = ds->drv->port_vlan_add(ds, p->port, vid, vlan->flags & @@ -274,18 +272,14 @@ static int dsa_slave_port_vlan_add(struct net_device *dev, if (err) return err; } - break; - default: - return -EOPNOTSUPP; } return 0; } static int dsa_slave_port_vlan_del(struct net_device *dev, - struct switchdev_obj *obj) + const struct switchdev_obj_vlan *vlan) { - struct switchdev_obj_vlan *vlan = &obj->u.vlan; struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; u16 vid; @@ -304,9 +298,9 @@ static int dsa_slave_port_vlan_del(struct net_device *dev, } static int dsa_slave_port_vlan_dump(struct net_device *dev, - struct switchdev_obj *obj) + struct switchdev_obj_vlan *vlan, + int (*cb)(void *obj)) { - struct switchdev_obj_vlan *vlan = &obj->u.vlan; struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; DECLARE_BITMAP(members, DSA_MAX_PORTS); @@ -338,7 +332,7 @@ static int dsa_slave_port_vlan_dump(struct net_device *dev, if (test_bit(p->port, untagged)) vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED; - err = obj->cb(dev, obj); + err = cb(vlan); if (err) break; } @@ -347,25 +341,24 @@ static int dsa_slave_port_vlan_dump(struct net_device *dev, } static int dsa_slave_port_fdb_add(struct net_device *dev, - struct switchdev_obj *obj) + const struct switchdev_obj_fdb *fdb, + struct switchdev_trans *trans) { - struct switchdev_obj_fdb *fdb = &obj->u.fdb; struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; int ret = -EOPNOTSUPP; - if (obj->trans == SWITCHDEV_TRANS_PREPARE) + if (switchdev_trans_ph_prepare(trans)) ret = ds->drv->port_fdb_add ? 0 : -EOPNOTSUPP; - else if (obj->trans == SWITCHDEV_TRANS_COMMIT) + else ret = ds->drv->port_fdb_add(ds, p->port, fdb->addr, fdb->vid); return ret; } static int dsa_slave_port_fdb_del(struct net_device *dev, - struct switchdev_obj *obj) + const struct switchdev_obj_fdb *fdb) { - struct switchdev_obj_fdb *fdb = &obj->u.fdb; struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; int ret = -EOPNOTSUPP; @@ -377,7 +370,8 @@ static int dsa_slave_port_fdb_del(struct net_device *dev, } static int dsa_slave_port_fdb_dump(struct net_device *dev, - struct switchdev_obj *obj) + struct switchdev_obj_fdb *fdb, + int (*cb)(void *obj)) { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; @@ -396,11 +390,11 @@ static int dsa_slave_port_fdb_dump(struct net_device *dev, if (ret < 0) break; - obj->u.fdb.addr = addr; - obj->u.fdb.vid = vid; - obj->u.fdb.ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE; + fdb->addr = addr; + fdb->vid = vid; + fdb->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE; - ret = obj->cb(dev, obj); + ret = cb(fdb); if (ret < 0) break; } @@ -456,7 +450,8 @@ static int dsa_slave_stp_update(struct net_device *dev, u8 state) } static int dsa_slave_port_attr_set(struct net_device *dev, - struct switchdev_attr *attr) + struct switchdev_attr *attr, + struct switchdev_trans *trans) { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; @@ -464,7 +459,7 @@ static int dsa_slave_port_attr_set(struct net_device *dev, switch (attr->id) { case SWITCHDEV_ATTR_PORT_STP_STATE: - if (attr->trans == SWITCHDEV_TRANS_PREPARE) + if (switchdev_trans_ph_prepare(trans)) ret = ds->drv->port_stp_update ? 0 : -EOPNOTSUPP; else ret = ds->drv->port_stp_update(ds, p->port, @@ -479,7 +474,8 @@ static int dsa_slave_port_attr_set(struct net_device *dev, } static int dsa_slave_port_obj_add(struct net_device *dev, - struct switchdev_obj *obj) + enum switchdev_obj_id id, const void *obj, + struct switchdev_trans *trans) { int err; @@ -488,12 +484,12 @@ static int dsa_slave_port_obj_add(struct net_device *dev, * supported, return -EOPNOTSUPP. */ - switch (obj->id) { + switch (id) { case SWITCHDEV_OBJ_PORT_FDB: - err = dsa_slave_port_fdb_add(dev, obj); + err = dsa_slave_port_fdb_add(dev, obj, trans); break; case SWITCHDEV_OBJ_PORT_VLAN: - err = dsa_slave_port_vlan_add(dev, obj); + err = dsa_slave_port_vlan_add(dev, obj, trans); break; default: err = -EOPNOTSUPP; @@ -504,11 +500,11 @@ static int dsa_slave_port_obj_add(struct net_device *dev, } static int dsa_slave_port_obj_del(struct net_device *dev, - struct switchdev_obj *obj) + enum switchdev_obj_id id, const void *obj) { int err; - switch (obj->id) { + switch (id) { case SWITCHDEV_OBJ_PORT_FDB: err = dsa_slave_port_fdb_del(dev, obj); break; @@ -524,16 +520,17 @@ static int dsa_slave_port_obj_del(struct net_device *dev, } static int dsa_slave_port_obj_dump(struct net_device *dev, - struct switchdev_obj *obj) + enum switchdev_obj_id id, void *obj, + int (*cb)(void *obj)) { int err; - switch (obj->id) { + switch (id) { case SWITCHDEV_OBJ_PORT_FDB: - err = dsa_slave_port_fdb_dump(dev, obj); + err = dsa_slave_port_fdb_dump(dev, obj, cb); break; case SWITCHDEV_OBJ_PORT_VLAN: - err = dsa_slave_port_vlan_dump(dev, obj); + err = dsa_slave_port_vlan_dump(dev, obj, cb); break; default: err = -EOPNOTSUPP; @@ -967,6 +964,10 @@ static const struct switchdev_ops dsa_slave_switchdev_ops = { .switchdev_port_obj_dump = dsa_slave_port_obj_dump, }; +static struct device_type dsa_type = { + .name = "dsa", +}; + static void dsa_slave_adjust_link(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -1155,6 +1156,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, slave_dev->priv_flags |= IFF_NO_QUEUE; slave_dev->netdev_ops = &dsa_slave_netdev_ops; slave_dev->switchdev_ops = &dsa_slave_switchdev_ops; + SET_NETDEV_DEVTYPE(slave_dev, &dsa_type); netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one, NULL); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index d850fdc828f9..9e63f252a89e 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -127,7 +127,7 @@ u32 eth_get_headlen(void *data, unsigned int len) struct flow_keys keys; /* this should never happen, but better safe than sorry */ - if (len < sizeof(*eth)) + if (unlikely(len < sizeof(*eth))) return len; /* parse any remaining L2/L3 headers, check for L4 */ diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h index ea339fa94c27..b4e17a7c0df0 100644 --- a/net/ieee802154/6lowpan/6lowpan_i.h +++ b/net/ieee802154/6lowpan/6lowpan_i.h @@ -7,6 +7,15 @@ #include <net/inet_frag.h> #include <net/6lowpan.h> +typedef unsigned __bitwise__ lowpan_rx_result; +#define RX_CONTINUE ((__force lowpan_rx_result) 0u) +#define RX_DROP_UNUSABLE ((__force lowpan_rx_result) 1u) +#define RX_DROP ((__force lowpan_rx_result) 2u) +#define RX_QUEUED ((__force lowpan_rx_result) 3u) + +#define LOWPAN_DISPATCH_FRAG1 0xc0 +#define LOWPAN_DISPATCH_FRAGN 0xe0 + struct lowpan_create_arg { u16 tag; u16 d_size; @@ -40,7 +49,7 @@ static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a) /* private device info */ struct lowpan_dev_info { - struct net_device *real_dev; /* real WPAN device ptr */ + struct net_device *wdev; /* wpan device ptr */ u16 fragment_tag; }; @@ -62,4 +71,7 @@ int lowpan_header_create(struct sk_buff *skb, struct net_device *dev, const void *_saddr, unsigned int len); netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev); +int lowpan_iphc_decompress(struct sk_buff *skb); +lowpan_rx_result lowpan_rx_h_ipv6(struct sk_buff *skb); + #endif /* __IEEE802154_6LOWPAN_I_H__ */ diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 953b1c49f5d1..9f0cfa598e3a 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -61,7 +61,7 @@ static struct header_ops lowpan_header_ops = { static struct lock_class_key lowpan_tx_busylock; static struct lock_class_key lowpan_netdev_xmit_lock_key; -static void lowpan_set_lockdep_class_one(struct net_device *dev, +static void lowpan_set_lockdep_class_one(struct net_device *ldev, struct netdev_queue *txq, void *_unused) { @@ -69,35 +69,52 @@ static void lowpan_set_lockdep_class_one(struct net_device *dev, &lowpan_netdev_xmit_lock_key); } -static int lowpan_dev_init(struct net_device *dev) +static int lowpan_dev_init(struct net_device *ldev) { - netdev_for_each_tx_queue(dev, lowpan_set_lockdep_class_one, NULL); - dev->qdisc_tx_busylock = &lowpan_tx_busylock; + netdev_for_each_tx_queue(ldev, lowpan_set_lockdep_class_one, NULL); + ldev->qdisc_tx_busylock = &lowpan_tx_busylock; + return 0; +} + +static int lowpan_open(struct net_device *dev) +{ + if (!open_count) + lowpan_rx_init(); + open_count++; + return 0; +} + +static int lowpan_stop(struct net_device *dev) +{ + open_count--; + if (!open_count) + lowpan_rx_exit(); return 0; } static const struct net_device_ops lowpan_netdev_ops = { .ndo_init = lowpan_dev_init, .ndo_start_xmit = lowpan_xmit, + .ndo_open = lowpan_open, + .ndo_stop = lowpan_stop, }; -static void lowpan_setup(struct net_device *dev) +static void lowpan_setup(struct net_device *ldev) { - dev->addr_len = IEEE802154_ADDR_LEN; - memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN); - dev->type = ARPHRD_6LOWPAN; + ldev->addr_len = IEEE802154_ADDR_LEN; + memset(ldev->broadcast, 0xff, IEEE802154_ADDR_LEN); + ldev->type = ARPHRD_6LOWPAN; /* Frame Control + Sequence Number + Address fields + Security Header */ - dev->hard_header_len = 2 + 1 + 20 + 14; - dev->needed_tailroom = 2; /* FCS */ - dev->mtu = IPV6_MIN_MTU; - dev->priv_flags |= IFF_NO_QUEUE; - dev->flags = IFF_BROADCAST | IFF_MULTICAST; - dev->watchdog_timeo = 0; - - dev->netdev_ops = &lowpan_netdev_ops; - dev->header_ops = &lowpan_header_ops; - dev->destructor = free_netdev; - dev->features |= NETIF_F_NETNS_LOCAL; + ldev->hard_header_len = 2 + 1 + 20 + 14; + ldev->needed_tailroom = 2; /* FCS */ + ldev->mtu = IPV6_MIN_MTU; + ldev->priv_flags |= IFF_NO_QUEUE; + ldev->flags = IFF_BROADCAST | IFF_MULTICAST; + + ldev->netdev_ops = &lowpan_netdev_ops; + ldev->header_ops = &lowpan_header_ops; + ldev->destructor = free_netdev; + ldev->features |= NETIF_F_NETNS_LOCAL; } static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -109,10 +126,10 @@ static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } -static int lowpan_newlink(struct net *src_net, struct net_device *dev, +static int lowpan_newlink(struct net *src_net, struct net_device *ldev, struct nlattr *tb[], struct nlattr *data[]) { - struct net_device *real_dev; + struct net_device *wdev; int ret; ASSERT_RTNL(); @@ -120,58 +137,47 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev, pr_debug("adding new link\n"); if (!tb[IFLA_LINK] || - !net_eq(dev_net(dev), &init_net)) + !net_eq(dev_net(ldev), &init_net)) return -EINVAL; - /* find and hold real wpan device */ - real_dev = dev_get_by_index(dev_net(dev), nla_get_u32(tb[IFLA_LINK])); - if (!real_dev) + /* find and hold wpan device */ + wdev = dev_get_by_index(dev_net(ldev), nla_get_u32(tb[IFLA_LINK])); + if (!wdev) return -ENODEV; - if (real_dev->type != ARPHRD_IEEE802154) { - dev_put(real_dev); + if (wdev->type != ARPHRD_IEEE802154) { + dev_put(wdev); return -EINVAL; } - if (real_dev->ieee802154_ptr->lowpan_dev) { - dev_put(real_dev); + if (wdev->ieee802154_ptr->lowpan_dev) { + dev_put(wdev); return -EBUSY; } - lowpan_dev_info(dev)->real_dev = real_dev; + lowpan_dev_info(ldev)->wdev = wdev; /* Set the lowpan hardware address to the wpan hardware address. */ - memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN); + memcpy(ldev->dev_addr, wdev->dev_addr, IEEE802154_ADDR_LEN); - lowpan_netdev_setup(dev, LOWPAN_LLTYPE_IEEE802154); + lowpan_netdev_setup(ldev, LOWPAN_LLTYPE_IEEE802154); - ret = register_netdevice(dev); + ret = register_netdevice(ldev); if (ret < 0) { - dev_put(real_dev); + dev_put(wdev); return ret; } - real_dev->ieee802154_ptr->lowpan_dev = dev; - if (!open_count) - lowpan_rx_init(); - - open_count++; - + wdev->ieee802154_ptr->lowpan_dev = ldev; return 0; } -static void lowpan_dellink(struct net_device *dev, struct list_head *head) +static void lowpan_dellink(struct net_device *ldev, struct list_head *head) { - struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev); - struct net_device *real_dev = lowpan_dev->real_dev; + struct net_device *wdev = lowpan_dev_info(ldev)->wdev; ASSERT_RTNL(); - open_count--; - - if (!open_count) - lowpan_rx_exit(); - - real_dev->ieee802154_ptr->lowpan_dev = NULL; - unregister_netdevice(dev); - dev_put(real_dev); + wdev->ieee802154_ptr->lowpan_dev = NULL; + unregister_netdevice(ldev); + dev_put(wdev); } static struct rtnl_link_ops lowpan_link_ops __read_mostly = { @@ -196,9 +202,9 @@ static inline void lowpan_netlink_fini(void) static int lowpan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net_device *wdev = netdev_notifier_info_to_dev(ptr); - if (dev->type != ARPHRD_IEEE802154) + if (wdev->type != ARPHRD_IEEE802154) goto out; switch (event) { @@ -207,8 +213,8 @@ static int lowpan_device_event(struct notifier_block *unused, * also delete possible lowpan interfaces which belongs * to the wpan interface. */ - if (dev->ieee802154_ptr && dev->ieee802154_ptr->lowpan_dev) - lowpan_dellink(dev->ieee802154_ptr->lowpan_dev, NULL); + if (wdev->ieee802154_ptr->lowpan_dev) + lowpan_dellink(wdev->ieee802154_ptr->lowpan_dev, NULL); break; default: break; diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 214d44aef35b..12e8cf4bda9f 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -32,21 +32,10 @@ static const char lowpan_frags_cache_name[] = "lowpan-frags"; -struct lowpan_frag_info { - u16 d_tag; - u16 d_size; - u8 d_offset; -}; - -static struct lowpan_frag_info *lowpan_cb(struct sk_buff *skb) -{ - return (struct lowpan_frag_info *)skb->cb; -} - static struct inet_frags lowpan_frags; static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, - struct sk_buff *prev, struct net_device *dev); + struct sk_buff *prev, struct net_device *ldev); static unsigned int lowpan_hash_frag(u16 tag, u16 d_size, const struct ieee802154_addr *saddr, @@ -111,7 +100,7 @@ out: } static inline struct lowpan_frag_queue * -fq_find(struct net *net, const struct lowpan_frag_info *frag_info, +fq_find(struct net *net, const struct lowpan_802154_cb *cb, const struct ieee802154_addr *src, const struct ieee802154_addr *dst) { @@ -121,12 +110,12 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info, struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); - arg.tag = frag_info->d_tag; - arg.d_size = frag_info->d_size; + arg.tag = cb->d_tag; + arg.d_size = cb->d_size; arg.src = src; arg.dst = dst; - hash = lowpan_hash_frag(frag_info->d_tag, frag_info->d_size, src, dst); + hash = lowpan_hash_frag(cb->d_tag, cb->d_size, src, dst); q = inet_frag_find(&ieee802154_lowpan->frags, &lowpan_frags, &arg, hash); @@ -138,17 +127,17 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info, } static int lowpan_frag_queue(struct lowpan_frag_queue *fq, - struct sk_buff *skb, const u8 frag_type) + struct sk_buff *skb, u8 frag_type) { struct sk_buff *prev, *next; - struct net_device *dev; + struct net_device *ldev; int end, offset; if (fq->q.flags & INET_FRAG_COMPLETE) goto err; - offset = lowpan_cb(skb)->d_offset << 3; - end = lowpan_cb(skb)->d_size; + offset = lowpan_802154_cb(skb)->d_offset << 3; + end = lowpan_802154_cb(skb)->d_size; /* Is this the final fragment? */ if (offset + skb->len == end) { @@ -174,13 +163,16 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq, * this fragment, right? */ prev = fq->q.fragments_tail; - if (!prev || lowpan_cb(prev)->d_offset < lowpan_cb(skb)->d_offset) { + if (!prev || + lowpan_802154_cb(prev)->d_offset < + lowpan_802154_cb(skb)->d_offset) { next = NULL; goto found; } prev = NULL; for (next = fq->q.fragments; next != NULL; next = next->next) { - if (lowpan_cb(next)->d_offset >= lowpan_cb(skb)->d_offset) + if (lowpan_802154_cb(next)->d_offset >= + lowpan_802154_cb(skb)->d_offset) break; /* bingo! */ prev = next; } @@ -195,18 +187,15 @@ found: else fq->q.fragments = skb; - dev = skb->dev; - if (dev) + ldev = skb->dev; + if (ldev) skb->dev = NULL; fq->q.stamp = skb->tstamp; - if (frag_type == LOWPAN_DISPATCH_FRAG1) { - /* Calculate uncomp. 6lowpan header to estimate full size */ - fq->q.meat += lowpan_uncompress_size(skb, NULL); + if (frag_type == LOWPAN_DISPATCH_FRAG1) fq->q.flags |= INET_FRAG_FIRST_IN; - } else { - fq->q.meat += skb->len; - } + + fq->q.meat += skb->len; add_frag_mem_limit(fq->q.net, skb->truesize); if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && @@ -215,7 +204,7 @@ found: unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; - res = lowpan_frag_reasm(fq, prev, dev); + res = lowpan_frag_reasm(fq, prev, ldev); skb->_skb_refdst = orefdst; return res; } @@ -235,7 +224,7 @@ err: * the last and the first frames arrived and all the bits are here. */ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, - struct net_device *dev) + struct net_device *ldev) { struct sk_buff *fp, *head = fq->q.fragments; int sum_truesize; @@ -313,7 +302,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, sub_frag_mem_limit(fq->q.net, sum_truesize); head->next = NULL; - head->dev = dev; + head->dev = ldev; head->tstamp = fq->q.stamp; fq->q.fragments = NULL; @@ -325,24 +314,87 @@ out_oom: return -1; } -static int lowpan_get_frag_info(struct sk_buff *skb, const u8 frag_type, - struct lowpan_frag_info *frag_info) +static int lowpan_frag_rx_handlers_result(struct sk_buff *skb, + lowpan_rx_result res) +{ + switch (res) { + case RX_QUEUED: + return NET_RX_SUCCESS; + case RX_CONTINUE: + /* nobody cared about this packet */ + net_warn_ratelimited("%s: received unknown dispatch\n", + __func__); + + /* fall-through */ + default: + /* all others failure */ + return NET_RX_DROP; + } +} + +static lowpan_rx_result lowpan_frag_rx_h_iphc(struct sk_buff *skb) +{ + int ret; + + if (!lowpan_is_iphc(*skb_network_header(skb))) + return RX_CONTINUE; + + ret = lowpan_iphc_decompress(skb); + if (ret < 0) + return RX_DROP; + + return RX_QUEUED; +} + +static int lowpan_invoke_frag_rx_handlers(struct sk_buff *skb) +{ + lowpan_rx_result res; + +#define CALL_RXH(rxh) \ + do { \ + res = rxh(skb); \ + if (res != RX_CONTINUE) \ + goto rxh_next; \ + } while (0) + + /* likely at first */ + CALL_RXH(lowpan_frag_rx_h_iphc); + CALL_RXH(lowpan_rx_h_ipv6); + +rxh_next: + return lowpan_frag_rx_handlers_result(skb, res); +#undef CALL_RXH +} + +#define LOWPAN_FRAG_DGRAM_SIZE_HIGH_MASK 0x07 +#define LOWPAN_FRAG_DGRAM_SIZE_HIGH_SHIFT 8 + +static int lowpan_get_cb(struct sk_buff *skb, u8 frag_type, + struct lowpan_802154_cb *cb) { bool fail; - u8 pattern = 0, low = 0; + u8 high = 0, low = 0; __be16 d_tag = 0; - fail = lowpan_fetch_skb(skb, &pattern, 1); + fail = lowpan_fetch_skb(skb, &high, 1); fail |= lowpan_fetch_skb(skb, &low, 1); - frag_info->d_size = (pattern & 7) << 8 | low; + /* remove the dispatch value and use first three bits as high value + * for the datagram size + */ + cb->d_size = (high & LOWPAN_FRAG_DGRAM_SIZE_HIGH_MASK) << + LOWPAN_FRAG_DGRAM_SIZE_HIGH_SHIFT | low; fail |= lowpan_fetch_skb(skb, &d_tag, 2); - frag_info->d_tag = ntohs(d_tag); + cb->d_tag = ntohs(d_tag); if (frag_type == LOWPAN_DISPATCH_FRAGN) { - fail |= lowpan_fetch_skb(skb, &frag_info->d_offset, 1); + fail |= lowpan_fetch_skb(skb, &cb->d_offset, 1); } else { skb_reset_network_header(skb); - frag_info->d_offset = 0; + cb->d_offset = 0; + /* check if datagram_size has ipv6hdr on FRAG1 */ + fail |= cb->d_size < sizeof(struct ipv6hdr); + /* check if we can dereference the dispatch value */ + fail |= !skb->len; } if (unlikely(fail)) @@ -351,27 +403,33 @@ static int lowpan_get_frag_info(struct sk_buff *skb, const u8 frag_type, return 0; } -int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type) +int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type) { struct lowpan_frag_queue *fq; struct net *net = dev_net(skb->dev); - struct lowpan_frag_info *frag_info = lowpan_cb(skb); - struct ieee802154_addr source, dest; + struct lowpan_802154_cb *cb = lowpan_802154_cb(skb); + struct ieee802154_hdr hdr; int err; - source = mac_cb(skb)->source; - dest = mac_cb(skb)->dest; + if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) + goto err; - err = lowpan_get_frag_info(skb, frag_type, frag_info); + err = lowpan_get_cb(skb, frag_type, cb); if (err < 0) goto err; - if (frag_info->d_size > IPV6_MIN_MTU) { + if (frag_type == LOWPAN_DISPATCH_FRAG1) { + err = lowpan_invoke_frag_rx_handlers(skb); + if (err == NET_RX_DROP) + goto err; + } + + if (cb->d_size > IPV6_MIN_MTU) { net_warn_ratelimited("lowpan_frag_rcv: datagram size exceeds MTU\n"); goto err; } - fq = fq_find(net, frag_info, &source, &dest); + fq = fq_find(net, cb, &hdr.source, &hdr.dest); if (fq != NULL) { int ret; @@ -387,7 +445,6 @@ err: kfree_skb(skb); return -1; } -EXPORT_SYMBOL(lowpan_frag_rcv); #ifdef CONFIG_SYSCTL static int zero; diff --git a/net/ieee802154/6lowpan/rx.c b/net/ieee802154/6lowpan/rx.c index 12e10201d263..b1fd47d2802b 100644 --- a/net/ieee802154/6lowpan/rx.c +++ b/net/ieee802154/6lowpan/rx.c @@ -11,40 +11,99 @@ #include <linux/if_arp.h> #include <net/6lowpan.h> +#include <net/mac802154.h> #include <net/ieee802154_netdev.h> #include "6lowpan_i.h" -static int lowpan_give_skb_to_device(struct sk_buff *skb, - struct net_device *dev) +#define LOWPAN_DISPATCH_FIRST 0xc0 +#define LOWPAN_DISPATCH_FRAG_MASK 0xf8 + +#define LOWPAN_DISPATCH_NALP 0x00 +#define LOWPAN_DISPATCH_ESC 0x40 +#define LOWPAN_DISPATCH_HC1 0x42 +#define LOWPAN_DISPATCH_DFF 0x43 +#define LOWPAN_DISPATCH_BC0 0x50 +#define LOWPAN_DISPATCH_MESH 0x80 + +static int lowpan_give_skb_to_device(struct sk_buff *skb) { - skb->dev = dev->ieee802154_ptr->lowpan_dev; skb->protocol = htons(ETH_P_IPV6); - skb->pkt_type = PACKET_HOST; return netif_rx(skb); } -static int -iphc_decompress(struct sk_buff *skb, const struct ieee802154_hdr *hdr) +static int lowpan_rx_handlers_result(struct sk_buff *skb, lowpan_rx_result res) +{ + switch (res) { + case RX_CONTINUE: + /* nobody cared about this packet */ + net_warn_ratelimited("%s: received unknown dispatch\n", + __func__); + + /* fall-through */ + case RX_DROP_UNUSABLE: + kfree_skb(skb); + + /* fall-through */ + case RX_DROP: + return NET_RX_DROP; + case RX_QUEUED: + return lowpan_give_skb_to_device(skb); + default: + break; + } + + return NET_RX_DROP; +} + +static inline bool lowpan_is_frag1(u8 dispatch) +{ + return (dispatch & LOWPAN_DISPATCH_FRAG_MASK) == LOWPAN_DISPATCH_FRAG1; +} + +static inline bool lowpan_is_fragn(u8 dispatch) +{ + return (dispatch & LOWPAN_DISPATCH_FRAG_MASK) == LOWPAN_DISPATCH_FRAGN; +} + +static lowpan_rx_result lowpan_rx_h_frag(struct sk_buff *skb) +{ + int ret; + + if (!(lowpan_is_frag1(*skb_network_header(skb)) || + lowpan_is_fragn(*skb_network_header(skb)))) + return RX_CONTINUE; + + ret = lowpan_frag_rcv(skb, *skb_network_header(skb) & + LOWPAN_DISPATCH_FRAG_MASK); + if (ret == 1) + return RX_QUEUED; + + /* Packet is freed by lowpan_frag_rcv on error or put into the frag + * bucket. + */ + return RX_DROP; +} + +int lowpan_iphc_decompress(struct sk_buff *skb) { - u8 iphc0, iphc1; struct ieee802154_addr_sa sa, da; + struct ieee802154_hdr hdr; + u8 iphc0, iphc1; void *sap, *dap; - raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len); - /* at least two bytes will be used for the encoding */ - if (skb->len < 2) + if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) return -EINVAL; - if (lowpan_fetch_skb_u8(skb, &iphc0)) - return -EINVAL; + raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len); - if (lowpan_fetch_skb_u8(skb, &iphc1)) + if (lowpan_fetch_skb_u8(skb, &iphc0) || + lowpan_fetch_skb_u8(skb, &iphc1)) return -EINVAL; - ieee802154_addr_to_sa(&sa, &hdr->source); - ieee802154_addr_to_sa(&da, &hdr->dest); + ieee802154_addr_to_sa(&sa, &hdr.source); + ieee802154_addr_to_sa(&da, &hdr.dest); if (sa.addr_type == IEEE802154_ADDR_SHORT) sap = &sa.short_addr; @@ -61,77 +120,216 @@ iphc_decompress(struct sk_buff *skb, const struct ieee802154_hdr *hdr) IEEE802154_ADDR_LEN, iphc0, iphc1); } -static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) +static lowpan_rx_result lowpan_rx_h_iphc(struct sk_buff *skb) { - struct ieee802154_hdr hdr; int ret; - if (dev->type != ARPHRD_IEEE802154 || - !dev->ieee802154_ptr->lowpan_dev) - goto drop; + if (!lowpan_is_iphc(*skb_network_header(skb))) + return RX_CONTINUE; - skb = skb_share_check(skb, GFP_ATOMIC); - if (!skb) - goto drop; + /* Setting datagram_offset to zero indicates non frag handling + * while doing lowpan_header_decompress. + */ + lowpan_802154_cb(skb)->d_size = 0; - if (!netif_running(dev)) - goto drop_skb; + ret = lowpan_iphc_decompress(skb); + if (ret < 0) + return RX_DROP_UNUSABLE; - if (skb->pkt_type == PACKET_OTHERHOST) - goto drop_skb; + return RX_QUEUED; +} - if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) - goto drop_skb; - - /* check that it's our buffer */ - if (skb->data[0] == LOWPAN_DISPATCH_IPV6) { - /* Pull off the 1-byte of 6lowpan header. */ - skb_pull(skb, 1); - return lowpan_give_skb_to_device(skb, dev); - } else { - switch (skb->data[0] & 0xe0) { - case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */ - ret = iphc_decompress(skb, &hdr); - if (ret < 0) - goto drop_skb; - - return lowpan_give_skb_to_device(skb, dev); - case LOWPAN_DISPATCH_FRAG1: /* first fragment header */ - ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAG1); - if (ret == 1) { - ret = iphc_decompress(skb, &hdr); - if (ret < 0) - goto drop_skb; - - return lowpan_give_skb_to_device(skb, dev); - } else if (ret == -1) { - return NET_RX_DROP; - } else { - return NET_RX_SUCCESS; - } - case LOWPAN_DISPATCH_FRAGN: /* next fragments headers */ - ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAGN); - if (ret == 1) { - ret = iphc_decompress(skb, &hdr); - if (ret < 0) - goto drop_skb; - - return lowpan_give_skb_to_device(skb, dev); - } else if (ret == -1) { - return NET_RX_DROP; - } else { - return NET_RX_SUCCESS; - } - default: - break; - } +lowpan_rx_result lowpan_rx_h_ipv6(struct sk_buff *skb) +{ + if (!lowpan_is_ipv6(*skb_network_header(skb))) + return RX_CONTINUE; + + /* Pull off the 1-byte of 6lowpan header. */ + skb_pull(skb, 1); + return RX_QUEUED; +} + +static inline bool lowpan_is_esc(u8 dispatch) +{ + return dispatch == LOWPAN_DISPATCH_ESC; +} + +static lowpan_rx_result lowpan_rx_h_esc(struct sk_buff *skb) +{ + if (!lowpan_is_esc(*skb_network_header(skb))) + return RX_CONTINUE; + + net_warn_ratelimited("%s: %s\n", skb->dev->name, + "6LoWPAN ESC not supported\n"); + + return RX_DROP_UNUSABLE; +} + +static inline bool lowpan_is_hc1(u8 dispatch) +{ + return dispatch == LOWPAN_DISPATCH_HC1; +} + +static lowpan_rx_result lowpan_rx_h_hc1(struct sk_buff *skb) +{ + if (!lowpan_is_hc1(*skb_network_header(skb))) + return RX_CONTINUE; + + net_warn_ratelimited("%s: %s\n", skb->dev->name, + "6LoWPAN HC1 not supported\n"); + + return RX_DROP_UNUSABLE; +} + +static inline bool lowpan_is_dff(u8 dispatch) +{ + return dispatch == LOWPAN_DISPATCH_DFF; +} + +static lowpan_rx_result lowpan_rx_h_dff(struct sk_buff *skb) +{ + if (!lowpan_is_dff(*skb_network_header(skb))) + return RX_CONTINUE; + + net_warn_ratelimited("%s: %s\n", skb->dev->name, + "6LoWPAN DFF not supported\n"); + + return RX_DROP_UNUSABLE; +} + +static inline bool lowpan_is_bc0(u8 dispatch) +{ + return dispatch == LOWPAN_DISPATCH_BC0; +} + +static lowpan_rx_result lowpan_rx_h_bc0(struct sk_buff *skb) +{ + if (!lowpan_is_bc0(*skb_network_header(skb))) + return RX_CONTINUE; + + net_warn_ratelimited("%s: %s\n", skb->dev->name, + "6LoWPAN BC0 not supported\n"); + + return RX_DROP_UNUSABLE; +} + +static inline bool lowpan_is_mesh(u8 dispatch) +{ + return (dispatch & LOWPAN_DISPATCH_FIRST) == LOWPAN_DISPATCH_MESH; +} + +static lowpan_rx_result lowpan_rx_h_mesh(struct sk_buff *skb) +{ + if (!lowpan_is_mesh(*skb_network_header(skb))) + return RX_CONTINUE; + + net_warn_ratelimited("%s: %s\n", skb->dev->name, + "6LoWPAN MESH not supported\n"); + + return RX_DROP_UNUSABLE; +} + +static int lowpan_invoke_rx_handlers(struct sk_buff *skb) +{ + lowpan_rx_result res; + +#define CALL_RXH(rxh) \ + do { \ + res = rxh(skb); \ + if (res != RX_CONTINUE) \ + goto rxh_next; \ + } while (0) + + /* likely at first */ + CALL_RXH(lowpan_rx_h_iphc); + CALL_RXH(lowpan_rx_h_frag); + CALL_RXH(lowpan_rx_h_ipv6); + CALL_RXH(lowpan_rx_h_esc); + CALL_RXH(lowpan_rx_h_hc1); + CALL_RXH(lowpan_rx_h_dff); + CALL_RXH(lowpan_rx_h_bc0); + CALL_RXH(lowpan_rx_h_mesh); + +rxh_next: + return lowpan_rx_handlers_result(skb, res); +#undef CALL_RXH +} + +static inline bool lowpan_is_nalp(u8 dispatch) +{ + return (dispatch & LOWPAN_DISPATCH_FIRST) == LOWPAN_DISPATCH_NALP; +} + +/* Lookup for reserved dispatch values at: + * https://www.iana.org/assignments/_6lowpan-parameters/_6lowpan-parameters.xhtml#_6lowpan-parameters-1 + * + * Last Updated: 2015-01-22 + */ +static inline bool lowpan_is_reserved(u8 dispatch) +{ + return ((dispatch >= 0x44 && dispatch <= 0x4F) || + (dispatch >= 0x51 && dispatch <= 0x5F) || + (dispatch >= 0xc8 && dispatch <= 0xdf) || + (dispatch >= 0xe8 && dispatch <= 0xff)); +} + +/* lowpan_rx_h_check checks on generic 6LoWPAN requirements + * in MAC and 6LoWPAN header. + * + * Don't manipulate the skb here, it could be shared buffer. + */ +static inline bool lowpan_rx_h_check(struct sk_buff *skb) +{ + __le16 fc = ieee802154_get_fc_from_skb(skb); + + /* check on ieee802154 conform 6LoWPAN header */ + if (!ieee802154_is_data(fc) || + !ieee802154_is_intra_pan(fc)) + return false; + + /* check if we can dereference the dispatch */ + if (unlikely(!skb->len)) + return false; + + if (lowpan_is_nalp(*skb_network_header(skb)) || + lowpan_is_reserved(*skb_network_header(skb))) + return false; + + return true; +} + +static int lowpan_rcv(struct sk_buff *skb, struct net_device *wdev, + struct packet_type *pt, struct net_device *orig_wdev) +{ + struct net_device *ldev; + + if (wdev->type != ARPHRD_IEEE802154 || + skb->pkt_type == PACKET_OTHERHOST || + !lowpan_rx_h_check(skb)) + return NET_RX_DROP; + + ldev = wdev->ieee802154_ptr->lowpan_dev; + if (!ldev || !netif_running(ldev)) + return NET_RX_DROP; + + /* Replacing skb->dev and followed rx handlers will manipulate skb. */ + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return NET_RX_DROP; + skb->dev = ldev; + + /* When receive frag1 it's likely that we manipulate the buffer. + * When recevie iphc we manipulate the data buffer. So we need + * to unshare the buffer. + */ + if (lowpan_is_frag1(*skb_network_header(skb)) || + lowpan_is_iphc(*skb_network_header(skb))) { + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) + return NET_RX_DROP; } -drop_skb: - kfree_skb(skb); -drop: - return NET_RX_DROP; + return lowpan_invoke_rx_handlers(skb); } static struct packet_type lowpan_packet_type = { diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c index f6263fc12340..54939d031ea5 100644 --- a/net/ieee802154/6lowpan/tx.c +++ b/net/ieee802154/6lowpan/tx.c @@ -36,7 +36,7 @@ lowpan_addr_info *lowpan_skb_priv(const struct sk_buff *skb) sizeof(struct lowpan_addr_info)); } -int lowpan_header_create(struct sk_buff *skb, struct net_device *dev, +int lowpan_header_create(struct sk_buff *skb, struct net_device *ldev, unsigned short type, const void *_daddr, const void *_saddr, unsigned int len) { @@ -51,7 +51,7 @@ int lowpan_header_create(struct sk_buff *skb, struct net_device *dev, return 0; if (!saddr) - saddr = dev->dev_addr; + saddr = ldev->dev_addr; raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8); raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8); @@ -73,22 +73,21 @@ static struct sk_buff* lowpan_alloc_frag(struct sk_buff *skb, int size, const struct ieee802154_hdr *master_hdr) { - struct net_device *real_dev = lowpan_dev_info(skb->dev)->real_dev; + struct net_device *wdev = lowpan_dev_info(skb->dev)->wdev; struct sk_buff *frag; int rc; - frag = alloc_skb(real_dev->hard_header_len + - real_dev->needed_tailroom + size, + frag = alloc_skb(wdev->hard_header_len + wdev->needed_tailroom + size, GFP_ATOMIC); if (likely(frag)) { - frag->dev = real_dev; + frag->dev = wdev; frag->priority = skb->priority; - skb_reserve(frag, real_dev->hard_header_len); + skb_reserve(frag, wdev->hard_header_len); skb_reset_network_header(frag); *mac_cb(frag) = *mac_cb(skb); - rc = dev_hard_header(frag, real_dev, 0, &master_hdr->dest, + rc = dev_hard_header(frag, wdev, 0, &master_hdr->dest, &master_hdr->source, size); if (rc < 0) { kfree_skb(frag); @@ -123,19 +122,17 @@ lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr, } static int -lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev, - const struct ieee802154_hdr *wpan_hdr) +lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *ldev, + const struct ieee802154_hdr *wpan_hdr, u16 dgram_size, + u16 dgram_offset) { - u16 dgram_size, dgram_offset; __be16 frag_tag; u8 frag_hdr[5]; int frag_cap, frag_len, payload_cap, rc; int skb_unprocessed, skb_offset; - dgram_size = lowpan_uncompress_size(skb, &dgram_offset) - - skb->mac_len; - frag_tag = htons(lowpan_dev_info(dev)->fragment_tag); - lowpan_dev_info(dev)->fragment_tag++; + frag_tag = htons(lowpan_dev_info(ldev)->fragment_tag); + lowpan_dev_info(ldev)->fragment_tag++; frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07); frag_hdr[1] = dgram_size & 0xff; @@ -188,9 +185,10 @@ err: return rc; } -static int lowpan_header(struct sk_buff *skb, struct net_device *dev) +static int lowpan_header(struct sk_buff *skb, struct net_device *ldev, + u16 *dgram_size, u16 *dgram_offset) { - struct wpan_dev *wpan_dev = lowpan_dev_info(dev)->real_dev->ieee802154_ptr; + struct wpan_dev *wpan_dev = lowpan_dev_info(ldev)->wdev->ieee802154_ptr; struct ieee802154_addr sa, da; struct ieee802154_mac_cb *cb = mac_cb_init(skb); struct lowpan_addr_info info; @@ -202,7 +200,10 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *dev) daddr = &info.daddr.u.extended_addr; saddr = &info.saddr.u.extended_addr; - lowpan_header_compress(skb, dev, ETH_P_IPV6, daddr, saddr, skb->len); + *dgram_size = skb->len; + lowpan_header_compress(skb, ldev, ETH_P_IPV6, daddr, saddr, skb->len); + /* dgram_offset = (saved bytes after compression) + lowpan header len */ + *dgram_offset = (*dgram_size - skb->len) + skb_network_header_len(skb); cb->type = IEEE802154_FC_TYPE_DATA; @@ -227,14 +228,15 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *dev) cb->ackreq = wpan_dev->ackreq; } - return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, - ETH_P_IPV6, (void *)&da, (void *)&sa, 0); + return dev_hard_header(skb, lowpan_dev_info(ldev)->wdev, ETH_P_IPV6, + (void *)&da, (void *)&sa, 0); } -netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev) +netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev) { struct ieee802154_hdr wpan_hdr; int max_single, ret; + u16 dgram_size, dgram_offset; pr_debug("package xmit\n"); @@ -245,7 +247,7 @@ netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev) if (!skb) return NET_XMIT_DROP; - ret = lowpan_header(skb, dev); + ret = lowpan_header(skb, ldev, &dgram_size, &dgram_offset); if (ret < 0) { kfree_skb(skb); return NET_XMIT_DROP; @@ -259,13 +261,14 @@ netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev) max_single = ieee802154_max_payload(&wpan_hdr); if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) { - skb->dev = lowpan_dev_info(dev)->real_dev; + skb->dev = lowpan_dev_info(ldev)->wdev; return dev_queue_xmit(skb); } else { netdev_tx_t rc; pr_debug("frame is too big, fragmentation is needed\n"); - rc = lowpan_xmit_fragmented(skb, dev, &wpan_hdr); + rc = lowpan_xmit_fragmented(skb, ldev, &wpan_hdr, dgram_size, + dgram_offset); return rc < 0 ? NET_XMIT_DROP : rc; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1d0c3adb6f34..11c4ca13ec3b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -119,7 +119,7 @@ #ifdef CONFIG_IP_MROUTE #include <linux/mroute.h> #endif -#include <net/vrf.h> +#include <net/l3mdev.h> /* The inetsw table contains everything that inet_create needs to @@ -219,17 +219,13 @@ int inet_listen(struct socket *sock, int backlog) * shutdown() (rather than close()). */ if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 && - !inet_csk(sk)->icsk_accept_queue.fastopenq) { + !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0) - err = fastopen_init_queue(sk, backlog); + fastopen_queue_tune(sk, backlog); else if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT2) != 0) - err = fastopen_init_queue(sk, + fastopen_queue_tune(sk, ((uint)sysctl_tcp_fastopen) >> 16); - else - err = 0; - if (err) - goto out; tcp_fastopen_init_key_once(true); } @@ -450,7 +446,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } - tb_id = vrf_dev_table_ifindex(net, sk->sk_bound_dev_if) ? : tb_id; + tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id; chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id); /* Not specified by any standard per-se, however it breaks too @@ -1043,22 +1039,16 @@ void inet_register_protosw(struct inet_protosw *p) goto out_illegal; /* If we are trying to override a permanent protocol, bail. */ - answer = NULL; last_perm = &inetsw[p->type]; list_for_each(lh, &inetsw[p->type]) { answer = list_entry(lh, struct inet_protosw, list); - /* Check only the non-wild match. */ - if (INET_PROTOSW_PERMANENT & answer->flags) { - if (protocol == answer->protocol) - break; - last_perm = lh; - } - - answer = NULL; + if ((INET_PROTOSW_PERMANENT & answer->flags) == 0) + break; + if (protocol == answer->protocol) + goto out_permanent; + last_perm = lh; } - if (answer) - goto out_permanent; /* Add the new entry after the last permanent entry if any, so that * the new entry does not override a permanent entry when matched with diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index f03db8b7abee..01308e6e6127 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -624,14 +624,20 @@ out: } EXPORT_SYMBOL(arp_create); +static int arp_xmit_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + return dev_queue_xmit(skb); +} + /* * Send an arp packet. */ void arp_xmit(struct sk_buff *skb) { /* Send it off, maybe filter it using firewalling first. */ - NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, NULL, skb, - NULL, skb->dev, dev_queue_xmit_sk); + NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, + dev_net(skb->dev), NULL, skb, NULL, skb->dev, + arp_xmit_finish); } EXPORT_SYMBOL(arp_xmit); @@ -639,7 +645,7 @@ EXPORT_SYMBOL(arp_xmit); * Process an arp request. */ -static int arp_process(struct sock *sk, struct sk_buff *skb) +static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb->dev; struct in_device *in_dev = __in_dev_get_rcu(dev); @@ -651,7 +657,6 @@ static int arp_process(struct sock *sk, struct sk_buff *skb) u16 dev_type = dev->type; int addr_type; struct neighbour *n; - struct net *net = dev_net(dev); struct dst_entry *reply_dst = NULL; bool is_garp = false; @@ -870,7 +875,7 @@ out: static void parp_redo(struct sk_buff *skb) { - arp_process(NULL, skb); + arp_process(dev_net(skb->dev), NULL, skb); } @@ -903,8 +908,9 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev, memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); - return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, NULL, skb, - dev, NULL, arp_process); + return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, + dev_net(dev), NULL, skb, dev, NULL, + arp_process); consumeskb: consume_skb(skb); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 2d9cb1748f81..735008472844 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1654,7 +1654,8 @@ static size_t inet_get_link_af_size(const struct net_device *dev) return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */ } -static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev) +static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev, + u32 ext_filter_mask) { struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr); struct nlattr *nla; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 690bcbc59f26..d7c2bb0c4f65 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -45,7 +45,7 @@ #include <net/ip_fib.h> #include <net/rtnetlink.h> #include <net/xfrm.h> -#include <net/vrf.h> +#include <net/l3mdev.h> #include <trace/events/fib.h> #ifndef CONFIG_IP_MULTIPLE_TABLES @@ -255,7 +255,7 @@ EXPORT_SYMBOL(inet_addr_type); unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr) { - u32 rt_table = vrf_dev_table(dev) ? : RT_TABLE_LOCAL; + u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL; return __inet_dev_addr_type(net, dev, addr, rt_table); } @@ -268,7 +268,7 @@ unsigned int inet_addr_type_dev_table(struct net *net, const struct net_device *dev, __be32 addr) { - u32 rt_table = vrf_dev_table(dev) ? : RT_TABLE_LOCAL; + u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL; return __inet_dev_addr_type(net, NULL, addr, rt_table); } @@ -332,7 +332,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, bool dev_match; fl4.flowi4_oif = 0; - fl4.flowi4_iif = vrf_master_ifindex_rcu(dev); + fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev); if (!fl4.flowi4_iif) fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; fl4.daddr = src; @@ -367,7 +367,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, if (nh->nh_dev == dev) { dev_match = true; break; - } else if (vrf_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) { + } else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) { dev_match = true; break; } @@ -804,7 +804,7 @@ out: static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) { struct net *net = dev_net(ifa->ifa_dev->dev); - u32 tb_id = vrf_dev_table_rtnl(ifa->ifa_dev->dev); + u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev); struct fib_table *tb; struct fib_config cfg = { .fc_protocol = RTPROT_KERNEL, diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index e5eb8ac4089d..6b96dee2800b 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -96,7 +96,7 @@ #include <net/xfrm.h> #include <net/inet_common.h> #include <net/ip_fib.h> -#include <net/vrf.h> +#include <net/l3mdev.h> /* * Build xmit assembly blocks @@ -309,7 +309,7 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, rc = false; if (icmp_global_allow()) { - int vif = vrf_master_ifindex(dst->dev); + int vif = l3mdev_master_ifindex(dst->dev); struct inet_peer *peer; peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1); @@ -427,7 +427,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) fl4.flowi4_mark = mark; fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; - fl4.flowi4_oif = vrf_master_ifindex(skb->dev); + fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) @@ -461,7 +461,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; fl4->fl4_icmp_code = code; - fl4->flowi4_oif = vrf_master_ifindex(skb_in->dev); + fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev); security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); rt = __ip_route_output_key(net, fl4); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d38b8b61eaee..de6d4c8ba600 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2569,7 +2569,7 @@ void ip_mc_drop_socket(struct sock *sk) } /* called with rcu_read_lock() */ -int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) +int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u8 proto) { struct ip_mc_list *im; struct ip_mc_list __rcu **mc_hash; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7bb9c39e0a4d..e1527882a578 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -335,9 +335,8 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) sk_acceptq_removed(sk); if (sk->sk_protocol == IPPROTO_TCP && - tcp_rsk(req)->tfo_listener && - queue->fastopenq) { - spin_lock_bh(&queue->fastopenq->lock); + tcp_rsk(req)->tfo_listener) { + spin_lock_bh(&queue->fastopenq.lock); if (tcp_rsk(req)->tfo_listener) { /* We are still waiting for the final ACK from 3WHS * so can't free req now. Instead, we set req->sk to @@ -348,7 +347,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) req->sk = NULL; req = NULL; } - spin_unlock_bh(&queue->fastopenq->lock); + spin_unlock_bh(&queue->fastopenq.lock); } out: release_sock(sk); @@ -408,7 +407,7 @@ void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) } EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); -struct dst_entry *inet_csk_route_req(struct sock *sk, +struct dst_entry *inet_csk_route_req(const struct sock *sk, struct flowi4 *fl4, const struct request_sock *req) { @@ -439,7 +438,7 @@ no_route: } EXPORT_SYMBOL_GPL(inet_csk_route_req); -struct dst_entry *inet_csk_route_child_sock(struct sock *sk, +struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, struct sock *newsk, const struct request_sock *req) { @@ -563,7 +562,7 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh, req->num_timeout >= rskq_defer_accept - 1; } -int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) +int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req) { int err = req->rsk_ops->rtx_syn_ack(parent, req); @@ -886,12 +885,12 @@ void inet_csk_listen_stop(struct sock *sk) sk_acceptq_removed(sk); reqsk_put(req); } - if (queue->fastopenq) { + if (queue->fastopenq.rskq_rst_head) { /* Free all the reqs queued in rskq_rst_head. */ - spin_lock_bh(&queue->fastopenq->lock); - acc_req = queue->fastopenq->rskq_rst_head; - queue->fastopenq->rskq_rst_head = NULL; - spin_unlock_bh(&queue->fastopenq->lock); + spin_lock_bh(&queue->fastopenq.lock); + acc_req = queue->fastopenq.rskq_rst_head; + queue->fastopenq.rskq_rst_head = NULL; + spin_unlock_bh(&queue->fastopenq.lock); while ((req = acc_req) != NULL) { acc_req = req->dl_next; reqsk_put(req); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 89120196a949..56742e995dd3 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -126,7 +126,7 @@ void inet_put_port(struct sock *sk) } EXPORT_SYMBOL(inet_put_port); -int __inet_inherit_port(struct sock *sk, struct sock *child) +int __inet_inherit_port(const struct sock *sk, struct sock *child) { struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; unsigned short port = inet_sk(child)->inet_num; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 2d3aa408fbdc..d66cfb35ba74 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -61,18 +61,18 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) } -static int ip_forward_finish(struct sock *sk, struct sk_buff *skb) +static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); - IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); - IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len); + IP_INC_STATS_BH(net, IPSTATS_MIB_OUTFORWDATAGRAMS); + IP_ADD_STATS_BH(net, IPSTATS_MIB_OUTOCTETS, skb->len); if (unlikely(opt->optlen)) ip_forward_options(skb); skb_sender_cpu_clear(skb); - return dst_output_sk(sk, skb); + return dst_output(sk, skb); } int ip_forward(struct sk_buff *skb) @@ -81,6 +81,7 @@ int ip_forward(struct sk_buff *skb) struct iphdr *iph; /* Our header */ struct rtable *rt; /* Route we use */ struct ip_options *opt = &(IPCB(skb)->opt); + struct net *net; /* that should never happen */ if (skb->pkt_type != PACKET_HOST) @@ -99,6 +100,7 @@ int ip_forward(struct sk_buff *skb) return NET_RX_SUCCESS; skb_forward_csum(skb); + net = dev_net(skb->dev); /* * According to the RFC, we must first decrease the TTL field. If @@ -119,7 +121,7 @@ int ip_forward(struct sk_buff *skb) IPCB(skb)->flags |= IPSKB_FORWARDED; mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); if (ip_exceeds_mtu(skb, mtu)) { - IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); goto drop; @@ -143,8 +145,9 @@ int ip_forward(struct sk_buff *skb) skb->priority = rt_tos2priority(iph->tos); - return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, NULL, skb, - skb->dev, rt->dst.dev, ip_forward_finish); + return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, + net, NULL, skb, skb->dev, rt->dst.dev, + ip_forward_finish); sr_failed: /* @@ -155,7 +158,7 @@ sr_failed: too_many_hops: /* Tell the sender its packet died... */ - IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_INHDRERRORS); + IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS); icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); drop: kfree_skb(skb); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index fa7f15305f9a..9772b789adf3 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -48,7 +48,7 @@ #include <linux/inet.h> #include <linux/netfilter_ipv4.h> #include <net/inet_ecn.h> -#include <net/vrf.h> +#include <net/l3mdev.h> /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6 * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c @@ -78,7 +78,7 @@ struct ipq { u8 ecn; /* RFC3168 support */ u16 max_df_size; /* largest frag with DF set seen */ int iif; - int vif; /* VRF device index */ + int vif; /* L3 master device index */ unsigned int rid; struct inet_peer *peer; }; @@ -657,7 +657,7 @@ out_fail: int ip_defrag(struct sk_buff *skb, u32 user) { struct net_device *dev = skb->dev ? : skb_dst(skb)->dev; - int vif = vrf_master_ifindex_rcu(dev); + int vif = l3mdev_master_ifindex_rcu(dev); struct net *net = dev_net(dev); struct ipq *qp; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index f4fc8a77aaa7..7cc9f7bb7fb7 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -188,10 +188,8 @@ bool ip_call_ra_chain(struct sk_buff *skb) return false; } -static int ip_local_deliver_finish(struct sock *sk, struct sk_buff *skb) +static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net *net = dev_net(skb->dev); - __skb_pull(skb, skb_network_header_len(skb)); rcu_read_lock(); @@ -254,8 +252,8 @@ int ip_local_deliver(struct sk_buff *skb) return 0; } - return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, NULL, skb, - skb->dev, NULL, + return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, + dev_net(skb->dev), NULL, skb, skb->dev, NULL, ip_local_deliver_finish); } @@ -311,7 +309,7 @@ drop: int sysctl_ip_early_demux __read_mostly = 1; EXPORT_SYMBOL(sysctl_ip_early_demux); -static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb) +static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; @@ -337,8 +335,7 @@ static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb) iph->tos, skb->dev); if (unlikely(err)) { if (err == -EXDEV) - NET_INC_STATS_BH(dev_net(skb->dev), - LINUX_MIB_IPRPFILTER); + NET_INC_STATS_BH(net, LINUX_MIB_IPRPFILTER); goto drop; } } @@ -359,11 +356,9 @@ static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb) rt = skb_rtable(skb); if (rt->rt_type == RTN_MULTICAST) { - IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INMCAST, - skb->len); + IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INMCAST, skb->len); } else if (rt->rt_type == RTN_BROADCAST) - IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST, - skb->len); + IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INBCAST, skb->len); return dst_input(skb); @@ -378,6 +373,7 @@ drop: int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { const struct iphdr *iph; + struct net *net; u32 len; /* When the interface is in promisc. mode, drop all the crap @@ -387,11 +383,12 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, goto drop; - IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb->len); + net = dev_net(dev); + IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_IN, skb->len); skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) { - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); + IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS); goto out; } @@ -417,7 +414,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1); BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0); BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE); - IP_ADD_STATS_BH(dev_net(dev), + IP_ADD_STATS_BH(net, IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK), max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); @@ -431,7 +428,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, len = ntohs(iph->tot_len); if (skb->len < len) { - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS); + IP_INC_STATS_BH(net, IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } else if (len < (iph->ihl*4)) goto inhdr_error; @@ -441,7 +438,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, * Note this now means skb->len holds ntohs(iph->tot_len). */ if (pskb_trim_rcsum(skb, len)) { - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); + IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS); goto drop; } @@ -453,14 +450,14 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, /* Must drop socket now because of tproxy. */ skb_orphan(skb); - return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, NULL, skb, - dev, NULL, + return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, + net, NULL, skb, dev, NULL, ip_rcv_finish); csum_error: - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_CSUMERRORS); + IP_INC_STATS_BH(net, IPSTATS_MIB_CSUMERRORS); inhdr_error: - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS); + IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS); drop: kfree_skb(skb); out: diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 0138fada0951..aff6766922e8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -97,12 +97,14 @@ EXPORT_SYMBOL(ip_send_check); static int __ip_local_out_sk(struct sock *sk, struct sk_buff *skb) { + struct net *net = dev_net(skb_dst(skb)->dev); struct iphdr *iph = ip_hdr(skb); iph->tot_len = htons(skb->len); ip_send_check(iph); - return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, sk, skb, NULL, - skb_dst(skb)->dev, dst_output_sk); + return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, + net, sk, skb, NULL, skb_dst(skb)->dev, + dst_output_okfn); } int __ip_local_out(struct sk_buff *skb) @@ -116,7 +118,7 @@ int ip_local_out_sk(struct sock *sk, struct sk_buff *skb) err = __ip_local_out(skb); if (likely(err == 1)) - err = dst_output_sk(sk, skb); + err = dst_output(sk, skb); return err; } @@ -135,7 +137,7 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) * Add an ip header to a skbuff and send it out. * */ -int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, +int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, __be32 saddr, __be32 daddr, struct ip_options_rcu *opt) { struct inet_sock *inet = inet_sk(sk); @@ -149,15 +151,17 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, iph->version = 4; iph->ihl = 5; iph->tos = inet->tos; - if (ip_dont_fragment(sk, &rt->dst)) - iph->frag_off = htons(IP_DF); - else - iph->frag_off = 0; iph->ttl = ip_select_ttl(inet, &rt->dst); iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); iph->saddr = saddr; iph->protocol = sk->sk_protocol; - ip_select_ident(sock_net(sk), skb, sk); + if (ip_dont_fragment(sk, &rt->dst)) { + iph->frag_off = htons(IP_DF); + iph->id = 0; + } else { + iph->frag_off = 0; + __ip_select_ident(sock_net(sk), iph, 1); + } if (opt && opt->opt.optlen) { iph->ihl += opt->opt.optlen>>2; @@ -177,14 +181,15 @@ static int ip_finish_output2(struct sock *sk, struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct rtable *rt = (struct rtable *)dst; struct net_device *dev = dst->dev; + struct net *net = dev_net(dev); unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; u32 nexthop; if (rt->rt_type == RTN_MULTICAST) { - IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); + IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len); } else if (rt->rt_type == RTN_BROADCAST) - IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTBCAST, skb->len); + IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len); /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { @@ -263,7 +268,7 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb, return ret; } -static int ip_finish_output(struct sock *sk, struct sk_buff *skb) +static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { unsigned int mtu; @@ -271,7 +276,7 @@ static int ip_finish_output(struct sock *sk, struct sk_buff *skb) /* Policy lookup after SNAT yielded a new policy */ if (skb_dst(skb)->xfrm) { IPCB(skb)->flags |= IPSKB_REROUTED; - return dst_output_sk(sk, skb); + return dst_output(sk, skb); } #endif mtu = ip_skb_dst_mtu(skb); @@ -288,11 +293,12 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb) { struct rtable *rt = skb_rtable(skb); struct net_device *dev = rt->dst.dev; + struct net *net = dev_net(dev); /* * If the indicated interface is up and running, send the packet. */ - IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len); + IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len); skb->dev = dev; skb->protocol = htons(ETH_P_IP); @@ -320,7 +326,7 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb) struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); if (newskb) NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, - sk, newskb, NULL, newskb->dev, + net, sk, newskb, NULL, newskb->dev, dev_loopback_xmit); } @@ -335,26 +341,29 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb) if (rt->rt_flags&RTCF_BROADCAST) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); if (newskb) - NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, newskb, - NULL, newskb->dev, dev_loopback_xmit); + NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, + net, sk, newskb, NULL, newskb->dev, + dev_loopback_xmit); } - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb, NULL, - skb->dev, ip_finish_output, + return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, + net, sk, skb, NULL, skb->dev, + ip_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } int ip_output(struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; + struct net *net = dev_net(dev); - IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len); + IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len); skb->dev = dev; skb->protocol = htons(ETH_P_IP); - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb, - NULL, dev, + return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, + net, sk, skb, NULL, dev, ip_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } @@ -498,10 +507,9 @@ static int ip_fragment(struct sock *sk, struct sk_buff *skb, if (unlikely(!skb->ignore_df || (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size > mtu))) { - struct rtable *rt = skb_rtable(skb); - struct net_device *dev = rt->dst.dev; + struct net *net = dev_net(skb_rtable(skb)->dst.dev); - IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); kfree_skb(skb); @@ -529,9 +537,11 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb, int offset; __be16 not_last_frag; struct rtable *rt = skb_rtable(skb); + struct net *net; int err = 0; dev = rt->dst.dev; + net = dev_net(dev); /* * Point into the IP datagram header. @@ -624,7 +634,7 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb, err = output(sk, skb); if (!err) - IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); + IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES); if (err || !frag) break; @@ -634,7 +644,7 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb, } if (err == 0) { - IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); + IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS); return 0; } @@ -643,7 +653,7 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb, kfree_skb(frag); frag = skb; } - IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); return err; slow_path_clean: @@ -765,15 +775,15 @@ slow_path: if (err) goto fail; - IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); + IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES); } consume_skb(skb); - IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); + IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS); return err; fail: kfree_skb(skb); - IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); return err; } EXPORT_SYMBOL(ip_do_fragment); @@ -1561,7 +1571,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, } oif = arg->bound_dev_if; - if (!oif && netif_index_is_vrf(net, skb->skb_iif)) + if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) oif = skb->skb_iif; flowi4_init_output(&fl4, oif, diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 0c152087ca15..3b87ec5178f9 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -197,7 +197,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, skb_dst_set(skb, dst); skb->dev = skb_dst(skb)->dev; - err = dst_output(skb); + err = dst_output(skb->sk, skb); if (net_xmit_eval(err) == 0) err = skb->len; iptunnel_xmit_stats(err, &dev->stats, dev->tstats); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 866ee89f5254..cfcb996ec51b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1678,17 +1678,18 @@ static void ip_encap(struct net *net, struct sk_buff *skb, nf_reset(skb); } -static inline int ipmr_forward_finish(struct sock *sk, struct sk_buff *skb) +static inline int ipmr_forward_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); - IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); - IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len); + IP_INC_STATS_BH(net, IPSTATS_MIB_OUTFORWDATAGRAMS); + IP_ADD_STATS_BH(net, IPSTATS_MIB_OUTOCTETS, skb->len); if (unlikely(opt->optlen)) ip_forward_options(skb); - return dst_output_sk(sk, skb); + return dst_output(sk, skb); } /* @@ -1745,7 +1746,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, * to blackhole. */ - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS_BH(net, IPSTATS_MIB_FRAGFAILS); ip_rt_put(rt); goto out_free; } @@ -1787,8 +1788,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, * not mrouter) cannot join to more than one interface - it will * result in receiving multiple packets. */ - NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, NULL, skb, - skb->dev, dev, + NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, + net, NULL, skb, skb->dev, dev, ipmr_forward_finish); return; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 61eafc9b4545..c3776ff6749f 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -17,9 +17,8 @@ #include <net/netfilter/nf_queue.h> /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ -int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type) +int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_type) { - struct net *net = dev_net(skb_dst(skb)->dev); const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi4 fl4 = {}; @@ -104,7 +103,7 @@ static void nf_ip_saveroute(const struct sk_buff *skb, } } -static int nf_ip_reroute(struct sk_buff *skb, +static int nf_ip_reroute(struct net *net, struct sk_buff *skb, const struct nf_queue_entry *entry) { const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); @@ -116,7 +115,7 @@ static int nf_ip_reroute(struct sk_buff *skb, skb->mark == rt_info->mark && iph->daddr == rt_info->daddr && iph->saddr == rt_info->saddr)) - return ip_route_me_harder(skb, RTN_UNSPEC); + return ip_route_me_harder(net, skb, RTN_UNSPEC); } return 0; } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 8f87fc38ccde..2dad3e1c5f11 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -247,10 +247,10 @@ struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry) } unsigned int arpt_do_table(struct sk_buff *skb, - unsigned int hook, const struct nf_hook_state *state, struct xt_table *table) { + unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); unsigned int verdict = NF_DROP; const struct arphdr *arp; @@ -285,6 +285,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, */ e = get_entry(table_base, private->hook_entry[hook]); + acpar.net = state->net; acpar.in = state->in; acpar.out = state->out; acpar.hooknum = hook; diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 93876d03120c..1897ee160920 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -27,13 +27,10 @@ static const struct xt_table packet_filter = { /* The work comes in here from netfilter.c */ static unsigned int -arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, +arptable_filter_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - const struct net *net = dev_net(state->in ? state->in : state->out); - - return arpt_do_table(skb, ops->hooknum, state, - net->ipv4.arptable_filter); + return arpt_do_table(skb, state, state->net->ipv4.arptable_filter); } static struct nf_hook_ops *arpfilter_ops __read_mostly; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index b0a86e73451c..42d0946956db 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -246,7 +246,8 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e, return 0; } -static void trace_packet(const struct sk_buff *skb, +static void trace_packet(struct net *net, + const struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, @@ -258,7 +259,6 @@ static void trace_packet(const struct sk_buff *skb, const char *hookname, *chainname, *comment; const struct ipt_entry *iter; unsigned int rulenum = 0; - struct net *net = dev_net(in ? in : out); root = get_entry(private->entries, private->hook_entry[hook]); @@ -285,10 +285,10 @@ struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry) /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int ipt_do_table(struct sk_buff *skb, - unsigned int hook, const struct nf_hook_state *state, struct xt_table *table) { + unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); const struct iphdr *ip; /* Initializing verdict to NF_DROP keeps gcc happy. */ @@ -315,6 +315,7 @@ ipt_do_table(struct sk_buff *skb, acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; acpar.thoff = ip_hdrlen(skb); acpar.hotdrop = false; + acpar.net = state->net; acpar.in = state->in; acpar.out = state->out; acpar.family = NFPROTO_IPV4; @@ -378,8 +379,8 @@ ipt_do_table(struct sk_buff *skb, #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* The packet is traced: log it */ if (unlikely(skb->nf_trace)) - trace_packet(skb, hook, state->in, state->out, - table->name, private, e); + trace_packet(state->net, skb, hook, state->in, + state->out, table->name, private, e); #endif /* Standard target? */ if (!t->u.kernel.target->target) { diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 45cb16a6a4a3..3f32c03e8b2e 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -507,14 +507,14 @@ static void arp_print(struct arp_payload *payload) #endif static unsigned int -arp_mangle(const struct nf_hook_ops *ops, +arp_mangle(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct arphdr *arp = arp_hdr(skb); struct arp_payload *payload; struct clusterip_config *c; - struct net *net = dev_net(state->in ? state->in : state->out); + struct net *net = state->net; /* we don't care about non-ethernet and non-ipv4 ARP */ if (arp->ar_hrd != htons(ARPHRD_ETHER) || diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 87907d4bd259..1d16c0f28df0 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -59,7 +59,7 @@ reject_tg(struct sk_buff *skb, const struct xt_action_param *par) nf_send_unreach(skb, ICMP_PKT_FILTERED, hook); break; case IPT_TCP_RESET: - nf_send_reset(skb, hook); + nf_send_reset(par->net, skb, hook); case IPT_ICMP_ECHOREPLY: /* Doesn't happen. */ break; diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 95ea633e8356..6a6e762ab27f 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -39,11 +39,14 @@ synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr) } static void -synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb, +synproxy_send_tcp(const struct synproxy_net *snet, + const struct sk_buff *skb, struct sk_buff *nskb, struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo, struct iphdr *niph, struct tcphdr *nth, unsigned int tcp_hdr_size) { + struct net *net = nf_ct_net(snet->tmpl); + nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0); nskb->ip_summed = CHECKSUM_PARTIAL; nskb->csum_start = (unsigned char *)nth - nskb->head; @@ -51,7 +54,7 @@ synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb, skb_dst_set_noref(nskb, skb_dst(skb)); nskb->protocol = htons(ETH_P_IP); - if (ip_route_me_harder(nskb, RTN_UNSPEC)) + if (ip_route_me_harder(net, nskb, RTN_UNSPEC)) goto free_nskb; if (nfct) { @@ -68,7 +71,8 @@ free_nskb: } static void -synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th, +synproxy_send_client_synack(const struct synproxy_net *snet, + const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) { struct sk_buff *nskb; @@ -104,7 +108,7 @@ synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th, synproxy_build_options(nth, opts); - synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, + synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } @@ -148,7 +152,7 @@ synproxy_send_server_syn(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, + synproxy_send_tcp(snet, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, niph, nth, tcp_hdr_size); } @@ -188,7 +192,7 @@ synproxy_send_server_ack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); + synproxy_send_tcp(snet, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); } static void @@ -226,7 +230,7 @@ synproxy_send_client_ack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, + synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } @@ -258,7 +262,7 @@ static unsigned int synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_synproxy_info *info = par->targinfo; - struct synproxy_net *snet = synproxy_pernet(dev_net(par->in)); + struct synproxy_net *snet = synproxy_pernet(par->net); struct synproxy_options opts = {}; struct tcphdr *th, _th; @@ -287,7 +291,7 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) XT_SYNPROXY_OPT_SACK_PERM | XT_SYNPROXY_OPT_ECN); - synproxy_send_client_synack(skb, th, &opts); + synproxy_send_client_synack(snet, skb, th, &opts); return NF_DROP; } else if (th->ack && !(th->fin || th->rst || th->syn)) { @@ -299,11 +303,11 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } -static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops, +static unsigned int ipv4_synproxy_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *nhs) { - struct synproxy_net *snet = synproxy_pernet(dev_net(nhs->in ? : nhs->out)); + struct synproxy_net *snet = synproxy_pernet(nhs->net); enum ip_conntrack_info ctinfo; struct nf_conn *ct; struct nf_conn_synproxy *synproxy; diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 8618fd150c96..74dd6671b66d 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -32,12 +32,11 @@ static __be32 rpfilter_get_saddr(__be32 addr) return addr; } -static bool rpfilter_lookup_reverse(struct flowi4 *fl4, +static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4, const struct net_device *dev, u8 flags) { struct fib_result res; bool dev_match; - struct net *net = dev_net(dev); int ret __maybe_unused; if (fib_lookup(net, fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE)) @@ -98,7 +97,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) flow.flowi4_tos = RT_TOS(iph->tos); flow.flowi4_scope = RT_SCOPE_UNIVERSE; - return rpfilter_lookup_reverse(&flow, par->in, info->flags) ^ invert; + return rpfilter_lookup_reverse(par->net, &flow, par->in, info->flags) ^ invert; } static int rpfilter_check(const struct xt_mtchk_param *par) diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index a0f3beca52d2..397ef2dd133e 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -33,19 +33,16 @@ static const struct xt_table packet_filter = { }; static unsigned int -iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, +iptable_filter_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - const struct net *net; - - if (ops->hooknum == NF_INET_LOCAL_OUT && + if (state->hook == NF_INET_LOCAL_OUT && (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr))) /* root is playing with raw sockets. */ return NF_ACCEPT; - net = dev_net(state->in ? state->in : state->out); - return ipt_do_table(skb, ops->hooknum, state, net->ipv4.iptable_filter); + return ipt_do_table(skb, state, state->net->ipv4.iptable_filter); } static struct nf_hook_ops *filter_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 62cbb8c5f4a8..ba5d392a13c4 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -39,7 +39,6 @@ static const struct xt_table packet_mangler = { static unsigned int ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) { - struct net_device *out = state->out; unsigned int ret; const struct iphdr *iph; u_int8_t tos; @@ -59,8 +58,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) daddr = iph->daddr; tos = iph->tos; - ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, state, - dev_net(out)->ipv4.iptable_mangle); + ret = ipt_do_table(skb, state, state->net->ipv4.iptable_mangle); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN) { iph = ip_hdr(skb); @@ -69,7 +67,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) iph->daddr != daddr || skb->mark != mark || iph->tos != tos) { - err = ip_route_me_harder(skb, RTN_UNSPEC); + err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); if (err < 0) ret = NF_DROP_ERR(err); } @@ -80,18 +78,17 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) /* The work comes in here from netfilter.c. */ static unsigned int -iptable_mangle_hook(const struct nf_hook_ops *ops, +iptable_mangle_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - if (ops->hooknum == NF_INET_LOCAL_OUT) + if (state->hook == NF_INET_LOCAL_OUT) return ipt_mangle_out(skb, state); - if (ops->hooknum == NF_INET_POST_ROUTING) - return ipt_do_table(skb, ops->hooknum, state, - dev_net(state->out)->ipv4.iptable_mangle); + if (state->hook == NF_INET_POST_ROUTING) + return ipt_do_table(skb, state, + state->net->ipv4.iptable_mangle); /* PREROUTING/INPUT/FORWARD: */ - return ipt_do_table(skb, ops->hooknum, state, - dev_net(state->in)->ipv4.iptable_mangle); + return ipt_do_table(skb, state, state->net->ipv4.iptable_mangle); } static struct nf_hook_ops *mangle_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 0d4d9cdf98a4..3a2e4d830a0b 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -28,42 +28,40 @@ static const struct xt_table nf_nat_ipv4_table = { .af = NFPROTO_IPV4, }; -static unsigned int iptable_nat_do_chain(const struct nf_hook_ops *ops, +static unsigned int iptable_nat_do_chain(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct) { - struct net *net = nf_ct_net(ct); - - return ipt_do_table(skb, ops->hooknum, state, net->ipv4.nat_table); + return ipt_do_table(skb, state, state->net->ipv4.nat_table); } -static unsigned int iptable_nat_ipv4_fn(const struct nf_hook_ops *ops, +static unsigned int iptable_nat_ipv4_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv4_fn(ops, skb, state, iptable_nat_do_chain); + return nf_nat_ipv4_fn(priv, skb, state, iptable_nat_do_chain); } -static unsigned int iptable_nat_ipv4_in(const struct nf_hook_ops *ops, +static unsigned int iptable_nat_ipv4_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv4_in(ops, skb, state, iptable_nat_do_chain); + return nf_nat_ipv4_in(priv, skb, state, iptable_nat_do_chain); } -static unsigned int iptable_nat_ipv4_out(const struct nf_hook_ops *ops, +static unsigned int iptable_nat_ipv4_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv4_out(ops, skb, state, iptable_nat_do_chain); + return nf_nat_ipv4_out(priv, skb, state, iptable_nat_do_chain); } -static unsigned int iptable_nat_ipv4_local_fn(const struct nf_hook_ops *ops, +static unsigned int iptable_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv4_local_fn(ops, skb, state, iptable_nat_do_chain); + return nf_nat_ipv4_local_fn(priv, skb, state, iptable_nat_do_chain); } static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = { diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 0356e6da4bb7..1ba02811acb0 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -20,19 +20,16 @@ static const struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int -iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, +iptable_raw_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - const struct net *net; - - if (ops->hooknum == NF_INET_LOCAL_OUT && + if (state->hook == NF_INET_LOCAL_OUT && (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr))) /* root is playing with raw sockets. */ return NF_ACCEPT; - net = dev_net(state->in ? state->in : state->out); - return ipt_do_table(skb, ops->hooknum, state, net->ipv4.iptable_raw); + return ipt_do_table(skb, state, state->net->ipv4.iptable_raw); } static struct nf_hook_ops *rawtable_ops __read_mostly; diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 4bce3980ccd9..f534e2f05bad 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -37,20 +37,16 @@ static const struct xt_table security_table = { }; static unsigned int -iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, +iptable_security_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - const struct net *net; - - if (ops->hooknum == NF_INET_LOCAL_OUT && + if (state->hook == NF_INET_LOCAL_OUT && (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr))) /* Somebody is playing with raw sockets. */ return NF_ACCEPT; - net = dev_net(state->in ? state->in : state->out); - return ipt_do_table(skb, ops->hooknum, state, - net->ipv4.iptable_security); + return ipt_do_table(skb, state, state->net->ipv4.iptable_security); } static struct nf_hook_ops *sectbl_ops __read_mostly; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 8a2caaf3940b..752fb40adcf8 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -92,7 +92,7 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, return NF_ACCEPT; } -static unsigned int ipv4_helper(const struct nf_hook_ops *ops, +static unsigned int ipv4_helper(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -119,7 +119,7 @@ static unsigned int ipv4_helper(const struct nf_hook_ops *ops, ct, ctinfo); } -static unsigned int ipv4_confirm(const struct nf_hook_ops *ops, +static unsigned int ipv4_confirm(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -143,14 +143,14 @@ out: return nf_conntrack_confirm(skb); } -static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops, +static unsigned int ipv4_conntrack_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_conntrack_in(dev_net(state->in), PF_INET, ops->hooknum, skb); + return nf_conntrack_in(state->net, PF_INET, state->hook, skb); } -static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops, +static unsigned int ipv4_conntrack_local(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -158,7 +158,7 @@ static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops, if (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - return nf_conntrack_in(dev_net(state->out), PF_INET, ops->hooknum, skb); + return nf_conntrack_in(state->net, PF_INET, state->hook, skb); } /* Connection tracking may drop packets, but never alters them, so diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index cdde3ec496e9..c567e1b5d799 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -30,7 +30,7 @@ static inline struct nf_icmp_net *icmp_pernet(struct net *net) } static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, - struct nf_conntrack_tuple *tuple) + struct net *net, struct nf_conntrack_tuple *tuple) { const struct icmphdr *hp; struct icmphdr _hdr; @@ -144,7 +144,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb) + ip_hdrlen(skb) + sizeof(struct icmphdr), - PF_INET, &origtuple)) { + PF_INET, net, &origtuple)) { pr_debug("icmp_error_message: failed to get tuple\n"); return -NF_ACCEPT; } diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 9306ec4fab41..b246346ee849 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -61,7 +61,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, return IP_DEFRAG_CONNTRACK_OUT + zone_id; } -static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops, +static unsigned int ipv4_conntrack_defrag(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -83,7 +83,7 @@ static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops, /* Gather fragments. */ if (ip_is_fragment(ip_hdr(skb))) { enum ip_defrag_users user = - nf_ct_defrag_user(ops->hooknum, skb); + nf_ct_defrag_user(state->hook, skb); if (nf_ct_ipv4_gather_frags(skb, user)) return NF_STOLEN; diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c index 2d79e6e8d934..ce2a59e5c665 100644 --- a/net/ipv4/netfilter/nf_dup_ipv4.c +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -23,25 +23,10 @@ #include <net/netfilter/nf_conntrack.h> #endif -static struct net *pick_net(struct sk_buff *skb) -{ -#ifdef CONFIG_NET_NS - const struct dst_entry *dst; - - if (skb->dev != NULL) - return dev_net(skb->dev); - dst = skb_dst(skb); - if (dst != NULL && dst->dev != NULL) - return dev_net(dst->dev); -#endif - return &init_net; -} - -static bool nf_dup_ipv4_route(struct sk_buff *skb, const struct in_addr *gw, - int oif) +static bool nf_dup_ipv4_route(struct net *net, struct sk_buff *skb, + const struct in_addr *gw, int oif) { const struct iphdr *iph = ip_hdr(skb); - struct net *net = pick_net(skb); struct rtable *rt; struct flowi4 fl4; @@ -65,7 +50,7 @@ static bool nf_dup_ipv4_route(struct sk_buff *skb, const struct in_addr *gw, return true; } -void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum, +void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, const struct in_addr *gw, int oif) { struct iphdr *iph; @@ -105,7 +90,7 @@ void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum, --iph->ttl; ip_send_check(iph); - if (nf_dup_ipv4_route(skb, gw, oif)) { + if (nf_dup_ipv4_route(net, skb, gw, oif)) { __this_cpu_write(nf_skb_duplicated, true); ip_local_out(skb); __this_cpu_write(nf_skb_duplicated, false); diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index 22f4579b0c2a..5075b7ecd26d 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -255,9 +255,9 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb, EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); unsigned int -nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, +nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)) @@ -266,7 +266,7 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; /* maniptype == SRC for postrouting. */ - enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); + enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook); /* We never see fragments: conntrack defrags on pre-routing * and local-out, and nf_nat_out protects post-routing. @@ -295,7 +295,7 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, case IP_CT_RELATED_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, - ops->hooknum)) + state->hook)) return NF_DROP; else return NF_ACCEPT; @@ -308,21 +308,21 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; - ret = do_chain(ops, skb, state, ct); + ret = do_chain(priv, skb, state, ct); if (ret != NF_ACCEPT) return ret; - if (nf_nat_initialized(ct, HOOK2MANIP(ops->hooknum))) + if (nf_nat_initialized(ct, HOOK2MANIP(state->hook))) break; - ret = nf_nat_alloc_null_binding(ct, ops->hooknum); + ret = nf_nat_alloc_null_binding(ct, state->hook); if (ret != NF_ACCEPT) return ret; } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); - if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, + if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out)) goto oif_changed; } @@ -332,11 +332,11 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out)) + if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out)) goto oif_changed; } - return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); + return nf_nat_packet(ct, ctinfo, state->hook, skb); oif_changed: nf_ct_kill_acct(ct, ctinfo, skb); @@ -345,9 +345,9 @@ oif_changed: EXPORT_SYMBOL_GPL(nf_nat_ipv4_fn); unsigned int -nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb, +nf_nat_ipv4_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)) @@ -355,7 +355,7 @@ nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb, unsigned int ret; __be32 daddr = ip_hdr(skb)->daddr; - ret = nf_nat_ipv4_fn(ops, skb, state, do_chain); + ret = nf_nat_ipv4_fn(priv, skb, state, do_chain); if (ret != NF_DROP && ret != NF_STOLEN && daddr != ip_hdr(skb)->daddr) skb_dst_drop(skb); @@ -365,9 +365,9 @@ nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb, EXPORT_SYMBOL_GPL(nf_nat_ipv4_in); unsigned int -nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, +nf_nat_ipv4_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)) @@ -384,7 +384,7 @@ nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_ipv4_fn(ops, skb, state, do_chain); + ret = nf_nat_ipv4_fn(priv, skb, state, do_chain); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && @@ -396,7 +396,7 @@ nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && ct->tuplehash[dir].tuple.src.u.all != ct->tuplehash[!dir].tuple.dst.u.all)) { - err = nf_xfrm_me_harder(skb, AF_INET); + err = nf_xfrm_me_harder(state->net, skb, AF_INET); if (err < 0) ret = NF_DROP_ERR(err); } @@ -407,9 +407,9 @@ nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, EXPORT_SYMBOL_GPL(nf_nat_ipv4_out); unsigned int -nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, +nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)) @@ -424,14 +424,14 @@ nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_ipv4_fn(ops, skb, state, do_chain); + ret = nf_nat_ipv4_fn(priv, skb, state, do_chain); if (ret != NF_DROP && ret != NF_STOLEN && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); if (ct->tuplehash[dir].tuple.dst.u3.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { - err = ip_route_me_harder(skb, RTN_UNSPEC); + err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); if (err < 0) ret = NF_DROP_ERR(err); } @@ -440,7 +440,7 @@ nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && ct->tuplehash[dir].tuple.dst.u.all != ct->tuplehash[!dir].tuple.src.u.all) { - err = nf_xfrm_me_harder(skb, AF_INET); + err = nf_xfrm_me_harder(state->net, skb, AF_INET); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 3262e41ff76f..2f5e925d3264 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -99,7 +99,7 @@ void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put); /* Send RST reply */ -void nf_send_reset(struct sk_buff *oldskb, int hook) +void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) { struct sk_buff *nskb; const struct iphdr *oiph; @@ -129,7 +129,7 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) ip4_dst_hoplimit(skb_dst(nskb))); nf_reject_ip_tcphdr_put(nskb, oldskb, oth); - if (ip_route_me_harder(nskb, RTN_UNSPEC)) + if (ip_route_me_harder(net, nskb, RTN_UNSPEC)) goto free_nskb; /* "Never happens" */ diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 8412268bbad1..9d09d4f59545 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -15,15 +15,15 @@ #include <net/netfilter/nf_tables.h> static unsigned int -nft_do_chain_arp(const struct nf_hook_ops *ops, +nft_do_chain_arp(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; - nft_set_pktinfo(&pkt, ops, skb, state); + nft_set_pktinfo(&pkt, skb, state); - return nft_do_chain(&pkt, ops); + return nft_do_chain(&pkt, priv); } static struct nft_af_info nft_af_arp __read_mostly = { diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index aa180d3a69a5..ca9dc3c46c4f 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -18,18 +18,18 @@ #include <net/ip.h> #include <net/netfilter/nf_tables_ipv4.h> -static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops, +static unsigned int nft_do_chain_ipv4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv4(&pkt, ops, skb, state); + nft_set_pktinfo_ipv4(&pkt, skb, state); - return nft_do_chain(&pkt, ops); + return nft_do_chain(&pkt, priv); } -static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, +static unsigned int nft_ipv4_output(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -41,7 +41,7 @@ static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, return NF_ACCEPT; } - return nft_do_chain_ipv4(ops, skb, state); + return nft_do_chain_ipv4(priv, skb, state); } struct nft_af_info nft_af_ipv4 __read_mostly = { diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c index bf5c30ae14e4..f5c66a7a4bf2 100644 --- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -26,44 +26,44 @@ #include <net/netfilter/nf_nat_l3proto.h> #include <net/ip.h> -static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops, +static unsigned int nft_nat_do_chain(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct) { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv4(&pkt, ops, skb, state); + nft_set_pktinfo_ipv4(&pkt, skb, state); - return nft_do_chain(&pkt, ops); + return nft_do_chain(&pkt, priv); } -static unsigned int nft_nat_ipv4_fn(const struct nf_hook_ops *ops, +static unsigned int nft_nat_ipv4_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv4_fn(ops, skb, state, nft_nat_do_chain); + return nf_nat_ipv4_fn(priv, skb, state, nft_nat_do_chain); } -static unsigned int nft_nat_ipv4_in(const struct nf_hook_ops *ops, +static unsigned int nft_nat_ipv4_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv4_in(ops, skb, state, nft_nat_do_chain); + return nf_nat_ipv4_in(priv, skb, state, nft_nat_do_chain); } -static unsigned int nft_nat_ipv4_out(const struct nf_hook_ops *ops, +static unsigned int nft_nat_ipv4_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv4_out(ops, skb, state, nft_nat_do_chain); + return nf_nat_ipv4_out(priv, skb, state, nft_nat_do_chain); } -static unsigned int nft_nat_ipv4_local_fn(const struct nf_hook_ops *ops, +static unsigned int nft_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv4_local_fn(ops, skb, state, nft_nat_do_chain); + return nf_nat_ipv4_local_fn(priv, skb, state, nft_nat_do_chain); } static const struct nf_chain_type nft_chain_nat_ipv4 = { diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index e335b0afdaf3..2375b0a8be46 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -21,7 +21,7 @@ #include <net/route.h> #include <net/ip.h> -static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, +static unsigned int nf_route_table_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -37,7 +37,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - nft_set_pktinfo_ipv4(&pkt, ops, skb, state); + nft_set_pktinfo_ipv4(&pkt, skb, state); mark = skb->mark; iph = ip_hdr(skb); @@ -45,7 +45,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, daddr = iph->daddr; tos = iph->tos; - ret = nft_do_chain(&pkt, ops); + ret = nft_do_chain(&pkt, priv); if (ret != NF_DROP && ret != NF_QUEUE) { iph = ip_hdr(skb); @@ -53,7 +53,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, iph->daddr != daddr || skb->mark != mark || iph->tos != tos) - if (ip_route_me_harder(skb, RTN_UNSPEC)) + if (ip_route_me_harder(state->net, skb, RTN_UNSPEC)) ret = NF_DROP; } return ret; diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c index b45932d43b69..bf855e64fc45 100644 --- a/net/ipv4/netfilter/nft_dup_ipv4.c +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -30,7 +30,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr, }; int oif = regs->data[priv->sreg_dev]; - nf_dup_ipv4(pkt->skb, pkt->ops->hooknum, &gw, oif); + nf_dup_ipv4(pkt->net, pkt->skb, pkt->hook, &gw, oif); } static int nft_dup_ipv4_init(const struct nft_ctx *ctx, diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c index 40e414c4ca56..b72ffc58e255 100644 --- a/net/ipv4/netfilter/nft_masq_ipv4.c +++ b/net/ipv4/netfilter/nft_masq_ipv4.c @@ -26,7 +26,7 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); range.flags = priv->flags; - regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum, + regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->hook, &range, pkt->out); } diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c index d8d795df9c13..c09d4381427e 100644 --- a/net/ipv4/netfilter/nft_redir_ipv4.c +++ b/net/ipv4/netfilter/nft_redir_ipv4.c @@ -36,7 +36,7 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr, mr.range[0].flags |= priv->flags; regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, - pkt->ops->hooknum); + pkt->hook); } static struct nft_expr_type nft_redir_ipv4_type; diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index b07e58b51158..c24f41c816b3 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -27,11 +27,10 @@ static void nft_reject_ipv4_eval(const struct nft_expr *expr, switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach(pkt->skb, priv->icmp_code, - pkt->ops->hooknum); + nf_send_unreach(pkt->skb, priv->icmp_code, pkt->hook); break; case NFT_REJECT_TCP_RST: - nf_send_reset(pkt->skb, pkt->ops->hooknum); + nf_send_reset(pkt->net, pkt->skb, pkt->hook); break; default: break; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 561cd4b8fc6e..28ef8a913130 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -411,8 +411,9 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, icmp_out_count(net, ((struct icmphdr *) skb_transport_header(skb))->type); - err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, sk, skb, - NULL, rt->dst.dev, dst_output_sk); + err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, + net, sk, skb, NULL, rt->dst.dev, + dst_output_okfn); if (err > 0) err = net_xmit_errno(err); if (err) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c81deb85acb4..76ca4e75f785 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -112,7 +112,7 @@ #endif #include <net/secure_seq.h> #include <net/ip_tunnels.h> -#include <net/vrf.h> +#include <net/l3mdev.h> #define RT_FL_TOS(oldflp4) \ ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) @@ -847,7 +847,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) return; } log_martians = IN_DEV_LOG_MARTIANS(in_dev); - vif = vrf_master_ifindex_rcu(rt->dst.dev); + vif = l3mdev_master_ifindex_rcu(rt->dst.dev); rcu_read_unlock(); net = dev_net(rt->dst.dev); @@ -941,7 +941,7 @@ static int ip_error(struct sk_buff *skb) } peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, - vrf_master_ifindex(skb->dev), 1); + l3mdev_master_ifindex(skb->dev), 1); send = true; if (peer) { @@ -1438,12 +1438,34 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, } static struct rtable *rt_dst_alloc(struct net_device *dev, + unsigned int flags, u16 type, bool nopolicy, bool noxfrm, bool will_cache) { - return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, - (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) | - (nopolicy ? DST_NOPOLICY : 0) | - (noxfrm ? DST_NOXFRM : 0)); + struct rtable *rt; + + rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, + (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) | + (nopolicy ? DST_NOPOLICY : 0) | + (noxfrm ? DST_NOXFRM : 0)); + + if (rt) { + rt->rt_genid = rt_genid_ipv4(dev_net(dev)); + rt->rt_flags = flags; + rt->rt_type = type; + rt->rt_is_input = 0; + rt->rt_iif = 0; + rt->rt_pmtu = 0; + rt->rt_gateway = 0; + rt->rt_uses_gateway = 0; + rt->rt_table_id = 0; + INIT_LIST_HEAD(&rt->rt_uncached); + + rt->dst.output = ip_output; + if (flags & RTCF_LOCAL) + rt->dst.input = ip_local_deliver; + } + + return rt; } /* called in rcu_read_lock() section */ @@ -1452,6 +1474,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, { struct rtable *rth; struct in_device *in_dev = __in_dev_get_rcu(dev); + unsigned int flags = RTCF_MULTICAST; u32 itag = 0; int err; @@ -1464,9 +1487,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, skb->protocol != htons(ETH_P_IP)) goto e_inval; - if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) - if (ipv4_is_loopback(saddr)) - goto e_inval; + if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev)) + goto e_inval; if (ipv4_is_zeronet(saddr)) { if (!ipv4_is_local_multicast(daddr)) @@ -1477,7 +1499,10 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (err < 0) goto e_err; } - rth = rt_dst_alloc(dev_net(dev)->loopback_dev, + if (our) + flags |= RTCF_LOCAL; + + rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); if (!rth) goto e_nobufs; @@ -1486,20 +1511,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->dst.tclassid = itag; #endif rth->dst.output = ip_rt_bug; - - rth->rt_genid = rt_genid_ipv4(dev_net(dev)); - rth->rt_flags = RTCF_MULTICAST; - rth->rt_type = RTN_MULTICAST; rth->rt_is_input= 1; - rth->rt_iif = 0; - rth->rt_pmtu = 0; - rth->rt_gateway = 0; - rth->rt_uses_gateway = 0; - INIT_LIST_HEAD(&rth->rt_uncached); - if (our) { - rth->dst.input= ip_local_deliver; - rth->rt_flags |= RTCF_LOCAL; - } #ifdef CONFIG_IP_MROUTE if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) @@ -1608,7 +1620,7 @@ static int __mkroute_input(struct sk_buff *skb, } } - rth = rt_dst_alloc(out_dev->dev, + rth = rt_dst_alloc(out_dev->dev, 0, res->type, IN_DEV_CONF_GET(in_dev, NOPOLICY), IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache); if (!rth) { @@ -1616,19 +1628,12 @@ static int __mkroute_input(struct sk_buff *skb, goto cleanup; } - rth->rt_genid = rt_genid_ipv4(dev_net(rth->dst.dev)); - rth->rt_flags = 0; - rth->rt_type = res->type; rth->rt_is_input = 1; - rth->rt_iif = 0; - rth->rt_pmtu = 0; - rth->rt_gateway = 0; - rth->rt_uses_gateway = 0; - INIT_LIST_HEAD(&rth->rt_uncached); + if (res->table) + rth->rt_table_id = res->table->tb_id; RT_CACHE_STAT_INC(in_slow_tot); rth->dst.input = ip_forward; - rth->dst.output = ip_output; rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag); if (lwtunnel_output_redirect(rth->dst.lwtstate)) { @@ -1706,6 +1711,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto martian_source; res.fi = NULL; + res.table = NULL; if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0)) goto brd_input; @@ -1733,7 +1739,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, * Now we are ready to route packet. */ fl4.flowi4_oif = 0; - fl4.flowi4_iif = vrf_master_ifindex_rcu(dev) ? : dev->ifindex; + fl4.flowi4_iif = l3mdev_fib_oif_rcu(dev); fl4.flowi4_mark = skb->mark; fl4.flowi4_tos = tos; fl4.flowi4_scope = RT_SCOPE_UNIVERSE; @@ -1754,7 +1760,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &itag); if (err < 0) - goto martian_source_keep_err; + goto martian_source; goto local_input; } @@ -1776,7 +1782,7 @@ brd_input: err = fib_validate_source(skb, saddr, 0, tos, 0, dev, in_dev, &itag); if (err < 0) - goto martian_source_keep_err; + goto martian_source; } flags |= RTCF_BROADCAST; res.type = RTN_BROADCAST; @@ -1796,26 +1802,18 @@ local_input: } } - rth = rt_dst_alloc(net->loopback_dev, + rth = rt_dst_alloc(net->loopback_dev, flags | RTCF_LOCAL, res.type, IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache); if (!rth) goto e_nobufs; - rth->dst.input= ip_local_deliver; rth->dst.output= ip_rt_bug; #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; #endif - - rth->rt_genid = rt_genid_ipv4(net); - rth->rt_flags = flags|RTCF_LOCAL; - rth->rt_type = res.type; rth->rt_is_input = 1; - rth->rt_iif = 0; - rth->rt_pmtu = 0; - rth->rt_gateway = 0; - rth->rt_uses_gateway = 0; - INIT_LIST_HEAD(&rth->rt_uncached); + if (res.table) + rth->rt_table_id = res.table->tb_id; RT_CACHE_STAT_INC(in_slow_tot); if (res.type == RTN_UNREACHABLE) { @@ -1837,6 +1835,7 @@ no_route: RT_CACHE_STAT_INC(in_no_route); res.type = RTN_UNREACHABLE; res.fi = NULL; + res.table = NULL; goto local_input; /* @@ -1859,8 +1858,6 @@ e_nobufs: goto out; martian_source: - err = -EINVAL; -martian_source_keep_err: ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); goto out; } @@ -1988,28 +1985,19 @@ static struct rtable *__mkroute_output(const struct fib_result *res, } add: - rth = rt_dst_alloc(dev_out, + rth = rt_dst_alloc(dev_out, flags, type, IN_DEV_CONF_GET(in_dev, NOPOLICY), IN_DEV_CONF_GET(in_dev, NOXFRM), do_cache); if (!rth) return ERR_PTR(-ENOBUFS); - rth->dst.output = ip_output; - - rth->rt_genid = rt_genid_ipv4(dev_net(dev_out)); - rth->rt_flags = flags; - rth->rt_type = type; - rth->rt_is_input = 0; rth->rt_iif = orig_oif ? : 0; - rth->rt_pmtu = 0; - rth->rt_gateway = 0; - rth->rt_uses_gateway = 0; - INIT_LIST_HEAD(&rth->rt_uncached); + if (res->table) + rth->rt_table_id = res->table->tb_id; + RT_CACHE_STAT_INC(out_slow_tot); - if (flags & RTCF_LOCAL) - rth->dst.input = ip_local_deliver; if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { if (flags & RTCF_LOCAL && !(dev_out->flags & IFF_LOOPBACK)) { @@ -2137,11 +2125,10 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) fl4->saddr = inet_select_addr(dev_out, 0, RT_SCOPE_HOST); } - if (netif_is_vrf(dev_out) && - !(fl4->flowi4_flags & FLOWI_FLAG_VRFSRC)) { - rth = vrf_dev_get_rth(dev_out); + + rth = l3mdev_get_rtable(dev_out, fl4); + if (rth) goto out; - } } if (!fl4->daddr) { @@ -2303,7 +2290,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or } struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, - struct sock *sk) + const struct sock *sk) { struct rtable *rt = __ip_route_output_key(net, flp4); @@ -2319,7 +2306,7 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, } EXPORT_SYMBOL_GPL(ip_route_output_flow); -static int rt_fill_info(struct net *net, __be32 dst, __be32 src, +static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, struct flowi4 *fl4, struct sk_buff *skb, u32 portid, u32 seq, int event, int nowait, unsigned int flags) { @@ -2339,8 +2326,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, r->rtm_dst_len = 32; r->rtm_src_len = 0; r->rtm_tos = fl4->flowi4_tos; - r->rtm_table = RT_TABLE_MAIN; - if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN)) + r->rtm_table = table_id; + if (nla_put_u32(skb, RTA_TABLE, table_id)) goto nla_put_failure; r->rtm_type = rt->rt_type; r->rtm_scope = RT_SCOPE_UNIVERSE; @@ -2445,6 +2432,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) int err; int mark; struct sk_buff *skb; + u32 table_id = RT_TABLE_MAIN; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); if (err < 0) @@ -2514,7 +2502,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; - err = rt_fill_info(net, dst, src, &fl4, skb, + if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE) + table_id = rt->rt_table_id; + + err = rt_fill_info(net, dst, src, table_id, &fl4, skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); if (err < 0) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index d70b1f603692..6b97b5f6457c 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -192,15 +192,11 @@ u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, } EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence); -__u32 cookie_v4_init_sequence(struct sock *sk, const struct sk_buff *skb, - __u16 *mssp) +__u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mssp) { const struct iphdr *iph = ip_hdr(skb); const struct tcphdr *th = tcp_hdr(skb); - tcp_synq_overflow(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); - return __cookie_v4_init_sequence(iph, th, mssp); } @@ -345,7 +341,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->wscale_ok = tcp_opt.wscale_ok; ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; - treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; + treq->snt_synack.v64 = 0; treq->tfo_listener = false; ireq->ir_iif = sk->sk_bound_dev_if; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b8b8fa184f75..3c96fa87ff9e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2253,13 +2253,6 @@ int tcp_disconnect(struct sock *sk, int flags) } EXPORT_SYMBOL(tcp_disconnect); -void tcp_sock_destruct(struct sock *sk) -{ - inet_sock_destruct(sk); - - kfree(inet_csk(sk)->icsk_accept_queue.fastopenq); -} - static inline bool tcp_can_repair_sock(const struct sock *sk) { return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) && @@ -2581,7 +2574,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, TCPF_LISTEN))) { tcp_fastopen_init_key_once(true); - err = fastopen_init_queue(sk, val); + fastopen_queue_tune(sk, val); } else { err = -EINVAL; } @@ -2849,10 +2842,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, break; case TCP_FASTOPEN: - if (icsk->icsk_accept_queue.fastopenq) - val = icsk->icsk_accept_queue.fastopenq->max_qlen; - else - val = 0; + val = icsk->icsk_accept_queue.fastopenq.max_qlen; break; case TCP_TIMESTAMP: diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 93c4dc3ab23f..882caa4e72bc 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -173,6 +173,10 @@ out: */ if (ca->get_info) memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); + if (ca->flags & TCP_CONG_NEEDS_ECN) + INET_ECN_xmit(sk); + else + INET_ECN_dontxmit(sk); } void tcp_init_congestion_control(struct sock *sk) @@ -181,6 +185,10 @@ void tcp_init_congestion_control(struct sock *sk) if (icsk->icsk_ca_ops->init) icsk->icsk_ca_ops->init(sk); + if (tcp_ca_needs_ecn(sk)) + INET_ECN_xmit(sk); + else + INET_ECN_dontxmit(sk); } static void tcp_reinit_congestion_control(struct sock *sk, @@ -192,8 +200,8 @@ static void tcp_reinit_congestion_control(struct sock *sk, icsk->icsk_ca_ops = ca; icsk->icsk_ca_setsockopt = 1; - if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init) - icsk->icsk_ca_ops->init(sk); + if (sk->sk_state != TCP_CLOSE) + tcp_init_congestion_control(sk); } /* Manage refcounts on socket close. */ diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index f9c0fb84e435..f69f436fcbcc 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -124,10 +124,10 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req, return false; } -static bool tcp_fastopen_create_child(struct sock *sk, - struct sk_buff *skb, - struct dst_entry *dst, - struct request_sock *req) +static struct sock *tcp_fastopen_create_child(struct sock *sk, + struct sk_buff *skb, + struct dst_entry *dst, + struct request_sock *req) { struct tcp_sock *tp; struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; @@ -140,11 +140,11 @@ static bool tcp_fastopen_create_child(struct sock *sk, child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); if (!child) - return false; + return NULL; - spin_lock(&queue->fastopenq->lock); - queue->fastopenq->qlen++; - spin_unlock(&queue->fastopenq->lock); + spin_lock(&queue->fastopenq.lock); + queue->fastopenq.qlen++; + spin_unlock(&queue->fastopenq.lock); /* Initialize the child socket. Have to fix some values to take * into account the child is a Fast Open socket and is created @@ -216,9 +216,11 @@ static bool tcp_fastopen_create_child(struct sock *sk, tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = end_seq; sk->sk_data_ready(sk); bh_unlock_sock(child); - sock_put(child); + /* Note: sock_put(child) will be done by tcp_conn_request() + * after SYNACK packet is sent. + */ WARN_ON(!req->sk); - return true; + return child; } static bool tcp_fastopen_queue_check(struct sock *sk) @@ -235,8 +237,8 @@ static bool tcp_fastopen_queue_check(struct sock *sk) * between qlen overflow causing Fast Open to be disabled * temporarily vs a server not supporting Fast Open at all. */ - fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq; - if (!fastopenq || fastopenq->max_qlen == 0) + fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq; + if (fastopenq->max_qlen == 0) return false; if (fastopenq->qlen >= fastopenq->max_qlen) { @@ -261,13 +263,14 @@ static bool tcp_fastopen_queue_check(struct sock *sk) * may be updated and return the client in the SYN-ACK later. E.g., Fast Open * cookie request (foc->len == 0). */ -bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct tcp_fastopen_cookie *foc, - struct dst_entry *dst) +struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct tcp_fastopen_cookie *foc, + struct dst_entry *dst) { struct tcp_fastopen_cookie valid_foc = { .len = -1 }; bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; + struct sock *child; if (foc->len == 0) /* Client requests a cookie */ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); @@ -276,7 +279,7 @@ bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, (syn_data || foc->len >= 0) && tcp_fastopen_queue_check(sk))) { foc->len = -1; - return false; + return NULL; } if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD)) @@ -296,11 +299,12 @@ bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, * data in SYN_RECV state. */ fastopen: - if (tcp_fastopen_create_child(sk, skb, dst, req)) { + child = tcp_fastopen_create_child(sk, skb, dst, req); + if (child) { foc->len = -1; NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); - return true; + return child; } NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); } else if (foc->len > 0) /* Client presents an invalid cookie */ @@ -308,6 +312,5 @@ fastopen: valid_foc.exp = foc->exp; *foc = valid_foc; - return false; + return NULL; } -EXPORT_SYMBOL(tcp_try_fastopen); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a8f515bb19c4..e58cbcd2f07e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2953,21 +2953,21 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, } /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */ -static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp) +void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req) { - struct tcp_sock *tp = tcp_sk(sk); - long seq_rtt_us = -1L; + long rtt_us = -1L; - if (synack_stamp && !tp->total_retrans) - seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - synack_stamp); + if (req && !req->num_retrans && tcp_rsk(req)->snt_synack.v64) { + struct skb_mstamp now; - /* If the ACK acks both the SYNACK and the (Fast Open'd) data packets - * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack() - */ - if (!tp->srtt_us) - tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L); + skb_mstamp_get(&now); + rtt_us = skb_mstamp_us_delta(&now, &tcp_rsk(req)->snt_synack); + } + + tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L); } + static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -5472,7 +5472,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, } static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th, unsigned int len) + const struct tcphdr *th) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -5698,15 +5698,14 @@ reset_and_undo: * address independent. */ -int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th, unsigned int len) +int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcphdr *th = tcp_hdr(skb); struct request_sock *req; int queued = 0; bool acceptable; - u32 synack_stamp; tp->rx_opt.saw_tstamp = 0; @@ -5750,7 +5749,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, goto discard; case TCP_SYN_SENT: - queued = tcp_rcv_synsent_state_process(sk, skb, th, len); + queued = tcp_rcv_synsent_state_process(sk, skb, th); if (queued >= 0) return queued; @@ -5785,15 +5784,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (!acceptable) return 1; + if (!tp->srtt_us) + tcp_synack_rtt_meas(sk, req); + /* Once we leave TCP_SYN_RECV, we no longer need req * so release it. */ if (req) { - synack_stamp = tcp_rsk(req)->snt_synack; tp->total_retrans = req->num_retrans; reqsk_fastopen_remove(sk, req, false); } else { - synack_stamp = tp->lsndtime; /* Make sure socket is routed, for correct metrics. */ icsk->icsk_af_ops->rebuild_header(sk); tcp_init_congestion_control(sk); @@ -5816,7 +5816,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tp->snd_una = TCP_SKB_CB(skb)->ack_seq; tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); - tcp_synack_rtt_meas(sk, synack_stamp); if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; @@ -6027,7 +6026,7 @@ static void tcp_openreq_init(struct request_sock *req, req->cookie_ts = 0; tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; - tcp_rsk(req)->snt_synack = tcp_time_stamp; + skb_mstamp_get(&tcp_rsk(req)->snt_synack); tcp_rsk(req)->last_oow_ack_time = 0; req->mss = rx_opt->mss_clamp; req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; @@ -6065,7 +6064,7 @@ EXPORT_SYMBOL(inet_reqsk_alloc); /* * Return true if a syncookie should be sent */ -static bool tcp_syn_flood_action(struct sock *sk, +static bool tcp_syn_flood_action(const struct sock *sk, const struct sk_buff *skb, const char *proto) { @@ -6083,11 +6082,12 @@ static bool tcp_syn_flood_action(struct sock *sk, NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; - if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) { - lopt->synflood_warned = 1; + if (!lopt->synflood_warned && + sysctl_tcp_syncookies != 2 && + xchg(&lopt->synflood_warned, 1) == 0) pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", proto, ntohs(tcp_hdr(skb)->dest), msg); - } + return want_cookie; } @@ -6112,14 +6112,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb) { + struct tcp_fastopen_cookie foc = { .len = -1 }; + __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn; struct tcp_options_received tmp_opt; - struct request_sock *req; struct tcp_sock *tp = tcp_sk(sk); + struct sock *fastopen_sk = NULL; struct dst_entry *dst = NULL; - __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn; - bool want_cookie = false, fastopen; + struct request_sock *req; + bool want_cookie = false; struct flowi fl; - struct tcp_fastopen_cookie foc = { .len = -1 }; int err; @@ -6228,12 +6229,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, } tcp_rsk(req)->snt_isn = isn; + tcp_rsk(req)->txhash = net_tx_rndhash(); tcp_openreq_init_rwin(req, sk, dst); - fastopen = !want_cookie && - tcp_try_fastopen(sk, skb, req, &foc, dst); - err = af_ops->send_synack(sk, dst, &fl, req, + if (!want_cookie) + fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst); + err = af_ops->send_synack(fastopen_sk ?: sk, dst, &fl, req, skb_get_queue_mapping(skb), &foc); - if (!fastopen) { + if (fastopen_sk) { + sock_put(fastopen_sk); + } else { if (err || want_cookie) goto drop_and_free; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 93898e093d4e..64ece718d66c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -576,7 +576,7 @@ EXPORT_SYMBOL(tcp_v4_send_check); * Exception: precedence violation. We do not implement it in any case. */ -static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) +static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); struct { @@ -795,7 +795,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) inet_twsk_put(tw); } -static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, +static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req) { /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV @@ -818,7 +818,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, * This still operates on a request_sock only, not on a big * socket. */ -static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, +static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, u16 queue_mapping, @@ -865,7 +865,7 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) */ /* Find the Key structure for an address. */ -struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, +struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, const union tcp_md5_addr *addr, int family) { @@ -877,7 +877,7 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, /* caller either holds rcu_read_lock() or socket lock */ md5sig = rcu_dereference_check(tp->md5sig_info, sock_owned_by_user(sk) || - lockdep_is_held(&sk->sk_lock.slock)); + lockdep_is_held((spinlock_t *)&sk->sk_lock.slock)); if (!md5sig) return NULL; #if IS_ENABLED(CONFIG_IPV6) @@ -894,7 +894,7 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, } EXPORT_SYMBOL(tcp_md5_do_lookup); -struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, +struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, const struct sock *addr_sk) { const union tcp_md5_addr *addr; @@ -1168,7 +1168,8 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk, } #endif -static void tcp_v4_init_req(struct request_sock *req, struct sock *sk_listener, +static void tcp_v4_init_req(struct request_sock *req, + const struct sock *sk_listener, struct sk_buff *skb) { struct inet_request_sock *ireq = inet_rsk(req); @@ -1179,7 +1180,8 @@ static void tcp_v4_init_req(struct request_sock *req, struct sock *sk_listener, ireq->opt = tcp_v4_save_options(skb); } -static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl, +static struct dst_entry *tcp_v4_route_req(const struct sock *sk, + struct flowi *fl, const struct request_sock *req, bool *strict) { @@ -1241,7 +1243,7 @@ EXPORT_SYMBOL(tcp_v4_conn_request); * The three way handshake has completed - we got a valid synack - * now create the new socket. */ -struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, +struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) { @@ -1277,7 +1279,6 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newinet->mc_ttl = ip_hdr(skb)->ttl; newinet->rcv_tos = ip_hdr(skb)->tos; inet_csk(newsk)->icsk_ext_hdr_len = 0; - sk_set_txhash(newsk); if (inet_opt) inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; newinet->inet_id = newtp->write_seq ^ jiffies; @@ -1420,7 +1421,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) } else sock_rps_save_rxhash(sk, skb); - if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { + if (tcp_rcv_state_process(sk, skb)) { rsk = sk; goto reset; } @@ -2185,7 +2186,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); const struct inet_sock *inet = inet_sk(sk); - struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq; + const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; __be32 dest = inet->inet_daddr; __be32 src = inet->inet_rcv_saddr; __u16 destp = ntohs(inet->inet_dport); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index def765911ff8..897e34273ba3 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -361,27 +361,35 @@ void tcp_twsk_destructor(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); +/* Warning : This function is called without sk_listener being locked. + * Be sure to read socket fields once, as their value could change under us. + */ void tcp_openreq_init_rwin(struct request_sock *req, - struct sock *sk, struct dst_entry *dst) + const struct sock *sk_listener, + const struct dst_entry *dst) { struct inet_request_sock *ireq = inet_rsk(req); - struct tcp_sock *tp = tcp_sk(sk); - __u8 rcv_wscale; + const struct tcp_sock *tp = tcp_sk(sk_listener); + u16 user_mss = READ_ONCE(tp->rx_opt.user_mss); + int full_space = tcp_full_space(sk_listener); int mss = dst_metric_advmss(dst); + u32 window_clamp; + __u8 rcv_wscale; - if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) - mss = tp->rx_opt.user_mss; + if (user_mss && user_mss < mss) + mss = user_mss; + window_clamp = READ_ONCE(tp->window_clamp); /* Set this up on the first call only */ - req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); + req->window_clamp = window_clamp ? : dst_metric(dst, RTAX_WINDOW); /* limit the window selection if the user enforce a smaller rx buffer */ - if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && - (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0)) - req->window_clamp = tcp_full_space(sk); + if (sk_listener->sk_userlocks & SOCK_RCVBUF_LOCK && + (req->window_clamp > full_space || req->window_clamp == 0)) + req->window_clamp = full_space; /* tcp_full_space because it is guaranteed to be the first packet */ - tcp_select_initial_window(tcp_full_space(sk), + tcp_select_initial_window(full_space, mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), &req->rcv_wnd, &req->window_clamp, @@ -433,7 +441,9 @@ EXPORT_SYMBOL_GPL(tcp_ca_openreq_child); * Actually, we could lots of memory writes here. tp of listening * socket contains all necessary default parameters. */ -struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb) +struct sock *tcp_create_openreq_child(const struct sock *sk, + struct request_sock *req, + struct sk_buff *skb) { struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC); @@ -469,7 +479,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tcp_enable_early_retrans(newtp); newtp->tlp_high_seq = 0; - newtp->lsndtime = treq->snt_synack; + newtp->lsndtime = treq->snt_synack.stamp_jiffies; + newsk->sk_txhash = treq->txhash; newtp->last_oow_ack_time = 0; newtp->total_retrans = req->num_retrans; @@ -759,6 +770,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (!child) goto listen_overflow; + tcp_synack_rtt_meas(child, req); inet_csk_reqsk_queue_drop(sk, req); inet_csk_reqsk_queue_add(sk, req, child); /* Warning: caller must not call reqsk_put(req); @@ -811,8 +823,7 @@ int tcp_child_process(struct sock *parent, struct sock *child, int state = child->sk_state; if (!sock_owned_by_user(child)) { - ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb), - skb->len); + ret = tcp_rcv_state_process(child, skb); /* Wakeup parent, send SIGIO */ if (state == TCP_SYN_RECV && child->sk_state != state) parent->sk_data_ready(parent); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1100ffe4a722..09bb082ca1a7 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -357,14 +357,10 @@ static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) } static void -tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th, - struct sock *sk) +tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th) { - if (inet_rsk(req)->ecn_ok) { + if (inet_rsk(req)->ecn_ok) th->ece = 1; - if (tcp_ca_needs_ecn(sk)) - INET_ECN_xmit(sk); - } } /* Set up ECN state for a packet on a ESTABLISHED socket that is about to @@ -612,12 +608,11 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, } /* Set up TCP options for SYN-ACKs. */ -static unsigned int tcp_synack_options(struct sock *sk, - struct request_sock *req, - unsigned int mss, struct sk_buff *skb, - struct tcp_out_options *opts, - const struct tcp_md5sig_key *md5, - struct tcp_fastopen_cookie *foc) +static unsigned int tcp_synack_options(struct request_sock *req, + unsigned int mss, struct sk_buff *skb, + struct tcp_out_options *opts, + const struct tcp_md5sig_key *md5, + struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); unsigned int remaining = MAX_TCP_OPTION_SPACE; @@ -1827,7 +1822,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, /* Ok, it looks like it is advisable to defer. */ - if (cong_win < send_win && cong_win < skb->len) + if (cong_win < send_win && cong_win <= skb->len) *is_cwnd_limited = true; return true; @@ -2060,7 +2055,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, cwnd_quota = tcp_cwnd_test(tp, skb); if (!cwnd_quota) { - is_cwnd_limited = true; if (push_one == 2) /* Force out a loss probe pkt. */ cwnd_quota = 1; @@ -2142,6 +2136,7 @@ repair: /* Send one loss probe per tail loss episode. */ if (push_one != 2) tcp_schedule_loss_probe(sk); + is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd); tcp_cwnd_validate(sk, is_cwnd_limited); return false; } @@ -2165,7 +2160,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) /* Don't do any loss probe on a Fast Open connection before 3WHS * finishes. */ - if (sk->sk_state == TCP_SYN_RECV) + if (tp->fastopen_rsk) return false; /* TLP is only scheduled when next timer event is RTO. */ @@ -2175,7 +2170,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) /* Schedule a loss probe in 2*RTT for SACK capable connections * in Open state, that are either limited by cwnd or application. */ - if (sysctl_tcp_early_retrans < 3 || !tp->srtt_us || !tp->packets_out || + if (sysctl_tcp_early_retrans < 3 || !tp->packets_out || !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) return false; @@ -2184,9 +2179,10 @@ bool tcp_schedule_loss_probe(struct sock *sk) return false; /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account - * for delayed ack when there's one outstanding packet. + * for delayed ack when there's one outstanding packet. If no RTT + * sample is available then probe after TCP_TIMEOUT_INIT. */ - timeout = rtt << 1; + timeout = rtt << 1 ? : TCP_TIMEOUT_INIT; if (tp->packets_out == 1) timeout = max_t(u32, timeout, (rtt + (rtt >> 1) + TCP_DELACK_MAX)); @@ -2949,20 +2945,25 @@ int tcp_send_synack(struct sock *sk) * Allocate one skb and build a SYNACK packet. * @dst is consumed : Caller should not use it again. */ -struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, +struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, struct request_sock *req, struct tcp_fastopen_cookie *foc) { - struct tcp_out_options opts; struct inet_request_sock *ireq = inet_rsk(req); - struct tcp_sock *tp = tcp_sk(sk); - struct tcphdr *th; - struct sk_buff *skb; + const struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *md5 = NULL; + struct tcp_out_options opts; + struct sk_buff *skb; int tcp_header_size; + struct tcphdr *th; + u16 user_mss; int mss; - skb = sock_wmalloc(sk, MAX_TCP_HEADER, 1, GFP_ATOMIC); + /* sk is a const pointer, because we want to express multiple cpus + * might call us concurrently. + * sock_wmalloc() will change sk->sk_wmem_alloc in an atomic way. + */ + skb = sock_wmalloc((struct sock *)sk, MAX_TCP_HEADER, 1, GFP_ATOMIC); if (unlikely(!skb)) { dst_release(dst); return NULL; @@ -2973,8 +2974,9 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, skb_dst_set(skb, dst); mss = dst_metric_advmss(dst); - if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) - mss = tp->rx_opt.user_mss; + user_mss = READ_ONCE(tp->rx_opt.user_mss); + if (user_mss && user_mss < mss) + mss = user_mss; memset(&opts, 0, sizeof(opts)); #ifdef CONFIG_SYN_COOKIES @@ -2988,8 +2990,9 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, rcu_read_lock(); md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); #endif - tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, - foc) + sizeof(*th); + skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4); + tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) + + sizeof(*th); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); @@ -2998,7 +3001,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, memset(th, 0, sizeof(struct tcphdr)); th->syn = 1; th->ack = 1; - tcp_ecn_make_synack(req, th, sk); + tcp_ecn_make_synack(req, th); th->source = htons(ireq->ir_num); th->dest = ireq->ir_rmt_port; /* Setting of flags are superfluous here for callers (and ECE is @@ -3013,7 +3016,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(min(req->rcv_wnd, 65535U)); - tcp_options_write((__be32 *)(th + 1), tp, &opts); + tcp_options_write((__be32 *)(th + 1), NULL, &opts); th->doff = (tcp_header_size >> 2); TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS); @@ -3500,12 +3503,13 @@ void tcp_send_probe0(struct sock *sk) TCP_RTO_MAX); } -int tcp_rtx_synack(struct sock *sk, struct request_sock *req) +int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) { const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific; struct flowi fl; int res; + tcp_rsk(req)->txhash = net_tx_rndhash(); res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL); if (!res) { TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f7d1d5e19e95..156ba75b6000 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1021,7 +1021,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) * device lookup source address from VRF table. This mimics * behavior of ip_route_connect{_init}. */ - if (netif_index_is_vrf(net, ipc.oif)) { + if (netif_index_is_l3_master(net, ipc.oif)) { flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, (flow_flags | FLOWI_FLAG_VRFSRC | diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 60b032f58ccc..62e1e72db461 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -22,7 +22,8 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb) return xfrm4_extract_header(skb); } -static inline int xfrm4_rcv_encap_finish(struct sock *sk, struct sk_buff *skb) +static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) { if (!skb_dst(skb)) { const struct iphdr *iph = ip_hdr(skb); @@ -52,8 +53,8 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) iph->tot_len = htons(skb->len); ip_send_check(iph); - NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, NULL, skb, - skb->dev, NULL, + NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, + dev_net(skb->dev), NULL, skb, skb->dev, NULL, xfrm4_rcv_encap_finish); return 0; } diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 2878dbfffeb7..cd6be736e19f 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -80,14 +80,14 @@ int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb) return xfrm_output(sk, skb); } -static int __xfrm4_output(struct sock *sk, struct sk_buff *skb) +static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct xfrm_state *x = skb_dst(skb)->xfrm; #ifdef CONFIG_NETFILTER if (!x) { IPCB(skb)->flags |= IPSKB_REROUTED; - return dst_output_sk(sk, skb); + return dst_output(sk, skb); } #endif @@ -96,8 +96,11 @@ static int __xfrm4_output(struct sock *sk, struct sk_buff *skb) int xfrm4_output(struct sock *sk, struct sk_buff *skb) { - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb, - NULL, skb_dst(skb)->dev, __xfrm4_output, + struct net *net = dev_net(skb_dst(skb)->dev); + + return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, + net, sk, skb, NULL, skb_dst(skb)->dev, + __xfrm4_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index c10a9ee68433..f2606b9056bb 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -15,7 +15,7 @@ #include <net/dst.h> #include <net/xfrm.h> #include <net/ip.h> -#include <net/vrf.h> +#include <net/l3mdev.h> static struct xfrm_policy_afinfo xfrm4_policy_afinfo; @@ -97,6 +97,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_gateway = rt->rt_gateway; xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; xdst->u.rt.rt_pmtu = rt->rt_pmtu; + xdst->u.rt.rt_table_id = rt->rt_table_id; INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); return 0; @@ -110,10 +111,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) struct flowi4 *fl4 = &fl->u.ip4; int oif = 0; - if (skb_dst(skb)) { - oif = vrf_master_ifindex(skb_dst(skb)->dev) ? - : skb_dst(skb)->dev->ifindex; - } + if (skb_dst(skb)) + oif = l3mdev_fib_oif(skb_dst(skb)->dev); memset(fl4, 0, sizeof(struct flowi4)); fl4->flowi4_mark = skb->mark; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 900113376d4e..c8380f1876f1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3625,7 +3625,7 @@ static void addrconf_dad_work(struct work_struct *w) /* send a neighbour solicitation for our addr */ addrconf_addr_solict_mult(&ifp->addr, &mcaddr); - ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any, NULL); + ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any, NULL); out: in6_ifa_put(ifp); rtnl_unlock(); @@ -4729,7 +4729,8 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, } } -static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev) +static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev, + u32 ext_filter_mask) { struct nlattr *nla; struct ifla_cacheinfo ci; @@ -4749,6 +4750,9 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev) /* XXX - MC not implemented */ + if (ext_filter_mask & RTEXT_FILTER_SKIP_STATS) + return 0; + nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64)); if (!nla) goto nla_put_failure; @@ -4784,14 +4788,15 @@ static size_t inet6_get_link_af_size(const struct net_device *dev) return inet6_ifla6_size(); } -static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev) +static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev, + u32 ext_filter_mask) { struct inet6_dev *idev = __in6_dev_get(dev); if (!idev) return -ENODATA; - if (inet6_fill_ifla6_attrs(skb, idev) < 0) + if (inet6_fill_ifla6_attrs(skb, idev, ext_filter_mask) < 0) return -EMSGSIZE; return 0; @@ -4946,7 +4951,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, if (!protoinfo) goto nla_put_failure; - if (inet6_fill_ifla6_attrs(skb, idev) < 0) + if (inet6_fill_ifla6_attrs(skb, idev, 0) < 0) goto nla_put_failure; nla_nest_end(skb, protoinfo); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 9aadd57808a5..d70b0238f468 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -263,7 +263,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info) { - struct ipv6_pinfo *np = inet6_sk(sk); + const struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; struct ipv6hdr *iph; struct sk_buff *skb; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 6927f3fb5597..163bfef3e5db 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -65,17 +65,18 @@ int inet6_csk_bind_conflict(const struct sock *sk, } EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); -struct dst_entry *inet6_csk_route_req(struct sock *sk, +struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6, - const struct request_sock *req) + const struct request_sock *req, + u8 proto) { struct inet_request_sock *ireq = inet_rsk(req); - struct ipv6_pinfo *np = inet6_sk(sk); + const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *final_p, final; struct dst_entry *dst; memset(fl6, 0, sizeof(*fl6)); - fl6->flowi6_proto = IPPROTO_TCP; + fl6->flowi6_proto = proto; fl6->daddr = ireq->ir_v6_rmt_addr; final_p = fl6_update_dst(fl6, np->opt, &final); fl6->saddr = ireq->ir_v6_loc_addr; @@ -91,6 +92,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, return dst; } +EXPORT_SYMBOL(inet6_csk_route_req); /* * request_sock (formerly open request) hash tables. diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index adba03ac7ce9..9075acf081dd 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -47,7 +47,7 @@ #include <net/inet_ecn.h> #include <net/dst_metadata.h> -int ip6_rcv_finish(struct sock *sk, struct sk_buff *skb) +int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { const struct inet6_protocol *ipprot; @@ -109,7 +109,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (hdr->version != 6) goto err; - IP6_ADD_STATS_BH(dev_net(dev), idev, + IP6_ADD_STATS_BH(net, idev, IPSTATS_MIB_NOECTPKTS + (ipv6_get_dsfield(hdr) & INET_ECN_MASK), max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); @@ -183,8 +183,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt /* Must drop socket now because of tproxy. */ skb_orphan(skb); - return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb, - dev, NULL, + return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, + net, NULL, skb, dev, NULL, ip6_rcv_finish); err: IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); @@ -199,9 +199,8 @@ drop: */ -static int ip6_input_finish(struct sock *sk, struct sk_buff *skb) +static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net *net = dev_net(skb_dst(skb)->dev); const struct inet6_protocol *ipprot; struct inet6_dev *idev; unsigned int nhoff; @@ -278,8 +277,8 @@ discard: int ip6_input(struct sk_buff *skb) { - return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, NULL, skb, - skb->dev, NULL, + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, + dev_net(skb->dev), NULL, skb, skb->dev, NULL, ip6_input_finish); } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 92b1aa38f121..a598fe2c0849 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -60,6 +60,7 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; + struct net *net = dev_net(dev); struct neighbour *neigh; struct in6_addr *nexthop; int ret; @@ -71,7 +72,7 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb) struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && - ((mroute6_socket(dev_net(dev), skb) && + ((mroute6_socket(net, skb) && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->saddr))) { @@ -82,19 +83,18 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb) */ if (newskb) NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, - sk, newskb, NULL, newskb->dev, + net, sk, newskb, NULL, newskb->dev, dev_loopback_xmit); if (ipv6_hdr(skb)->hop_limit == 0) { - IP6_INC_STATS(dev_net(dev), idev, + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return 0; } } - IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, - skb->len); + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len); if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= IPV6_ADDR_SCOPE_NODELOCAL && @@ -116,13 +116,12 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb) } rcu_read_unlock_bh(); - IP6_INC_STATS(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; } -static int ip6_finish_output(struct sock *sk, struct sk_buff *skb) +static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)) || @@ -136,28 +135,31 @@ int ip6_output(struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); + struct net *net = dev_net(dev); + if (unlikely(idev->cnf.disable_ipv6)) { - IP6_INC_STATS(dev_net(dev), idev, - IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return 0; } - return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb, - NULL, dev, + return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, + net, sk, skb, NULL, dev, ip6_finish_output, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } /* - * xmit an sk_buff (used by TCP, SCTP and DCCP) + * xmit an sk_buff (used by TCP, SCTP and DCCP) + * Note : socket lock is not held for SYNACK packets, but might be modified + * by calls to skb_set_owner_w() and ipv6_local_error(), + * which are using proper atomic operations or spinlocks. */ - -int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, +int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, struct ipv6_txoptions *opt, int tclass) { struct net *net = sock_net(sk); - struct ipv6_pinfo *np = inet6_sk(sk); + const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr; @@ -186,7 +188,10 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, } consume_skb(skb); skb = skb2; - skb_set_owner_w(skb, sk); + /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically, + * it is safe to call in our context (socket lock not held) + */ + skb_set_owner_w(skb, (struct sock *)sk); } if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); @@ -224,12 +229,20 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); - return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, - NULL, dst->dev, dst_output_sk); + /* hooks should never assume socket lock is held. + * we promote our socket to non const + */ + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, + net, (struct sock *)sk, skb, NULL, dst->dev, + dst_output_okfn); } skb->dev = dst->dev; - ipv6_local_error(sk, EMSGSIZE, fl6, mtu); + /* ipv6_local_error() does not require socket lock, + * we promote our socket to non const + */ + ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; @@ -317,10 +330,11 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) return 0; } -static inline int ip6_forward_finish(struct sock *sk, struct sk_buff *skb) +static inline int ip6_forward_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) { skb_sender_cpu_clear(skb); - return dst_output_sk(sk, skb); + return dst_output(sk, skb); } static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) @@ -512,8 +526,8 @@ int ip6_forward(struct sk_buff *skb) IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); - return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb, - skb->dev, dst->dev, + return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, + net, NULL, skb, skb->dev, dst->dev, ip6_forward_finish); error: @@ -883,7 +897,7 @@ out: return dst; } -static int ip6_dst_lookup_tail(struct net *net, struct sock *sk, +static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) { #ifdef CONFIG_IPV6_OPTIMISTIC_DAD @@ -1014,7 +1028,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup); * It returns a valid dst pointer on success, or a pointer encoded * error code. */ -struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, +struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst) { struct dst_entry *dst = NULL; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 0224c032dca5..f96f1c19b4a8 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -482,7 +482,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) return -EMSGSIZE; } - err = dst_output(skb); + err = dst_output(skb->sk, skb); if (net_xmit_eval(err) == 0) { struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 0e004cc42a22..5e5d16e7ce85 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1985,13 +1985,13 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) } #endif -static inline int ip6mr_forward2_finish(struct sock *sk, struct sk_buff *skb) +static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTFORWDATAGRAMS); - IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), + IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTOCTETS, skb->len); - return dst_output_sk(sk, skb); + return dst_output(sk, skb); } /* @@ -2063,8 +2063,8 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, IP6CB(skb)->flags |= IP6SKB_FORWARDED; - return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb, - skb->dev, dev, + return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, + net, NULL, skb, skb->dev, dev, ip6mr_forward2_finish); out_free: diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 083b2927fc67..a8bf57ca74d3 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1645,8 +1645,8 @@ static void mld_sendpack(struct sk_buff *skb) payload_len = skb->len; err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, - net->ipv6.igmp_sk, skb, NULL, skb->dev, - dst_output_sk); + net, net->ipv6.igmp_sk, skb, NULL, skb->dev, + dst_output_okfn); out: if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT); @@ -2008,8 +2008,9 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) } skb_dst_set(skb, dst); - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, - NULL, skb->dev, dst_output_sk); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, + net, sk, skb, NULL, skb->dev, + dst_output_okfn); out: if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, type); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 64a71354b069..7089c305245c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -463,9 +463,9 @@ static void ndisc_send_skb(struct sk_buff *skb, idev = __in6_dev_get(dst->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, - NULL, dst->dev, - dst_output_sk); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, + net, sk, skb, NULL, dst->dev, + dst_output_okfn); if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); @@ -474,8 +474,7 @@ static void ndisc_send_skb(struct sk_buff *skb, rcu_read_unlock(); } -void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, - const struct in6_addr *daddr, +void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, bool router, bool solicited, bool override, bool inc_opt) { @@ -541,7 +540,7 @@ static void ndisc_send_unsol_na(struct net_device *dev) read_lock_bh(&idev->lock); list_for_each_entry(ifa, &idev->addr_list, if_list) { - ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &ifa->addr, + ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr, /*router=*/ !!idev->cnf.forwarding, /*solicited=*/ false, /*override=*/ true, /*inc_opt=*/ true); @@ -551,8 +550,7 @@ static void ndisc_send_unsol_na(struct net_device *dev) in6_dev_put(idev); } -void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, - const struct in6_addr *solicit, +void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, const struct in6_addr *daddr, const struct in6_addr *saddr, struct sk_buff *oskb) { @@ -679,12 +677,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) "%s: trying to ucast probe in NUD_INVALID: %pI6\n", __func__, target); } - ndisc_send_ns(dev, neigh, target, target, saddr, skb); + ndisc_send_ns(dev, target, target, saddr, skb); } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) { neigh_app_ns(neigh); } else { addrconf_addr_solict_mult(target, &mcaddr); - ndisc_send_ns(dev, NULL, target, &mcaddr, saddr, skb); + ndisc_send_ns(dev, target, &mcaddr, saddr, skb); } } @@ -828,7 +826,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) is_router = idev->cnf.forwarding; if (dad) { - ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &msg->target, + ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target, !!is_router, false, (ifp != NULL), true); goto out; } @@ -849,8 +847,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) NEIGH_UPDATE_F_WEAK_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE); if (neigh || !dev->header_ops) { - ndisc_send_na(dev, neigh, saddr, &msg->target, - !!is_router, + ndisc_send_na(dev, saddr, &msg->target, !!is_router, true, (ifp != NULL && inc), inc); if (neigh) neigh_release(neigh); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index b4de08a83e0b..d11c46833d61 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -18,9 +18,8 @@ #include <net/ip6_checksum.h> #include <net/netfilter/nf_queue.h> -int ip6_route_me_harder(struct sk_buff *skb) +int ip6_route_me_harder(struct net *net, struct sk_buff *skb) { - struct net *net = dev_net(skb_dst(skb)->dev); const struct ipv6hdr *iph = ipv6_hdr(skb); unsigned int hh_len; struct dst_entry *dst; @@ -93,7 +92,7 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, } } -static int nf_ip6_reroute(struct sk_buff *skb, +static int nf_ip6_reroute(struct net *net, struct sk_buff *skb, const struct nf_queue_entry *entry) { struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); @@ -103,7 +102,7 @@ static int nf_ip6_reroute(struct sk_buff *skb, if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || skb->mark != rt_info->mark) - return ip6_route_me_harder(skb); + return ip6_route_me_harder(net, skb); } return 0; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 0771991ed812..80e3bd72b715 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -275,7 +275,8 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e, return 0; } -static void trace_packet(const struct sk_buff *skb, +static void trace_packet(struct net *net, + const struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, @@ -287,7 +288,6 @@ static void trace_packet(const struct sk_buff *skb, const char *hookname, *chainname, *comment; const struct ip6t_entry *iter; unsigned int rulenum = 0; - struct net *net = dev_net(in ? in : out); root = get_entry(private->entries, private->hook_entry[hook]); @@ -314,10 +314,10 @@ ip6t_next_entry(const struct ip6t_entry *entry) /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int ip6t_do_table(struct sk_buff *skb, - unsigned int hook, const struct nf_hook_state *state, struct xt_table *table) { + unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); /* Initializing verdict to NF_DROP keeps gcc happy. */ unsigned int verdict = NF_DROP; @@ -340,6 +340,7 @@ ip6t_do_table(struct sk_buff *skb, * rule is also a fragment-specific rule, non-fragments won't * match it. */ acpar.hotdrop = false; + acpar.net = state->net; acpar.in = state->in; acpar.out = state->out; acpar.family = NFPROTO_IPV6; @@ -401,8 +402,8 @@ ip6t_do_table(struct sk_buff *skb, #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* The packet is traced: log it */ if (unlikely(skb->nf_trace)) - trace_packet(skb, hook, state->in, state->out, - table->name, private, e); + trace_packet(state->net, skb, hook, state->in, + state->out, table->name, private, e); #endif /* Standard target? */ if (!t->u.kernel.target->target) { diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 0ed841a3fa33..db29bbf41b59 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -39,7 +39,7 @@ static unsigned int reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_reject_info *reject = par->targinfo; - struct net *net = dev_net((par->in != NULL) ? par->in : par->out); + struct net *net = par->net; switch (reject->with) { case IP6T_ICMP6_NO_ROUTE: diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 1e4bf99ed16e..c2356602158a 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -275,7 +275,7 @@ static unsigned int synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_synproxy_info *info = par->targinfo; - struct synproxy_net *snet = synproxy_pernet(dev_net(par->in)); + struct synproxy_net *snet = synproxy_pernet(par->net); struct synproxy_options opts = {}; struct tcphdr *th, _th; @@ -316,11 +316,11 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } -static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops, +static unsigned int ipv6_synproxy_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *nhs) { - struct synproxy_net *snet = synproxy_pernet(dev_net(nhs->in ? : nhs->out)); + struct synproxy_net *snet = synproxy_pernet(nhs->net); enum ip_conntrack_info ctinfo; struct nf_conn *ct; struct nf_conn_synproxy *synproxy; diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 790e0c6b19e1..1ee1b25df096 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -26,7 +26,7 @@ static bool rpfilter_addr_unicast(const struct in6_addr *addr) return addr_type & IPV6_ADDR_UNICAST; } -static bool rpfilter_lookup_reverse6(const struct sk_buff *skb, +static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, const struct net_device *dev, u8 flags) { struct rt6_info *rt; @@ -53,7 +53,7 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb, lookup_flags |= RT6_LOOKUP_F_IFACE; } - rt = (void *) ip6_route_lookup(dev_net(dev), &fl6, lookup_flags); + rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags); if (rt->dst.error) goto out; @@ -93,7 +93,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) if (unlikely(saddrtype == IPV6_ADDR_ANY)) return true ^ invert; /* not routable: forward path will drop it */ - return rpfilter_lookup_reverse6(skb, par->in, info->flags) ^ invert; + return rpfilter_lookup_reverse6(par->net, skb, par->in, info->flags) ^ invert; } static int rpfilter_check(const struct xt_mtchk_param *par) diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 5c33d8abc077..8b277b983ca5 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -32,12 +32,10 @@ static const struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip6table_filter_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - const struct net *net = dev_net(state->in ? state->in : state->out); - - return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_filter); + return ip6t_do_table(skb, state, state->net->ipv6.ip6table_filter); } static struct nf_hook_ops *filter_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index b551f5b79fe2..abe278b07932 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -57,8 +57,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u_int32_t *)ipv6_hdr(skb)); - ret = ip6t_do_table(skb, NF_INET_LOCAL_OUT, state, - dev_net(state->out)->ipv6.ip6table_mangle); + ret = ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle); if (ret != NF_DROP && ret != NF_STOLEN && (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) || @@ -66,7 +65,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { - err = ip6_route_me_harder(skb); + err = ip6_route_me_harder(state->net, skb); if (err < 0) ret = NF_DROP_ERR(err); } @@ -76,17 +75,16 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) /* The work comes in here from netfilter.c. */ static unsigned int -ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip6table_mangle_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - if (ops->hooknum == NF_INET_LOCAL_OUT) + if (state->hook == NF_INET_LOCAL_OUT) return ip6t_mangle_out(skb, state); - if (ops->hooknum == NF_INET_POST_ROUTING) - return ip6t_do_table(skb, ops->hooknum, state, - dev_net(state->out)->ipv6.ip6table_mangle); + if (state->hook == NF_INET_POST_ROUTING) + return ip6t_do_table(skb, state, + state->net->ipv6.ip6table_mangle); /* INPUT/FORWARD */ - return ip6t_do_table(skb, ops->hooknum, state, - dev_net(state->in)->ipv6.ip6table_mangle); + return ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle); } static struct nf_hook_ops *mangle_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index c3a7f7af0ed4..abea175d5853 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -30,42 +30,40 @@ static const struct xt_table nf_nat_ipv6_table = { .af = NFPROTO_IPV6, }; -static unsigned int ip6table_nat_do_chain(const struct nf_hook_ops *ops, +static unsigned int ip6table_nat_do_chain(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct) { - struct net *net = nf_ct_net(ct); - - return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_nat); + return ip6t_do_table(skb, state, state->net->ipv6.ip6table_nat); } -static unsigned int ip6table_nat_fn(const struct nf_hook_ops *ops, +static unsigned int ip6table_nat_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv6_fn(ops, skb, state, ip6table_nat_do_chain); + return nf_nat_ipv6_fn(priv, skb, state, ip6table_nat_do_chain); } -static unsigned int ip6table_nat_in(const struct nf_hook_ops *ops, +static unsigned int ip6table_nat_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv6_in(ops, skb, state, ip6table_nat_do_chain); + return nf_nat_ipv6_in(priv, skb, state, ip6table_nat_do_chain); } -static unsigned int ip6table_nat_out(const struct nf_hook_ops *ops, +static unsigned int ip6table_nat_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv6_out(ops, skb, state, ip6table_nat_do_chain); + return nf_nat_ipv6_out(priv, skb, state, ip6table_nat_do_chain); } -static unsigned int ip6table_nat_local_fn(const struct nf_hook_ops *ops, +static unsigned int ip6table_nat_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv6_local_fn(ops, skb, state, ip6table_nat_do_chain); + return nf_nat_ipv6_local_fn(priv, skb, state, ip6table_nat_do_chain); } static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 0b33caad2b69..9021963565c3 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -19,12 +19,10 @@ static const struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip6table_raw_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - const struct net *net = dev_net(state->in ? state->in : state->out); - - return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_raw); + return ip6t_do_table(skb, state, state->net->ipv6.ip6table_raw); } static struct nf_hook_ops *rawtable_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index fcef83c25f7b..0d856fedfeb0 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -36,13 +36,10 @@ static const struct xt_table security_table = { }; static unsigned int -ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip6table_security_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - const struct net *net = dev_net(state->in ? state->in : state->out); - - return ip6t_do_table(skb, ops->hooknum, state, - net->ipv6.ip6table_security); + return ip6t_do_table(skb, state, state->net->ipv6.ip6table_security); } static struct nf_hook_ops *sectbl_ops __read_mostly; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 7302900c321a..dd83ad42f8f6 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -95,7 +95,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, return NF_ACCEPT; } -static unsigned int ipv6_helper(const struct nf_hook_ops *ops, +static unsigned int ipv6_helper(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -131,7 +131,7 @@ static unsigned int ipv6_helper(const struct nf_hook_ops *ops, return helper->help(skb, protoff, ct, ctinfo); } -static unsigned int ipv6_confirm(const struct nf_hook_ops *ops, +static unsigned int ipv6_confirm(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -165,14 +165,14 @@ out: return nf_conntrack_confirm(skb); } -static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops, +static unsigned int ipv6_conntrack_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_conntrack_in(dev_net(state->in), PF_INET6, ops->hooknum, skb); + return nf_conntrack_in(state->net, PF_INET6, state->hook, skb); } -static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops, +static unsigned int ipv6_conntrack_local(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -181,7 +181,7 @@ static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops, net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return nf_conntrack_in(dev_net(state->out), PF_INET6, ops->hooknum, skb); + return nf_conntrack_in(state->net, PF_INET6, state->hook, skb); } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 0e6fae103d33..d3b797446cea 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -36,6 +36,7 @@ static inline struct nf_icmp_net *icmpv6_pernet(struct net *net) static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, + struct net *net, struct nf_conntrack_tuple *tuple) { const struct icmp6hdr *hp; @@ -159,7 +160,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, skb_network_offset(skb) + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr), - PF_INET6, &origtuple)) { + PF_INET6, net, &origtuple)) { pr_debug("icmpv6_error: Can't get tuple\n"); return -NF_ACCEPT; } diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index 6d9c0b3d5b8c..a99baf63eccf 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -51,7 +51,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, return IP6_DEFRAG_CONNTRACK_OUT + zone_id; } -static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, +static unsigned int ipv6_defrag(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -63,7 +63,7 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, return NF_ACCEPT; #endif - reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(ops->hooknum, skb)); + reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(state->hook, skb)); /* queued */ if (reasm == NULL) return NF_STOLEN; @@ -74,7 +74,7 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, nf_ct_frag6_consume_orig(reasm); - NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, state->sk, reasm, + NF_HOOK_THRESH(NFPROTO_IPV6, state->hook, state->net, state->sk, reasm, state->in, state->out, state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index c8ab626556a0..ee0d9a5b16c3 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -19,25 +19,10 @@ #include <net/netfilter/nf_conntrack.h> #endif -static struct net *pick_net(struct sk_buff *skb) -{ -#ifdef CONFIG_NET_NS - const struct dst_entry *dst; - - if (skb->dev != NULL) - return dev_net(skb->dev); - dst = skb_dst(skb); - if (dst != NULL && dst->dev != NULL) - return dev_net(dst->dev); -#endif - return &init_net; -} - -static bool nf_dup_ipv6_route(struct sk_buff *skb, const struct in6_addr *gw, - int oif) +static bool nf_dup_ipv6_route(struct net *net, struct sk_buff *skb, + const struct in6_addr *gw, int oif) { const struct ipv6hdr *iph = ipv6_hdr(skb); - struct net *net = pick_net(skb); struct dst_entry *dst; struct flowi6 fl6; @@ -61,7 +46,7 @@ static bool nf_dup_ipv6_route(struct sk_buff *skb, const struct in6_addr *gw, return true; } -void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum, +void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, const struct in6_addr *gw, int oif) { if (this_cpu_read(nf_skb_duplicated)) @@ -81,7 +66,7 @@ void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum, struct ipv6hdr *iph = ipv6_hdr(skb); --iph->hop_limit; } - if (nf_dup_ipv6_route(skb, gw, oif)) { + if (nf_dup_ipv6_route(net, skb, gw, oif)) { __this_cpu_write(nf_skb_duplicated, true); ip6_local_out(skb); __this_cpu_write(nf_skb_duplicated, false); diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 70fbaed49edb..238e70c3f7b7 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -262,9 +262,9 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation); unsigned int -nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, +nf_nat_ipv6_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)) @@ -272,7 +272,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, struct nf_conn *ct; enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; - enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); + enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook); __be16 frag_off; int hdrlen; u8 nexthdr; @@ -303,7 +303,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, - ops->hooknum, + state->hook, hdrlen)) return NF_DROP; else @@ -317,21 +317,21 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; - ret = do_chain(ops, skb, state, ct); + ret = do_chain(priv, skb, state, ct); if (ret != NF_ACCEPT) return ret; - if (nf_nat_initialized(ct, HOOK2MANIP(ops->hooknum))) + if (nf_nat_initialized(ct, HOOK2MANIP(state->hook))) break; - ret = nf_nat_alloc_null_binding(ct, ops->hooknum); + ret = nf_nat_alloc_null_binding(ct, state->hook); if (ret != NF_ACCEPT) return ret; } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); - if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out)) + if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out)) goto oif_changed; } break; @@ -340,11 +340,11 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out)) + if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out)) goto oif_changed; } - return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); + return nf_nat_packet(ct, ctinfo, state->hook, skb); oif_changed: nf_ct_kill_acct(ct, ctinfo, skb); @@ -353,9 +353,9 @@ oif_changed: EXPORT_SYMBOL_GPL(nf_nat_ipv6_fn); unsigned int -nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb, +nf_nat_ipv6_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)) @@ -363,7 +363,7 @@ nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb, unsigned int ret; struct in6_addr daddr = ipv6_hdr(skb)->daddr; - ret = nf_nat_ipv6_fn(ops, skb, state, do_chain); + ret = nf_nat_ipv6_fn(priv, skb, state, do_chain); if (ret != NF_DROP && ret != NF_STOLEN && ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) skb_dst_drop(skb); @@ -373,9 +373,9 @@ nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb, EXPORT_SYMBOL_GPL(nf_nat_ipv6_in); unsigned int -nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, +nf_nat_ipv6_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)) @@ -391,7 +391,7 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, if (skb->len < sizeof(struct ipv6hdr)) return NF_ACCEPT; - ret = nf_nat_ipv6_fn(ops, skb, state, do_chain); + ret = nf_nat_ipv6_fn(priv, skb, state, do_chain); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && @@ -403,7 +403,7 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && ct->tuplehash[dir].tuple.src.u.all != ct->tuplehash[!dir].tuple.dst.u.all)) { - err = nf_xfrm_me_harder(skb, AF_INET6); + err = nf_xfrm_me_harder(state->net, skb, AF_INET6); if (err < 0) ret = NF_DROP_ERR(err); } @@ -414,9 +414,9 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, EXPORT_SYMBOL_GPL(nf_nat_ipv6_out); unsigned int -nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, +nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int (*do_chain)(const struct nf_hook_ops *ops, + unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)) @@ -430,14 +430,14 @@ nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, if (skb->len < sizeof(struct ipv6hdr)) return NF_ACCEPT; - ret = nf_nat_ipv6_fn(ops, skb, state, do_chain); + ret = nf_nat_ipv6_fn(priv, skb, state, do_chain); if (ret != NF_DROP && ret != NF_STOLEN && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &ct->tuplehash[!dir].tuple.src.u3)) { - err = ip6_route_me_harder(skb); + err = ip6_route_me_harder(state->net, skb); if (err < 0) ret = NF_DROP_ERR(err); } @@ -446,7 +446,7 @@ nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && ct->tuplehash[dir].tuple.dst.u.all != ct->tuplehash[!dir].tuple.src.u.all) { - err = nf_xfrm_me_harder(skb, AF_INET6); + err = nf_xfrm_me_harder(state->net, skb, AF_INET6); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c index 7745609665cd..31ba7ca19757 100644 --- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c @@ -34,7 +34,7 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY)); - if (ipv6_dev_get_saddr(dev_net(out), out, + if (ipv6_dev_get_saddr(nf_ct_net(ct), out, &ipv6_hdr(skb)->daddr, 0, &src) < 0) return NF_DROP; diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index c8148ba76d1a..120ea9131be0 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -16,20 +16,20 @@ #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_ipv6.h> -static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops, +static unsigned int nft_do_chain_ipv6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; /* malformed packet, drop it */ - if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state) < 0) + if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0) return NF_DROP; - return nft_do_chain(&pkt, ops); + return nft_do_chain(&pkt, priv); } -static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, +static unsigned int nft_ipv6_output(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -40,7 +40,7 @@ static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, return NF_ACCEPT; } - return nft_do_chain_ipv6(ops, skb, state); + return nft_do_chain_ipv6(priv, skb, state); } struct nft_af_info nft_af_ipv6 __read_mostly = { diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c index 951bb458b7bd..443cd306c0b0 100644 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -24,44 +24,44 @@ #include <net/netfilter/nf_nat_l3proto.h> #include <net/ipv6.h> -static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops, +static unsigned int nft_nat_do_chain(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct) { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv6(&pkt, ops, skb, state); + nft_set_pktinfo_ipv6(&pkt, skb, state); - return nft_do_chain(&pkt, ops); + return nft_do_chain(&pkt, priv); } -static unsigned int nft_nat_ipv6_fn(const struct nf_hook_ops *ops, +static unsigned int nft_nat_ipv6_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv6_fn(ops, skb, state, nft_nat_do_chain); + return nf_nat_ipv6_fn(priv, skb, state, nft_nat_do_chain); } -static unsigned int nft_nat_ipv6_in(const struct nf_hook_ops *ops, +static unsigned int nft_nat_ipv6_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv6_in(ops, skb, state, nft_nat_do_chain); + return nf_nat_ipv6_in(priv, skb, state, nft_nat_do_chain); } -static unsigned int nft_nat_ipv6_out(const struct nf_hook_ops *ops, +static unsigned int nft_nat_ipv6_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv6_out(ops, skb, state, nft_nat_do_chain); + return nf_nat_ipv6_out(priv, skb, state, nft_nat_do_chain); } -static unsigned int nft_nat_ipv6_local_fn(const struct nf_hook_ops *ops, +static unsigned int nft_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_nat_ipv6_local_fn(ops, skb, state, nft_nat_do_chain); + return nf_nat_ipv6_local_fn(priv, skb, state, nft_nat_do_chain); } static const struct nf_chain_type nft_chain_nat_ipv6 = { diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index 0dafdaac5e17..9df75bd7c94a 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -22,7 +22,7 @@ #include <net/netfilter/nf_tables_ipv6.h> #include <net/route.h> -static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, +static unsigned int nf_route_table_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -33,7 +33,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, u32 mark, flowlabel; /* malformed packet, drop it */ - if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state) < 0) + if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0) return NF_DROP; /* save source/dest address, mark, hoplimit, flowlabel, priority */ @@ -45,14 +45,14 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u32 *)ipv6_hdr(skb)); - ret = nft_do_chain(&pkt, ops); + ret = nft_do_chain(&pkt, priv); if (ret != NF_DROP && ret != NF_QUEUE && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) - return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP; + return ip6_route_me_harder(state->net, skb) == 0 ? ret : NF_DROP; return ret; } diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c index 0eaa4f65fdea..8bfd470cbe72 100644 --- a/net/ipv6/netfilter/nft_dup_ipv6.c +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -28,7 +28,7 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr, struct in6_addr *gw = (struct in6_addr *)®s->data[priv->sreg_addr]; int oif = regs->data[priv->sreg_dev]; - nf_dup_ipv6(pkt->skb, pkt->ops->hooknum, gw, oif); + nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif); } static int nft_dup_ipv6_init(const struct nft_ctx *ctx, diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c index effd393bd517..aca44e89a881 100644 --- a/net/ipv6/netfilter/nft_redir_ipv6.c +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -35,8 +35,7 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr, range.flags |= priv->flags; - regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, - pkt->ops->hooknum); + regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->hook); } static struct nft_expr_type nft_redir_ipv6_type; diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index d0d1540ecf87..533cd5719c59 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -24,15 +24,14 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); - struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach6(net, pkt->skb, priv->icmp_code, - pkt->ops->hooknum); + nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code, + pkt->hook); break; case NFT_REJECT_TCP_RST: - nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); + nf_send_reset6(pkt->net, pkt->skb, pkt->hook); break; default: break; diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 928a0fb0b744..e77102c4f804 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -140,6 +140,7 @@ EXPORT_SYMBOL(ip6_dst_hoplimit); static int __ip6_local_out_sk(struct sock *sk, struct sk_buff *skb) { + struct net *net = dev_net(skb_dst(skb)->dev); int len; len = skb->len - sizeof(struct ipv6hdr); @@ -148,8 +149,9 @@ static int __ip6_local_out_sk(struct sock *sk, struct sk_buff *skb) ipv6_hdr(skb)->payload_len = htons(len); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); - return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, - NULL, skb_dst(skb)->dev, dst_output_sk); + return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, + net, sk, skb, NULL, skb_dst(skb)->dev, + dst_output_okfn); } int __ip6_local_out(struct sk_buff *skb) @@ -164,7 +166,7 @@ int ip6_local_out_sk(struct sock *sk, struct sk_buff *skb) err = __ip6_local_out_sk(sk, skb); if (likely(err == 1)) - err = dst_output_sk(sk, skb); + err = dst_output(sk, skb); return err; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index fdbada1569a3..fec0151522a2 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -614,6 +614,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, unsigned int flags) { struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sock_net(sk); struct ipv6hdr *iph; struct sk_buff *skb; int err; @@ -652,9 +653,9 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, if (err) goto error_fault; - IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, - NULL, rt->dst.dev, dst_output_sk); + IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, + NULL, rt->dst.dev, dst_output_okfn); if (err > 0) err = net_xmit_errno(err); if (err) @@ -666,7 +667,7 @@ error_fault: err = -EFAULT; kfree_skb(skb); error: - IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); if (err == -ENOBUFS && !np->recverr) err = 0; return err; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index cb32ce250db0..d3d946773a3e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -421,31 +421,7 @@ static bool rt6_check_expired(const struct rt6_info *rt) static int rt6_info_hash_nhsfn(unsigned int candidate_count, const struct flowi6 *fl6) { - unsigned int val = fl6->flowi6_proto; - - val ^= ipv6_addr_hash(&fl6->daddr); - val ^= ipv6_addr_hash(&fl6->saddr); - - /* Work only if this not encapsulated */ - switch (fl6->flowi6_proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_SCTP: - val ^= (__force u16)fl6->fl6_sport; - val ^= (__force u16)fl6->fl6_dport; - break; - - case IPPROTO_ICMPV6: - val ^= (__force u16)fl6->fl6_icmp_type; - val ^= (__force u16)fl6->fl6_icmp_code; - break; - } - /* RFC6438 recommands to use flowlabel */ - val ^= (__force u32)fl6->flowlabel; - - /* Perhaps, we need to tune, this function? */ - val = val ^ (val >> 7) ^ (val >> 12); - return val % candidate_count; + return get_hash_from_flowi6(fl6) % candidate_count; } static struct rt6_info *rt6_multipath_select(struct rt6_info *match, @@ -498,10 +474,10 @@ static inline struct rt6_info *rt6_device_match(struct net *net, if (dev->flags & IFF_LOOPBACK) { if (!sprt->rt6i_idev || sprt->rt6i_idev->dev->ifindex != oif) { - if (flags & RT6_LOOKUP_F_IFACE && oif) + if (flags & RT6_LOOKUP_F_IFACE) continue; - if (local && (!oif || - local->rt6i_idev->dev->ifindex == oif)) + if (local && + local->rt6i_idev->dev->ifindex == oif) continue; } local = sprt; @@ -538,7 +514,7 @@ static void rt6_probe_deferred(struct work_struct *w) container_of(w, struct __rt6_probe_work, work); addrconf_addr_solict_mult(&work->target, &mcaddr); - ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL); + ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, NULL); dev_put(work->dev); kfree(work); } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 0909f4e0d53c..7606eba83e7b 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -114,14 +114,11 @@ u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph, } EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence); -__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp) +__u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mssp) { const struct ipv6hdr *iph = ipv6_hdr(skb); const struct tcphdr *th = tcp_hdr(skb); - tcp_synq_overflow(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); - return __cookie_v6_init_sequence(iph, th, mssp); } @@ -210,7 +207,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->wscale_ok = tcp_opt.wscale_ok; ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; - treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; + treq->snt_synack.v64 = 0; treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 97d9314ea361..2ae95e1d03e1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -70,8 +70,8 @@ #include <linux/crypto.h> #include <linux/scatterlist.h> -static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb); -static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, +static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); +static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); @@ -82,7 +82,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific; static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; #else -static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, +static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, const struct in6_addr *addr) { return NULL; @@ -434,7 +434,7 @@ out: } -static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, +static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, u16 queue_mapping, @@ -447,7 +447,8 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, int err = -ENOMEM; /* First, grab a route. */ - if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) + if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, + IPPROTO_TCP)) == NULL) goto done; skb = tcp_make_synack(sk, dst, req, foc); @@ -476,13 +477,13 @@ static void tcp_v6_reqsk_destructor(struct request_sock *req) } #ifdef CONFIG_TCP_MD5SIG -static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, +static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, const struct in6_addr *addr) { return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6); } -static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk, +static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, const struct sock *addr_sk) { return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr); @@ -663,22 +664,23 @@ static bool tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) } #endif -static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, +static void tcp_v6_init_req(struct request_sock *req, + const struct sock *sk_listener, struct sk_buff *skb) { struct inet_request_sock *ireq = inet_rsk(req); - struct ipv6_pinfo *np = inet6_sk(sk); + const struct ipv6_pinfo *np = inet6_sk(sk_listener); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; /* So that link locals have meaning */ - if (!sk->sk_bound_dev_if && + if (!sk_listener->sk_bound_dev_if && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = tcp_v6_iif(skb); if (!TCP_SKB_CB(skb)->tcp_tw_isn && - (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) || + (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim || np->repflow)) { @@ -687,13 +689,14 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, } } -static struct dst_entry *tcp_v6_route_req(struct sock *sk, struct flowi *fl, +static struct dst_entry *tcp_v6_route_req(const struct sock *sk, + struct flowi *fl, const struct request_sock *req, bool *strict) { if (strict) *strict = true; - return inet6_csk_route_req(sk, &fl->u.ip6, req); + return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); } struct request_sock_ops tcp6_request_sock_ops __read_mostly = { @@ -723,7 +726,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .queue_hash_add = inet6_csk_reqsk_queue_hash_add, }; -static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq, +static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int rst, u8 tclass, u32 label) @@ -822,7 +825,7 @@ static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq, kfree_skb(buff); } -static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) +static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); u32 seq = 0, ack_seq = 0; @@ -893,7 +896,7 @@ release_sk1: #endif } -static void tcp_v6_send_ack(struct sock *sk, struct sk_buff *skb, u32 seq, +static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, u8 tclass, u32 label) @@ -916,7 +919,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) inet_twsk_put(tw); } -static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, +static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req) { /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV @@ -984,12 +987,13 @@ drop: return 0; /* don't send reset */ } -static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, +static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) { struct inet_request_sock *ireq; - struct ipv6_pinfo *newnp, *np = inet6_sk(sk); + struct ipv6_pinfo *newnp; + const struct ipv6_pinfo *np = inet6_sk(sk); struct tcp6_sock *newtcp6sk; struct inet_sock *newinet; struct tcp_sock *newtp; @@ -1057,7 +1061,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, goto out_overflow; if (!dst) { - dst = inet6_csk_route_req(sk, &fl6, req); + dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); if (!dst) goto out; } @@ -1090,8 +1094,6 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; newsk->sk_bound_dev_if = ireq->ir_iif; - sk_set_txhash(newsk); - /* Now IPv6 options... First: no IPv4 options. @@ -1273,7 +1275,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) } else sock_rps_save_rxhash(sk, skb); - if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) + if (tcp_rcv_state_process(sk, skb)) goto reset; if (opt_skb) goto ipv6_pktoptions; @@ -1670,7 +1672,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) const struct inet_sock *inet = inet_sk(sp); const struct tcp_sock *tp = tcp_sk(sp); const struct inet_connection_sock *icsk = inet_csk(sp); - struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq; + const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; dest = &sp->sk_v6_daddr; src = &sp->sk_v6_rcv_saddr; @@ -1714,7 +1716,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, sp->sk_state == TCP_LISTEN ? - (fastopenq ? fastopenq->max_qlen : 0) : + fastopenq->max_qlen : (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) ); } diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 74bd17882a2f..0eaab1fa6be5 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -42,8 +42,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) ipv6_hdr(skb)->payload_len = htons(skb->len); __skb_push(skb, skb->data - skb_network_header(skb)); - NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb, - skb->dev, NULL, + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, + dev_net(skb->dev), NULL, skb, skb->dev, NULL, ip6_rcv_finish); return -1; } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 09c76a7b474d..0c3e9ffcf231 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -131,7 +131,7 @@ int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb) return xfrm_output(sk, skb); } -static int __xfrm6_output(struct sock *sk, struct sk_buff *skb) +static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; @@ -140,7 +140,7 @@ static int __xfrm6_output(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_NETFILTER if (!x) { IP6CB(skb)->flags |= IP6SKB_REROUTED; - return dst_output_sk(sk, skb); + return dst_output(sk, skb); } #endif @@ -168,7 +168,10 @@ static int __xfrm6_output(struct sock *sk, struct sk_buff *skb) int xfrm6_output(struct sock *sk, struct sk_buff *skb) { - return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb, - NULL, skb_dst(skb)->dev, __xfrm6_output, + struct net *net = dev_net(skb_dst(skb)->dev); + + return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, + net, sk, skb, NULL, skb_dst(skb)->dev, + __xfrm6_output, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 30caa289c5db..69cee4e0d728 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -20,7 +20,7 @@ #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_route.h> -#include <net/vrf.h> +#include <net/l3mdev.h> #if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/mip6.h> #endif @@ -132,10 +132,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) nexthdr = nh[nhoff]; - if (skb_dst(skb)) { - oif = vrf_master_ifindex(skb_dst(skb)->dev) ? - : skb_dst(skb)->dev->ifindex; - } + if (skb_dst(skb)) + oif = l3mdev_fib_oif(skb_dst(skb)->dev); memset(fl6, 0, sizeof(struct flowi6)); fl6->flowi6_mark = skb->mark; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 918151c11348..fcb2752419c6 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -95,11 +95,10 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *, enum iucv_tx_notify); /* Call Back functions */ static void iucv_callback_rx(struct iucv_path *, struct iucv_message *); static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *); -static void iucv_callback_connack(struct iucv_path *, u8 ipuser[16]); -static int iucv_callback_connreq(struct iucv_path *, u8 ipvmid[8], - u8 ipuser[16]); -static void iucv_callback_connrej(struct iucv_path *, u8 ipuser[16]); -static void iucv_callback_shutdown(struct iucv_path *, u8 ipuser[16]); +static void iucv_callback_connack(struct iucv_path *, u8 *); +static int iucv_callback_connreq(struct iucv_path *, u8 *, u8 *); +static void iucv_callback_connrej(struct iucv_path *, u8 *); +static void iucv_callback_shutdown(struct iucv_path *, u8 *); static struct iucv_sock_list iucv_sk_list = { .lock = __RW_LOCK_UNLOCKED(iucv_sk_list.lock), diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 2a6a1fdd62c0..7eaa000c9258 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -713,7 +713,7 @@ static struct notifier_block __refdata iucv_cpu_notifier = { * * Sever an iucv path to free up the pathid. Used internally. */ -static int iucv_sever_pathid(u16 pathid, u8 userdata[16]) +static int iucv_sever_pathid(u16 pathid, u8 *userdata) { union iucv_param *parm; @@ -876,7 +876,7 @@ static struct notifier_block iucv_reboot_notifier = { * Returns the result of the CP IUCV call. */ int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler, - u8 userdata[16], void *private) + u8 *userdata, void *private) { union iucv_param *parm; int rc; @@ -923,7 +923,7 @@ EXPORT_SYMBOL(iucv_path_accept); * Returns the result of the CP IUCV call. */ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler, - u8 userid[8], u8 system[8], u8 userdata[16], + u8 *userid, u8 *system, u8 *userdata, void *private) { union iucv_param *parm; @@ -985,7 +985,7 @@ EXPORT_SYMBOL(iucv_path_connect); * * Returns the result from the CP IUCV call. */ -int iucv_path_quiesce(struct iucv_path *path, u8 userdata[16]) +int iucv_path_quiesce(struct iucv_path *path, u8 *userdata) { union iucv_param *parm; int rc; @@ -1017,7 +1017,7 @@ EXPORT_SYMBOL(iucv_path_quiesce); * * Returns the result from the CP IUCV call. */ -int iucv_path_resume(struct iucv_path *path, u8 userdata[16]) +int iucv_path_resume(struct iucv_path *path, u8 *userdata) { union iucv_param *parm; int rc; @@ -1047,7 +1047,7 @@ out: * * Returns the result from the CP IUCV call. */ -int iucv_path_sever(struct iucv_path *path, u8 userdata[16]) +int iucv_path_sever(struct iucv_path *path, u8 *userdata) { int rc; diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 68aa9ffd4ae4..5871537af387 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -321,4 +321,7 @@ do { \ #define l2tp_dbg(ptr, type, fmt, ...) \ l2tp_printk(ptr, type, pr_debug, fmt, ##__VA_ARGS__) +#define MODULE_ALIAS_L2TP_PWTYPE(type) \ + MODULE_ALIAS("net-l2tp-type-" __stringify(type)) + #endif /* _L2TP_CORE_H_ */ diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 4b552873b556..e253c26f31ac 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -358,3 +358,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("James Chapman <jchapman@katalix.com>"); MODULE_DESCRIPTION("L2TP ethernet pseudowire driver"); MODULE_VERSION("1.0"); +MODULE_ALIAS_L2TP_PWTYPE(5); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 79649937ec71..ec22078b0914 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -655,3 +655,4 @@ MODULE_VERSION("1.0"); * enums */ MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP); +MODULE_ALIAS_NET_PF_PROTO(PF_INET, IPPROTO_L2TP); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index d1ded3777815..aca38d8aed8e 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -801,3 +801,4 @@ MODULE_VERSION("1.0"); * enums */ MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 2, IPPROTO_L2TP); +MODULE_ALIAS_NET_PF_PROTO(PF_INET6, IPPROTO_L2TP); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 9e13c2ff8789..f93c5be612a7 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -576,6 +576,13 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf if (info->attrs[L2TP_ATTR_MRU]) cfg.mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]); +#ifdef CONFIG_MODULES + if (l2tp_nl_cmd_ops[cfg.pw_type] == NULL) { + genl_unlock(); + request_module("net-l2tp-type-%u", cfg.pw_type); + genl_lock(); + } +#endif if ((l2tp_nl_cmd_ops[cfg.pw_type] == NULL) || (l2tp_nl_cmd_ops[cfg.pw_type]->session_create == NULL)) { ret = -EPROTONOSUPPORT; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index f56c9f69e9f2..1ad18c55064c 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1863,3 +1863,4 @@ MODULE_DESCRIPTION("PPP over L2TP over UDP"); MODULE_LICENSE("GPL"); MODULE_VERSION(PPPOL2TP_DRV_VERSION); MODULE_ALIAS("pppox-proto-" __stringify(PX_PROTO_OL2TP)); +MODULE_ALIAS_L2TP_PWTYPE(11); diff --git a/net/l3mdev/Kconfig b/net/l3mdev/Kconfig new file mode 100644 index 000000000000..5d47325037bc --- /dev/null +++ b/net/l3mdev/Kconfig @@ -0,0 +1,10 @@ +# +# Configuration for L3 master device support +# + +config NET_L3_MASTER_DEV + bool "L3 Master device support" + depends on INET || IPV6 + ---help--- + This module provides glue between core networking code and device + drivers to support L3 master devices like VRF. diff --git a/net/l3mdev/Makefile b/net/l3mdev/Makefile new file mode 100644 index 000000000000..84a53a6f609a --- /dev/null +++ b/net/l3mdev/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the L3 device API +# + +obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev.o diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c new file mode 100644 index 000000000000..ddf75ad41713 --- /dev/null +++ b/net/l3mdev/l3mdev.c @@ -0,0 +1,92 @@ +/* + * net/l3mdev/l3mdev.c - L3 master device implementation + * Copyright (c) 2015 Cumulus Networks + * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/netdevice.h> +#include <net/l3mdev.h> + +/** + * l3mdev_master_ifindex - get index of L3 master device + * @dev: targeted interface + */ + +int l3mdev_master_ifindex_rcu(struct net_device *dev) +{ + int ifindex = 0; + + if (!dev) + return 0; + + if (netif_is_l3_master(dev)) { + ifindex = dev->ifindex; + } else if (dev->flags & IFF_SLAVE) { + struct net_device *master; + + master = netdev_master_upper_dev_get_rcu(dev); + if (master && netif_is_l3_master(master)) + ifindex = master->ifindex; + } + + return ifindex; +} +EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu); + +/** + * l3mdev_fib_table - get FIB table id associated with an L3 + * master interface + * @dev: targeted interface + */ + +u32 l3mdev_fib_table_rcu(const struct net_device *dev) +{ + u32 tb_id = 0; + + if (!dev) + return 0; + + if (netif_is_l3_master(dev)) { + if (dev->l3mdev_ops->l3mdev_fib_table) + tb_id = dev->l3mdev_ops->l3mdev_fib_table(dev); + } else if (dev->flags & IFF_SLAVE) { + /* Users of netdev_master_upper_dev_get_rcu need non-const, + * but current inet_*type functions take a const + */ + struct net_device *_dev = (struct net_device *) dev; + const struct net_device *master; + + master = netdev_master_upper_dev_get_rcu(_dev); + if (master && netif_is_l3_master(master) && + master->l3mdev_ops->l3mdev_fib_table) + tb_id = master->l3mdev_ops->l3mdev_fib_table(master); + } + + return tb_id; +} +EXPORT_SYMBOL_GPL(l3mdev_fib_table_rcu); + +u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) +{ + struct net_device *dev; + u32 tb_id = 0; + + if (!ifindex) + return 0; + + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, ifindex); + if (dev) + tb_id = l3mdev_fib_table_rcu(dev); + + rcu_read_unlock(); + + return tb_id; +} +EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 8e47f8113495..2e907335ee81 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -269,7 +269,7 @@ unsigned int nf_iterate(struct list_head *head, /* Optimization: we don't need to hold module reference here, since function can't sleep. --RR */ repeat: - verdict = (*elemp)->hook(*elemp, skb, state); + verdict = (*elemp)->hook((*elemp)->priv, skb, state); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG if (unlikely((verdict & NF_VERDICT_MASK) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 338b4047776f..69ab9c2634e1 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -519,8 +519,7 @@ int ip_set_test(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get( - dev_net(par->in ? par->in : par->out), index); + struct ip_set *set = ip_set_rcu_get(par->net, index); int ret = 0; BUG_ON(!set); @@ -558,8 +557,7 @@ int ip_set_add(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get( - dev_net(par->in ? par->in : par->out), index); + struct ip_set *set = ip_set_rcu_get(par->net, index); int ret; BUG_ON(!set); @@ -581,8 +579,7 @@ int ip_set_del(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get( - dev_net(par->in ? par->in : par->out), index); + struct ip_set *set = ip_set_rcu_get(par->net, index); int ret = 0; BUG_ON(!set); diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index dfd7b65b3d2a..0328f7250693 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -75,7 +75,7 @@ static void ip_vs_app_inc_rcu_free(struct rcu_head *head) * Allocate/initialize app incarnation and register it in proto apps. */ static int -ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto, +ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto, __u16 port) { struct ip_vs_protocol *pp; @@ -107,7 +107,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto, } } - ret = pp->register_app(net, inc); + ret = pp->register_app(ipvs, inc); if (ret) goto out; @@ -127,7 +127,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto, * Release app incarnation */ static void -ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc) +ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { struct ip_vs_protocol *pp; @@ -135,7 +135,7 @@ ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc) return; if (pp->unregister_app) - pp->unregister_app(net, inc); + pp->unregister_app(ipvs, inc); IP_VS_DBG(9, "%s App %s:%u unregistered\n", pp->name, inc->name, ntohs(inc->port)); @@ -175,14 +175,14 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc) * Register an application incarnation in protocol applications */ int -register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, +register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto, __u16 port) { int result; mutex_lock(&__ip_vs_app_mutex); - result = ip_vs_app_inc_new(net, app, proto, port); + result = ip_vs_app_inc_new(ipvs, app, proto, port); mutex_unlock(&__ip_vs_app_mutex); @@ -191,15 +191,11 @@ register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, /* Register application for netns */ -struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app) +struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_app *a; int err = 0; - if (!ipvs) - return ERR_PTR(-ENOENT); - mutex_lock(&__ip_vs_app_mutex); list_for_each_entry(a, &ipvs->app_list, a_list) { @@ -230,21 +226,17 @@ out_unlock: * We are sure there are no app incarnations attached to services * Caller should use synchronize_rcu() or rcu_barrier() */ -void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) +void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_app *a, *anxt, *inc, *nxt; - if (!ipvs) - return; - mutex_lock(&__ip_vs_app_mutex); list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) { if (app && strcmp(app->name, a->name)) continue; list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) { - ip_vs_app_inc_release(net, inc); + ip_vs_app_inc_release(ipvs, inc); } list_del(&a->a_list); @@ -611,17 +603,19 @@ static const struct file_operations ip_vs_app_fops = { }; #endif -int __net_init ip_vs_app_net_init(struct net *net) +int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs) { - struct netns_ipvs *ipvs = net_ipvs(net); + struct net *net = ipvs->net; INIT_LIST_HEAD(&ipvs->app_list); proc_create("ip_vs_app", 0, net->proc_net, &ip_vs_app_fops); return 0; } -void __net_exit ip_vs_app_net_cleanup(struct net *net) +void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs) { - unregister_ip_vs_app(net, NULL /* all */); + struct net *net = ipvs->net; + + unregister_ip_vs_app(ipvs, NULL /* all */); remove_proc_entry("ip_vs_app", net->proc_net); } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index b0f7b626b56d..d1d168c7fc68 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -108,7 +108,7 @@ static inline void ct_write_unlock_bh(unsigned int key) /* * Returns hash value for IPVS connection entry */ -static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned int proto, +static unsigned int ip_vs_conn_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto, const union nf_inet_addr *addr, __be16 port) { @@ -116,11 +116,11 @@ static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned int pro if (af == AF_INET6) return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), (__force u32)port, proto, ip_vs_conn_rnd) ^ - ((size_t)net>>8)) & ip_vs_conn_tab_mask; + ((size_t)ipvs>>8)) & ip_vs_conn_tab_mask; #endif return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto, ip_vs_conn_rnd) ^ - ((size_t)net>>8)) & ip_vs_conn_tab_mask; + ((size_t)ipvs>>8)) & ip_vs_conn_tab_mask; } static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, @@ -141,14 +141,14 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, port = p->vport; } - return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port); + return ip_vs_conn_hashkey(p->ipvs, p->af, p->protocol, addr, port); } static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol, + ip_vs_conn_fill_param(cp->ipvs, cp->af, cp->protocol, &cp->caddr, cp->cport, NULL, 0, &p); if (cp->pe) { @@ -279,7 +279,7 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && p->protocol == cp->protocol && - ip_vs_conn_net_eq(cp, p->net)) { + cp->ipvs == p->ipvs) { if (!__ip_vs_conn_get(cp)) continue; /* HIT */ @@ -314,33 +314,34 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p) } static int -ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, +ip_vs_conn_fill_param_proto(struct netns_ipvs *ipvs, + int af, const struct sk_buff *skb, const struct ip_vs_iphdr *iph, - int inverse, struct ip_vs_conn_param *p) + struct ip_vs_conn_param *p) { __be16 _ports[2], *pptr; - struct net *net = skb_net(skb); pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); if (pptr == NULL) return 1; - if (likely(!inverse)) - ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr, + if (likely(!ip_vs_iph_inverse(iph))) + ip_vs_conn_fill_param(ipvs, af, iph->protocol, &iph->saddr, pptr[0], &iph->daddr, pptr[1], p); else - ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr, + ip_vs_conn_fill_param(ipvs, af, iph->protocol, &iph->daddr, pptr[1], &iph->saddr, pptr[0], p); return 0; } struct ip_vs_conn * -ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, int inverse) +ip_vs_conn_in_get_proto(struct netns_ipvs *ipvs, int af, + const struct sk_buff *skb, + const struct ip_vs_iphdr *iph) { struct ip_vs_conn_param p; - if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p)) + if (ip_vs_conn_fill_param_proto(ipvs, af, skb, iph, &p)) return NULL; return ip_vs_conn_in_get(&p); @@ -359,7 +360,7 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { if (unlikely(p->pe_data && p->pe->ct_match)) { - if (!ip_vs_conn_net_eq(cp, p->net)) + if (cp->ipvs != p->ipvs) continue; if (p->pe == cp->pe && p->pe->ct_match(p, cp)) { if (__ip_vs_conn_get(cp)) @@ -377,7 +378,7 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) p->vport == cp->vport && p->cport == cp->cport && cp->flags & IP_VS_CONN_F_TEMPLATE && p->protocol == cp->protocol && - ip_vs_conn_net_eq(cp, p->net)) { + cp->ipvs == p->ipvs) { if (__ip_vs_conn_get(cp)) goto out; } @@ -418,7 +419,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && p->protocol == cp->protocol && - ip_vs_conn_net_eq(cp, p->net)) { + cp->ipvs == p->ipvs) { if (!__ip_vs_conn_get(cp)) continue; /* HIT */ @@ -439,12 +440,13 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) } struct ip_vs_conn * -ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, int inverse) +ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af, + const struct sk_buff *skb, + const struct ip_vs_iphdr *iph) { struct ip_vs_conn_param p; - if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p)) + if (ip_vs_conn_fill_param_proto(ipvs, af, skb, iph, &p)) return NULL; return ip_vs_conn_out_get(&p); @@ -638,7 +640,7 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp) * so we can make the assumption that the svc_af is the same as the * dest_af */ - dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, cp->af, &cp->daddr, + dest = ip_vs_find_dest(cp->ipvs, cp->af, cp->af, &cp->daddr, cp->dport, &cp->vaddr, cp->vport, cp->protocol, cp->fwmark, cp->flags); if (dest) { @@ -668,7 +670,7 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp) #endif ip_vs_bind_xmit(cp); - pd = ip_vs_proto_data_get(ip_vs_conn_net(cp), cp->protocol); + pd = ip_vs_proto_data_get(cp->ipvs, cp->protocol); if (pd && atomic_read(&pd->appcnt)) ip_vs_bind_app(cp, pd->pp); } @@ -746,7 +748,7 @@ static int expire_quiescent_template(struct netns_ipvs *ipvs, int ip_vs_check_template(struct ip_vs_conn *ct) { struct ip_vs_dest *dest = ct->dest; - struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct)); + struct netns_ipvs *ipvs = ct->ipvs; /* * Checking the dest server status. @@ -800,8 +802,7 @@ static void ip_vs_conn_rcu_free(struct rcu_head *head) static void ip_vs_conn_expire(unsigned long data) { struct ip_vs_conn *cp = (struct ip_vs_conn *)data; - struct net *net = ip_vs_conn_net(cp); - struct netns_ipvs *ipvs = net_ipvs(net); + struct netns_ipvs *ipvs = cp->ipvs; /* * do I control anybody? @@ -847,7 +848,7 @@ static void ip_vs_conn_expire(unsigned long data) cp->timeout = 60*HZ; if (ipvs->sync_state & IP_VS_STATE_MASTER) - ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs)); + ip_vs_sync_conn(ipvs, cp, sysctl_sync_threshold(ipvs)); ip_vs_conn_put(cp); } @@ -875,8 +876,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, struct ip_vs_dest *dest, __u32 fwmark) { struct ip_vs_conn *cp; - struct netns_ipvs *ipvs = net_ipvs(p->net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net, + struct netns_ipvs *ipvs = p->ipvs; + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->ipvs, p->protocol); cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC); @@ -887,7 +888,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, INIT_HLIST_NODE(&cp->c_list); setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); - ip_vs_conn_net_set(cp, p->net); + cp->ipvs = ipvs; cp->af = p->af; cp->daf = dest_af; cp->protocol = p->protocol; @@ -1061,7 +1062,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) size_t len = 0; char dbuf[IP_VS_ADDRSTRLEN]; - if (!ip_vs_conn_net_eq(cp, net)) + if (!net_eq(cp->ipvs->net, net)) return 0; if (cp->pe_data) { pe_data[0] = ' '; @@ -1146,7 +1147,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) const struct ip_vs_conn *cp = v; struct net *net = seq_file_net(seq); - if (!ip_vs_conn_net_eq(cp, net)) + if (!net_eq(cp->ipvs->net, net)) return 0; #ifdef CONFIG_IP_VS_IPV6 @@ -1240,7 +1241,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp) } /* Called from keventd and must protect itself from softirqs */ -void ip_vs_random_dropentry(struct net *net) +void ip_vs_random_dropentry(struct netns_ipvs *ipvs) { int idx; struct ip_vs_conn *cp, *cp_c; @@ -1256,7 +1257,7 @@ void ip_vs_random_dropentry(struct net *net) if (cp->flags & IP_VS_CONN_F_TEMPLATE) /* connection template */ continue; - if (!ip_vs_conn_net_eq(cp, net)) + if (cp->ipvs != ipvs) continue; if (cp->protocol == IPPROTO_TCP) { switch(cp->state) { @@ -1308,18 +1309,17 @@ void ip_vs_random_dropentry(struct net *net) /* * Flush all the connection entries in the ip_vs_conn_tab */ -static void ip_vs_conn_flush(struct net *net) +static void ip_vs_conn_flush(struct netns_ipvs *ipvs) { int idx; struct ip_vs_conn *cp, *cp_c; - struct netns_ipvs *ipvs = net_ipvs(net); flush_again: rcu_read_lock(); for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { - if (!ip_vs_conn_net_eq(cp, net)) + if (cp->ipvs != ipvs) continue; IP_VS_DBG(4, "del connection\n"); ip_vs_conn_expire_now(cp); @@ -1345,9 +1345,9 @@ flush_again: /* * per netns init and exit */ -int __net_init ip_vs_conn_net_init(struct net *net) +int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs) { - struct netns_ipvs *ipvs = net_ipvs(net); + struct net *net = ipvs->net; atomic_set(&ipvs->conn_count, 0); @@ -1356,10 +1356,12 @@ int __net_init ip_vs_conn_net_init(struct net *net) return 0; } -void __net_exit ip_vs_conn_net_cleanup(struct net *net) +void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs) { + struct net *net = ipvs->net; + /* flush all the connection entries first */ - ip_vs_conn_flush(net); + ip_vs_conn_flush(ipvs); remove_proc_entry("ip_vs_conn", net->proc_net); remove_proc_entry("ip_vs_conn_sync", net->proc_net); } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 38fbc194b9cb..37dd77a3d0fb 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -112,7 +112,7 @@ static inline void ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_dest *dest = cp->dest; - struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); + struct netns_ipvs *ipvs = cp->ipvs; if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { struct ip_vs_cpu_stats *s; @@ -146,7 +146,7 @@ static inline void ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_dest *dest = cp->dest; - struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); + struct netns_ipvs *ipvs = cp->ipvs; if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { struct ip_vs_cpu_stats *s; @@ -179,7 +179,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) static inline void ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) { - struct netns_ipvs *ipvs = net_ipvs(svc->net); + struct netns_ipvs *ipvs = svc->ipvs; struct ip_vs_cpu_stats *s; s = this_cpu_ptr(cp->dest->stats.cpustats); @@ -215,7 +215,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, const union nf_inet_addr *vaddr, __be16 vport, struct ip_vs_conn_param *p) { - ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr, + ip_vs_conn_fill_param(svc->ipvs, svc->af, protocol, caddr, cport, vaddr, vport, p); p->pe = rcu_dereference(svc->pe); if (p->pe && p->pe->fill_param) @@ -245,20 +245,30 @@ ip_vs_sched_persist(struct ip_vs_service *svc, const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; union nf_inet_addr snet; /* source network of the client, after masking */ + const union nf_inet_addr *src_addr, *dst_addr; + + if (likely(!ip_vs_iph_inverse(iph))) { + src_addr = &iph->saddr; + dst_addr = &iph->daddr; + } else { + src_addr = &iph->daddr; + dst_addr = &iph->saddr; + } + /* Mask saddr with the netmask to adjust template granularity */ #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) - ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, + ipv6_addr_prefix(&snet.in6, &src_addr->in6, (__force __u32) svc->netmask); else #endif - snet.ip = iph->saddr.ip & svc->netmask; + snet.ip = src_addr->ip & svc->netmask; IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " "mnet %s\n", - IP_VS_DBG_ADDR(svc->af, &iph->saddr), ntohs(src_port), - IP_VS_DBG_ADDR(svc->af, &iph->daddr), ntohs(dst_port), + IP_VS_DBG_ADDR(svc->af, src_addr), ntohs(src_port), + IP_VS_DBG_ADDR(svc->af, dst_addr), ntohs(dst_port), IP_VS_DBG_ADDR(svc->af, &snet)); /* @@ -276,7 +286,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, */ { int protocol = iph->protocol; - const union nf_inet_addr *vaddr = &iph->daddr; + const union nf_inet_addr *vaddr = dst_addr; __be16 vport = 0; if (dst_port == svc->port) { @@ -366,8 +376,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* * Create a new connection according to the template */ - ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr, - src_port, &iph->daddr, dst_port, ¶m); + ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, src_addr, + src_port, dst_addr, dst_port, ¶m); cp = ip_vs_conn_new(¶m, dest->af, &dest->addr, dport, flags, dest, skb->mark); @@ -418,7 +428,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_conn *cp = NULL; struct ip_vs_scheduler *sched; struct ip_vs_dest *dest; - __be16 _ports[2], *pptr; + __be16 _ports[2], *pptr, cport, vport; + const void *caddr, *vaddr; unsigned int flags; *ignored = 1; @@ -429,14 +440,26 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, if (pptr == NULL) return NULL; + if (likely(!ip_vs_iph_inverse(iph))) { + cport = pptr[0]; + caddr = &iph->saddr; + vport = pptr[1]; + vaddr = &iph->daddr; + } else { + cport = pptr[1]; + caddr = &iph->daddr; + vport = pptr[0]; + vaddr = &iph->saddr; + } + /* * FTPDATA needs this check when using local real server. * Never schedule Active FTPDATA connections from real server. * For LVS-NAT they must be already created. For other methods * with persistence the connection is created on SYN+ACK. */ - if (pptr[0] == FTPDATA) { - IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, + if (cport == FTPDATA) { + IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off, "Not scheduling FTPDATA"); return NULL; } @@ -444,19 +467,25 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, /* * Do not schedule replies from local real server. */ - if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && - (cp = pp->conn_in_get(svc->af, skb, iph, 1))) { - IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, - "Not scheduling reply for existing connection"); - __ip_vs_conn_put(cp); - return NULL; + if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK)) { + iph->hdr_flags ^= IP_VS_HDR_INVERSE; + cp = pp->conn_in_get(svc->ipvs, svc->af, skb, iph); + iph->hdr_flags ^= IP_VS_HDR_INVERSE; + + if (cp) { + IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off, + "Not scheduling reply for existing" + " connection"); + __ip_vs_conn_put(cp); + return NULL; + } } /* * Persistent service */ if (svc->flags & IP_VS_SVC_F_PERSISTENT) - return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored, + return ip_vs_sched_persist(svc, skb, cport, vport, ignored, iph); *ignored = 0; @@ -464,7 +493,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, /* * Non-persistent service */ - if (!svc->fwmark && pptr[1] != svc->port) { + if (!svc->fwmark && vport != svc->port) { if (!svc->port) pr_err("Schedule: port zero only supported " "in persistent services, " @@ -495,11 +524,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, - &iph->saddr, pptr[0], &iph->daddr, - pptr[1], &p); + ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, + caddr, cport, vaddr, vport, &p); cp = ip_vs_conn_new(&p, dest->af, &dest->addr, - dest->port ? dest->port : pptr[1], + dest->port ? dest->port : vport, flags, dest, skb->mark); if (!cp) { *ignored = -1; @@ -519,6 +547,17 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, return cp; } +#ifdef CONFIG_SYSCTL +static inline int ip_vs_addr_is_unicast(struct net *net, int af, + union nf_inet_addr *addr) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + return ipv6_addr_type(&addr->in6) & IPV6_ADDR_UNICAST; +#endif + return (inet_addr_type(net, addr->ip) == RTN_UNICAST); +} +#endif /* * Pass or drop the packet. @@ -528,33 +567,21 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph) { - __be16 _ports[2], *pptr; -#ifdef CONFIG_SYSCTL - struct net *net; - struct netns_ipvs *ipvs; - int unicast; -#endif + __be16 _ports[2], *pptr, dport; + struct netns_ipvs *ipvs = svc->ipvs; + struct net *net = ipvs->net; pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); - if (pptr == NULL) { + if (!pptr) return NF_DROP; - } - -#ifdef CONFIG_SYSCTL - net = skb_net(skb); - -#ifdef CONFIG_IP_VS_IPV6 - if (svc->af == AF_INET6) - unicast = ipv6_addr_type(&iph->daddr.in6) & IPV6_ADDR_UNICAST; - else -#endif - unicast = (inet_addr_type(net, iph->daddr.ip) == RTN_UNICAST); + dport = likely(!ip_vs_iph_inverse(iph)) ? pptr[1] : pptr[0]; /* if it is fwmark-based service, the cache_bypass sysctl is up and the destination is a non-local unicast, then create a cache_bypass connection entry */ - ipvs = net_ipvs(net); - if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) { + if (sysctl_cache_bypass(ipvs) && svc->fwmark && + !(iph->hdr_flags & (IP_VS_HDR_INVERSE | IP_VS_HDR_ICMP)) && + ip_vs_addr_is_unicast(net, svc->af, &iph->daddr)) { int ret; struct ip_vs_conn *cp; unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && @@ -566,7 +593,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, + ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, &iph->saddr, pptr[0], &iph->daddr, pptr[1], &p); cp = ip_vs_conn_new(&p, svc->af, &daddr, 0, @@ -590,7 +617,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ip_vs_conn_put(cp); return ret; } -#endif /* * When the virtual ftp service is presented, packets destined @@ -598,9 +624,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, * listed in the ipvs table), pass the packets, because it is * not ipvs job to decide to drop the packets. */ - if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) + if (svc->port == FTPPORT && dport != FTPPORT) return NF_ACCEPT; + if (unlikely(ip_vs_iph_icmp(iph))) + return NF_DROP; + /* * Notify the client that the destination is unreachable, and * release the socket buffer. @@ -610,11 +639,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, */ #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) { - if (!skb->dev) { - struct net *net_ = dev_net(skb_dst(skb)->dev); - - skb->dev = net_->loopback_dev; - } + if (!skb->dev) + skb->dev = net->loopback_dev; icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); } else #endif @@ -625,15 +651,13 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, #ifdef CONFIG_SYSCTL -static int sysctl_snat_reroute(struct sk_buff *skb) +static int sysctl_snat_reroute(struct netns_ipvs *ipvs) { - struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); return ipvs->sysctl_snat_reroute; } -static int sysctl_nat_icmp_send(struct net *net) +static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs) { - struct netns_ipvs *ipvs = net_ipvs(net); return ipvs->sysctl_nat_icmp_send; } @@ -644,8 +668,8 @@ static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) #else -static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; } -static int sysctl_nat_icmp_send(struct net *net) { return 0; } +static int sysctl_snat_reroute(struct netns_ipvs *ipvs) { return 0; } +static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs) { return 0; } static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; } #endif @@ -664,7 +688,8 @@ static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum) return IP_DEFRAG_VS_OUT; } -static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) +static inline int ip_vs_gather_frags(struct netns_ipvs *ipvs, + struct sk_buff *skb, u_int32_t user) { int err; @@ -677,10 +702,10 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) return err; } -static int ip_vs_route_me_harder(int af, struct sk_buff *skb, - unsigned int hooknum) +static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af, + struct sk_buff *skb, unsigned int hooknum) { - if (!sysctl_snat_reroute(skb)) + if (!sysctl_snat_reroute(ipvs)) return 0; /* Reroute replies only to remote clients (FORWARD and LOCAL_OUT) */ if (NF_INET_LOCAL_IN == hooknum) @@ -690,12 +715,12 @@ static int ip_vs_route_me_harder(int af, struct sk_buff *skb, struct dst_entry *dst = skb_dst(skb); if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) && - ip6_route_me_harder(skb) != 0) + ip6_route_me_harder(ipvs->net, skb) != 0) return 1; } else #endif if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) && - ip_route_me_harder(skb, RTN_LOCAL) != 0) + ip_route_me_harder(ipvs->net, skb, RTN_LOCAL) != 0) return 1; return 0; @@ -848,7 +873,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, #endif ip_vs_nat_icmp(skb, pp, cp, 1); - if (ip_vs_route_me_harder(af, skb, hooknum)) + if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum)) goto out; /* do the statistics and put it back */ @@ -872,8 +897,8 @@ out: * Find any that might be relevant, check against existing connections. * Currently handles error types - unreachable, quench, ttl exceeded. */ -static int ip_vs_out_icmp(struct sk_buff *skb, int *related, - unsigned int hooknum) +static int ip_vs_out_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, + int *related, unsigned int hooknum) { struct iphdr *iph; struct icmphdr _icmph, *ic; @@ -888,7 +913,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, /* reassemble IP fragments */ if (ip_is_fragment(ip_hdr(skb))) { - if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum))) + if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum))) return NF_STOLEN; } @@ -934,10 +959,10 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, "Checking outgoing ICMP for"); - ip_vs_fill_ip4hdr(cih, &ciph); - ciph.len += offset; + ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, true, &ciph); + /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(AF_INET, skb, &ciph, 1); + cp = pp->conn_out_get(ipvs, AF_INET, skb, &ciph); if (!cp) return NF_ACCEPT; @@ -947,16 +972,16 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, } #ifdef CONFIG_IP_VS_IPV6 -static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, - unsigned int hooknum, struct ip_vs_iphdr *ipvsh) +static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb, + int *related, unsigned int hooknum, + struct ip_vs_iphdr *ipvsh) { struct icmp6hdr _icmph, *ic; - struct ipv6hdr _ip6h, *ip6h; /* The ip header contained within ICMP */ struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */ struct ip_vs_conn *cp; struct ip_vs_protocol *pp; union nf_inet_addr snet; - unsigned int writable; + unsigned int offset; *related = 1; ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh); @@ -984,31 +1009,23 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, ic->icmp6_type, ntohs(icmpv6_id(ic)), &ipvsh->saddr, &ipvsh->daddr); - /* Now find the contained IP header */ - ciph.len = ipvsh->len + sizeof(_icmph); - ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h); - if (ip6h == NULL) + if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, ipvsh->len + sizeof(_icmph), + true, &ciph)) return NF_ACCEPT; /* The packet looks wrong, ignore */ - ciph.saddr.in6 = ip6h->saddr; /* conn_out_get() handles reverse order */ - ciph.daddr.in6 = ip6h->daddr; - /* skip possible IPv6 exthdrs of contained IPv6 packet */ - ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL); - if (ciph.protocol < 0) - return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */ pp = ip_vs_proto_get(ciph.protocol); if (!pp) return NF_ACCEPT; /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(AF_INET6, skb, &ciph, 1); + cp = pp->conn_out_get(ipvs, AF_INET6, skb, &ciph); if (!cp) return NF_ACCEPT; snet.in6 = ciph.saddr.in6; - writable = ciph.len; + offset = ciph.len; return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp, - pp, writable, sizeof(struct ipv6hdr), + pp, offset, sizeof(struct ipv6hdr), hooknum); } #endif @@ -1093,7 +1110,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, { struct ip_vs_protocol *pp = pd->pp; - IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); + IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet"); if (!skb_make_writable(skb, iph->len)) goto drop; @@ -1127,10 +1144,10 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * if it came from this machine itself. So re-compute * the routing information. */ - if (ip_vs_route_me_harder(af, skb, hooknum)) + if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum)) goto drop; - IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); + IP_VS_DBG_PKT(10, af, pp, skb, iph->off, "After SNAT"); ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd); @@ -1155,9 +1172,9 @@ drop: * Check if outgoing packet belongs to the established ip_vs_conn. */ static unsigned int -ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) +ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af) { - struct net *net = NULL; + struct net *net = ipvs->net; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; @@ -1182,16 +1199,15 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) if (unlikely(!skb_dst(skb))) return NF_ACCEPT; - net = skb_net(skb); - if (!net_ipvs(net)->enable) + if (!ipvs->enable) return NF_ACCEPT; - ip_vs_fill_iph_skb(af, skb, &iph); + ip_vs_fill_iph_skb(af, skb, false, &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; - int verdict = ip_vs_out_icmp_v6(skb, &related, + int verdict = ip_vs_out_icmp_v6(ipvs, skb, &related, hooknum, &iph); if (related) @@ -1201,13 +1217,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) #endif if (unlikely(iph.protocol == IPPROTO_ICMP)) { int related; - int verdict = ip_vs_out_icmp(skb, &related, hooknum); + int verdict = ip_vs_out_icmp(ipvs, skb, &related, hooknum); if (related) return verdict; } - pd = ip_vs_proto_data_get(net, iph.protocol); + pd = ip_vs_proto_data_get(ipvs, iph.protocol); if (unlikely(!pd)) return NF_ACCEPT; pp = pd->pp; @@ -1217,21 +1233,21 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) if (af == AF_INET) #endif if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) { - if (ip_vs_gather_frags(skb, + if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum))) return NF_STOLEN; - ip_vs_fill_ip4hdr(skb_network_header(skb), &iph); + ip_vs_fill_iph_skb(AF_INET, skb, false, &iph); } /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(af, skb, &iph, 0); + cp = pp->conn_out_get(ipvs, af, skb, &iph); if (likely(cp)) return handle_response(af, skb, pd, cp, &iph, hooknum); - if (sysctl_nat_icmp_send(net) && + if (sysctl_nat_icmp_send(ipvs) && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || pp->protocol == IPPROTO_SCTP)) { @@ -1241,7 +1257,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) sizeof(_ports), _ports, &iph); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ - if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr, + if (ip_vs_has_real_service(ipvs, af, iph.protocol, &iph.saddr, pptr[0])) { /* * Notify the real server: there is no @@ -1272,7 +1288,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) } } } - IP_VS_DBG_PKT(12, af, pp, skb, 0, + IP_VS_DBG_PKT(12, af, pp, skb, iph.off, "ip_vs_out: packet continues traversal as normal"); return NF_ACCEPT; } @@ -1283,10 +1299,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_reply4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_out(ops->hooknum, skb, AF_INET); + return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET); } /* @@ -1294,10 +1310,10 @@ ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_local_reply4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_out(ops->hooknum, skb, AF_INET); + return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET); } #ifdef CONFIG_IP_VS_IPV6 @@ -1308,10 +1324,10 @@ ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_reply6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_out(ops->hooknum, skb, AF_INET6); + return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6); } /* @@ -1319,14 +1335,51 @@ ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_local_reply6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_out(ops->hooknum, skb, AF_INET6); + return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6); } #endif +static unsigned int +ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, + struct ip_vs_proto_data *pd, + int *verdict, struct ip_vs_conn **cpp, + struct ip_vs_iphdr *iph) +{ + struct ip_vs_protocol *pp = pd->pp; + + if (!iph->fragoffs) { + /* No (second) fragments need to enter here, as nf_defrag_ipv6 + * replayed fragment zero will already have created the cp + */ + + /* Schedule and create new connection entry into cpp */ + if (!pp->conn_schedule(ipvs, af, skb, pd, verdict, cpp, iph)) + return 0; + } + + if (unlikely(!*cpp)) { + /* sorry, all this trouble for a no-hit :) */ + IP_VS_DBG_PKT(12, af, pp, skb, iph->off, + "ip_vs_in: packet continues traversal as normal"); + if (iph->fragoffs) { + /* Fragment that couldn't be mapped to a conn entry + * is missing module nf_defrag_ipv6 + */ + IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); + IP_VS_DBG_PKT(7, af, pp, skb, iph->off, + "unhandled fragment"); + } + *verdict = NF_ACCEPT; + return 0; + } + + return 1; +} + /* * Handle ICMP messages in the outside-to-inside direction (incoming). * Find any that might be relevant, check against existing connections, @@ -1334,9 +1387,9 @@ ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, * Currently handles error types - unreachable, quench, ttl exceeded. */ static int -ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) +ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, + unsigned int hooknum) { - struct net *net = NULL; struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ @@ -1345,13 +1398,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; unsigned int offset, offset2, ihl, verdict; - bool ipip; + bool ipip, new_cp = false; *related = 1; /* reassemble IP fragments */ if (ip_is_fragment(ip_hdr(skb))) { - if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum))) + if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum))) return NF_STOLEN; } @@ -1385,8 +1438,6 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ - net = skb_net(skb); - /* Special case for errors for IPIP packets */ ipip = false; if (cih->protocol == IPPROTO_IPIP) { @@ -1402,7 +1453,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) ipip = true; } - pd = ip_vs_proto_data_get(net, cih->protocol); + pd = ip_vs_proto_data_get(ipvs, cih->protocol); if (!pd) return NF_ACCEPT; pp = pd->pp; @@ -1416,15 +1467,24 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) "Checking incoming ICMP for"); offset2 = offset; - ip_vs_fill_ip4hdr(cih, &ciph); - ciph.len += offset; + ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, !ipip, &ciph); offset = ciph.len; + /* The embedded headers contain source and dest in reverse order. * For IPIP this is error for request, not for reply. */ - cp = pp->conn_in_get(AF_INET, skb, &ciph, ipip ? 0 : 1); - if (!cp) - return NF_ACCEPT; + cp = pp->conn_in_get(ipvs, AF_INET, skb, &ciph); + + if (!cp) { + int v; + + if (!sysctl_schedule_icmp(ipvs)) + return NF_ACCEPT; + + if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph)) + return v; + new_cp = true; + } verdict = NF_DROP; @@ -1455,7 +1515,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) skb_reset_network_header(skb); IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n", &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu); - ipv4_update_pmtu(skb, dev_net(skb->dev), + ipv4_update_pmtu(skb, ipvs->net, mtu, 0, 0, 0, 0); /* Client uses PMTUD? */ if (!(frag_off & htons(IP_DF))) @@ -1501,23 +1561,26 @@ ignore_ipip: verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph); out: - __ip_vs_conn_put(cp); + if (likely(!new_cp)) + __ip_vs_conn_put(cp); + else + ip_vs_conn_put(cp); return verdict; } #ifdef CONFIG_IP_VS_IPV6 -static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, - unsigned int hooknum, struct ip_vs_iphdr *iph) +static int ip_vs_in_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb, + int *related, unsigned int hooknum, + struct ip_vs_iphdr *iph) { - struct net *net = NULL; - struct ipv6hdr _ip6h, *ip6h; struct icmp6hdr _icmph, *ic; struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */ struct ip_vs_conn *cp; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; - unsigned int offs_ciph, writable, verdict; + unsigned int offset, verdict; + bool new_cp = false; *related = 1; @@ -1546,21 +1609,11 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, ic->icmp6_type, ntohs(icmpv6_id(ic)), &iph->saddr, &iph->daddr); - /* Now find the contained IP header */ - ciph.len = iph->len + sizeof(_icmph); - offs_ciph = ciph.len; /* Save ip header offset */ - ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h); - if (ip6h == NULL) - return NF_ACCEPT; /* The packet looks wrong, ignore */ - ciph.saddr.in6 = ip6h->saddr; /* conn_in_get() handles reverse order */ - ciph.daddr.in6 = ip6h->daddr; - /* skip possible IPv6 exthdrs of contained IPv6 packet */ - ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL); - if (ciph.protocol < 0) - return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */ - - net = skb_net(skb); - pd = ip_vs_proto_data_get(net, ciph.protocol); + offset = iph->len + sizeof(_icmph); + if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, offset, true, &ciph)) + return NF_ACCEPT; + + pd = ip_vs_proto_data_get(ipvs, ciph.protocol); if (!pd) return NF_ACCEPT; pp = pd->pp; @@ -1569,36 +1622,49 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, if (ciph.fragoffs) return NF_ACCEPT; - IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offs_ciph, + IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset, "Checking incoming ICMPv6 for"); /* The embedded headers contain source and dest in reverse order * if not from localhost */ - cp = pp->conn_in_get(AF_INET6, skb, &ciph, - (hooknum == NF_INET_LOCAL_OUT) ? 0 : 1); + cp = pp->conn_in_get(ipvs, AF_INET6, skb, &ciph); + + if (!cp) { + int v; + + if (!sysctl_schedule_icmp(ipvs)) + return NF_ACCEPT; + + if (!ip_vs_try_to_schedule(ipvs, AF_INET6, skb, pd, &v, &cp, &ciph)) + return v; + + new_cp = true; + } - if (!cp) - return NF_ACCEPT; /* VS/TUN, VS/DR and LOCALNODE just let it go */ if ((hooknum == NF_INET_LOCAL_OUT) && (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) { - __ip_vs_conn_put(cp); - return NF_ACCEPT; + verdict = NF_ACCEPT; + goto out; } /* do the statistics and put it back */ ip_vs_in_stats(cp, skb); /* Need to mangle contained IPv6 header in ICMPv6 packet */ - writable = ciph.len; + offset = ciph.len; if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol || IPPROTO_SCTP == ciph.protocol) - writable += 2 * sizeof(__u16); /* Also mangle ports */ + offset += 2 * sizeof(__u16); /* Also mangle ports */ - verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, writable, hooknum, &ciph); + verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum, &ciph); - __ip_vs_conn_put(cp); +out: + if (likely(!new_cp)) + __ip_vs_conn_put(cp); + else + ip_vs_conn_put(cp); return verdict; } @@ -1610,15 +1676,13 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, * and send it on its way... */ static unsigned int -ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) +ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af) { - struct net *net; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; int ret, pkts; - struct netns_ipvs *ipvs; int conn_reuse_mode; /* Already marked as IPVS request or reply? */ @@ -1633,7 +1697,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) if (unlikely((skb->pkt_type != PACKET_HOST && hooknum != NF_INET_LOCAL_OUT) || !skb_dst(skb))) { - ip_vs_fill_iph_skb(af, skb, &iph); + ip_vs_fill_iph_skb(af, skb, false, &iph); IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s" " ignored in hook %u\n", skb->pkt_type, iph.protocol, @@ -1641,12 +1705,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) return NF_ACCEPT; } /* ipvs enabled in this netns ? */ - net = skb_net(skb); - ipvs = net_ipvs(net); if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - ip_vs_fill_iph_skb(af, skb, &iph); + ip_vs_fill_iph_skb(af, skb, false, &iph); /* Bad... Do not break raw sockets */ if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && @@ -1662,8 +1724,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) if (af == AF_INET6) { if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; - int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum, - &iph); + int verdict = ip_vs_in_icmp_v6(ipvs, skb, &related, + hooknum, &iph); if (related) return verdict; @@ -1672,21 +1734,30 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) #endif if (unlikely(iph.protocol == IPPROTO_ICMP)) { int related; - int verdict = ip_vs_in_icmp(skb, &related, hooknum); + int verdict = ip_vs_in_icmp(ipvs, skb, &related, + hooknum); if (related) return verdict; } /* Protocol supported? */ - pd = ip_vs_proto_data_get(net, iph.protocol); - if (unlikely(!pd)) + pd = ip_vs_proto_data_get(ipvs, iph.protocol); + if (unlikely(!pd)) { + /* The only way we'll see this packet again is if it's + * encapsulated, so mark it with ipvs_property=1 so we + * skip it if we're ignoring tunneled packets + */ + if (sysctl_ignore_tunneled(ipvs)) + skb->ipvs_property = 1; + return NF_ACCEPT; + } pp = pd->pp; /* * Check if the packet belongs to an existing connection entry */ - cp = pp->conn_in_get(af, skb, &iph, 0); + cp = pp->conn_in_get(ipvs, af, skb, &iph); conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); if (conn_reuse_mode && !iph.fragoffs && @@ -1700,32 +1771,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) cp = NULL; } - if (unlikely(!cp) && !iph.fragoffs) { - /* No (second) fragments need to enter here, as nf_defrag_ipv6 - * replayed fragment zero will already have created the cp - */ + if (unlikely(!cp)) { int v; - /* Schedule and create new connection entry into &cp */ - if (!pp->conn_schedule(af, skb, pd, &v, &cp, &iph)) + if (!ip_vs_try_to_schedule(ipvs, af, skb, pd, &v, &cp, &iph)) return v; } - if (unlikely(!cp)) { - /* sorry, all this trouble for a no-hit :) */ - IP_VS_DBG_PKT(12, af, pp, skb, 0, - "ip_vs_in: packet continues traversal as normal"); - if (iph.fragoffs) { - /* Fragment that couldn't be mapped to a conn entry - * is missing module nf_defrag_ipv6 - */ - IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); - IP_VS_DBG_PKT(7, af, pp, skb, 0, "unhandled fragment"); - } - return NF_ACCEPT; - } + IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet"); - IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); /* Check the server status */ if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ @@ -1765,7 +1819,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) pkts = atomic_add_return(1, &cp->in_pkts); if (ipvs->sync_state & IP_VS_STATE_MASTER) - ip_vs_sync_conn(net, cp, pkts); + ip_vs_sync_conn(ipvs, cp, pkts); ip_vs_conn_put(cp); return ret; @@ -1776,10 +1830,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) * Schedule and forward packets from remote clients */ static unsigned int -ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_remote_request4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_in(ops->hooknum, skb, AF_INET); + return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET); } /* @@ -1787,10 +1841,10 @@ ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, * Schedule and forward packets from local clients */ static unsigned int -ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_local_request4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_in(ops->hooknum, skb, AF_INET); + return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET); } #ifdef CONFIG_IP_VS_IPV6 @@ -1800,10 +1854,10 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, * Schedule and forward packets from remote clients */ static unsigned int -ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_remote_request6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_in(ops->hooknum, skb, AF_INET6); + return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6); } /* @@ -1811,10 +1865,10 @@ ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, * Schedule and forward packets from local clients */ static unsigned int -ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_local_request6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_in(ops->hooknum, skb, AF_INET6); + return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6); } #endif @@ -1830,46 +1884,40 @@ ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, * and send them to ip_vs_in_icmp. */ static unsigned int -ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_forward_icmp(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { int r; - struct net *net; - struct netns_ipvs *ipvs; + struct netns_ipvs *ipvs = net_ipvs(state->net); if (ip_hdr(skb)->protocol != IPPROTO_ICMP) return NF_ACCEPT; /* ipvs enabled in this netns ? */ - net = skb_net(skb); - ipvs = net_ipvs(net); if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - return ip_vs_in_icmp(skb, &r, ops->hooknum); + return ip_vs_in_icmp(ipvs, skb, &r, state->hook); } #ifdef CONFIG_IP_VS_IPV6 static unsigned int -ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { int r; - struct net *net; - struct netns_ipvs *ipvs; + struct netns_ipvs *ipvs = net_ipvs(state->net); struct ip_vs_iphdr iphdr; - ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr); + ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr); if (iphdr.protocol != IPPROTO_ICMPV6) return NF_ACCEPT; /* ipvs enabled in this netns ? */ - net = skb_net(skb); - ipvs = net_ipvs(net); if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - return ip_vs_in_icmp_v6(skb, &r, ops->hooknum, &iphdr); + return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr); } #endif @@ -1999,22 +2047,22 @@ static int __net_init __ip_vs_init(struct net *net) atomic_inc(&ipvs_netns_cnt); net->ipvs = ipvs; - if (ip_vs_estimator_net_init(net) < 0) + if (ip_vs_estimator_net_init(ipvs) < 0) goto estimator_fail; - if (ip_vs_control_net_init(net) < 0) + if (ip_vs_control_net_init(ipvs) < 0) goto control_fail; - if (ip_vs_protocol_net_init(net) < 0) + if (ip_vs_protocol_net_init(ipvs) < 0) goto protocol_fail; - if (ip_vs_app_net_init(net) < 0) + if (ip_vs_app_net_init(ipvs) < 0) goto app_fail; - if (ip_vs_conn_net_init(net) < 0) + if (ip_vs_conn_net_init(ipvs) < 0) goto conn_fail; - if (ip_vs_sync_net_init(net) < 0) + if (ip_vs_sync_net_init(ipvs) < 0) goto sync_fail; printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n", @@ -2025,15 +2073,15 @@ static int __net_init __ip_vs_init(struct net *net) */ sync_fail: - ip_vs_conn_net_cleanup(net); + ip_vs_conn_net_cleanup(ipvs); conn_fail: - ip_vs_app_net_cleanup(net); + ip_vs_app_net_cleanup(ipvs); app_fail: - ip_vs_protocol_net_cleanup(net); + ip_vs_protocol_net_cleanup(ipvs); protocol_fail: - ip_vs_control_net_cleanup(net); + ip_vs_control_net_cleanup(ipvs); control_fail: - ip_vs_estimator_net_cleanup(net); + ip_vs_estimator_net_cleanup(ipvs); estimator_fail: net->ipvs = NULL; return -ENOMEM; @@ -2041,22 +2089,25 @@ estimator_fail: static void __net_exit __ip_vs_cleanup(struct net *net) { - ip_vs_service_net_cleanup(net); /* ip_vs_flush() with locks */ - ip_vs_conn_net_cleanup(net); - ip_vs_app_net_cleanup(net); - ip_vs_protocol_net_cleanup(net); - ip_vs_control_net_cleanup(net); - ip_vs_estimator_net_cleanup(net); - IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen); + struct netns_ipvs *ipvs = net_ipvs(net); + + ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */ + ip_vs_conn_net_cleanup(ipvs); + ip_vs_app_net_cleanup(ipvs); + ip_vs_protocol_net_cleanup(ipvs); + ip_vs_control_net_cleanup(ipvs); + ip_vs_estimator_net_cleanup(ipvs); + IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen); net->ipvs = NULL; } static void __net_exit __ip_vs_dev_cleanup(struct net *net) { + struct netns_ipvs *ipvs = net_ipvs(net); EnterFunction(2); - net_ipvs(net)->enable = 0; /* Disable packet reception */ + ipvs->enable = 0; /* Disable packet reception */ smp_wmb(); - ip_vs_sync_net_cleanup(net); + ip_vs_sync_net_cleanup(ipvs); LeaveFunction(2); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 1a23e91d50d8..e7c1b052c2a3 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -228,7 +228,7 @@ static void defense_work_handler(struct work_struct *work) update_defense_level(ipvs); if (atomic_read(&ipvs->dropentry)) - ip_vs_random_dropentry(ipvs->net); + ip_vs_random_dropentry(ipvs); schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); } #endif @@ -263,7 +263,7 @@ static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; * Returns hash value for virtual service */ static inline unsigned int -ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto, +ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto, const union nf_inet_addr *addr, __be16 port) { register unsigned int porth = ntohs(port); @@ -276,7 +276,7 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto, addr->ip6[2]^addr->ip6[3]; #endif ahash = ntohl(addr_fold); - ahash ^= ((size_t) net >> 8); + ahash ^= ((size_t) ipvs >> 8); return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) & IP_VS_SVC_TAB_MASK; @@ -285,9 +285,9 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto, /* * Returns hash value of fwmark for virtual service lookup */ -static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark) +static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark) { - return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; + return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; } /* @@ -309,14 +309,14 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) /* * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table */ - hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol, + hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol, &svc->addr, svc->port); hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]); } else { /* * Hash it by fwmark in svc_fwm_table */ - hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark); + hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark); hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]); } @@ -357,21 +357,21 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) * Get service by {netns, proto,addr,port} in the service table. */ static inline struct ip_vs_service * -__ip_vs_service_find(struct net *net, int af, __u16 protocol, +__ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport) { unsigned int hash; struct ip_vs_service *svc; /* Check for "full" addressed entries */ - hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport); + hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport); hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) { if ((svc->af == af) && ip_vs_addr_equal(af, &svc->addr, vaddr) && (svc->port == vport) && (svc->protocol == protocol) - && net_eq(svc->net, net)) { + && (svc->ipvs == ipvs)) { /* HIT */ return svc; } @@ -385,17 +385,17 @@ __ip_vs_service_find(struct net *net, int af, __u16 protocol, * Get service by {fwmark} in the service table. */ static inline struct ip_vs_service * -__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) +__ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark) { unsigned int hash; struct ip_vs_service *svc; /* Check for fwmark addressed entries */ - hash = ip_vs_svc_fwm_hashkey(net, fwmark); + hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark); hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) { if (svc->fwmark == fwmark && svc->af == af - && net_eq(svc->net, net)) { + && (svc->ipvs == ipvs)) { /* HIT */ return svc; } @@ -406,17 +406,16 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) /* Find service, called under RCU lock */ struct ip_vs_service * -ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol, +ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport) { struct ip_vs_service *svc; - struct netns_ipvs *ipvs = net_ipvs(net); /* * Check the table hashed by fwmark first */ if (fwmark) { - svc = __ip_vs_svc_fwm_find(net, af, fwmark); + svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark); if (svc) goto out; } @@ -425,7 +424,7 @@ ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol, * Check the table hashed by <protocol,addr,port> * for "full" addressed entries */ - svc = __ip_vs_service_find(net, af, protocol, vaddr, vport); + svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport); if (svc == NULL && protocol == IPPROTO_TCP @@ -435,7 +434,7 @@ ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol, * Check if ftp service entry exists, the packet * might belong to FTP data connections. */ - svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT); + svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT); } if (svc == NULL @@ -443,7 +442,7 @@ ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol, /* * Check if the catch-all port (port zero) exists */ - svc = __ip_vs_service_find(net, af, protocol, vaddr, 0); + svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, 0); } out: @@ -543,10 +542,9 @@ static void ip_vs_rs_unhash(struct ip_vs_dest *dest) } /* Check if real service by <proto,addr,port> is present */ -bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol, +bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, const union nf_inet_addr *daddr, __be16 dport) { - struct netns_ipvs *ipvs = net_ipvs(net); unsigned int hash; struct ip_vs_dest *dest; @@ -601,7 +599,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af, * on the backup. * Called under RCU lock, no refcnt is returned. */ -struct ip_vs_dest *ip_vs_find_dest(struct net *net, int svc_af, int dest_af, +struct ip_vs_dest *ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af, const union nf_inet_addr *daddr, __be16 dport, const union nf_inet_addr *vaddr, @@ -612,7 +610,7 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int svc_af, int dest_af, struct ip_vs_service *svc; __be16 port = dport; - svc = ip_vs_service_find(net, svc_af, fwmark, protocol, vaddr, vport); + svc = ip_vs_service_find(ipvs, svc_af, fwmark, protocol, vaddr, vport); if (!svc) return NULL; if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) @@ -660,7 +658,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af, const union nf_inet_addr *daddr, __be16 dport) { struct ip_vs_dest *dest; - struct netns_ipvs *ipvs = net_ipvs(svc->net); + struct netns_ipvs *ipvs = svc->ipvs; /* * Find the destination in trash @@ -715,10 +713,9 @@ static void ip_vs_dest_free(struct ip_vs_dest *dest) * are expired, and the refcnt of each destination in the trash must * be 0, so we simply release them here. */ -static void ip_vs_trash_cleanup(struct net *net) +static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs) { struct ip_vs_dest *dest, *nxt; - struct netns_ipvs *ipvs = net_ipvs(net); del_timer_sync(&ipvs->dest_trash_timer); /* No need to use dest_trash_lock */ @@ -788,7 +785,7 @@ static void __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest, int add) { - struct netns_ipvs *ipvs = net_ipvs(svc->net); + struct netns_ipvs *ipvs = svc->ipvs; struct ip_vs_service *old_svc; struct ip_vs_scheduler *sched; int conn_flags; @@ -843,7 +840,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, spin_unlock_bh(&dest->dst_lock); if (add) { - ip_vs_start_estimator(svc->net, &dest->stats); + ip_vs_start_estimator(svc->ipvs, &dest->stats); list_add_rcu(&dest->n_list, &svc->destinations); svc->num_dests++; sched = rcu_dereference_protected(svc->scheduler, 1); @@ -874,12 +871,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, atype = ipv6_addr_type(&udest->addr.in6); if ((!(atype & IPV6_ADDR_UNICAST) || atype & IPV6_ADDR_LINKLOCAL) && - !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6)) + !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6)) return -EINVAL; } else #endif { - atype = inet_addr_type(svc->net, udest->addr.ip); + atype = inet_addr_type(svc->ipvs->net, udest->addr.ip); if (atype != RTN_LOCAL && atype != RTN_UNICAST) return -EINVAL; } @@ -1036,12 +1033,10 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) /* * Delete a destination (must be already unlinked from the service) */ -static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest, +static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest, bool cleanup) { - struct netns_ipvs *ipvs = net_ipvs(net); - - ip_vs_stop_estimator(net, &dest->stats); + ip_vs_stop_estimator(ipvs, &dest->stats); /* * Remove it from the d-linked list with the real services. @@ -1079,7 +1074,7 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, svc->num_dests--; if (dest->af != svc->af) - net_ipvs(svc->net)->mixed_address_family_dests--; + svc->ipvs->mixed_address_family_dests--; if (svcupd) { struct ip_vs_scheduler *sched; @@ -1120,7 +1115,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) /* * Delete the destination */ - __ip_vs_del_dest(svc->net, dest, false); + __ip_vs_del_dest(svc->ipvs, dest, false); LeaveFunction(2); @@ -1129,8 +1124,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) static void ip_vs_dest_trash_expire(unsigned long data) { - struct net *net = (struct net *) data; - struct netns_ipvs *ipvs = net_ipvs(net); + struct netns_ipvs *ipvs = (struct netns_ipvs *)data; struct ip_vs_dest *dest, *next; unsigned long now = jiffies; @@ -1163,14 +1157,13 @@ static void ip_vs_dest_trash_expire(unsigned long data) * Add a service into the service hash table */ static int -ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, +ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, struct ip_vs_service **svc_p) { int ret = 0, i; struct ip_vs_scheduler *sched = NULL; struct ip_vs_pe *pe = NULL; struct ip_vs_service *svc = NULL; - struct netns_ipvs *ipvs = net_ipvs(net); /* increase the module use count */ ip_vs_use_count_inc(); @@ -1237,7 +1230,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, svc->flags = u->flags; svc->timeout = u->timeout * HZ; svc->netmask = u->netmask; - svc->net = net; + svc->ipvs = ipvs; INIT_LIST_HEAD(&svc->destinations); spin_lock_init(&svc->sched_lock); @@ -1261,7 +1254,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, else if (svc->port == 0) atomic_inc(&ipvs->nullsvc_counter); - ip_vs_start_estimator(net, &svc->stats); + ip_vs_start_estimator(ipvs, &svc->stats); /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) @@ -1381,7 +1374,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) struct ip_vs_dest *dest, *nxt; struct ip_vs_scheduler *old_sched; struct ip_vs_pe *old_pe; - struct netns_ipvs *ipvs = net_ipvs(svc->net); + struct netns_ipvs *ipvs = svc->ipvs; pr_info("%s: enter\n", __func__); @@ -1389,7 +1382,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) if (svc->af == AF_INET) ipvs->num_services--; - ip_vs_stop_estimator(svc->net, &svc->stats); + ip_vs_stop_estimator(svc->ipvs, &svc->stats); /* Unbind scheduler */ old_sched = rcu_dereference_protected(svc->scheduler, 1); @@ -1405,7 +1398,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) */ list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { __ip_vs_unlink_dest(svc, dest, 0); - __ip_vs_del_dest(svc->net, dest, cleanup); + __ip_vs_del_dest(svc->ipvs, dest, cleanup); } /* @@ -1456,7 +1449,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc) /* * Flush all the virtual services */ -static int ip_vs_flush(struct net *net, bool cleanup) +static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) { int idx; struct ip_vs_service *svc; @@ -1468,7 +1461,7 @@ static int ip_vs_flush(struct net *net, bool cleanup) for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx], s_list) { - if (net_eq(svc->net, net)) + if (svc->ipvs == ipvs) ip_vs_unlink_service(svc, cleanup); } } @@ -1479,7 +1472,7 @@ static int ip_vs_flush(struct net *net, bool cleanup) for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx], f_list) { - if (net_eq(svc->net, net)) + if (svc->ipvs == ipvs) ip_vs_unlink_service(svc, cleanup); } } @@ -1491,12 +1484,12 @@ static int ip_vs_flush(struct net *net, bool cleanup) * Delete service by {netns} in the service table. * Called by __ip_vs_cleanup() */ -void ip_vs_service_net_cleanup(struct net *net) +void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs) { EnterFunction(2); /* Check for "full" addressed entries */ mutex_lock(&__ip_vs_mutex); - ip_vs_flush(net, true); + ip_vs_flush(ipvs, true); mutex_unlock(&__ip_vs_mutex); LeaveFunction(2); } @@ -1540,7 +1533,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, mutex_lock(&__ip_vs_mutex); for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { - if (net_eq(svc->net, net)) { + if (svc->ipvs == ipvs) { list_for_each_entry(dest, &svc->destinations, n_list) { ip_vs_forget_dev(dest, dev); @@ -1549,7 +1542,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, } hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { - if (net_eq(svc->net, net)) { + if (svc->ipvs == ipvs) { list_for_each_entry(dest, &svc->destinations, n_list) { ip_vs_forget_dev(dest, dev); @@ -1583,26 +1576,26 @@ static int ip_vs_zero_service(struct ip_vs_service *svc) return 0; } -static int ip_vs_zero_all(struct net *net) +static int ip_vs_zero_all(struct netns_ipvs *ipvs) { int idx; struct ip_vs_service *svc; for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { - if (net_eq(svc->net, net)) + if (svc->ipvs == ipvs) ip_vs_zero_service(svc); } } for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { - if (net_eq(svc->net, net)) + if (svc->ipvs == ipvs) ip_vs_zero_service(svc); } } - ip_vs_zero_stats(&net_ipvs(net)->tot_stats); + ip_vs_zero_stats(&ipvs->tot_stats); return 0; } @@ -1615,7 +1608,7 @@ static int proc_do_defense_mode(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - struct net *net = current->nsproxy->net_ns; + struct netns_ipvs *ipvs = table->extra2; int *valp = table->data; int val = *valp; int rc; @@ -1626,7 +1619,7 @@ proc_do_defense_mode(struct ctl_table *table, int write, /* Restore the correct value */ *valp = val; } else { - update_defense_level(net_ipvs(net)); + update_defense_level(ipvs); } } return rc; @@ -1844,6 +1837,18 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "schedule_icmp", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "ignore_tunneled", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -1889,6 +1894,7 @@ static inline const char *ip_vs_fwd_name(unsigned int flags) static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) { struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_iter *iter = seq->private; int idx; struct ip_vs_service *svc; @@ -1896,7 +1902,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* look in hash by protocol */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) { - if (net_eq(svc->net, net) && pos-- == 0) { + if ((svc->ipvs == ipvs) && pos-- == 0) { iter->table = ip_vs_svc_table; iter->bucket = idx; return svc; @@ -1908,7 +1914,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx], f_list) { - if (net_eq(svc->net, net) && pos-- == 0) { + if ((svc->ipvs == ipvs) && pos-- == 0) { iter->table = ip_vs_svc_fwm_table; iter->bucket = idx; return svc; @@ -2196,7 +2202,7 @@ static const struct file_operations ip_vs_stats_percpu_fops = { /* * Set timeout values for tcp tcpfin udp in the timeout_table. */ -static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u) +static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u) { #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) struct ip_vs_proto_data *pd; @@ -2209,13 +2215,13 @@ static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u) #ifdef CONFIG_IP_VS_PROTO_TCP if (u->tcp_timeout) { - pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] = u->tcp_timeout * HZ; } if (u->tcp_fin_timeout) { - pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] = u->tcp_fin_timeout * HZ; } @@ -2223,7 +2229,7 @@ static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u) #ifdef CONFIG_IP_VS_PROTO_UDP if (u->udp_timeout) { - pd = ip_vs_proto_data_get(net, IPPROTO_UDP); + pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); pd->timeout_table[IP_VS_UDP_S_NORMAL] = u->udp_timeout * HZ; } @@ -2344,12 +2350,12 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) cfg.syncid = dm->syncid; rtnl_lock(); mutex_lock(&ipvs->sync_mutex); - ret = start_sync_thread(net, &cfg, dm->state); + ret = start_sync_thread(ipvs, &cfg, dm->state); mutex_unlock(&ipvs->sync_mutex); rtnl_unlock(); } else { mutex_lock(&ipvs->sync_mutex); - ret = stop_sync_thread(net, dm->state); + ret = stop_sync_thread(ipvs, dm->state); mutex_unlock(&ipvs->sync_mutex); } goto out_dec; @@ -2358,11 +2364,11 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) mutex_lock(&__ip_vs_mutex); if (cmd == IP_VS_SO_SET_FLUSH) { /* Flush the virtual service */ - ret = ip_vs_flush(net, false); + ret = ip_vs_flush(ipvs, false); goto out_unlock; } else if (cmd == IP_VS_SO_SET_TIMEOUT) { /* Set timeout values for (tcp tcpfin udp) */ - ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg); + ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg); goto out_unlock; } @@ -2377,7 +2383,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (cmd == IP_VS_SO_SET_ZERO) { /* if no service address is set, zero counters in all */ if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { - ret = ip_vs_zero_all(net); + ret = ip_vs_zero_all(ipvs); goto out_unlock; } } @@ -2395,10 +2401,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) /* Lookup the exact service by <protocol, addr, port> or fwmark */ rcu_read_lock(); if (usvc.fwmark == 0) - svc = __ip_vs_service_find(net, usvc.af, usvc.protocol, + svc = __ip_vs_service_find(ipvs, usvc.af, usvc.protocol, &usvc.addr, usvc.port); else - svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark); + svc = __ip_vs_svc_fwm_find(ipvs, usvc.af, usvc.fwmark); rcu_read_unlock(); if (cmd != IP_VS_SO_SET_ADD @@ -2412,7 +2418,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (svc != NULL) ret = -EEXIST; else - ret = ip_vs_add_service(net, &usvc, &svc); + ret = ip_vs_add_service(ipvs, &usvc, &svc); break; case IP_VS_SO_SET_EDIT: ret = ip_vs_edit_service(svc, &usvc); @@ -2471,7 +2477,7 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) } static inline int -__ip_vs_get_service_entries(struct net *net, +__ip_vs_get_service_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_services *get, struct ip_vs_get_services __user *uptr) { @@ -2483,7 +2489,7 @@ __ip_vs_get_service_entries(struct net *net, for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { /* Only expose IPv4 entries to old interface */ - if (svc->af != AF_INET || !net_eq(svc->net, net)) + if (svc->af != AF_INET || (svc->ipvs != ipvs)) continue; if (count >= get->num_services) @@ -2502,7 +2508,7 @@ __ip_vs_get_service_entries(struct net *net, for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { /* Only expose IPv4 entries to old interface */ - if (svc->af != AF_INET || !net_eq(svc->net, net)) + if (svc->af != AF_INET || (svc->ipvs != ipvs)) continue; if (count >= get->num_services) @@ -2522,7 +2528,7 @@ out: } static inline int -__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, +__ip_vs_get_dest_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_dests *get, struct ip_vs_get_dests __user *uptr) { struct ip_vs_service *svc; @@ -2531,9 +2537,9 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, rcu_read_lock(); if (get->fwmark) - svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark); + svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, get->fwmark); else - svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr, + svc = __ip_vs_service_find(ipvs, AF_INET, get->protocol, &addr, get->port); rcu_read_unlock(); @@ -2578,7 +2584,7 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, } static inline void -__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u) +__ip_vs_get_timeouts(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u) { #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) struct ip_vs_proto_data *pd; @@ -2587,12 +2593,12 @@ __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u) memset(u, 0, sizeof (*u)); #ifdef CONFIG_IP_VS_PROTO_TCP - pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; #endif #ifdef CONFIG_IP_VS_PROTO_UDP - pd = ip_vs_proto_data_get(net, IPPROTO_UDP); + pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); u->udp_timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ; #endif @@ -2711,7 +2717,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EINVAL; goto out; } - ret = __ip_vs_get_service_entries(net, get, user); + ret = __ip_vs_get_service_entries(ipvs, get, user); } break; @@ -2725,9 +2731,9 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) addr.ip = entry->addr; rcu_read_lock(); if (entry->fwmark) - svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark); + svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, entry->fwmark); else - svc = __ip_vs_service_find(net, AF_INET, + svc = __ip_vs_service_find(ipvs, AF_INET, entry->protocol, &addr, entry->port); rcu_read_unlock(); @@ -2753,7 +2759,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EINVAL; goto out; } - ret = __ip_vs_get_dest_entries(net, get, user); + ret = __ip_vs_get_dest_entries(ipvs, get, user); } break; @@ -2761,7 +2767,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { struct ip_vs_timeout_user t; - __ip_vs_get_timeouts(net, &t); + __ip_vs_get_timeouts(ipvs, &t); if (copy_to_user(user, &t, sizeof(t)) != 0) ret = -EFAULT; } @@ -2996,12 +3002,13 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, int idx = 0, i; int start = cb->args[0]; struct ip_vs_service *svc; - struct net *net = skb_sknet(skb); + struct net *net = sock_net(skb->sk); + struct netns_ipvs *ipvs = net_ipvs(net); mutex_lock(&__ip_vs_mutex); for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { - if (++idx <= start || !net_eq(svc->net, net)) + if (++idx <= start || (svc->ipvs != ipvs)) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { idx--; @@ -3012,7 +3019,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { - if (++idx <= start || !net_eq(svc->net, net)) + if (++idx <= start || (svc->ipvs != ipvs)) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { idx--; @@ -3028,7 +3035,7 @@ nla_put_failure: return skb->len; } -static int ip_vs_genl_parse_service(struct net *net, +static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *usvc, struct nlattr *nla, int full_entry, struct ip_vs_service **ret_svc) @@ -3073,9 +3080,9 @@ static int ip_vs_genl_parse_service(struct net *net, rcu_read_lock(); if (usvc->fwmark) - svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark); + svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark); else - svc = __ip_vs_service_find(net, usvc->af, usvc->protocol, + svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol, &usvc->addr, usvc->port); rcu_read_unlock(); *ret_svc = svc; @@ -3113,14 +3120,14 @@ static int ip_vs_genl_parse_service(struct net *net, return 0; } -static struct ip_vs_service *ip_vs_genl_find_service(struct net *net, +static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs, struct nlattr *nla) { struct ip_vs_service_user_kern usvc; struct ip_vs_service *svc; int ret; - ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc); + ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, 0, &svc); return ret ? ERR_PTR(ret) : svc; } @@ -3195,7 +3202,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, struct ip_vs_service *svc; struct ip_vs_dest *dest; struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; - struct net *net = skb_sknet(skb); + struct net *net = sock_net(skb->sk); + struct netns_ipvs *ipvs = net_ipvs(net); mutex_lock(&__ip_vs_mutex); @@ -3205,7 +3213,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, goto out_err; - svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]); + svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]); if (IS_ERR(svc) || svc == NULL) goto out_err; @@ -3341,7 +3349,7 @@ nla_put_failure: static int ip_vs_genl_dump_daemons(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb_sknet(skb); + struct net *net = sock_net(skb->sk); struct netns_ipvs *ipvs = net_ipvs(net); mutex_lock(&ipvs->sync_mutex); @@ -3367,9 +3375,8 @@ nla_put_failure: return skb->len; } -static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs) +static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ipvs_sync_daemon_cfg c; struct nlattr *a; int ret; @@ -3426,33 +3433,32 @@ static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs) rtnl_lock(); mutex_lock(&ipvs->sync_mutex); - ret = start_sync_thread(net, &c, + ret = start_sync_thread(ipvs, &c, nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); mutex_unlock(&ipvs->sync_mutex); rtnl_unlock(); return ret; } -static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs) +static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs) { - struct netns_ipvs *ipvs = net_ipvs(net); int ret; if (!attrs[IPVS_DAEMON_ATTR_STATE]) return -EINVAL; mutex_lock(&ipvs->sync_mutex); - ret = stop_sync_thread(net, + ret = stop_sync_thread(ipvs, nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); mutex_unlock(&ipvs->sync_mutex); return ret; } -static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) +static int ip_vs_genl_set_config(struct netns_ipvs *ipvs, struct nlattr **attrs) { struct ip_vs_timeout_user t; - __ip_vs_get_timeouts(net, &t); + __ip_vs_get_timeouts(ipvs, &t); if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); @@ -3464,17 +3470,15 @@ static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); - return ip_vs_set_timeout(net, &t); + return ip_vs_set_timeout(ipvs, &t); } static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) { int ret = -EINVAL, cmd; - struct net *net; - struct netns_ipvs *ipvs; + struct net *net = sock_net(skb->sk); + struct netns_ipvs *ipvs = net_ipvs(net); - net = skb_sknet(skb); - ipvs = net_ipvs(net); cmd = info->genlhdr->cmd; if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { @@ -3487,9 +3491,9 @@ static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) goto out; if (cmd == IPVS_CMD_NEW_DAEMON) - ret = ip_vs_genl_new_daemon(net, daemon_attrs); + ret = ip_vs_genl_new_daemon(ipvs, daemon_attrs); else - ret = ip_vs_genl_del_daemon(net, daemon_attrs); + ret = ip_vs_genl_del_daemon(ipvs, daemon_attrs); } out: @@ -3503,22 +3507,22 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) struct ip_vs_dest_user_kern udest; int ret = 0, cmd; int need_full_svc = 0, need_full_dest = 0; - struct net *net; + struct net *net = sock_net(skb->sk); + struct netns_ipvs *ipvs = net_ipvs(net); - net = skb_sknet(skb); cmd = info->genlhdr->cmd; mutex_lock(&__ip_vs_mutex); if (cmd == IPVS_CMD_FLUSH) { - ret = ip_vs_flush(net, false); + ret = ip_vs_flush(ipvs, false); goto out; } else if (cmd == IPVS_CMD_SET_CONFIG) { - ret = ip_vs_genl_set_config(net, info->attrs); + ret = ip_vs_genl_set_config(ipvs, info->attrs); goto out; } else if (cmd == IPVS_CMD_ZERO && !info->attrs[IPVS_CMD_ATTR_SERVICE]) { - ret = ip_vs_zero_all(net); + ret = ip_vs_zero_all(ipvs); goto out; } @@ -3528,7 +3532,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) need_full_svc = 1; - ret = ip_vs_genl_parse_service(net, &usvc, + ret = ip_vs_genl_parse_service(ipvs, &usvc, info->attrs[IPVS_CMD_ATTR_SERVICE], need_full_svc, &svc); if (ret) @@ -3567,7 +3571,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) /* The synchronization protocol is incompatible * with mixed family services */ - if (net_ipvs(net)->sync_state) { + if (ipvs->sync_state) { ret = -EINVAL; goto out; } @@ -3587,7 +3591,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) switch (cmd) { case IPVS_CMD_NEW_SERVICE: if (svc == NULL) - ret = ip_vs_add_service(net, &usvc, &svc); + ret = ip_vs_add_service(ipvs, &usvc, &svc); else ret = -EEXIST; break; @@ -3625,9 +3629,9 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) struct sk_buff *msg; void *reply; int ret, cmd, reply_cmd; - struct net *net; + struct net *net = sock_net(skb->sk); + struct netns_ipvs *ipvs = net_ipvs(net); - net = skb_sknet(skb); cmd = info->genlhdr->cmd; if (cmd == IPVS_CMD_GET_SERVICE) @@ -3656,7 +3660,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) { struct ip_vs_service *svc; - svc = ip_vs_genl_find_service(net, + svc = ip_vs_genl_find_service(ipvs, info->attrs[IPVS_CMD_ATTR_SERVICE]); if (IS_ERR(svc)) { ret = PTR_ERR(svc); @@ -3677,7 +3681,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) { struct ip_vs_timeout_user t; - __ip_vs_get_timeouts(net, &t); + __ip_vs_get_timeouts(ipvs, &t); #ifdef CONFIG_IP_VS_PROTO_TCP if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout) || @@ -3832,10 +3836,10 @@ static void ip_vs_genl_unregister(void) * per netns intit/exit func. */ #ifdef CONFIG_SYSCTL -static int __net_init ip_vs_control_net_init_sysctl(struct net *net) +static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { + struct net *net = ipvs->net; int idx; - struct netns_ipvs *ipvs = net_ipvs(net); struct ctl_table *tbl; atomic_set(&ipvs->dropentry, 0); @@ -3854,6 +3858,10 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) } else tbl = vs_vars; /* Initialize sysctl defaults */ + for (idx = 0; idx < ARRAY_SIZE(vs_vars); idx++) { + if (tbl[idx].proc_handler == proc_do_defense_mode) + tbl[idx].extra2 = ipvs; + } idx = 0; ipvs->sysctl_amemthresh = 1024; tbl[idx++].data = &ipvs->sysctl_amemthresh; @@ -3895,7 +3903,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) tbl[idx++].data = &ipvs->sysctl_backup_only; ipvs->sysctl_conn_reuse_mode = 1; tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; - + tbl[idx++].data = &ipvs->sysctl_schedule_icmp; + tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); if (ipvs->sysctl_hdr == NULL) { @@ -3903,7 +3912,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) kfree(tbl); return -ENOMEM; } - ip_vs_start_estimator(net, &ipvs->tot_stats); + ip_vs_start_estimator(ipvs, &ipvs->tot_stats); ipvs->sysctl_tbl = tbl; /* Schedule defense work */ INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); @@ -3912,14 +3921,14 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) return 0; } -static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) +static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { - struct netns_ipvs *ipvs = net_ipvs(net); + struct net *net = ipvs->net; cancel_delayed_work_sync(&ipvs->defense_work); cancel_work_sync(&ipvs->defense_work.work); unregister_net_sysctl_table(ipvs->sysctl_hdr); - ip_vs_stop_estimator(net, &ipvs->tot_stats); + ip_vs_stop_estimator(ipvs, &ipvs->tot_stats); if (!net_eq(net, &init_net)) kfree(ipvs->sysctl_tbl); @@ -3927,8 +3936,8 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) #else -static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; } -static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { } +static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { return 0; } +static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { } #endif @@ -3936,10 +3945,10 @@ static struct notifier_block ip_vs_dst_notifier = { .notifier_call = ip_vs_dst_event, }; -int __net_init ip_vs_control_net_init(struct net *net) +int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) { + struct net *net = ipvs->net; int i, idx; - struct netns_ipvs *ipvs = net_ipvs(net); /* Initialize rs_table */ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) @@ -3948,7 +3957,7 @@ int __net_init ip_vs_control_net_init(struct net *net) INIT_LIST_HEAD(&ipvs->dest_trash); spin_lock_init(&ipvs->dest_trash_lock); setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, - (unsigned long) net); + (unsigned long) ipvs); atomic_set(&ipvs->ftpsvc_counter, 0); atomic_set(&ipvs->nullsvc_counter, 0); @@ -3970,7 +3979,7 @@ int __net_init ip_vs_control_net_init(struct net *net) proc_create("ip_vs_stats_percpu", 0, net->proc_net, &ip_vs_stats_percpu_fops); - if (ip_vs_control_net_init_sysctl(net)) + if (ip_vs_control_net_init_sysctl(ipvs)) goto err; return 0; @@ -3980,12 +3989,12 @@ err: return -ENOMEM; } -void __net_exit ip_vs_control_net_cleanup(struct net *net) +void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs) { - struct netns_ipvs *ipvs = net_ipvs(net); + struct net *net = ipvs->net; - ip_vs_trash_cleanup(net); - ip_vs_control_net_cleanup_sysctl(net); + ip_vs_trash_cleanup(ipvs); + ip_vs_control_net_cleanup_sysctl(ipvs); remove_proc_entry("ip_vs_stats_percpu", net->proc_net); remove_proc_entry("ip_vs_stats", net->proc_net); remove_proc_entry("ip_vs", net->proc_net); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index ef0eb0a8d552..457c6c193e13 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -102,10 +102,8 @@ static void estimation_timer(unsigned long arg) struct ip_vs_estimator *e; struct ip_vs_stats *s; u64 rate; - struct net *net = (struct net *)arg; - struct netns_ipvs *ipvs; + struct netns_ipvs *ipvs = (struct netns_ipvs *)arg; - ipvs = net_ipvs(net); spin_lock(&ipvs->est_lock); list_for_each_entry(e, &ipvs->est_list, list) { s = container_of(e, struct ip_vs_stats, est); @@ -140,9 +138,8 @@ static void estimation_timer(unsigned long arg) mod_timer(&ipvs->est_timer, jiffies + 2*HZ); } -void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats) +void ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_estimator *est = &stats->est; INIT_LIST_HEAD(&est->list); @@ -152,9 +149,8 @@ void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats) spin_unlock_bh(&ipvs->est_lock); } -void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats) +void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_estimator *est = &stats->est; spin_lock_bh(&ipvs->est_lock); @@ -192,18 +188,16 @@ void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats) dst->outbps = (e->outbps + 0xF) >> 5; } -int __net_init ip_vs_estimator_net_init(struct net *net) +int __net_init ip_vs_estimator_net_init(struct netns_ipvs *ipvs) { - struct netns_ipvs *ipvs = net_ipvs(net); - INIT_LIST_HEAD(&ipvs->est_list); spin_lock_init(&ipvs->est_lock); - setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net); + setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)ipvs); mod_timer(&ipvs->est_timer, jiffies + 2 * HZ); return 0; } -void __net_exit ip_vs_estimator_net_cleanup(struct net *net) +void __net_exit ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs) { - del_timer_sync(&net_ipvs(net)->est_timer); + del_timer_sync(&ipvs->est_timer); } diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 5d3daae98bf0..d30c327bb578 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -181,7 +181,6 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, int ret = 0; enum ip_conntrack_info ctinfo; struct nf_conn *ct; - struct net *net; *diff = 0; @@ -223,14 +222,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, */ { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, + ip_vs_conn_fill_param(cp->ipvs, AF_INET, iph->protocol, &from, port, &cp->caddr, 0, &p); n_cp = ip_vs_conn_out_get(&p); } if (!n_cp) { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(ip_vs_conn_net(cp), + ip_vs_conn_fill_param(cp->ipvs, AF_INET, IPPROTO_TCP, &cp->caddr, 0, &cp->vaddr, port, &p); /* As above, this is ipv4 only */ @@ -289,9 +288,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, * would be adjusted twice. */ - net = skb_net(skb); cp->app_data = NULL; - ip_vs_tcp_conn_listen(net, n_cp); + ip_vs_tcp_conn_listen(n_cp); ip_vs_conn_put(n_cp); return ret; } @@ -320,7 +318,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, union nf_inet_addr to; __be16 port; struct ip_vs_conn *n_cp; - struct net *net; /* no diff required for incoming packets */ *diff = 0; @@ -392,7 +389,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, + ip_vs_conn_fill_param(cp->ipvs, AF_INET, iph->protocol, &to, port, &cp->vaddr, htons(ntohs(cp->vport)-1), &p); n_cp = ip_vs_conn_in_get(&p); @@ -413,8 +410,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, /* * Move tunnel to listen state */ - net = skb_net(skb); - ip_vs_tcp_conn_listen(net, n_cp); + ip_vs_tcp_conn_listen(n_cp); ip_vs_conn_put(n_cp); return 1; @@ -447,14 +443,14 @@ static int __net_init __ip_vs_ftp_init(struct net *net) if (!ipvs) return -ENOENT; - app = register_ip_vs_app(net, &ip_vs_ftp); + app = register_ip_vs_app(ipvs, &ip_vs_ftp); if (IS_ERR(app)) return PTR_ERR(app); for (i = 0; i < ports_count; i++) { if (!ports[i]) continue; - ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]); + ret = register_ip_vs_app_inc(ipvs, app, app->protocol, ports[i]); if (ret) goto err_unreg; pr_info("%s: loaded support on port[%d] = %d\n", @@ -463,7 +459,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net) return 0; err_unreg: - unregister_ip_vs_app(net, &ip_vs_ftp); + unregister_ip_vs_app(ipvs, &ip_vs_ftp); return ret; } /* @@ -471,7 +467,12 @@ err_unreg: */ static void __ip_vs_ftp_exit(struct net *net) { - unregister_ip_vs_app(net, &ip_vs_ftp); + struct netns_ipvs *ipvs = net_ipvs(net); + + if (!ipvs) + return; + + unregister_ip_vs_app(ipvs, &ip_vs_ftp); } static struct pernet_operations ip_vs_ftp_ops = { diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 127f14046c51..cccf4d637412 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -250,8 +250,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc) static int sysctl_lblc_expiration(struct ip_vs_service *svc) { #ifdef CONFIG_SYSCTL - struct netns_ipvs *ipvs = net_ipvs(svc->net); - return ipvs->sysctl_lblc_expiration; + return svc->ipvs->sysctl_lblc_expiration; #else return DEFAULT_EXPIRATION; #endif diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 2229d2d8bbe0..796d70e47ddd 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -415,8 +415,7 @@ static void ip_vs_lblcr_flush(struct ip_vs_service *svc) static int sysctl_lblcr_expiration(struct ip_vs_service *svc) { #ifdef CONFIG_SYSCTL - struct netns_ipvs *ipvs = net_ipvs(svc->net); - return ipvs->sysctl_lblcr_expiration; + return svc->ipvs->sysctl_lblcr_expiration; #else return DEFAULT_EXPIRATION; #endif diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index 136184572fc9..30434fb133df 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -161,7 +161,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct, /* RS->CLIENT */ orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum, + ip_vs_conn_fill_param(net_ipvs(net), exp->tuple.src.l3num, orig->dst.protonum, &orig->src.u3, orig->src.u.tcp.port, &orig->dst.u3, orig->dst.u.tcp.port, &p); cp = ip_vs_conn_out_get(&p); @@ -274,8 +274,7 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) " for conn " FMT_CONN "\n", __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); - h = nf_conntrack_find_get(ip_vs_conn_net(cp), &nf_ct_zone_dflt, - &tuple); + h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); /* Show what happens instead of calling nf_ct_kill() */ diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index bed5f7042529..1b8d594e493a 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -70,7 +70,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) const char *dptr; int retc; - ip_vs_fill_iph_skb(p->af, skb, &iph); + ip_vs_fill_iph_skb(p->af, skb, false, &iph); /* Only useful with UDP */ if (iph.protocol != IPPROTO_UDP) diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 939f7fbe9b46..8ae480715cea 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -63,9 +63,8 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) * register an ipvs protocols netns related data */ static int -register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) +register_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_protocol *pp) { - struct netns_ipvs *ipvs = net_ipvs(net); unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); struct ip_vs_proto_data *pd = kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL); @@ -79,7 +78,7 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) atomic_set(&pd->appcnt, 0); /* Init app counter */ if (pp->init_netns != NULL) { - int ret = pp->init_netns(net, pd); + int ret = pp->init_netns(ipvs, pd); if (ret) { /* unlink an free proto data */ ipvs->proto_data_table[hash] = pd->next; @@ -116,9 +115,8 @@ static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp) * unregister an ipvs protocols netns data */ static int -unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd) +unregister_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data **pd_p; unsigned int hash = IP_VS_PROTO_HASH(pd->pp->protocol); @@ -127,7 +125,7 @@ unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd) if (*pd_p == pd) { *pd_p = pd->next; if (pd->pp->exit_netns != NULL) - pd->pp->exit_netns(net, pd); + pd->pp->exit_netns(ipvs, pd); kfree(pd); return 0; } @@ -156,8 +154,8 @@ EXPORT_SYMBOL(ip_vs_proto_get); /* * get ip_vs_protocol object data by netns and proto */ -static struct ip_vs_proto_data * -__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) +struct ip_vs_proto_data * +ip_vs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) { struct ip_vs_proto_data *pd; unsigned int hash = IP_VS_PROTO_HASH(proto); @@ -169,14 +167,6 @@ __ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) return NULL; } - -struct ip_vs_proto_data * -ip_vs_proto_data_get(struct net *net, unsigned short proto) -{ - struct netns_ipvs *ipvs = net_ipvs(net); - - return __ipvs_proto_data_get(ipvs, proto); -} EXPORT_SYMBOL(ip_vs_proto_data_get); /* @@ -317,7 +307,7 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, /* * per network name-space init */ -int __net_init ip_vs_protocol_net_init(struct net *net) +int __net_init ip_vs_protocol_net_init(struct netns_ipvs *ipvs) { int i, ret; static struct ip_vs_protocol *protos[] = { @@ -339,27 +329,26 @@ int __net_init ip_vs_protocol_net_init(struct net *net) }; for (i = 0; i < ARRAY_SIZE(protos); i++) { - ret = register_ip_vs_proto_netns(net, protos[i]); + ret = register_ip_vs_proto_netns(ipvs, protos[i]); if (ret < 0) goto cleanup; } return 0; cleanup: - ip_vs_protocol_net_cleanup(net); + ip_vs_protocol_net_cleanup(ipvs); return ret; } -void __net_exit ip_vs_protocol_net_cleanup(struct net *net) +void __net_exit ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data *pd; int i; /* unregister all the ipvs proto data for this netns */ for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { while ((pd = ipvs->proto_data_table[i]) != NULL) - unregister_ip_vs_proto_netns(net, pd); + unregister_ip_vs_proto_netns(ipvs, pd); } } diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index 5de3dd312c0f..5320d39976e1 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -41,30 +41,28 @@ struct isakmp_hdr { #define PORT_ISAKMP 500 static void -ah_esp_conn_fill_param_proto(struct net *net, int af, - const struct ip_vs_iphdr *iph, int inverse, +ah_esp_conn_fill_param_proto(struct netns_ipvs *ipvs, int af, + const struct ip_vs_iphdr *iph, struct ip_vs_conn_param *p) { - if (likely(!inverse)) - ip_vs_conn_fill_param(net, af, IPPROTO_UDP, + if (likely(!ip_vs_iph_inverse(iph))) + ip_vs_conn_fill_param(ipvs, af, IPPROTO_UDP, &iph->saddr, htons(PORT_ISAKMP), &iph->daddr, htons(PORT_ISAKMP), p); else - ip_vs_conn_fill_param(net, af, IPPROTO_UDP, + ip_vs_conn_fill_param(ipvs, af, IPPROTO_UDP, &iph->daddr, htons(PORT_ISAKMP), &iph->saddr, htons(PORT_ISAKMP), p); } static struct ip_vs_conn * -ah_esp_conn_in_get(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, - int inverse) +ah_esp_conn_in_get(struct netns_ipvs *ipvs, int af, const struct sk_buff *skb, + const struct ip_vs_iphdr *iph) { struct ip_vs_conn *cp; struct ip_vs_conn_param p; - struct net *net = skb_net(skb); - ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); + ah_esp_conn_fill_param_proto(ipvs, af, iph, &p); cp = ip_vs_conn_in_get(&p); if (!cp) { /* @@ -73,7 +71,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, */ IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet " "%s%s %s->%s\n", - inverse ? "ICMP+" : "", + ip_vs_iph_icmp(iph) ? "ICMP+" : "", ip_vs_proto_get(iph->protocol)->name, IP_VS_DBG_ADDR(af, &iph->saddr), IP_VS_DBG_ADDR(af, &iph->daddr)); @@ -84,19 +82,18 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, static struct ip_vs_conn * -ah_esp_conn_out_get(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, int inverse) +ah_esp_conn_out_get(struct netns_ipvs *ipvs, int af, const struct sk_buff *skb, + const struct ip_vs_iphdr *iph) { struct ip_vs_conn *cp; struct ip_vs_conn_param p; - struct net *net = skb_net(skb); - ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); + ah_esp_conn_fill_param_proto(ipvs, af, iph, &p); cp = ip_vs_conn_out_get(&p); if (!cp) { IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " "%s%s %s->%s\n", - inverse ? "ICMP+" : "", + ip_vs_iph_icmp(iph) ? "ICMP+" : "", ip_vs_proto_get(iph->protocol)->name, IP_VS_DBG_ADDR(af, &iph->saddr), IP_VS_DBG_ADDR(af, &iph->daddr)); @@ -107,7 +104,8 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb, static int -ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, +ah_esp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, + struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp, struct ip_vs_iphdr *iph) { diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 5b84c0b56642..010ddeec135f 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -9,35 +9,44 @@ #include <net/ip_vs.h> static int -sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, +sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, + struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp, struct ip_vs_iphdr *iph) { - struct net *net; struct ip_vs_service *svc; - struct netns_ipvs *ipvs; sctp_chunkhdr_t _schunkh, *sch; sctp_sctphdr_t *sh, _sctph; - - sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); - if (sh == NULL) { - *verdict = NF_DROP; - return 0; + __be16 _ports[2], *ports = NULL; + + if (likely(!ip_vs_iph_icmp(iph))) { + sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); + if (sh) { + sch = skb_header_pointer( + skb, iph->len + sizeof(sctp_sctphdr_t), + sizeof(_schunkh), &_schunkh); + if (sch && (sch->type == SCTP_CID_INIT || + sysctl_sloppy_sctp(ipvs))) + ports = &sh->source; + } + } else { + ports = skb_header_pointer( + skb, iph->len, sizeof(_ports), &_ports); } - sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t), - sizeof(_schunkh), &_schunkh); - if (sch == NULL) { + if (!ports) { *verdict = NF_DROP; return 0; } - net = skb_net(skb); - ipvs = net_ipvs(net); rcu_read_lock(); - if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) && - (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, - &iph->daddr, sh->dest))) { + if (likely(!ip_vs_iph_inverse(iph))) + svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, + &iph->daddr, ports[1]); + else + svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, + &iph->saddr, ports[0]); + if (svc) { int ignored; if (ip_vs_todrop(ipvs)) { @@ -474,14 +483,13 @@ static inline __u16 sctp_app_hashkey(__be16 port) & SCTP_APP_TAB_MASK; } -static int sctp_register_app(struct net *net, struct ip_vs_app *inc) +static int sctp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; - struct netns_ipvs *ipvs = net_ipvs(net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP); hash = sctp_app_hashkey(port); @@ -498,9 +506,9 @@ out: return ret; } -static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) +static void sctp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP); atomic_dec(&pd->appcnt); list_del_rcu(&inc->p_list); @@ -508,7 +516,7 @@ static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) static int sctp_app_conn_bind(struct ip_vs_conn *cp) { - struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); + struct netns_ipvs *ipvs = cp->ipvs; int hash; struct ip_vs_app *inc; int result = 0; @@ -549,10 +557,8 @@ out: * timeouts is netns related now. * --------------------------------------------- */ -static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) +static int __ip_vs_sctp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { - struct netns_ipvs *ipvs = net_ipvs(net); - ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, sizeof(sctp_timeouts)); @@ -561,7 +567,7 @@ static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) return 0; } -static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd) +static void __ip_vs_sctp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { kfree(pd->timeout_table); } diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 8e92beb0cca9..d7024b2ed769 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -32,27 +32,47 @@ #include <net/ip_vs.h> static int -tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, +tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, + struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp, struct ip_vs_iphdr *iph) { - struct net *net; struct ip_vs_service *svc; struct tcphdr _tcph, *th; - struct netns_ipvs *ipvs; + __be16 _ports[2], *ports = NULL; - th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); - if (th == NULL) { + /* In the event of icmp, we're only guaranteed to have the first 8 + * bytes of the transport header, so we only check the rest of the + * TCP packet for non-ICMP packets + */ + if (likely(!ip_vs_iph_icmp(iph))) { + th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); + if (th) { + if (th->rst || !(sysctl_sloppy_tcp(ipvs) || th->syn)) + return 1; + ports = &th->source; + } + } else { + ports = skb_header_pointer( + skb, iph->len, sizeof(_ports), &_ports); + } + + if (!ports) { *verdict = NF_DROP; return 0; } - net = skb_net(skb); - ipvs = net_ipvs(net); + /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ rcu_read_lock(); - if ((th->syn || sysctl_sloppy_tcp(ipvs)) && !th->rst && - (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, - &iph->daddr, th->dest))) { + + if (likely(!ip_vs_iph_inverse(iph))) + svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, + &iph->daddr, ports[1]); + else + svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, + &iph->saddr, ports[0]); + + if (svc) { int ignored; if (ip_vs_todrop(ipvs)) { @@ -571,14 +591,13 @@ static inline __u16 tcp_app_hashkey(__be16 port) } -static int tcp_register_app(struct net *net, struct ip_vs_app *inc) +static int tcp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; - struct netns_ipvs *ipvs = net_ipvs(net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); hash = tcp_app_hashkey(port); @@ -597,9 +616,9 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc) static void -tcp_unregister_app(struct net *net, struct ip_vs_app *inc) +tcp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); atomic_dec(&pd->appcnt); list_del_rcu(&inc->p_list); @@ -609,7 +628,7 @@ tcp_unregister_app(struct net *net, struct ip_vs_app *inc) static int tcp_app_conn_bind(struct ip_vs_conn *cp) { - struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); + struct netns_ipvs *ipvs = cp->ipvs; int hash; struct ip_vs_app *inc; int result = 0; @@ -653,9 +672,9 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) /* * Set LISTEN timeout. (ip_vs_conn_put will setup timer) */ -void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp) +void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp) { - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(cp->ipvs, IPPROTO_TCP); spin_lock_bh(&cp->lock); cp->state = IP_VS_TCP_S_LISTEN; @@ -668,10 +687,8 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp) * timeouts is netns related now. * --------------------------------------------- */ -static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) +static int __ip_vs_tcp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { - struct netns_ipvs *ipvs = net_ipvs(net); - ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE); pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, sizeof(tcp_timeouts)); @@ -681,7 +698,7 @@ static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) return 0; } -static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd) +static void __ip_vs_tcp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { kfree(pd->timeout_table); } diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index b62a3c0ff9bf..e494e9a88c7f 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -29,28 +29,42 @@ #include <net/ip6_checksum.h> static int -udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, +udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, + struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp, struct ip_vs_iphdr *iph) { - struct net *net; struct ip_vs_service *svc; struct udphdr _udph, *uh; + __be16 _ports[2], *ports = NULL; - /* IPv6 fragments, only first fragment will hit this */ - uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); - if (uh == NULL) { + if (likely(!ip_vs_iph_icmp(iph))) { + /* IPv6 fragments, only first fragment will hit this */ + uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); + if (uh) + ports = &uh->source; + } else { + ports = skb_header_pointer( + skb, iph->len, sizeof(_ports), &_ports); + } + + if (!ports) { *verdict = NF_DROP; return 0; } - net = skb_net(skb); + rcu_read_lock(); - svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, - &iph->daddr, uh->dest); + if (likely(!ip_vs_iph_inverse(iph))) + svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, + &iph->daddr, ports[1]); + else + svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, + &iph->saddr, ports[0]); + if (svc) { int ignored; - if (ip_vs_todrop(net_ipvs(net))) { + if (ip_vs_todrop(ipvs)) { /* * It seems that we are very loaded. * We have to drop this packet :( @@ -348,14 +362,13 @@ static inline __u16 udp_app_hashkey(__be16 port) } -static int udp_register_app(struct net *net, struct ip_vs_app *inc) +static int udp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; - struct netns_ipvs *ipvs = net_ipvs(net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); hash = udp_app_hashkey(port); @@ -374,9 +387,9 @@ static int udp_register_app(struct net *net, struct ip_vs_app *inc) static void -udp_unregister_app(struct net *net, struct ip_vs_app *inc) +udp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); atomic_dec(&pd->appcnt); list_del_rcu(&inc->p_list); @@ -385,7 +398,7 @@ udp_unregister_app(struct net *net, struct ip_vs_app *inc) static int udp_app_conn_bind(struct ip_vs_conn *cp) { - struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); + struct netns_ipvs *ipvs = cp->ipvs; int hash; struct ip_vs_app *inc; int result = 0; @@ -456,10 +469,8 @@ udp_state_transition(struct ip_vs_conn *cp, int direction, cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL]; } -static int __udp_init(struct net *net, struct ip_vs_proto_data *pd) +static int __udp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { - struct netns_ipvs *ipvs = net_ipvs(net); - ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE); pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, sizeof(udp_timeouts)); @@ -468,7 +479,7 @@ static int __udp_init(struct net *net, struct ip_vs_proto_data *pd) return 0; } -static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd) +static void __udp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { kfree(pd->timeout_table); } diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 98a13433b68c..1e373a5e44e3 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -280,35 +280,29 @@ static int ip_vs_sh_dest_changed(struct ip_vs_service *svc, static inline __be16 ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph) { - __be16 port; - struct tcphdr _tcph, *th; - struct udphdr _udph, *uh; - sctp_sctphdr_t _sctph, *sh; + __be16 _ports[2], *ports; + /* At this point we know that we have a valid packet of some kind. + * Because ICMP packets are only guaranteed to have the first 8 + * bytes, let's just grab the ports. Fortunately they're in the + * same position for all three of the protocols we care about. + */ switch (iph->protocol) { case IPPROTO_TCP: - th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); - if (unlikely(th == NULL)) - return 0; - port = th->source; - break; case IPPROTO_UDP: - uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); - if (unlikely(uh == NULL)) - return 0; - port = uh->source; - break; case IPPROTO_SCTP: - sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); - if (unlikely(sh == NULL)) + ports = skb_header_pointer(skb, iph->len, sizeof(_ports), + &_ports); + if (unlikely(!ports)) return 0; - port = sh->source; - break; + + if (likely(!ip_vs_iph_inverse(iph))) + return ports[0]; + else + return ports[1]; default: - port = 0; + return 0; } - - return port; } @@ -322,6 +316,9 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_dest *dest; struct ip_vs_sh_state *s; __be16 port = 0; + const union nf_inet_addr *hash_addr; + + hash_addr = ip_vs_iph_inverse(iph) ? &iph->daddr : &iph->saddr; IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); @@ -331,9 +328,9 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, s = (struct ip_vs_sh_state *) svc->sched_data; if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK) - dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port); + dest = ip_vs_sh_get_fallback(svc, s, hash_addr, port); else - dest = ip_vs_sh_get(svc, s, &iph->saddr, port); + dest = ip_vs_sh_get(svc, s, hash_addr, port); if (!dest) { ip_vs_scheduler_err(svc, "no destination available"); @@ -341,7 +338,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, } IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n", - IP_VS_DBG_ADDR(svc->af, &iph->saddr), + IP_VS_DBG_ADDR(svc->af, hash_addr), IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port)); diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 43f140950075..803001a45aa1 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -193,7 +193,7 @@ union ip_vs_sync_conn { #define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1)) struct ip_vs_sync_thread_data { - struct net *net; + struct netns_ipvs *ipvs; struct socket *sock; char *buf; int id; @@ -533,10 +533,9 @@ set: * Version 0 , could be switched in by sys_ctl. * Add an ip_vs_conn information into the current sync_buff. */ -static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, +static void ip_vs_sync_conn_v0(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_sync_mesg_v0 *m; struct ip_vs_sync_conn_v0 *s; struct ip_vs_sync_buff *buff; @@ -615,7 +614,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, pkts = atomic_add_return(1, &cp->in_pkts); else pkts = sysctl_sync_threshold(ipvs); - ip_vs_sync_conn(net, cp, pkts); + ip_vs_sync_conn(ipvs, cp, pkts); } } @@ -624,9 +623,8 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, * Called by ip_vs_in. * Sending Version 1 messages */ -void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts) +void ip_vs_sync_conn(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_sync_mesg *m; union ip_vs_sync_conn *s; struct ip_vs_sync_buff *buff; @@ -637,7 +635,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts) /* Handle old version of the protocol */ if (sysctl_sync_ver(ipvs) == 0) { - ip_vs_sync_conn_v0(net, cp, pkts); + ip_vs_sync_conn_v0(ipvs, cp, pkts); return; } /* Do not sync ONE PACKET */ @@ -784,21 +782,21 @@ control: * fill_param used by version 1 */ static inline int -ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc, +ip_vs_conn_fill_param_sync(struct netns_ipvs *ipvs, int af, union ip_vs_sync_conn *sc, struct ip_vs_conn_param *p, __u8 *pe_data, unsigned int pe_data_len, __u8 *pe_name, unsigned int pe_name_len) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) - ip_vs_conn_fill_param(net, af, sc->v6.protocol, + ip_vs_conn_fill_param(ipvs, af, sc->v6.protocol, (const union nf_inet_addr *)&sc->v6.caddr, sc->v6.cport, (const union nf_inet_addr *)&sc->v6.vaddr, sc->v6.vport, p); else #endif - ip_vs_conn_fill_param(net, af, sc->v4.protocol, + ip_vs_conn_fill_param(ipvs, af, sc->v4.protocol, (const union nf_inet_addr *)&sc->v4.caddr, sc->v4.cport, (const union nf_inet_addr *)&sc->v4.vaddr, @@ -837,7 +835,7 @@ ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc, * Param: ... * timeout is in sec. */ -static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, +static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *param, unsigned int flags, unsigned int state, unsigned int protocol, unsigned int type, const union nf_inet_addr *daddr, __be16 dport, @@ -846,7 +844,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, { struct ip_vs_dest *dest; struct ip_vs_conn *cp; - struct netns_ipvs *ipvs = net_ipvs(net); if (!(flags & IP_VS_CONN_F_TEMPLATE)) { cp = ip_vs_conn_in_get(param); @@ -904,7 +901,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, * with synchronization, so we can make the assumption that * the svc_af is the same as the dest_af */ - dest = ip_vs_find_dest(net, type, type, daddr, dport, + dest = ip_vs_find_dest(ipvs, type, type, daddr, dport, param->vaddr, param->vport, protocol, fwmark, flags); @@ -941,7 +938,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, } else { struct ip_vs_proto_data *pd; - pd = ip_vs_proto_data_get(net, protocol); + pd = ip_vs_proto_data_get(ipvs, protocol); if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table) cp->timeout = pd->timeout_table[state]; else @@ -953,7 +950,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, /* * Process received multicast message for Version 0 */ -static void ip_vs_process_message_v0(struct net *net, const char *buffer, +static void ip_vs_process_message_v0(struct netns_ipvs *ipvs, const char *buffer, const size_t buflen) { struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer; @@ -1009,14 +1006,14 @@ static void ip_vs_process_message_v0(struct net *net, const char *buffer, } } - ip_vs_conn_fill_param(net, AF_INET, s->protocol, + ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol, (const union nf_inet_addr *)&s->caddr, s->cport, (const union nf_inet_addr *)&s->vaddr, s->vport, ¶m); /* Send timeout as Zero */ - ip_vs_proc_conn(net, ¶m, flags, state, s->protocol, AF_INET, + ip_vs_proc_conn(ipvs, ¶m, flags, state, s->protocol, AF_INET, (union nf_inet_addr *)&s->daddr, s->dport, 0, 0, opt); } @@ -1067,7 +1064,7 @@ static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len, /* * Process a Version 1 sync. connection */ -static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) +static inline int ip_vs_proc_sync_conn(struct netns_ipvs *ipvs, __u8 *p, __u8 *msg_end) { struct ip_vs_sync_conn_options opt; union ip_vs_sync_conn *s; @@ -1171,21 +1168,21 @@ static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) state = 0; } } - if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data, + if (ip_vs_conn_fill_param_sync(ipvs, af, s, ¶m, pe_data, pe_data_len, pe_name, pe_name_len)) { retc = 50; goto out; } /* If only IPv4, just silent skip IPv6 */ if (af == AF_INET) - ip_vs_proc_conn(net, ¶m, flags, state, s->v4.protocol, af, + ip_vs_proc_conn(ipvs, ¶m, flags, state, s->v4.protocol, af, (union nf_inet_addr *)&s->v4.daddr, s->v4.dport, ntohl(s->v4.timeout), ntohl(s->v4.fwmark), (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) ); #ifdef CONFIG_IP_VS_IPV6 else - ip_vs_proc_conn(net, ¶m, flags, state, s->v6.protocol, af, + ip_vs_proc_conn(ipvs, ¶m, flags, state, s->v6.protocol, af, (union nf_inet_addr *)&s->v6.daddr, s->v6.dport, ntohl(s->v6.timeout), ntohl(s->v6.fwmark), (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) @@ -1204,10 +1201,9 @@ out: * ip_vs_conn entries. * Handles Version 0 & 1 */ -static void ip_vs_process_message(struct net *net, __u8 *buffer, +static void ip_vs_process_message(struct netns_ipvs *ipvs, __u8 *buffer, const size_t buflen) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer; __u8 *p, *msg_end; int i, nr_conns; @@ -1257,7 +1253,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer, return; } /* Process a single sync_conn */ - retc = ip_vs_proc_sync_conn(net, p, msg_end); + retc = ip_vs_proc_sync_conn(ipvs, p, msg_end); if (retc < 0) { IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n", retc); @@ -1268,7 +1264,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer, } } else { /* Old type of message */ - ip_vs_process_message_v0(net, buffer, buflen); + ip_vs_process_message_v0(ipvs, buffer, buflen); return; } } @@ -1493,16 +1489,15 @@ static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen, /* * Set up sending multicast socket over UDP */ -static struct socket *make_send_sock(struct net *net, int id) +static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id) { - struct netns_ipvs *ipvs = net_ipvs(net); /* multicast addr */ union ipvs_sockaddr mcast_addr; struct socket *sock; int result, salen; /* First create a socket */ - result = sock_create_kern(net, ipvs->mcfg.mcast_af, SOCK_DGRAM, + result = sock_create_kern(ipvs->net, ipvs->mcfg.mcast_af, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); @@ -1550,16 +1545,15 @@ error: /* * Set up receiving multicast socket over UDP */ -static struct socket *make_receive_sock(struct net *net, int id) +static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id) { - struct netns_ipvs *ipvs = net_ipvs(net); /* multicast addr */ union ipvs_sockaddr mcast_addr; struct socket *sock; int result, salen; /* First create a socket */ - result = sock_create_kern(net, ipvs->bcfg.mcast_af, SOCK_DGRAM, + result = sock_create_kern(ipvs->net, ipvs->bcfg.mcast_af, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); @@ -1687,7 +1681,7 @@ next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms) static int sync_thread_master(void *data) { struct ip_vs_sync_thread_data *tinfo = data; - struct netns_ipvs *ipvs = net_ipvs(tinfo->net); + struct netns_ipvs *ipvs = tinfo->ipvs; struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id]; struct sock *sk = tinfo->sock->sk; struct ip_vs_sync_buff *sb; @@ -1743,7 +1737,7 @@ done: static int sync_thread_backup(void *data) { struct ip_vs_sync_thread_data *tinfo = data; - struct netns_ipvs *ipvs = net_ipvs(tinfo->net); + struct netns_ipvs *ipvs = tinfo->ipvs; int len; pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " @@ -1765,7 +1759,7 @@ static int sync_thread_backup(void *data) break; } - ip_vs_process_message(tinfo->net, tinfo->buf, len); + ip_vs_process_message(ipvs, tinfo->buf, len); } } @@ -1778,13 +1772,12 @@ static int sync_thread_backup(void *data) } -int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c, +int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, int state) { struct ip_vs_sync_thread_data *tinfo; struct task_struct **array = NULL, *task; struct socket *sock; - struct netns_ipvs *ipvs = net_ipvs(net); struct net_device *dev; char *name; int (*threadfn)(void *data); @@ -1811,7 +1804,7 @@ int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c, if (!c->mcast_ttl) c->mcast_ttl = 1; - dev = __dev_get_by_name(net, c->mcast_ifn); + dev = __dev_get_by_name(ipvs->net, c->mcast_ifn); if (!dev) { pr_err("Unknown mcast interface: %s\n", c->mcast_ifn); return -ENODEV; @@ -1873,9 +1866,9 @@ int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c, tinfo = NULL; for (id = 0; id < count; id++) { if (state == IP_VS_STATE_MASTER) - sock = make_send_sock(net, id); + sock = make_send_sock(ipvs, id); else - sock = make_receive_sock(net, id); + sock = make_receive_sock(ipvs, id); if (IS_ERR(sock)) { result = PTR_ERR(sock); goto outtinfo; @@ -1883,7 +1876,7 @@ int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c, tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); if (!tinfo) goto outsocket; - tinfo->net = net; + tinfo->ipvs = ipvs; tinfo->sock = sock; if (state == IP_VS_STATE_BACKUP) { tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen, @@ -1947,9 +1940,8 @@ out: } -int stop_sync_thread(struct net *net, int state) +int stop_sync_thread(struct netns_ipvs *ipvs, int state) { - struct netns_ipvs *ipvs = net_ipvs(net); struct task_struct **array; int id; int retc = -EINVAL; @@ -2015,27 +2007,24 @@ int stop_sync_thread(struct net *net, int state) /* * Initialize data struct for each netns */ -int __net_init ip_vs_sync_net_init(struct net *net) +int __net_init ip_vs_sync_net_init(struct netns_ipvs *ipvs) { - struct netns_ipvs *ipvs = net_ipvs(net); - __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key); spin_lock_init(&ipvs->sync_lock); spin_lock_init(&ipvs->sync_buff_lock); return 0; } -void ip_vs_sync_net_cleanup(struct net *net) +void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs) { int retc; - struct netns_ipvs *ipvs = net_ipvs(net); mutex_lock(&ipvs->sync_mutex); - retc = stop_sync_thread(net, IP_VS_STATE_MASTER); + retc = stop_sync_thread(ipvs, IP_VS_STATE_MASTER); if (retc && retc != -ESRCH) pr_err("Failed to stop Master Daemon\n"); - retc = stop_sync_thread(net, IP_VS_STATE_BACKUP); + retc = stop_sync_thread(ipvs, IP_VS_STATE_BACKUP); if (retc && retc != -ESRCH) pr_err("Failed to stop Backup Daemon\n"); mutex_unlock(&ipvs->sync_mutex); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 258a0b0e82a2..77182b9750cd 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -212,19 +212,20 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu) ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); } -static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode, +static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af, + int rt_mode, struct ip_vs_iphdr *ipvsh, struct sk_buff *skb, int mtu) { #ifdef CONFIG_IP_VS_IPV6 if (skb_af == AF_INET6) { - struct net *net = dev_net(skb_dst(skb)->dev); + struct net *net = ipvs->net; if (unlikely(__mtu_check_toobig_v6(skb, mtu))) { if (!skb->dev) skb->dev = net->loopback_dev; /* only send ICMP too big on first fragment */ - if (!ipvsh->fragoffs) + if (!ipvsh->fragoffs && !ip_vs_iph_icmp(ipvsh)) icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr); @@ -233,8 +234,6 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode, } else #endif { - struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); - /* If we're going to tunnel the packet and pmtu discovery * is disabled, we'll just fragment it anyway */ @@ -242,7 +241,8 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode, return true; if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) && - skb->len > mtu && !skb_is_gso(skb))) { + skb->len > mtu && !skb_is_gso(skb) && + !ip_vs_iph_icmp(ipvsh))) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG(1, "frag needed for %pI4\n", @@ -256,11 +256,12 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode, /* Get route to destination or remote server */ static int -__ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, +__ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, + struct ip_vs_dest *dest, __be32 daddr, int rt_mode, __be32 *ret_saddr, struct ip_vs_iphdr *ipvsh) { - struct net *net = dev_net(skb_dst(skb)->dev); + struct net *net = ipvs->net; struct ip_vs_dest_dst *dest_dst; struct rtable *rt; /* Route to the other host */ int mtu; @@ -336,7 +337,7 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, maybe_update_pmtu(skb_af, skb, mtu); } - if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu)) + if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu)) goto err_put; skb_dst_drop(skb); @@ -402,11 +403,12 @@ out_err: * Get route to destination or remote server */ static int -__ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, +__ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, + struct ip_vs_dest *dest, struct in6_addr *daddr, struct in6_addr *ret_saddr, struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode) { - struct net *net = dev_net(skb_dst(skb)->dev); + struct net *net = ipvs->net; struct ip_vs_dest_dst *dest_dst; struct rt6_info *rt; /* Route to the other host */ struct dst_entry *dst; @@ -484,7 +486,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, maybe_update_pmtu(skb_af, skb, mtu); } - if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu)) + if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu)) goto err_put; skb_dst_drop(skb); @@ -573,8 +575,8 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, skb_forward_csum(skb); if (!skb->sk) skb_sender_cpu_clear(skb); - NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, - NULL, skb_dst(skb)->dev, dst_output_sk); + NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, + NULL, skb_dst(skb)->dev, dst_output_okfn); } else ret = NF_ACCEPT; @@ -595,8 +597,8 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, skb_forward_csum(skb); if (!skb->sk) skb_sender_cpu_clear(skb); - NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, - NULL, skb_dst(skb)->dev, dst_output_sk); + NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, + NULL, skb_dst(skb)->dev, dst_output_okfn); } else ret = NF_ACCEPT; return ret; @@ -629,7 +631,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); rcu_read_lock(); - if (__ip_vs_get_out_rt(cp->af, skb, NULL, iph->daddr, + if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0) goto tx_error; @@ -656,10 +658,13 @@ int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { + struct ipv6hdr *iph = ipv6_hdr(skb); + EnterFunction(10); rcu_read_lock(); - if (__ip_vs_get_out_rt_v6(cp->af, skb, NULL, &ipvsh->daddr.in6, NULL, + if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL, + &iph->daddr, NULL, ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0) goto tx_error; @@ -706,7 +711,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, } was_input = rt_is_input_route(skb_rtable(skb)); - local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, + local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR, NULL, ipvsh); @@ -723,7 +728,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { - IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0, + IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, ipvsh->off, "ip_vs_nat_xmit(): " "stopping DNAT to local address"); goto tx_error; @@ -733,8 +738,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* From world but DNAT to loopback address? */ if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) { - IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " - "stopping DNAT to loopback address"); + IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, ipvsh->off, + "ip_vs_nat_xmit(): stopping DNAT to loopback " + "address"); goto tx_error; } @@ -751,7 +757,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_hdr(skb)->daddr = cp->daddr.ip; ip_send_check(ip_hdr(skb)); - IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT"); + IP_VS_DBG_PKT(10, AF_INET, pp, skb, ipvsh->off, "After DNAT"); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still @@ -794,7 +800,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } - local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, + local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, + &cp->daddr.in6, NULL, ipvsh, 0, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | @@ -812,7 +819,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { - IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0, + IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, ipvsh->off, "ip_vs_nat_xmit_v6(): " "stopping DNAT to local address"); goto tx_error; @@ -823,7 +830,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* From world but DNAT to loopback address? */ if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) { - IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, + IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, ipvsh->off, "ip_vs_nat_xmit_v6(): " "stopping DNAT to loopback address"); goto tx_error; @@ -841,7 +848,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error; ipv6_hdr(skb)->daddr = cp->daddr.in6; - IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT"); + IP_VS_DBG_PKT(10, AF_INET6, pp, skb, ipvsh->off, "After DNAT"); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still @@ -967,8 +974,8 @@ int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { - struct net *net = skb_net(skb); - struct netns_ipvs *ipvs = net_ipvs(net); + struct netns_ipvs *ipvs = cp->ipvs; + struct net *net = ipvs->net; struct rtable *rt; /* Route to the other host */ __be32 saddr; /* Source for tunnel */ struct net_device *tdev; /* Device to other host */ @@ -984,7 +991,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); rcu_read_lock(); - local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, + local = __ip_vs_get_out_rt(ipvs, cp->af, skb, cp->dest, cp->daddr.ip, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_CONNECT | @@ -1078,7 +1085,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); rcu_read_lock(); - local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, + local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, + &cp->daddr.in6, &saddr, ipvsh, 1, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | @@ -1165,7 +1173,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); rcu_read_lock(); - local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, + local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh); @@ -1204,7 +1212,8 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); rcu_read_lock(); - local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, + local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, + &cp->daddr.in6, NULL, ipvsh, 0, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | @@ -1273,7 +1282,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; rcu_read_lock(); - local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, rt_mode, + local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, rt_mode, NULL, iph); if (local < 0) goto tx_error; @@ -1365,8 +1374,8 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; rcu_read_lock(); - local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, - NULL, ipvsh, 0, rt_mode); + local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, + &cp->daddr.in6, NULL, ipvsh, 0, rt_mode); if (local < 0) goto tx_error; rt = (struct rt6_info *) skb_dst(skb); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c09d6c7198f6..09d1d19b2ab9 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -168,6 +168,7 @@ nf_ct_get_tuple(const struct sk_buff *skb, unsigned int dataoff, u_int16_t l3num, u_int8_t protonum, + struct net *net, struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l4proto *l4proto) @@ -181,12 +182,13 @@ nf_ct_get_tuple(const struct sk_buff *skb, tuple->dst.protonum = protonum; tuple->dst.dir = IP_CT_DIR_ORIGINAL; - return l4proto->pkt_to_tuple(skb, dataoff, tuple); + return l4proto->pkt_to_tuple(skb, dataoff, net, tuple); } EXPORT_SYMBOL_GPL(nf_ct_get_tuple); bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, - u_int16_t l3num, struct nf_conntrack_tuple *tuple) + u_int16_t l3num, + struct net *net, struct nf_conntrack_tuple *tuple) { struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; @@ -205,7 +207,7 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, l4proto = __nf_ct_l4proto_find(l3num, protonum); - ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, tuple, + ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple, l3proto, l4proto); rcu_read_unlock(); @@ -1029,7 +1031,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, u32 hash; if (!nf_ct_get_tuple(skb, skb_network_offset(skb), - dataoff, l3num, protonum, &tuple, l3proto, + dataoff, l3num, protonum, net, &tuple, l3proto, l4proto)) { pr_debug("resolve_normal_ct: Can't get tuple\n"); return NULL; diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 6dd995c7c72b..fce1b1cca32d 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -398,7 +398,7 @@ static inline struct dccp_net *dccp_pernet(struct net *net) } static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, - struct nf_conntrack_tuple *tuple) + struct net *net, struct nf_conntrack_tuple *tuple) { struct dccp_hdr _hdr, *dh; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 2281be419a74..86dc752e5349 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -45,7 +45,7 @@ static inline struct nf_generic_net *generic_pernet(struct net *net) static bool generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, - struct nf_conntrack_tuple *tuple) + struct net *net, struct nf_conntrack_tuple *tuple) { tuple->src.u.all = 0; tuple->dst.u.all = 0; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 7648674f29c3..a96451a7af20 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -190,9 +190,8 @@ static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple, /* gre hdr info to tuple */ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, - struct nf_conntrack_tuple *tuple) + struct net *net, struct nf_conntrack_tuple *tuple) { - struct net *net = dev_net(skb->dev ? skb->dev : skb_dst(skb)->dev); const struct gre_hdr_pptp *pgrehdr; struct gre_hdr_pptp _pgrehdr; __be16 srckey; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 67197731eb68..9578a7c371ef 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -156,7 +156,7 @@ static inline struct sctp_net *sctp_pernet(struct net *net) } static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, - struct nf_conntrack_tuple *tuple) + struct net *net, struct nf_conntrack_tuple *tuple) { const struct sctphdr *hp; struct sctphdr _hdr; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 70383de72054..278f3b9356ef 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -277,7 +277,7 @@ static inline struct nf_tcp_net *tcp_pernet(struct net *net) } static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, - struct nf_conntrack_tuple *tuple) + struct net *net, struct nf_conntrack_tuple *tuple) { const struct tcphdr *hp; struct tcphdr _hdr; diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 6957281ffee5..478f92f834b6 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -38,6 +38,7 @@ static inline struct nf_udp_net *udp_pernet(struct net *net) static bool udp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, + struct net *net, struct nf_conntrack_tuple *tuple) { const struct udphdr *hp; diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index c5903d1649f9..1ac8ee13a873 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -48,6 +48,7 @@ static inline struct udplite_net *udplite_pernet(struct net *net) static bool udplite_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, + struct net *net, struct nf_conntrack_tuple *tuple) { const struct udphdr *hp; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 5113dfd39df9..06a9f45771ab 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -83,7 +83,7 @@ out: rcu_read_unlock(); } -int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family) +int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family) { struct flowi fl; unsigned int hh_len; @@ -99,7 +99,7 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family) dst = ((struct xfrm_dst *)dst)->route; dst_hold(dst); - dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0); + dst = xfrm_lookup(net, dst, &fl, skb->sk, 0); if (IS_ERR(dst)) return PTR_ERR(dst); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 96777f9a9350..34f628e16a4c 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -199,7 +199,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) if (verdict == NF_ACCEPT) { afinfo = nf_get_afinfo(entry->state.pf); - if (!afinfo || afinfo->reroute(skb, entry) < 0) + if (!afinfo || afinfo->reroute(entry->state.net, skb, entry) < 0) verdict = NF_DROP; } @@ -215,7 +215,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) case NF_ACCEPT: case NF_STOP: local_bh_disable(); - entry->state.okfn(entry->state.sk, skb); + entry->state.okfn(entry->state.net, entry->state.sk, skb); local_bh_enable(); break; case NF_QUEUE: diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 05d0b03530f6..f3695a497408 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -48,9 +48,7 @@ static void __nft_trace_packet(const struct nft_pktinfo *pkt, const struct nft_chain *chain, int rulenum, enum nft_trace type) { - struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); - - nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, + nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in, pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", chain->table->name, chain->name, comments[type], rulenum); @@ -111,10 +109,10 @@ struct nft_jumpstack { }; unsigned int -nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) +nft_do_chain(struct nft_pktinfo *pkt, void *priv) { - const struct nft_chain *chain = ops->priv, *basechain = chain; - const struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); + const struct nft_chain *chain = priv, *basechain = chain; + const struct net *net = pkt->net; const struct nft_rule *rule; const struct nft_expr *expr, *last; struct nft_regs regs; diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 2cae4d4a03b7..7b9c053ba750 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -17,13 +17,13 @@ static inline void nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt, - const struct nf_hook_ops *ops, struct sk_buff *skb, + struct sk_buff *skb, const struct nf_hook_state *state) { struct iphdr *iph, _iph; u32 len, thoff; - nft_set_pktinfo(pkt, ops, skb, state); + nft_set_pktinfo(pkt, skb, state); iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph), &_iph); @@ -48,7 +48,6 @@ nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt, static inline void __nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -82,33 +81,32 @@ __nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, } static inline void nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - nft_set_pktinfo(pkt, ops, skb, state); - __nft_netdev_set_pktinfo_ipv6(pkt, ops, skb, state); + nft_set_pktinfo(pkt, skb, state); + __nft_netdev_set_pktinfo_ipv6(pkt, skb, state); } static unsigned int -nft_do_chain_netdev(const struct nf_hook_ops *ops, struct sk_buff *skb, +nft_do_chain_netdev(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct nft_pktinfo pkt; switch (eth_hdr(skb)->h_proto) { case htons(ETH_P_IP): - nft_netdev_set_pktinfo_ipv4(&pkt, ops, skb, state); + nft_netdev_set_pktinfo_ipv4(&pkt, skb, state); break; case htons(ETH_P_IPV6): - nft_netdev_set_pktinfo_ipv6(&pkt, ops, skb, state); + nft_netdev_set_pktinfo_ipv6(&pkt, skb, state); break; default: - nft_set_pktinfo(&pkt, ops, skb, state); + nft_set_pktinfo(&pkt, skb, state); break; } - return nft_do_chain(&pkt, ops); + return nft_do_chain(&pkt, priv); } static struct nft_af_info nft_af_netdev __read_mostly = { diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index a5cd6d90b78b..41583e30051b 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -670,8 +670,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) struct nfqnl_instance *queue; struct sk_buff *skb, *segs; int err = -ENOBUFS; - struct net *net = dev_net(entry->state.in ? - entry->state.in : entry->state.out); + struct net *net = entry->state.net; struct nfnl_queue_net *q = nfnl_queue_pernet(net); /* rcu_read_lock()ed by nf_hook_slow() */ diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index a13d6a386d63..319c22b4bca2 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -31,9 +31,8 @@ static void nft_log_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { const struct nft_log *priv = nft_expr_priv(expr); - struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); - nf_log_packet(net, pkt->ops->pf, pkt->ops->hooknum, pkt->skb, pkt->in, + nf_log_packet(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in, pkt->out, &priv->loginfo, "%s", priv->prefix); } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index cb2f13ebb5a6..e4ad2c24bc41 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -42,7 +42,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, *(__be16 *)dest = skb->protocol; break; case NFT_META_NFPROTO: - *dest = pkt->ops->pf; + *dest = pkt->pf; break; case NFT_META_L4PROTO: *dest = pkt->tprot; @@ -135,7 +135,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, break; } - switch (pkt->ops->pf) { + switch (pkt->pf) { case NFPROTO_IPV4: if (ipv4_is_multicast(ip_hdr(skb)->daddr)) *dest = PACKET_MULTICAST; diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index 96805d21d618..61d216eb7917 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -42,7 +42,7 @@ static void nft_queue_eval(const struct nft_expr *expr, queue = priv->queuenum + cpu % priv->queues_total; } else { queue = nfqueue_hash(pkt->skb, queue, - priv->queues_total, pkt->ops->pf, + priv->queues_total, pkt->pf, jhash_initval); } } diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index 635dbba93d01..759ca5248a3d 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -22,38 +22,37 @@ static void nft_reject_inet_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); - struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); - switch (pkt->ops->pf) { + switch (pkt->pf) { case NFPROTO_IPV4: switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: nf_send_unreach(pkt->skb, priv->icmp_code, - pkt->ops->hooknum); + pkt->hook); break; case NFT_REJECT_TCP_RST: - nf_send_reset(pkt->skb, pkt->ops->hooknum); + nf_send_reset(pkt->net, pkt->skb, pkt->hook); break; case NFT_REJECT_ICMPX_UNREACH: nf_send_unreach(pkt->skb, nft_reject_icmp_code(priv->icmp_code), - pkt->ops->hooknum); + pkt->hook); break; } break; case NFPROTO_IPV6: switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach6(net, pkt->skb, priv->icmp_code, - pkt->ops->hooknum); + nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code, + pkt->hook); break; case NFT_REJECT_TCP_RST: - nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); + nf_send_reset6(pkt->net, pkt->skb, pkt->hook); break; case NFT_REJECT_ICMPX_UNREACH: - nf_send_unreach6(net, pkt->skb, + nf_send_unreach6(pkt->net, pkt->skb, nft_reject_icmpv6_code(priv->icmp_code), - pkt->ops->hooknum); + pkt->hook); break; } break; diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index c13b79440ede..1763ab82bcd7 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -33,7 +33,7 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_log_info *loginfo = par->targinfo; struct nf_loginfo li; - struct net *net = dev_net(par->in ? par->in : par->out); + struct net *net = par->net; li.type = NF_LOG_TYPE_LOG; li.u.log.level = loginfo->level; diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index fb7497c928a0..a1fa2c800cb9 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -26,7 +26,7 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_nflog_info *info = par->targinfo; struct nf_loginfo li; - struct net *net = dev_net(par->in ? par->in : par->out); + struct net *net = par->net; li.type = NF_LOG_TYPE_ULOG; li.u.ulog.copy_len = info->len; diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 8c02501a530f..b7c43def0dc6 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -108,7 +108,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, return -1; if (info->mss == XT_TCPMSS_CLAMP_PMTU) { - struct net *net = dev_net(par->in ? par->in : par->out); + struct net *net = par->net; unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family); if (dst_mtu(skb_dst(skb)) <= minlen) { diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index fd980aa7715d..899b06115fc5 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -32,7 +32,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tee_tginfo *info = par->targinfo; - nf_dup_ipv4(skb, par->hooknum, &info->gw.in, info->priv->oif); + nf_dup_ipv4(par->net, skb, par->hooknum, &info->gw.in, info->priv->oif); return XT_CONTINUE; } @@ -43,7 +43,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tee_tginfo *info = par->targinfo; - nf_dup_ipv6(skb, par->hooknum, &info->gw.in6, info->priv->oif); + nf_dup_ipv6(par->net, skb, par->hooknum, &info->gw.in6, info->priv->oif); return XT_CONTINUE; } diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index d0c96c5ae29a..3ab591e73ec0 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -250,8 +250,8 @@ nf_tproxy_get_sock_v6(struct net *net, const u8 protocol, * no such listener is found, or NULL if the TCP header is incomplete. */ static struct sock * -tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport, - struct sock *sk) +tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb, + __be32 laddr, __be16 lport, struct sock *sk) { const struct iphdr *iph = ip_hdr(skb); struct tcphdr _hdr, *hp; @@ -267,7 +267,7 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport, * to a listener socket if there's one */ struct sock *sk2; - sk2 = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, + sk2 = nf_tproxy_get_sock_v4(net, iph->protocol, iph->saddr, laddr ? laddr : iph->daddr, hp->source, lport ? lport : hp->dest, skb->dev, NFT_LOOKUP_LISTENER); @@ -290,7 +290,7 @@ nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) } static unsigned int -tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport, +tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport, u_int32_t mark_mask, u_int32_t mark_value) { const struct iphdr *iph = ip_hdr(skb); @@ -305,7 +305,7 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport, * addresses, this happens if the redirect already happened * and the current packet belongs to an already established * connection */ - sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, + sk = nf_tproxy_get_sock_v4(net, iph->protocol, iph->saddr, iph->daddr, hp->source, hp->dest, skb->dev, NFT_LOOKUP_ESTABLISHED); @@ -317,11 +317,11 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport, /* UDP has no TCP_TIME_WAIT state, so we never enter here */ if (sk && sk->sk_state == TCP_TIME_WAIT) /* reopening a TIME_WAIT connection needs special handling */ - sk = tproxy_handle_time_wait4(skb, laddr, lport, sk); + sk = tproxy_handle_time_wait4(net, skb, laddr, lport, sk); else if (!sk) /* no, there's no established connection, check if * there's a listener on the redirected addr/port */ - sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, + sk = nf_tproxy_get_sock_v4(net, iph->protocol, iph->saddr, laddr, hp->source, lport, skb->dev, NFT_LOOKUP_LISTENER); @@ -351,7 +351,7 @@ tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info *tgi = par->targinfo; - return tproxy_tg4(skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value); + return tproxy_tg4(par->net, skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value); } static unsigned int @@ -359,7 +359,7 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; - return tproxy_tg4(skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value); + return tproxy_tg4(par->net, skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value); } #ifdef XT_TPROXY_HAVE_IPV6 @@ -429,7 +429,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, * to a listener socket if there's one */ struct sock *sk2; - sk2 = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, + sk2 = nf_tproxy_get_sock_v6(par->net, tproto, &iph->saddr, tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr), hp->source, @@ -472,7 +472,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) * addresses, this happens if the redirect already happened * and the current packet belongs to an already established * connection */ - sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, + sk = nf_tproxy_get_sock_v6(par->net, tproto, &iph->saddr, &iph->daddr, hp->source, hp->dest, par->in, NFT_LOOKUP_ESTABLISHED); @@ -487,7 +487,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) else if (!sk) /* no there's no established connection, check if * there's a listener on the redirected addr/port */ - sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, + sk = nf_tproxy_get_sock_v6(par->net, tproto, &iph->saddr, laddr, hp->source, lport, par->in, NFT_LOOKUP_LISTENER); diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 5b4743cc0436..11d6091991a4 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -125,7 +125,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev, static bool addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = dev_net(par->in ? par->in : par->out); + struct net *net = par->net; const struct xt_addrtype_info *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); bool ret = true; @@ -143,7 +143,7 @@ addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) static bool addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = dev_net(par->in ? par->in : par->out); + struct net *net = par->net; const struct xt_addrtype_info_v1 *info = par->matchinfo; const struct iphdr *iph; const struct net_device *dev = NULL; diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 075d89d94d28..99bbc829868d 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -317,7 +317,7 @@ static int count_them(struct net *net, static bool connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = dev_net(par->in ? par->in : par->out); + struct net *net = par->net; const struct xt_connlimit_info *info = par->matchinfo; union nf_inet_addr addr; struct nf_conntrack_tuple tuple; @@ -332,7 +332,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; zone = nf_ct_zone(ct); } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), - par->family, &tuple)) { + par->family, net, &tuple)) { goto hotdrop; } diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c index 8d47c3780fda..71a9d95e0a81 100644 --- a/net/netfilter/xt_ipvs.c +++ b/net/netfilter/xt_ipvs.c @@ -48,6 +48,7 @@ static bool ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_ipvs_mtinfo *data = par->matchinfo; + struct netns_ipvs *ipvs = net_ipvs(par->net); /* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */ const u_int8_t family = par->family; struct ip_vs_iphdr iph; @@ -67,7 +68,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) goto out; } - ip_vs_fill_iph_skb(family, skb, &iph); + ip_vs_fill_iph_skb(family, skb, true, &iph); if (data->bitmask & XT_IPVS_PROTO) if ((iph.protocol == data->l4proto) ^ @@ -85,7 +86,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(family, skb, &iph, 1 /* inverse */); + cp = pp->conn_out_get(ipvs, family, skb, &iph); if (unlikely(cp == NULL)) { match = false; goto out; diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 0778855ea5e7..df8801e02a32 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -200,7 +200,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) unsigned char opts[MAX_IPOPTLEN]; const struct xt_osf_finger *kf; const struct xt_osf_user_finger *f; - struct net *net = dev_net(p->in ? p->in : p->out); + struct net *net = p->net; if (!info) return false; diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 45e1b30e4fb2..d725a27743a1 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -237,7 +237,7 @@ static void recent_table_flush(struct recent_table *t) static bool recent_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = dev_net(par->in ? par->in : par->out); + struct net *net = par->net; struct recent_net *recent_net = recent_pernet(net); const struct xt_recent_mtinfo_v1 *info = par->matchinfo; struct recent_table *t; diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 43e26c881100..2ec08f04b816 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -143,7 +143,8 @@ static bool xt_socket_sk_is_transparent(struct sock *sk) } } -static struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb, +static struct sock *xt_socket_lookup_slow_v4(struct net *net, + const struct sk_buff *skb, const struct net_device *indev) { const struct iphdr *iph = ip_hdr(skb); @@ -197,7 +198,7 @@ static struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb, } #endif - return xt_socket_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr, + return xt_socket_get_sock_v4(net, protocol, saddr, daddr, sport, dport, indev); } @@ -209,7 +210,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, struct sock *sk = skb->sk; if (!sk) - sk = xt_socket_lookup_slow_v4(skb, par->in); + sk = xt_socket_lookup_slow_v4(par->net, skb, par->in); if (sk) { bool wildcard; bool transparent = true; @@ -335,7 +336,8 @@ xt_socket_get_sock_v6(struct net *net, const u8 protocol, return NULL; } -static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb, +static struct sock *xt_socket_lookup_slow_v6(struct net *net, + const struct sk_buff *skb, const struct net_device *indev) { __be16 uninitialized_var(dport), uninitialized_var(sport); @@ -371,7 +373,7 @@ static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb, return NULL; } - return xt_socket_get_sock_v6(dev_net(skb->dev), tproto, saddr, daddr, + return xt_socket_get_sock_v6(net, tproto, saddr, daddr, sport, dport, indev); } @@ -383,7 +385,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) struct sock *sk = skb->sk; if (!sk) - sk = xt_socket_lookup_slow_v6(skb, par->in); + sk = xt_socket_lookup_slow_v6(par->net, skb, par->in); if (sk) { bool wildcard; bool transparent = true; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 2ed5f964772e..75724a96aef2 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1136,19 +1136,19 @@ int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb, } EXPORT_SYMBOL(genlmsg_multicast_allns); -void genl_notify(struct genl_family *family, - struct sk_buff *skb, struct net *net, u32 portid, u32 group, - struct nlmsghdr *nlh, gfp_t flags) +void genl_notify(struct genl_family *family, struct sk_buff *skb, + struct genl_info *info, u32 group, gfp_t flags) { + struct net *net = genl_info_net(info); struct sock *sk = net->genl_sock; int report = 0; - if (nlh) - report = nlmsg_report(nlh); + if (info->nlhdr) + report = nlmsg_report(info->nlhdr); if (WARN_ON_ONCE(group >= family->n_mcgrps)) return; group = family->mcgrp_offset + group; - nlmsg_notify(sk, skb, portid, group, report, flags); + nlmsg_notify(sk, skb, info->snd_portid, group, report, flags); } EXPORT_SYMBOL(genl_notify); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 002a755fa07e..eb759e3a88ca 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -347,7 +347,7 @@ ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone, { struct nf_conntrack_tuple tuple; - if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, &tuple)) + if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple)) return NULL; return __nf_ct_expect_find(net, zone, &tuple); } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index b816ff871528..a75828091e21 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -91,8 +91,7 @@ static bool ovs_must_notify(struct genl_family *family, struct genl_info *info, static void ovs_notify(struct genl_family *family, struct sk_buff *skb, struct genl_info *info) { - genl_notify(family, skb, genl_info_net(info), info->snd_portid, - 0, info->nlhdr, GFP_KERNEL); + genl_notify(family, skb, info, 0, GFP_KERNEL); } /** diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index c11413d5075f..fb3cdb85905d 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -151,7 +151,8 @@ static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, { struct vxlan_dev *vxlan = netdev_priv(vport->dev); struct net *net = ovs_dp_get_net(vport->dp); - __be16 dst_port = vxlan_dev_dst_port(vxlan); + unsigned short family = ip_tunnel_info_af(upcall->egress_tun_info); + __be16 dst_port = vxlan_dev_dst_port(vxlan, family); __be16 src_port; int port_min; int port_max; diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c index 6631f4f1e39b..692b3e67fb54 100644 --- a/net/rxrpc/ar-connection.c +++ b/net/rxrpc/ar-connection.c @@ -808,7 +808,7 @@ void rxrpc_put_connection(struct rxrpc_connection *conn) ASSERTCMP(atomic_read(&conn->usage), >, 0); - conn->put_time = get_seconds(); + conn->put_time = ktime_get_seconds(); if (atomic_dec_and_test(&conn->usage)) { _debug("zombie"); rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); @@ -852,7 +852,7 @@ static void rxrpc_connection_reaper(struct work_struct *work) _enter(""); - now = get_seconds(); + now = ktime_get_seconds(); earliest = ULONG_MAX; write_lock_bh(&rxrpc_connection_lock); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index aef1bd294e17..2934a73a5981 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -208,7 +208,7 @@ struct rxrpc_transport { struct rb_root server_conns; /* server connections on this transport */ struct list_head link; /* link in master session list */ struct sk_buff_head error_queue; /* error packets awaiting processing */ - time_t put_time; /* time at which to reap */ + unsigned long put_time; /* time at which to reap */ spinlock_t client_lock; /* client connection allocation lock */ rwlock_t conn_lock; /* lock for active/dead connections */ atomic_t usage; @@ -256,7 +256,7 @@ struct rxrpc_connection { struct rxrpc_crypt csum_iv; /* packet checksum base */ unsigned long events; #define RXRPC_CONN_CHALLENGE 0 /* send challenge packet */ - time_t put_time; /* time at which to reap */ + unsigned long put_time; /* time at which to reap */ rwlock_t lock; /* access lock */ spinlock_t state_lock; /* state-change lock */ atomic_t usage; diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c index 1976dec84f29..9946467f16b4 100644 --- a/net/rxrpc/ar-transport.c +++ b/net/rxrpc/ar-transport.c @@ -189,7 +189,7 @@ void rxrpc_put_transport(struct rxrpc_transport *trans) ASSERTCMP(atomic_read(&trans->usage), >, 0); - trans->put_time = get_seconds(); + trans->put_time = ktime_get_seconds(); if (unlikely(atomic_dec_and_test(&trans->usage))) { _debug("zombie"); /* let the reaper determine the timeout to avoid a race with @@ -226,7 +226,7 @@ static void rxrpc_transport_reaper(struct work_struct *work) _enter(""); - now = get_seconds(); + now = ktime_get_seconds(); earliest = ULONG_MAX; /* extract all the transports that have been dead too long */ diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 559bfa011bda..0bc6f912f870 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -72,6 +72,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, case TC_ACT_PIPE: case TC_ACT_RECLASSIFY: case TC_ACT_OK: + case TC_ACT_REDIRECT: action = filter_res; break; case TC_ACT_SHOT: diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 5019a47b9270..bb41699c6c49 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -68,13 +68,13 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, } if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), - proto, &tuple)) + proto, ca->net, &tuple)) goto out; zone.id = ca->zone; zone.dir = NF_CT_DEFAULT_ZONE_DIR; - thash = nf_conntrack_find_get(dev_net(skb->dev), &zone, &tuple); + thash = nf_conntrack_find_get(ca->net, &zone, &tuple); if (!thash) goto out; @@ -119,6 +119,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, ci = to_connmark(a); ci->tcf_action = parm->action; + ci->net = net; ci->zone = parm->zone; tcf_hash_insert(a); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 99c9cc1c7af9..d05869646515 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -189,6 +189,7 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, * worry later - danger - this API seems to have changed * from earlier kernels */ + par.net = dev_net(skb->dev); par.in = skb->dev; par.out = NULL; par.hooknum = ipt->tcfi_hook; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index e5168f8b9640..7eeffaf69c75 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -38,6 +38,7 @@ struct cls_bpf_prog { struct bpf_prog *filter; struct list_head link; struct tcf_result res; + bool exts_integrated; struct tcf_exts exts; u32 handle; union { @@ -52,6 +53,7 @@ struct cls_bpf_prog { static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { [TCA_BPF_CLASSID] = { .type = NLA_U32 }, + [TCA_BPF_FLAGS] = { .type = NLA_U32 }, [TCA_BPF_FD] = { .type = NLA_U32 }, [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN }, [TCA_BPF_OPS_LEN] = { .type = NLA_U16 }, @@ -59,6 +61,20 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, }; +static int cls_bpf_exec_opcode(int code) +{ + switch (code) { + case TC_ACT_OK: + case TC_ACT_SHOT: + case TC_ACT_STOLEN: + case TC_ACT_REDIRECT: + case TC_ACT_UNSPEC: + return code; + default: + return TC_ACT_UNSPEC; + } +} + static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { @@ -79,6 +95,8 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, list_for_each_entry_rcu(prog, &head->plist, link) { int filter_res; + qdisc_skb_cb(skb)->tc_classid = prog->res.classid; + if (at_ingress) { /* It is safe to push/pull even if skb_shared() */ __skb_push(skb, skb->mac_len); @@ -88,6 +106,16 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, filter_res = BPF_PROG_RUN(prog->filter, skb); } + if (prog->exts_integrated) { + res->class = prog->res.class; + res->classid = qdisc_skb_cb(skb)->tc_classid; + + ret = cls_bpf_exec_opcode(filter_res); + if (ret == TC_ACT_UNSPEC) + continue; + break; + } + if (filter_res == 0) continue; @@ -195,8 +223,7 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) return ret; } -static int cls_bpf_prog_from_ops(struct nlattr **tb, - struct cls_bpf_prog *prog, u32 classid) +static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog) { struct sock_filter *bpf_ops; struct sock_fprog_kern fprog_tmp; @@ -230,15 +257,12 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb, prog->bpf_ops = bpf_ops; prog->bpf_num_ops = bpf_num_ops; prog->bpf_name = NULL; - prog->filter = fp; - prog->res.classid = classid; return 0; } -static int cls_bpf_prog_from_efd(struct nlattr **tb, - struct cls_bpf_prog *prog, u32 classid) +static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog) { struct bpf_prog *fp; char *name = NULL; @@ -268,9 +292,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, prog->bpf_ops = NULL; prog->bpf_fd = bpf_fd; prog->bpf_name = name; - prog->filter = fp; - prog->res.classid = classid; return 0; } @@ -280,16 +302,13 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, unsigned long base, struct nlattr **tb, struct nlattr *est, bool ovr) { + bool is_bpf, is_ebpf, have_exts = false; struct tcf_exts exts; - bool is_bpf, is_ebpf; - u32 classid; int ret; is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS]; is_ebpf = tb[TCA_BPF_FD]; - - if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) || - !tb[TCA_BPF_CLASSID]) + if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) return -EINVAL; tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); @@ -297,18 +316,32 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, if (ret < 0) return ret; - classid = nla_get_u32(tb[TCA_BPF_CLASSID]); + if (tb[TCA_BPF_FLAGS]) { + u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]); + + if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) { + tcf_exts_destroy(&exts); + return -EINVAL; + } + + have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT; + } + + prog->exts_integrated = have_exts; - ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog, classid) : - cls_bpf_prog_from_efd(tb, prog, classid); + ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) : + cls_bpf_prog_from_efd(tb, prog); if (ret < 0) { tcf_exts_destroy(&exts); return ret; } - tcf_bind_filter(tp, &prog->res, base); - tcf_exts_change(tp, &prog->exts, &exts); + if (tb[TCA_BPF_CLASSID]) { + prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]); + tcf_bind_filter(tp, &prog->res, base); + } + tcf_exts_change(tp, &prog->exts, &exts); return 0; } @@ -429,6 +462,7 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, { struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh; struct nlattr *nest; + u32 bpf_flags = 0; int ret; if (prog == NULL) @@ -440,7 +474,8 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (nest == NULL) goto nla_put_failure; - if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid)) + if (prog->res.classid && + nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid)) goto nla_put_failure; if (cls_bpf_is_ebpf(prog)) @@ -453,6 +488,11 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (tcf_exts_dump(skb, &prog->exts) < 0) goto nla_put_failure; + if (prog->exts_integrated) + bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT; + if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags)) + goto nla_put_failure; + nla_nest_end(skb, nest); if (tcf_exts_dump_stats(skb, &prog->exts) < 0) diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index df0328ba6a48..c66ca9400ab4 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -95,6 +95,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, if (skb->skb_iif) indev = dev_get_by_index_rcu(em->net, skb->skb_iif); + acpar.net = em->net; acpar.in = indev ? indev : dev; acpar.out = dev; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index c4d45fd8c551..f357f34d02d2 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -35,14 +35,20 @@ #define NO_DEFAULT_INDEX (1 << 16) +struct mask_value { + u8 mask; + u8 value; +}; + struct dsmark_qdisc_data { struct Qdisc *q; struct tcf_proto __rcu *filter_list; - u8 *mask; /* "owns" the array */ - u8 *value; + struct mask_value *mv; u16 indices; + u8 set_tc_index; u32 default_index; /* index range is 0...0xffff */ - int set_tc_index; +#define DSMARK_EMBEDDED_SZ 16 + struct mask_value embedded[DSMARK_EMBEDDED_SZ]; }; static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index) @@ -116,7 +122,6 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_DSMARK_MAX + 1]; int err = -EINVAL; - u8 mask = 0; pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n", __func__, sch, p, classid, parent, *arg); @@ -133,14 +138,11 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, if (err < 0) goto errout; - if (tb[TCA_DSMARK_MASK]) - mask = nla_get_u8(tb[TCA_DSMARK_MASK]); - if (tb[TCA_DSMARK_VALUE]) - p->value[*arg - 1] = nla_get_u8(tb[TCA_DSMARK_VALUE]); + p->mv[*arg - 1].value = nla_get_u8(tb[TCA_DSMARK_VALUE]); if (tb[TCA_DSMARK_MASK]) - p->mask[*arg - 1] = mask; + p->mv[*arg - 1].mask = nla_get_u8(tb[TCA_DSMARK_MASK]); err = 0; @@ -155,8 +157,8 @@ static int dsmark_delete(struct Qdisc *sch, unsigned long arg) if (!dsmark_valid_index(p, arg)) return -EINVAL; - p->mask[arg - 1] = 0xff; - p->value[arg - 1] = 0; + p->mv[arg - 1].mask = 0xff; + p->mv[arg - 1].value = 0; return 0; } @@ -173,7 +175,7 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker) return; for (i = 0; i < p->indices; i++) { - if (p->mask[i] == 0xff && !p->value[i]) + if (p->mv[i].mask == 0xff && !p->mv[i].value) goto ignore; if (walker->count >= walker->skip) { if (walker->fn(sch, i + 1, walker) < 0) { @@ -291,12 +293,12 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): - ipv4_change_dsfield(ip_hdr(skb), p->mask[index], - p->value[index]); + ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask, + p->mv[index].value); break; case htons(ETH_P_IPV6): - ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index], - p->value[index]); + ipv6_change_dsfield(ipv6_hdr(skb), p->mv[index].mask, + p->mv[index].value); break; default: /* @@ -304,7 +306,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) * This way, we can send non-IP traffic through dsmark * and don't need yet another qdisc as a bypass. */ - if (p->mask[index] != 0xff || p->value[index]) + if (p->mv[index].mask != 0xff || p->mv[index].value) pr_warn("%s: unsupported protocol %d\n", __func__, ntohs(tc_skb_protocol(skb))); break; @@ -346,7 +348,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) int err = -EINVAL; u32 default_index = NO_DEFAULT_INDEX; u16 indices; - u8 *mask; + int i; pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt); @@ -366,18 +368,18 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_DSMARK_DEFAULT_INDEX]) default_index = nla_get_u16(tb[TCA_DSMARK_DEFAULT_INDEX]); - mask = kmalloc(indices * 2, GFP_KERNEL); - if (mask == NULL) { + if (indices <= DSMARK_EMBEDDED_SZ) + p->mv = p->embedded; + else + p->mv = kmalloc_array(indices, sizeof(*p->mv), GFP_KERNEL); + if (!p->mv) { err = -ENOMEM; goto errout; } - - p->mask = mask; - memset(p->mask, 0xff, indices); - - p->value = p->mask + indices; - memset(p->value, 0, indices); - + for (i = 0; i < indices; i++) { + p->mv[i].mask = 0xff; + p->mv[i].value = 0; + } p->indices = indices; p->default_index = default_index; p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]); @@ -410,7 +412,8 @@ static void dsmark_destroy(struct Qdisc *sch) tcf_destroy_chain(&p->filter_list); qdisc_destroy(p->q); - kfree(p->mask); + if (p->mv != p->embedded) + kfree(p->mv); } static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, @@ -430,8 +433,8 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, opts = nla_nest_start(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; - if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mask[cl - 1]) || - nla_put_u8(skb, TCA_DSMARK_VALUE, p->value[cl - 1])) + if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mv[cl - 1].mask) || + nla_put_u8(skb, TCA_DSMARK_VALUE, p->mv[cl - 1].value)) goto nla_put_failure; return nla_nest_end(skb, opts); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index fda38f830a10..fe82fab1d55c 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1,6 +1,6 @@ /* * net/switchdev/switchdev.c - Switch device API - * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us> + * Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us> * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com> * * This program is free software; you can redistribute it and/or modify @@ -16,10 +16,83 @@ #include <linux/notifier.h> #include <linux/netdevice.h> #include <linux/if_bridge.h> +#include <linux/list.h> #include <net/ip_fib.h> #include <net/switchdev.h> /** + * switchdev_trans_item_enqueue - Enqueue data item to transaction queue + * + * @trans: transaction + * @data: pointer to data being queued + * @destructor: data destructor + * @tritem: transaction item being queued + * + * Enqeueue data item to transaction queue. tritem is typically placed in + * cointainter pointed at by data pointer. Destructor is called on + * transaction abort and after successful commit phase in case + * the caller did not dequeue the item before. + */ +void switchdev_trans_item_enqueue(struct switchdev_trans *trans, + void *data, void (*destructor)(void const *), + struct switchdev_trans_item *tritem) +{ + tritem->data = data; + tritem->destructor = destructor; + list_add_tail(&tritem->list, &trans->item_list); +} +EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue); + +static struct switchdev_trans_item * +__switchdev_trans_item_dequeue(struct switchdev_trans *trans) +{ + struct switchdev_trans_item *tritem; + + if (list_empty(&trans->item_list)) + return NULL; + tritem = list_first_entry(&trans->item_list, + struct switchdev_trans_item, list); + list_del(&tritem->list); + return tritem; +} + +/** + * switchdev_trans_item_dequeue - Dequeue data item from transaction queue + * + * @trans: transaction + */ +void *switchdev_trans_item_dequeue(struct switchdev_trans *trans) +{ + struct switchdev_trans_item *tritem; + + tritem = __switchdev_trans_item_dequeue(trans); + BUG_ON(!tritem); + return tritem->data; +} +EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue); + +static void switchdev_trans_init(struct switchdev_trans *trans) +{ + INIT_LIST_HEAD(&trans->item_list); +} + +static void switchdev_trans_items_destroy(struct switchdev_trans *trans) +{ + struct switchdev_trans_item *tritem; + + while ((tritem = __switchdev_trans_item_dequeue(trans))) + tritem->destructor(tritem->data); +} + +static void switchdev_trans_items_warn_destroy(struct net_device *dev, + struct switchdev_trans *trans) +{ + WARN(!list_empty(&trans->item_list), "%s: transaction item queue is not empty.\n", + dev->name); + switchdev_trans_items_destroy(trans); +} + +/** * switchdev_port_attr_get - Get port attribute * * @dev: port device @@ -62,7 +135,8 @@ int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) EXPORT_SYMBOL_GPL(switchdev_port_attr_get); static int __switchdev_port_attr_set(struct net_device *dev, - struct switchdev_attr *attr) + struct switchdev_attr *attr, + struct switchdev_trans *trans) { const struct switchdev_ops *ops = dev->switchdev_ops; struct net_device *lower_dev; @@ -70,7 +144,7 @@ static int __switchdev_port_attr_set(struct net_device *dev, int err = -EOPNOTSUPP; if (ops && ops->switchdev_port_attr_set) - return ops->switchdev_port_attr_set(dev, attr); + return ops->switchdev_port_attr_set(dev, attr, trans); if (attr->flags & SWITCHDEV_F_NO_RECURSE) return err; @@ -81,7 +155,7 @@ static int __switchdev_port_attr_set(struct net_device *dev, */ netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = __switchdev_port_attr_set(lower_dev, attr); + err = __switchdev_port_attr_set(lower_dev, attr, trans); if (err) break; } @@ -144,6 +218,7 @@ static int switchdev_port_attr_set_defer(struct net_device *dev, */ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) { + struct switchdev_trans trans; int err; if (!rtnl_is_locked()) { @@ -156,6 +231,8 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) return switchdev_port_attr_set_defer(dev, attr); } + switchdev_trans_init(&trans); + /* Phase I: prepare for attr set. Driver/device should fail * here if there are going to be issues in the commit phase, * such as lack of resources or support. The driver/device @@ -163,18 +240,16 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) * but should not commit the attr. */ - attr->trans = SWITCHDEV_TRANS_PREPARE; - err = __switchdev_port_attr_set(dev, attr); + trans.ph_prepare = true; + err = __switchdev_port_attr_set(dev, attr, &trans); if (err) { /* Prepare phase failed: abort the transaction. Any * resources reserved in the prepare phase are * released. */ - if (err != -EOPNOTSUPP) { - attr->trans = SWITCHDEV_TRANS_ABORT; - __switchdev_port_attr_set(dev, attr); - } + if (err != -EOPNOTSUPP) + switchdev_trans_items_destroy(&trans); return err; } @@ -184,17 +259,19 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) * because the driver said everythings was OK in phase I. */ - attr->trans = SWITCHDEV_TRANS_COMMIT; - err = __switchdev_port_attr_set(dev, attr); + trans.ph_prepare = false; + err = __switchdev_port_attr_set(dev, attr, &trans); WARN(err, "%s: Commit of attribute (id=%d) failed.\n", dev->name, attr->id); + switchdev_trans_items_warn_destroy(dev, &trans); return err; } EXPORT_SYMBOL_GPL(switchdev_port_attr_set); static int __switchdev_port_obj_add(struct net_device *dev, - struct switchdev_obj *obj) + enum switchdev_obj_id id, const void *obj, + struct switchdev_trans *trans) { const struct switchdev_ops *ops = dev->switchdev_ops; struct net_device *lower_dev; @@ -202,7 +279,7 @@ static int __switchdev_port_obj_add(struct net_device *dev, int err = -EOPNOTSUPP; if (ops && ops->switchdev_port_obj_add) - return ops->switchdev_port_obj_add(dev, obj); + return ops->switchdev_port_obj_add(dev, id, obj, trans); /* Switch device port(s) may be stacked under * bond/team/vlan dev, so recurse down to add object on @@ -210,7 +287,7 @@ static int __switchdev_port_obj_add(struct net_device *dev, */ netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = __switchdev_port_obj_add(lower_dev, obj); + err = __switchdev_port_obj_add(lower_dev, id, obj, trans); if (err) break; } @@ -222,6 +299,7 @@ static int __switchdev_port_obj_add(struct net_device *dev, * switchdev_port_obj_add - Add port object * * @dev: port device + * @id: object ID * @obj: object to add * * Use a 2-phase prepare-commit transaction model to ensure @@ -230,12 +308,16 @@ static int __switchdev_port_obj_add(struct net_device *dev, * * rtnl_lock must be held. */ -int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj) +int switchdev_port_obj_add(struct net_device *dev, enum switchdev_obj_id id, + const void *obj) { + struct switchdev_trans trans; int err; ASSERT_RTNL(); + switchdev_trans_init(&trans); + /* Phase I: prepare for obj add. Driver/device should fail * here if there are going to be issues in the commit phase, * such as lack of resources or support. The driver/device @@ -243,18 +325,16 @@ int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj) * but should not commit the obj. */ - obj->trans = SWITCHDEV_TRANS_PREPARE; - err = __switchdev_port_obj_add(dev, obj); + trans.ph_prepare = true; + err = __switchdev_port_obj_add(dev, id, obj, &trans); if (err) { /* Prepare phase failed: abort the transaction. Any * resources reserved in the prepare phase are * released. */ - if (err != -EOPNOTSUPP) { - obj->trans = SWITCHDEV_TRANS_ABORT; - __switchdev_port_obj_add(dev, obj); - } + if (err != -EOPNOTSUPP) + switchdev_trans_items_destroy(&trans); return err; } @@ -264,9 +344,10 @@ int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj) * because the driver said everythings was OK in phase I. */ - obj->trans = SWITCHDEV_TRANS_COMMIT; - err = __switchdev_port_obj_add(dev, obj); - WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id); + trans.ph_prepare = false; + err = __switchdev_port_obj_add(dev, id, obj, &trans); + WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, id); + switchdev_trans_items_warn_destroy(dev, &trans); return err; } @@ -276,9 +357,11 @@ EXPORT_SYMBOL_GPL(switchdev_port_obj_add); * switchdev_port_obj_del - Delete port object * * @dev: port device + * @id: object ID * @obj: object to delete */ -int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj) +int switchdev_port_obj_del(struct net_device *dev, enum switchdev_obj_id id, + const void *obj) { const struct switchdev_ops *ops = dev->switchdev_ops; struct net_device *lower_dev; @@ -286,7 +369,7 @@ int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj) int err = -EOPNOTSUPP; if (ops && ops->switchdev_port_obj_del) - return ops->switchdev_port_obj_del(dev, obj); + return ops->switchdev_port_obj_del(dev, id, obj); /* Switch device port(s) may be stacked under * bond/team/vlan dev, so recurse down to delete object on @@ -294,7 +377,7 @@ int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj) */ netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = switchdev_port_obj_del(lower_dev, obj); + err = switchdev_port_obj_del(lower_dev, id, obj); if (err) break; } @@ -307,9 +390,12 @@ EXPORT_SYMBOL_GPL(switchdev_port_obj_del); * switchdev_port_obj_dump - Dump port objects * * @dev: port device + * @id: object ID * @obj: object to dump + * @cb: function to call with a filled object */ -int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj) +int switchdev_port_obj_dump(struct net_device *dev, enum switchdev_obj_id id, + void *obj, int (*cb)(void *obj)) { const struct switchdev_ops *ops = dev->switchdev_ops; struct net_device *lower_dev; @@ -317,7 +403,7 @@ int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj) int err = -EOPNOTSUPP; if (ops && ops->switchdev_port_obj_dump) - return ops->switchdev_port_obj_dump(dev, obj); + return ops->switchdev_port_obj_dump(dev, id, obj, cb); /* Switch device port(s) may be stacked under * bond/team/vlan dev, so recurse down to dump objects on @@ -325,7 +411,7 @@ int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj) */ netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = switchdev_port_obj_dump(lower_dev, obj); + err = switchdev_port_obj_dump(lower_dev, id, obj, cb); break; } @@ -397,7 +483,7 @@ int call_switchdev_notifiers(unsigned long val, struct net_device *dev, EXPORT_SYMBOL_GPL(call_switchdev_notifiers); struct switchdev_vlan_dump { - struct switchdev_obj obj; + struct switchdev_obj_vlan vlan; struct sk_buff *skb; u32 filter_mask; u16 flags; @@ -405,8 +491,7 @@ struct switchdev_vlan_dump { u16 end; }; -static int switchdev_port_vlan_dump_put(struct net_device *dev, - struct switchdev_vlan_dump *dump) +static int switchdev_port_vlan_dump_put(struct switchdev_vlan_dump *dump) { struct bridge_vlan_info vinfo; @@ -436,12 +521,11 @@ static int switchdev_port_vlan_dump_put(struct net_device *dev, return 0; } -static int switchdev_port_vlan_dump_cb(struct net_device *dev, - struct switchdev_obj *obj) +static int switchdev_port_vlan_dump_cb(void *obj) { + struct switchdev_obj_vlan *vlan = obj; struct switchdev_vlan_dump *dump = - container_of(obj, struct switchdev_vlan_dump, obj); - struct switchdev_obj_vlan *vlan = &dump->obj.u.vlan; + container_of(vlan, struct switchdev_vlan_dump, vlan); int err = 0; if (vlan->vid_begin > vlan->vid_end) @@ -452,7 +536,7 @@ static int switchdev_port_vlan_dump_cb(struct net_device *dev, for (dump->begin = dump->end = vlan->vid_begin; dump->begin <= vlan->vid_end; dump->begin++, dump->end++) { - err = switchdev_port_vlan_dump_put(dev, dump); + err = switchdev_port_vlan_dump_put(dump); if (err) return err; } @@ -464,7 +548,7 @@ static int switchdev_port_vlan_dump_cb(struct net_device *dev, /* prepend */ dump->begin = vlan->vid_begin; } else { - err = switchdev_port_vlan_dump_put(dev, dump); + err = switchdev_port_vlan_dump_put(dump); dump->flags = vlan->flags; dump->begin = vlan->vid_begin; dump->end = vlan->vid_end; @@ -476,7 +560,7 @@ static int switchdev_port_vlan_dump_cb(struct net_device *dev, /* append */ dump->end = vlan->vid_end; } else { - err = switchdev_port_vlan_dump_put(dev, dump); + err = switchdev_port_vlan_dump_put(dump); dump->flags = vlan->flags; dump->begin = vlan->vid_begin; dump->end = vlan->vid_end; @@ -493,10 +577,6 @@ static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev, u32 filter_mask) { struct switchdev_vlan_dump dump = { - .obj = { - .id = SWITCHDEV_OBJ_PORT_VLAN, - .cb = switchdev_port_vlan_dump_cb, - }, .skb = skb, .filter_mask = filter_mask, }; @@ -504,12 +584,14 @@ static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev, if ((filter_mask & RTEXT_FILTER_BRVLAN) || (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) { - err = switchdev_port_obj_dump(dev, &dump.obj); + err = switchdev_port_obj_dump(dev, SWITCHDEV_OBJ_PORT_VLAN, + &dump.vlan, + switchdev_port_vlan_dump_cb); if (err) goto err_out; if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) /* last one */ - err = switchdev_port_vlan_dump_put(dev, &dump); + err = switchdev_port_vlan_dump_put(&dump); } err_out: @@ -617,14 +699,12 @@ static int switchdev_port_br_setlink_protinfo(struct net_device *dev, static int switchdev_port_br_afspec(struct net_device *dev, struct nlattr *afspec, int (*f)(struct net_device *dev, - struct switchdev_obj *obj)) + enum switchdev_obj_id id, + const void *obj)) { struct nlattr *attr; struct bridge_vlan_info *vinfo; - struct switchdev_obj obj = { - .id = SWITCHDEV_OBJ_PORT_VLAN, - }; - struct switchdev_obj_vlan *vlan = &obj.u.vlan; + struct switchdev_obj_vlan vlan = { 0 }; int rem; int err; @@ -634,30 +714,30 @@ static int switchdev_port_br_afspec(struct net_device *dev, if (nla_len(attr) != sizeof(struct bridge_vlan_info)) return -EINVAL; vinfo = nla_data(attr); - vlan->flags = vinfo->flags; + vlan.flags = vinfo->flags; if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { - if (vlan->vid_begin) + if (vlan.vid_begin) return -EINVAL; - vlan->vid_begin = vinfo->vid; + vlan.vid_begin = vinfo->vid; } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) { - if (!vlan->vid_begin) + if (!vlan.vid_begin) return -EINVAL; - vlan->vid_end = vinfo->vid; - if (vlan->vid_end <= vlan->vid_begin) + vlan.vid_end = vinfo->vid; + if (vlan.vid_end <= vlan.vid_begin) return -EINVAL; - err = f(dev, &obj); + err = f(dev, SWITCHDEV_OBJ_PORT_VLAN, &vlan); if (err) return err; - memset(vlan, 0, sizeof(*vlan)); + memset(&vlan, 0, sizeof(vlan)); } else { - if (vlan->vid_begin) + if (vlan.vid_begin) return -EINVAL; - vlan->vid_begin = vinfo->vid; - vlan->vid_end = vinfo->vid; - err = f(dev, &obj); + vlan.vid_begin = vinfo->vid; + vlan.vid_end = vinfo->vid; + err = f(dev, SWITCHDEV_OBJ_PORT_VLAN, &vlan); if (err) return err; - memset(vlan, 0, sizeof(*vlan)); + memset(&vlan, 0, sizeof(vlan)); } } @@ -739,15 +819,12 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlm_flags) { - struct switchdev_obj obj = { - .id = SWITCHDEV_OBJ_PORT_FDB, - .u.fdb = { - .addr = addr, - .vid = vid, - }, + struct switchdev_obj_fdb fdb = { + .addr = addr, + .vid = vid, }; - return switchdev_port_obj_add(dev, &obj); + return switchdev_port_obj_add(dev, SWITCHDEV_OBJ_PORT_FDB, &fdb); } EXPORT_SYMBOL_GPL(switchdev_port_fdb_add); @@ -766,30 +843,28 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid) { - struct switchdev_obj obj = { - .id = SWITCHDEV_OBJ_PORT_FDB, - .u.fdb = { - .addr = addr, - .vid = vid, - }, + struct switchdev_obj_fdb fdb = { + .addr = addr, + .vid = vid, }; - return switchdev_port_obj_del(dev, &obj); + return switchdev_port_obj_del(dev, SWITCHDEV_OBJ_PORT_FDB, &fdb); } EXPORT_SYMBOL_GPL(switchdev_port_fdb_del); struct switchdev_fdb_dump { - struct switchdev_obj obj; + struct switchdev_obj_fdb fdb; + struct net_device *dev; struct sk_buff *skb; struct netlink_callback *cb; int idx; }; -static int switchdev_port_fdb_dump_cb(struct net_device *dev, - struct switchdev_obj *obj) +static int switchdev_port_fdb_dump_cb(void *obj) { + struct switchdev_obj_fdb *fdb = obj; struct switchdev_fdb_dump *dump = - container_of(obj, struct switchdev_fdb_dump, obj); + container_of(fdb, struct switchdev_fdb_dump, fdb); u32 portid = NETLINK_CB(dump->cb->skb).portid; u32 seq = dump->cb->nlh->nlmsg_seq; struct nlmsghdr *nlh; @@ -809,13 +884,13 @@ static int switchdev_port_fdb_dump_cb(struct net_device *dev, ndm->ndm_pad2 = 0; ndm->ndm_flags = NTF_SELF; ndm->ndm_type = 0; - ndm->ndm_ifindex = dev->ifindex; - ndm->ndm_state = obj->u.fdb.ndm_state; + ndm->ndm_ifindex = dump->dev->ifindex; + ndm->ndm_state = fdb->ndm_state; - if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr)) + if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, fdb->addr)) goto nla_put_failure; - if (obj->u.fdb.vid && nla_put_u16(dump->skb, NDA_VLAN, obj->u.fdb.vid)) + if (fdb->vid && nla_put_u16(dump->skb, NDA_VLAN, fdb->vid)) goto nla_put_failure; nlmsg_end(dump->skb, nlh); @@ -845,16 +920,14 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *filter_dev, int idx) { struct switchdev_fdb_dump dump = { - .obj = { - .id = SWITCHDEV_OBJ_PORT_FDB, - .cb = switchdev_port_fdb_dump_cb, - }, + .dev = dev, .skb = skb, .cb = cb, .idx = idx, }; - switchdev_port_obj_dump(dev, &dump.obj); + switchdev_port_obj_dump(dev, SWITCHDEV_OBJ_PORT_FDB, &dump.fdb, + switchdev_port_fdb_dump_cb); return dump.idx; } EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); @@ -932,17 +1005,14 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 nlflags, u32 tb_id) { - struct switchdev_obj fib_obj = { - .id = SWITCHDEV_OBJ_IPV4_FIB, - .u.ipv4_fib = { - .dst = dst, - .dst_len = dst_len, - .fi = fi, - .tos = tos, - .type = type, - .nlflags = nlflags, - .tb_id = tb_id, - }, + struct switchdev_obj_ipv4_fib ipv4_fib = { + .dst = dst, + .dst_len = dst_len, + .fi = fi, + .tos = tos, + .type = type, + .nlflags = nlflags, + .tb_id = tb_id, }; struct net_device *dev; int err = 0; @@ -963,7 +1033,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, if (!dev) return 0; - err = switchdev_port_obj_add(dev, &fib_obj); + err = switchdev_port_obj_add(dev, SWITCHDEV_OBJ_IPV4_FIB, &ipv4_fib); if (!err) fi->fib_flags |= RTNH_F_OFFLOAD; @@ -986,17 +1056,14 @@ EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add); int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, u8 tos, u8 type, u32 tb_id) { - struct switchdev_obj fib_obj = { - .id = SWITCHDEV_OBJ_IPV4_FIB, - .u.ipv4_fib = { - .dst = dst, - .dst_len = dst_len, - .fi = fi, - .tos = tos, - .type = type, - .nlflags = 0, - .tb_id = tb_id, - }, + struct switchdev_obj_ipv4_fib ipv4_fib = { + .dst = dst, + .dst_len = dst_len, + .fi = fi, + .tos = tos, + .type = type, + .nlflags = 0, + .tb_id = tb_id, }; struct net_device *dev; int err = 0; @@ -1008,7 +1075,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, if (!dev) return 0; - err = switchdev_port_obj_del(dev, &fib_obj); + err = switchdev_port_obj_del(dev, SWITCHDEV_OBJ_IPV4_FIB, &ipv4_fib); if (!err) fi->fib_flags &= ~RTNH_F_OFFLOAD; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 68ada2ca4b60..c48a4b8582bb 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -19,7 +19,7 @@ #include <net/dst.h> #include <net/xfrm.h> -static int xfrm_output2(struct sock *sk, struct sk_buff *skb); +static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb); static int xfrm_skb_check_space(struct sk_buff *skb) { @@ -131,6 +131,8 @@ out: int xfrm_output_resume(struct sk_buff *skb, int err) { + struct net *net = xs_net(skb_dst(skb)->xfrm); + while (likely((err = xfrm_output_one(skb, err)) == 0)) { nf_reset(skb); @@ -139,10 +141,10 @@ int xfrm_output_resume(struct sk_buff *skb, int err) goto out; if (!skb_dst(skb)->xfrm) - return dst_output(skb); + return dst_output(skb->sk, skb); err = nf_hook(skb_dst(skb)->ops->family, - NF_INET_POST_ROUTING, skb->sk, skb, + NF_INET_POST_ROUTING, net, skb->sk, skb, NULL, skb_dst(skb)->dev, xfrm_output2); if (unlikely(err != 1)) goto out; @@ -156,12 +158,12 @@ out: } EXPORT_SYMBOL_GPL(xfrm_output_resume); -static int xfrm_output2(struct sock *sk, struct sk_buff *skb) +static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { return xfrm_output_resume(skb, 1); } -static int xfrm_output_gso(struct sock *sk, struct sk_buff *skb) +static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb) { struct sk_buff *segs; @@ -177,7 +179,7 @@ static int xfrm_output_gso(struct sock *sk, struct sk_buff *skb) int err; segs->next = NULL; - err = xfrm_output2(sk, segs); + err = xfrm_output2(net, sk, segs); if (unlikely(err)) { kfree_skb_list(nskb); @@ -196,7 +198,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) int err; if (skb_is_gso(skb)) - return xfrm_output_gso(sk, skb); + return xfrm_output_gso(net, sk, skb); if (skb->ip_summed == CHECKSUM_PARTIAL) { err = skb_checksum_help(skb); @@ -207,7 +209,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) } } - return xfrm_output2(sk, skb); + return xfrm_output2(net, sk, skb); } EXPORT_SYMBOL_GPL(xfrm_output); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 94af3d065785..418daa038edf 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1208,7 +1208,7 @@ static inline int policy_to_flow_dir(int dir) } } -static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, +static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, const struct flowi *fl) { struct xfrm_policy *pol; @@ -1583,8 +1583,6 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); xdst->flo.ops = &xfrm_bundle_fc_ops; - if (afinfo->init_dst) - afinfo->init_dst(net, xdst); } else xdst = ERR_PTR(-ENOBUFS); @@ -1947,7 +1945,7 @@ static void xfrm_policy_queue_process(unsigned long arg) skb_dst_drop(skb); skb_dst_set(skb, dst); - dst_output(skb); + dst_output(skb->sk, skb); } out: @@ -2187,7 +2185,7 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family, */ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, const struct flowi *fl, - struct sock *sk, int flags) + const struct sock *sk, int flags) { struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; struct flow_cache_object *flo; @@ -2335,7 +2333,7 @@ EXPORT_SYMBOL(xfrm_lookup); */ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, const struct flowi *fl, - struct sock *sk, int flags) + const struct sock *sk, int flags) { struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, flags | XFRM_LOOKUP_QUEUE | |