From ca8d9ea30bc79b2965a1d169dcb2f48f02af4d2d Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Wed, 6 Jan 2010 12:56:37 +0000 Subject: fix bonding: allow arp_ip_targets on separate vlans to use arp validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Wed, Jan 06, 2010 at 10:10:03PM +0100, Eric Dumazet wrote: > Le 06/01/2010 19:38, Eric Dumazet a écrit : > > > > (net-next-2.6 doesnt work well on my bond/vlan setup, I suspect I need a bisection) > > David, I had to revert 1f3c8804acba841b5573b953f5560d2683d2db0d > (bonding: allow arp_ip_targets on separate vlans to use arp validation) > > Or else, my vlan devices dont work (unfortunatly I dont have much time > these days to debug the thing) > > My config : > > +---------+ > vlan.103 -----+ bond0 +--- eth1 (bnx2) > | + > vlan.825 -----+ +--- eth2 (tg3) > +---------+ > > $ cat /proc/net/bonding/bond0 > Ethernet Channel Bonding Driver: v3.6.0 (September 26, 2009) > > Bonding Mode: fault-tolerance (active-backup) > Primary Slave: None > Currently Active Slave: eth2 > MII Status: up > MII Polling Interval (ms): 100 > Up Delay (ms): 0 > Down Delay (ms): 0 > > Slave Interface: eth1 (bnx2) > MII Status: down > Link Failure Count: 1 > Permanent HW addr: 00:1e:0b:ec:d3:d2 > > Slave Interface: eth2 (tg3) > MII Status: up > Link Failure Count: 0 > Permanent HW addr: 00:1e:0b:92:78:50 > This patch fixes up a problem with found with commit 1f3c8804acba841b5573b953f5560d2683d2db0d. The original change overloaded null_or_orig, but doing that prevented any packet handlers that were not tied to a specific device (i.e. ptype->dev == NULL) from ever receiving any frames. The null_or_orig variable cannot be overloaded, and must be kept as NULL to prevent the frame from being ignored by packet handlers designed to accept frames on any interface. Signed-off-by: Andy Gospodarek Signed-off-by: Jay Vosburgh Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index f9aa699ab6cb..d9ab9be0c323 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2430,6 +2430,7 @@ int netif_receive_skb(struct sk_buff *skb) struct packet_type *ptype, *pt_prev; struct net_device *orig_dev; struct net_device *null_or_orig; + struct net_device *null_or_bond; int ret = NET_RX_DROP; __be16 type; @@ -2500,21 +2501,19 @@ ncls: * bonding interfaces still make their way to any base bonding * device that may have registered for a specific ptype. The * handler may have to adjust skb->dev and orig_dev. - * - * null_or_orig can be overloaded since it will not be set when - * using VLANs on top of bonding. Putting it here prevents - * disturbing the ptype_all handlers above. */ + null_or_bond = NULL; if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) && (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) { - null_or_orig = vlan_dev_real_dev(skb->dev); + null_or_bond = vlan_dev_real_dev(skb->dev); } type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { if (ptype->type == type && (ptype->dev == null_or_orig || - ptype->dev == skb->dev || ptype->dev == orig_dev)) { + ptype->dev == skb->dev || ptype->dev == orig_dev || + ptype->dev == null_or_bond)) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; -- cgit v1.2.1 From 2d13bafeba24f732e89b818b8c66b07893457570 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 5 Jan 2010 05:50:52 +0000 Subject: net: Make it easier to parse /proc/net/dev contents. The contents of /proc/net/dev is annoying to parse, because it changes whether there is a space after the "ethX:" or not. It depends upon the size of the "Receive bytes" counter, if the number is below 7 digits, then there is whitespaces else if the number is 8 digits or above there is no space between the ":" and the number. This patch changes the output to assure there is always a space between the ":" and the number. Given that all existing userspace application already need to handle the whitespaces, I see no breakage of existing tools. Signed-off-by: Jesper Dangaard Brouer Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index d9ab9be0c323..a008f6987a95 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3206,7 +3206,7 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { const struct net_device_stats *stats = dev_get_stats(dev); - seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " + seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", dev->name, stats->rx_bytes, stats->rx_packets, stats->rx_errors, -- cgit v1.2.1 From 11380a4b2d86fae9a6bce75c9373668cc323fe57 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 19 Jan 2010 13:46:10 -0800 Subject: net: Unexport napi_gro_flush(). Nothing outside of net/core/dev.c uses it. Signed-off-by: David S. Miller --- net/core/dev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index a008f6987a95..5747b9edc1bb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2582,7 +2582,7 @@ out: return netif_receive_skb(skb); } -void napi_gro_flush(struct napi_struct *napi) +static void napi_gro_flush(struct napi_struct *napi) { struct sk_buff *skb, *next; @@ -2595,7 +2595,6 @@ void napi_gro_flush(struct napi_struct *napi) napi->gro_count = 0; napi->gro_list = NULL; } -EXPORT_SYMBOL(napi_gro_flush); enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { -- cgit v1.2.1 From 4b258461c0b31ded170a1a56b944b0fded1c887b Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Thu, 21 Jan 2010 01:26:29 -0800 Subject: net: Optimize non-gso test checks Avoid checking twice whether skb needs to be linearized, if one skb_linearize was already done. Signed-off-by: Krishna Kumar Signed-off-by: David S. Miller --- net/core/dev.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 5747b9edc1bb..4fad9db417b1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1982,6 +1982,21 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, return rc; } +/* + * Returns true if either: + * 1. skb has frag_list and the device doesn't support FRAGLIST, or + * 2. skb is fragmented and the device does not support SG, or if + * at least one of fragments is in highmem and device does not + * support DMA from it. + */ +static inline int skb_needs_linearize(struct sk_buff *skb, + struct net_device *dev) +{ + return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || + (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || + illegal_highdma(dev, skb))); +} + /** * dev_queue_xmit - transmit a buffer * @skb: buffer to transmit @@ -2018,18 +2033,8 @@ int dev_queue_xmit(struct sk_buff *skb) if (netif_needs_gso(dev, skb)) goto gso; - if (skb_has_frags(skb) && - !(dev->features & NETIF_F_FRAGLIST) && - __skb_linearize(skb)) - goto out_kfree_skb; - - /* Fragmented skb is linearized if device does not support SG, - * or if at least one of fragments is in highmem and device - * does not support DMA from it. - */ - if (skb_shinfo(skb)->nr_frags && - (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && - __skb_linearize(skb)) + /* Convert a paged skb to linear, if required */ + if (skb_needs_linearize(skb, dev) && __skb_linearize(skb)) goto out_kfree_skb; /* If packet is not checksummed and device does not support -- cgit v1.2.1 From 32e7bfc41110bc8f29ec0f293c3bcee6645fef34 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 25 Jan 2010 13:36:10 -0800 Subject: net: use helpers to access uc list V2 This patch introduces three macros to work with uc list from net drivers. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 4fad9db417b1..2cba5c521e56 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3665,10 +3665,10 @@ void __dev_set_rx_mode(struct net_device *dev) /* Unicast addresses changes may only happen under the rtnl, * therefore calling __dev_set_promiscuity here is safe. */ - if (dev->uc.count > 0 && !dev->uc_promisc) { + if (!netdev_uc_empty(dev) && !dev->uc_promisc) { __dev_set_promiscuity(dev, 1); dev->uc_promisc = 1; - } else if (dev->uc.count == 0 && dev->uc_promisc) { + } else if (netdev_uc_empty(dev) && dev->uc_promisc) { __dev_set_promiscuity(dev, -1); dev->uc_promisc = 0; } -- cgit v1.2.1 From 8a83a00b0735190384a348156837918271034144 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 30 Jan 2010 12:23:03 +0000 Subject: net: maintain namespace isolation between vlan and real device In the vlan and macvlan drivers, the start_xmit function forwards data to the dev_queue_xmit function for another device, which may potentially belong to a different namespace. To make sure that classification stays within a single namespace, this resets the potentially critical fields. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- net/core/dev.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 2cba5c521e56..94c1eeed25e5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1448,13 +1448,10 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) if (skb->len > (dev->mtu + dev->hard_header_len)) return NET_RX_DROP; - skb_dst_drop(skb); + skb_set_dev(skb, dev); skb->tstamp.tv64 = 0; skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, dev); - skb->mark = 0; - secpath_reset(skb); - nf_reset(skb); return netif_rx(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); @@ -1614,6 +1611,36 @@ static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) return false; } +/** + * skb_dev_set -- assign a new device to a buffer + * @skb: buffer for the new device + * @dev: network device + * + * If an skb is owned by a device already, we have to reset + * all data private to the namespace a device belongs to + * before assigning it a new device. + */ +#ifdef CONFIG_NET_NS +void skb_set_dev(struct sk_buff *skb, struct net_device *dev) +{ + skb_dst_drop(skb); + if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) { + secpath_reset(skb); + nf_reset(skb); + skb_init_secmark(skb); + skb->mark = 0; + skb->priority = 0; + skb->nf_trace = 0; + skb->ipvs_property = 0; +#ifdef CONFIG_NET_SCHED + skb->tc_index = 0; +#endif + } + skb->dev = dev; +} +EXPORT_SYMBOL(skb_set_dev); +#endif /* CONFIG_NET_NS */ + /* * Invalidate hardware checksum when packet is to be mangled, and * complete checksum manually on outgoing path. -- cgit v1.2.1