From 368a07d26ae99c80678a968946744fd83e7708d0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 28 May 2010 14:26:23 +0200 Subject: mac80211: make a function static sparse correctly complains that __ieee80211_get_channel_mode is not static. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/chan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 5d218c530a4e..32be11e4c4d9 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -5,7 +5,7 @@ #include #include "ieee80211_i.h" -enum ieee80211_chan_mode +static enum ieee80211_chan_mode __ieee80211_get_channel_mode(struct ieee80211_local *local, struct ieee80211_sub_if_data *ignore) { -- cgit v1.2.1 From 6057fd78a8dcce6269f029b967051d5a2e9b0895 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Fri, 28 May 2010 23:02:35 -0700 Subject: IPv6: fix Mobile IPv6 regression Commit f4f914b5 (net: ipv6 bind to device issue) caused a regression with Mobile IPv6 when it changed the meaning of fl->oif to become a strict requirement of the route lookup. Instead, only force strict mode when sk->sk_bound_dev_if is set on the calling socket, getting the intended behavior and fixing the regression. Tested-by: Arnaud Ebalard Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 294cbe8b0725..252d76199c41 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -814,7 +814,7 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, { int flags = 0; - if (fl->oif || rt6_need_strict(&fl->fl6_dst)) + if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst)) flags |= RT6_LOOKUP_F_IFACE; if (!ipv6_addr_any(&fl->fl6_src)) -- cgit v1.2.1 From 5b0daa3474d52bed906c4d5e92b44e10148c6972 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sat, 29 May 2010 00:12:13 -0700 Subject: skb: make skb_recycle_check() return a bool value Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- net/core/skbuff.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4c11000a96aa..4667d4d7c162 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -482,22 +482,22 @@ EXPORT_SYMBOL(consume_skb); * reference count dropping and cleans up the skbuff as if it * just came from __alloc_skb(). */ -int skb_recycle_check(struct sk_buff *skb, int skb_size) +bool skb_recycle_check(struct sk_buff *skb, int skb_size) { struct skb_shared_info *shinfo; if (irqs_disabled()) - return 0; + return false; if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) - return 0; + return false; skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD); if (skb_end_pointer(skb) - skb->head < skb_size) - return 0; + return false; if (skb_shared(skb) || skb_cloned(skb)) - return 0; + return false; skb_release_head_state(skb); @@ -509,7 +509,7 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size) skb->data = skb->head + NET_SKB_PAD; skb_reset_tail_pointer(skb); - return 1; + return true; } EXPORT_SYMBOL(skb_recycle_check); -- cgit v1.2.1 From 5daf47bb4e708fde32c1856a0d049e3c3d03c36c Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 26 May 2010 05:54:21 +0000 Subject: net/rds: Add missing mutex_unlock Add a mutex_unlock missing on the error path. In each case, whenever the label out is reached from elsewhere in the function, mutex is not locked. The semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression E1; @@ * mutex_lock(E1); <+... when != E1 if (...) { ... when != E1 * return ...; } ...+> * mutex_unlock(E1); // Signed-off-by: Julia Lawall Reviewed-by: Zach Brown Acked-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/ib_cm.c | 1 + net/rds/iw_cm.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 10ed0d55f759..f68832798db2 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -475,6 +475,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, err = rds_ib_setup_qp(conn); if (err) { rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err); + mutex_unlock(&conn->c_cm_lock); goto out; } diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index a9d951b4fbae..b5dd6ac39be8 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -452,6 +452,7 @@ int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id, err = rds_iw_setup_qp(conn); if (err) { rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", err); + mutex_unlock(&conn->c_cm_lock); goto out; } -- cgit v1.2.1 From 97dc875f90a7b88a9fa476c256345c0d40fcdf6c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 26 May 2010 05:16:48 +0000 Subject: caif: unlock on error path in cfserl_receive() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There was an spin_unlock missing on the error path. The spin_lock was tucked in with the declarations so it was hard to spot. I added a new line. Signed-off-by: Dan Carpenter Acked-by: Sjur Brændeland Signed-off-by: David S. Miller --- net/caif/cfserl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c index cb4325a3dc83..965c5baace40 100644 --- a/net/caif/cfserl.c +++ b/net/caif/cfserl.c @@ -59,16 +59,18 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt) u8 stx = CFSERL_STX; int ret; u16 expectlen = 0; + caif_assert(newpkt != NULL); spin_lock(&layr->sync); if (layr->incomplete_frm != NULL) { - layr->incomplete_frm = cfpkt_append(layr->incomplete_frm, newpkt, expectlen); pkt = layr->incomplete_frm; - if (pkt == NULL) + if (pkt == NULL) { + spin_unlock(&layr->sync); return -ENOMEM; + } } else { pkt = newpkt; } -- cgit v1.2.1 From 7dfde179c38056b91d51e60f3d50902387f27c84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Wed, 26 May 2010 00:44:44 +0000 Subject: Phonet: listening socket lock protects the connected socket list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The accept()'d socket need to be unhashed while the (listen()'ing) socket lock is held. This fixes a race condition that could lead to an OOPS. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pep.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 7b048a35ca58..94d72e85a475 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -1045,12 +1045,12 @@ static void pep_sock_unhash(struct sock *sk) lock_sock(sk); if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) { skparent = pn->listener; - sk_del_node_init(sk); release_sock(sk); - sk = skparent; pn = pep_sk(skparent); - lock_sock(sk); + lock_sock(skparent); + sk_del_node_init(sk); + sk = skparent; } /* Unhash a listening sock only when it is closed * and all of its active connected pipes are closed. */ -- cgit v1.2.1 From 2903037400a26e7c0cc93ab75a7d62abfacdf485 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 29 May 2010 00:20:48 -0700 Subject: net: fix sk_forward_alloc corruptions As David found out, sock_queue_err_skb() should be called with socket lock hold, or we risk sk_forward_alloc corruption, since we use non atomic operations to update this field. This patch adds bh_lock_sock()/bh_unlock_sock() pair to three spots. (BH already disabled) 1) skb_tstamp_tx() 2) Before calling ip_icmp_error(), in __udp4_lib_err() 3) Before calling ipv6_icmp_error(), in __udp6_lib_err() Reported-by: Anton Blanchard Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 ++++ net/ipv4/udp.c | 2 ++ net/ipv6/udp.c | 6 ++++-- 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4667d4d7c162..f2913ae2b52c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2992,7 +2992,11 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, memset(serr, 0, sizeof(*serr)); serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; + + bh_lock_sock(sk); err = sock_queue_err_skb(sk, skb); + bh_unlock_sock(sk); + if (err) kfree_skb(skb); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b9d0d409516f..acdc9be833cc 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -634,7 +634,9 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { + bh_lock_sock(sk); ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); + bh_unlock_sock(sk); } sk->sk_err = err; sk->sk_error_report(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 87be58673b55..3048f906c042 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -466,9 +466,11 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk->sk_state != TCP_ESTABLISHED && !np->recverr) goto out; - if (np->recverr) + if (np->recverr) { + bh_lock_sock(sk); ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); - + bh_unlock_sock(sk); + } sk->sk_err = err; sk->sk_error_report(sk); out: -- cgit v1.2.1 From c936e8bd1de2fa50c49e3df6fa5036bf07870b67 Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Mon, 31 May 2010 16:41:09 +0200 Subject: netfilter: don't xt_jumpstack_alloc twice in xt_register_table In xt_register_table, xt_jumpstack_alloc is called first, later xt_replace_table is used. But in xt_replace_table, xt_jumpstack_alloc will be used again. Then the memory allocated by previous xt_jumpstack_alloc will be leaked. We can simply remove the previous xt_jumpstack_alloc because there aren't any users of newinfo between xt_jumpstack_alloc and xt_replace_table. Signed-off-by: Xiaotian Feng Cc: Patrick McHardy Cc: "David S. Miller" Cc: Jan Engelhardt Cc: Andrew Morton Cc: Rusty Russell Cc: Alexey Dobriyan Acked-By: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/netfilter/x_tables.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 445de702b8b7..47b1e7917a9c 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -844,10 +844,6 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table_info *private; struct xt_table *t, *table; - ret = xt_jumpstack_alloc(newinfo); - if (ret < 0) - return ERR_PTR(ret); - /* Don't add one object to multiple lists. */ table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL); if (!table) { -- cgit v1.2.1 From 7489aec8eed4f2f1eb3b4d35763bd3ea30b32ef5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 31 May 2010 16:41:35 +0200 Subject: netfilter: xtables: stackptr should be percpu commit f3c5c1bfd4 (netfilter: xtables: make ip_tables reentrant) introduced a performance regression, because stackptr array is shared by all cpus, adding cache line ping pongs. (16 cpus share a 64 bytes cache line) Fix this using alloc_percpu() Signed-off-by: Eric Dumazet Acked-By: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 2 +- net/netfilter/x_tables.c | 13 +++---------- 3 files changed, 5 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 63958f3394a5..4b6c5ca610fc 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -336,7 +336,7 @@ ipt_do_table(struct sk_buff *skb, cpu = smp_processor_id(); table_base = private->entries[cpu]; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; - stackptr = &private->stackptr[cpu]; + stackptr = per_cpu_ptr(private->stackptr, cpu); origptr = *stackptr; e = get_entry(table_base, private->hook_entry[hook]); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 6f517bd83692..9d2d68f0e605 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -363,7 +363,7 @@ ip6t_do_table(struct sk_buff *skb, cpu = smp_processor_id(); table_base = private->entries[cpu]; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; - stackptr = &private->stackptr[cpu]; + stackptr = per_cpu_ptr(private->stackptr, cpu); origptr = *stackptr; e = get_entry(table_base, private->hook_entry[hook]); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 47b1e7917a9c..e34622fa0003 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -699,10 +699,8 @@ void xt_free_table_info(struct xt_table_info *info) vfree(info->jumpstack); else kfree(info->jumpstack); - if (sizeof(unsigned int) * nr_cpu_ids > PAGE_SIZE) - vfree(info->stackptr); - else - kfree(info->stackptr); + + free_percpu(info->stackptr); kfree(info); } @@ -753,14 +751,9 @@ static int xt_jumpstack_alloc(struct xt_table_info *i) unsigned int size; int cpu; - size = sizeof(unsigned int) * nr_cpu_ids; - if (size > PAGE_SIZE) - i->stackptr = vmalloc(size); - else - i->stackptr = kmalloc(size, GFP_KERNEL); + i->stackptr = alloc_percpu(unsigned int); if (i->stackptr == NULL) return -ENOMEM; - memset(i->stackptr, 0, size); size = sizeof(void **) * nr_cpu_ids; if (size > PAGE_SIZE) -- cgit v1.2.1 From b1faf5666438090a4dc4fceac8502edc7788b7e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 31 May 2010 23:44:05 -0700 Subject: net: sock_queue_err_skb() dont mess with sk_forward_alloc Correct sk_forward_alloc handling for error_queue would need to use a backlog of frames that softirq handler could not deliver because socket is owned by user thread. Or extend backlog processing to be able to process normal and error packets. Another possibility is to not use mem charge for error queue, this is what I implemented in this patch. Note: this reverts commit 29030374 (net: fix sk_forward_alloc corruptions), since we dont need to lock socket anymore. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 30 ++++++++++++++++++++++++++++-- net/ipv4/udp.c | 6 ++---- net/ipv6/udp.c | 6 ++---- 3 files changed, 32 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4e7ac09c281a..9f07e749d7b1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2965,6 +2965,34 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) } EXPORT_SYMBOL_GPL(skb_cow_data); +static void sock_rmem_free(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); +} + +/* + * Note: We dont mem charge error packets (no sk_forward_alloc changes) + */ +int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) +{ + if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= + (unsigned)sk->sk_rcvbuf) + return -ENOMEM; + + skb_orphan(skb); + skb->sk = sk; + skb->destructor = sock_rmem_free; + atomic_add(skb->truesize, &sk->sk_rmem_alloc); + + skb_queue_tail(&sk->sk_error_queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, skb->len); + return 0; +} +EXPORT_SYMBOL(sock_queue_err_skb); + void skb_tstamp_tx(struct sk_buff *orig_skb, struct skb_shared_hwtstamps *hwtstamps) { @@ -2997,9 +3025,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; - bh_lock_sock(sk); err = sock_queue_err_skb(sk, skb); - bh_unlock_sock(sk); if (err) kfree_skb(skb); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 50678f9a2763..eec4ff456e33 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -633,11 +633,9 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) if (!inet->recverr) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; - } else { - bh_lock_sock(sk); + } else ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); - bh_unlock_sock(sk); - } + sk->sk_err = err; sk->sk_error_report(sk); out: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3048f906c042..87be58673b55 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -466,11 +466,9 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk->sk_state != TCP_ESTABLISHED && !np->recverr) goto out; - if (np->recverr) { - bh_lock_sock(sk); + if (np->recverr) ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); - bh_unlock_sock(sk); - } + sk->sk_err = err; sk->sk_error_report(sk); out: -- cgit v1.2.1 From 288fcee8b7aa98796d96cd5b1b2e8005639328bf Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 31 May 2010 23:48:19 -0700 Subject: net/ipv4/tcp_input.c: fix compilation breakage when FASTRETRANS_DEBUG > 1 Commit: c720c7e8383aff1cb219bddf474ed89d850336e3 missed these. Signed-off-by: Joe Perches Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3e6dafcb1071..548d575e6cc6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2639,7 +2639,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) if (sk->sk_family == AF_INET) { printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", msg, - &inet->daddr, ntohs(inet->dport), + &inet->inet_daddr, ntohs(inet->inet_dport), tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); @@ -2649,7 +2649,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) struct ipv6_pinfo *np = inet6_sk(sk); printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", msg, - &np->daddr, ntohs(inet->dport), + &np->daddr, ntohs(inet->inet_dport), tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); -- cgit v1.2.1 From 8ae5977ff95c03fe6c36a5721c57dcb4bfe4f290 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 30 May 2010 14:52:58 +0200 Subject: mac80211: fix blockack-req processing Daniel reported that the paged RX changes had broken blockack request frame processing due to using data that wasn't really part of the skb data. Fix this using skb_copy_bits() for the needed data. As a side effect, this adds a check on processing too short frames, which previously this code could do. Reported-by: Daniel Halperin Signed-off-by: Johannes Berg Acked-by: Daniel Halperin Signed-off-by: John W. Linville --- net/mac80211/rx.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6e2a7bcd8cb8..5e0b65406c44 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1818,17 +1818,26 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) return RX_CONTINUE; if (ieee80211_is_back_req(bar->frame_control)) { + struct { + __le16 control, start_seq_num; + } __packed bar_data; + if (!rx->sta) return RX_DROP_MONITOR; + + if (skb_copy_bits(skb, offsetof(struct ieee80211_bar, control), + &bar_data, sizeof(bar_data))) + return RX_DROP_MONITOR; + spin_lock(&rx->sta->lock); - tid = le16_to_cpu(bar->control) >> 12; + tid = le16_to_cpu(bar_data.control) >> 12; if (!rx->sta->ampdu_mlme.tid_active_rx[tid]) { spin_unlock(&rx->sta->lock); return RX_DROP_MONITOR; } tid_agg_rx = rx->sta->ampdu_mlme.tid_rx[tid]; - start_seq_num = le16_to_cpu(bar->start_seq_num) >> 4; + start_seq_num = le16_to_cpu(bar_data.start_seq_num) >> 4; /* reset session timer */ if (tid_agg_rx->timeout) -- cgit v1.2.1 From 51a0d38de26226f2779912d92f155b93d539da9a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 31 May 2010 12:00:12 +0200 Subject: mac80211: fix dialog token allocator The dialog token allocator has apparently been broken since b83f4e15 ("mac80211: fix deadlock in sta->lock") because it got moved out under the spinlock. Fix it. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-tx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index c163d0a149f4..98258b7341e3 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -332,14 +332,16 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) IEEE80211_QUEUE_STOP_REASON_AGGREGATION); spin_unlock(&local->ampdu_lock); - spin_unlock_bh(&sta->lock); - /* send an addBA request */ + /* prepare tid data */ sta->ampdu_mlme.dialog_token_allocator++; sta->ampdu_mlme.tid_tx[tid]->dialog_token = sta->ampdu_mlme.dialog_token_allocator; sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num; + spin_unlock_bh(&sta->lock); + + /* send AddBA request */ ieee80211_send_addba_request(sdata, pubsta->addr, tid, sta->ampdu_mlme.tid_tx[tid]->dialog_token, sta->ampdu_mlme.tid_tx[tid]->ssn, -- cgit v1.2.1 From fafeeb6c80e3842c6dc19d05de09a23f23eef0d8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 1 Jun 2010 10:04:49 +0000 Subject: xfrm: force a dst reference in __xfrm_route_forward() Packets going through __xfrm_route_forward() have a not refcounted dst entry, since we enabled a noref forwarding path. xfrm_lookup() might incorrectly release this dst entry. It's a bit late to make invasive changes in xfrm_lookup(), so lets force a refcount in this path. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d965a2bad8d3..4bf27d901333 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2153,6 +2153,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) return 0; } + skb_dst_force(skb); dst = skb_dst(skb); res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0; -- cgit v1.2.1 From 194dbcc8a1a97cbac9a619a563e5f6b7f7d5a485 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 12 May 2010 21:31:06 +0000 Subject: net: init_vlan should not copy slave or master flags The vlan device should not copy the slave or master flags from the real device. It is not in the bond until added nor is it a master. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 55be90826f5f..529842677817 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -708,7 +708,8 @@ static int vlan_dev_init(struct net_device *dev) netif_carrier_off(dev); /* IFF_BROADCAST|IFF_MULTICAST; ??? */ - dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI); + dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | + IFF_MASTER | IFF_SLAVE); dev->iflink = real_dev->ifindex; dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))) | -- cgit v1.2.1 From 2df4a0fa1540c460ec69788ab2a901cc72a75644 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 12 May 2010 21:31:11 +0000 Subject: net: fix conflict between null_or_orig and null_or_bond If a skb is received on an inactive bond that does not meet the special cases checked for by skb_bond_should_drop it should only be delivered to exact matches as the comment in netif_receive_skb() says. However because null_or_bond could also be null this is not always true. This patch renames null_or_bond to orig_or_bond and initializes it to orig_dev. This keeps the intent of null_or_bond to pass frames received on VLAN interfaces stacked on bonding interfaces without invalidating the statement for null_or_orig. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 1845b08c624e..d03470f5260a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2795,7 +2795,7 @@ static int __netif_receive_skb(struct sk_buff *skb) struct net_device *orig_dev; struct net_device *master; struct net_device *null_or_orig; - struct net_device *null_or_bond; + struct net_device *orig_or_bond; int ret = NET_RX_DROP; __be16 type; @@ -2868,10 +2868,10 @@ ncls: * device that may have registered for a specific ptype. The * handler may have to adjust skb->dev and orig_dev. */ - null_or_bond = NULL; + orig_or_bond = orig_dev; if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) && (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) { - null_or_bond = vlan_dev_real_dev(skb->dev); + orig_or_bond = vlan_dev_real_dev(skb->dev); } type = skb->protocol; @@ -2879,7 +2879,7 @@ ncls: &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { if (ptype->type == type && (ptype->dev == null_or_orig || ptype->dev == skb->dev || ptype->dev == orig_dev || - ptype->dev == null_or_bond)) { + ptype->dev == orig_or_bond)) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; -- cgit v1.2.1 From 33c29dde7d04dc0ec0edb649d20ccf1351c13a06 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sat, 29 May 2010 14:26:59 +0000 Subject: act_nat: fix the wrong checksum when addr isn't in old_addr/mask fix the wrong checksum when addr isn't in old_addr/mask For TCP and UDP packets, when addr isn't in old_addr/mask we don't do SNAT or DNAT, and we should not update layer 4 checksum. Signed-off-by: Changli Gao ---- net/sched/act_nat.c | 4 ++++ 1 file changed, 4 insertions(+) Signed-off-by: David S. Miller --- net/sched/act_nat.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index d885ba311564..570949417f38 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -159,6 +159,9 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, iph->daddr = new_addr; csum_replace4(&iph->check, addr, new_addr); + } else if ((iph->frag_off & htons(IP_OFFSET)) || + iph->protocol != IPPROTO_ICMP) { + goto out; } ihl = iph->ihl * 4; @@ -247,6 +250,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, break; } +out: return action; drop: -- cgit v1.2.1 From edafe502404f3669d364b6e96d79b54067b634b4 Mon Sep 17 00:00:00 2001 From: Daniele Lacamera Date: Wed, 2 Jun 2010 02:02:04 +0000 Subject: TCP: tcp_hybla: Fix integer overflow in slow start increment For large values of rtt, 2^rho operation may overflow u32. Clamp down the increment to 2^16. Signed-off-by: Daniele Lacamera Signed-off-by: David S. Miller --- net/ipv4/tcp_hybla.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index c209e054a634..377bc9349371 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -126,8 +126,8 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * calculate 2^fract in a <<7 value. */ is_slowstart = 1; - increment = ((1 << ca->rho) * hybla_fraction(rho_fractions)) - - 128; + increment = ((1 << min(ca->rho, 16U)) * + hybla_fraction(rho_fractions)) - 128; } else { /* * congestion avoidance -- cgit v1.2.1 From fbc2e7d9cf49e0bf89b9e91fd60a06851a855c5d Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Wed, 2 Jun 2010 07:32:42 -0700 Subject: cls_u32: use skb_header_pointer() to dereference data safely use skb_header_pointer() to dereference data safely the original skb->data dereference isn't safe, as there isn't any skb->len or skb_is_nonlinear() check. skb_header_pointer() is used instead in this patch. And when the skb isn't long enough, we terminate the function u32_classify() immediately with -1. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 49 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 96275422c619..4f522143811e 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -98,11 +98,11 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re { struct { struct tc_u_knode *knode; - u8 *ptr; + unsigned int off; } stack[TC_U32_MAXDEPTH]; struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root; - u8 *ptr = skb_network_header(skb); + unsigned int off = skb_network_offset(skb); struct tc_u_knode *n; int sdepth = 0; int off2 = 0; @@ -134,8 +134,14 @@ next_knode: #endif for (i = n->sel.nkeys; i>0; i--, key++) { - - if ((*(__be32*)(ptr+key->off+(off2&key->offmask))^key->val)&key->mask) { + unsigned int toff; + __be32 *data, _data; + + toff = off + key->off + (off2 & key->offmask); + data = skb_header_pointer(skb, toff, 4, &_data); + if (!data) + goto out; + if ((*data ^ key->val) & key->mask) { n = n->next; goto next_knode; } @@ -174,29 +180,45 @@ check_terminal: if (sdepth >= TC_U32_MAXDEPTH) goto deadloop; stack[sdepth].knode = n; - stack[sdepth].ptr = ptr; + stack[sdepth].off = off; sdepth++; ht = n->ht_down; sel = 0; - if (ht->divisor) - sel = ht->divisor&u32_hash_fold(*(__be32*)(ptr+n->sel.hoff), &n->sel,n->fshift); - + if (ht->divisor) { + __be32 *data, _data; + + data = skb_header_pointer(skb, off + n->sel.hoff, 4, + &_data); + if (!data) + goto out; + sel = ht->divisor & u32_hash_fold(*data, &n->sel, + n->fshift); + } if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT))) goto next_ht; if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) { off2 = n->sel.off + 3; - if (n->sel.flags&TC_U32_VAROFFSET) - off2 += ntohs(n->sel.offmask & *(__be16*)(ptr+n->sel.offoff)) >>n->sel.offshift; + if (n->sel.flags & TC_U32_VAROFFSET) { + __be16 *data, _data; + + data = skb_header_pointer(skb, + off + n->sel.offoff, + 2, &_data); + if (!data) + goto out; + off2 += ntohs(n->sel.offmask & *data) >> + n->sel.offshift; + } off2 &= ~3; } if (n->sel.flags&TC_U32_EAT) { - ptr += off2; + off += off2; off2 = 0; } - if (ptr < skb_tail_pointer(skb)) + if (off < skb->len) goto next_ht; } @@ -204,9 +226,10 @@ check_terminal: if (sdepth--) { n = stack[sdepth].knode; ht = n->ht_up; - ptr = stack[sdepth].ptr; + off = stack[sdepth].off; goto check_terminal; } +out: return -1; deadloop: -- cgit v1.2.1 From db2c24175d149b55784f7cb2c303622ce962c1ae Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Wed, 2 Jun 2010 04:55:02 +0000 Subject: act_pedit: access skb->data safely access skb->data safely we should use skb_header_pointer() and skb_store_bits() to access skb->data to handle small or non-linear skbs. Signed-off-by: Changli Gao ---- net/sched/act_pedit.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) Signed-off-by: David S. Miller --- net/sched/act_pedit.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index fdbd0b7bd840..50e3d945e1f4 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -125,7 +125,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, { struct tcf_pedit *p = a->priv; int i, munged = 0; - u8 *pptr; + unsigned int off; if (!(skb->tc_verd & TC_OK2MUNGE)) { /* should we set skb->cloned? */ @@ -134,7 +134,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, } } - pptr = skb_network_header(skb); + off = skb_network_offset(skb); spin_lock(&p->tcf_lock); @@ -144,17 +144,17 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tc_pedit_key *tkey = p->tcfp_keys; for (i = p->tcfp_nkeys; i > 0; i--, tkey++) { - u32 *ptr; + u32 *ptr, _data; int offset = tkey->off; if (tkey->offmask) { - if (skb->len > tkey->at) { - char *j = pptr + tkey->at; - offset += ((*j & tkey->offmask) >> - tkey->shift); - } else { + char *d, _d; + + d = skb_header_pointer(skb, off + tkey->at, 1, + &_d); + if (!d) goto bad; - } + offset += (*d & tkey->offmask) >> tkey->shift; } if (offset % 4) { @@ -169,9 +169,13 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, goto bad; } - ptr = (u32 *)(pptr+offset); + ptr = skb_header_pointer(skb, off + offset, 4, &_data); + if (!ptr) + goto bad; /* just do it, baby */ *ptr = ((*ptr & tkey->mask) ^ tkey->val); + if (ptr == &_data) + skb_store_bits(skb, off + offset, ptr, 4); munged++; } -- cgit v1.2.1 From 8b9a4e6e442756f670ef507f09bbc6c11dc0fca6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 28 May 2010 15:22:58 +0200 Subject: mac80211: process station blockack action frames from work Processing an association response could take a bit of time while we set up the hardware etc. During that time, the AP might already send a blockack request. If this happens very quickly on a fairly slow machine, we can end up processing the blockack request before the association processing has finished. Since the blockack processing cannot sleep right now, we also cannot make it wait in the driver. As a result, sometimes on slow machines the iwlagn driver gets totally confused, and no traffic can pass when the aggregation setup was done before the assoc setup completed. I'm working on a proper fix for this, which involves queuing all blockack category action frames from a work struct, and also allowing the ampdu_action driver callback to sleep, which will generally clean up the code and make things easier. However, this is a very involved and complex change. To fix the problem at hand in a way that can also be backported to stable, I've come up with this patch. Here, I simply process all aggregation action frames from the managed interface skb queue, which means their processing will be serialized with processing the association response, thereby fixing the problem. Signed-off-by: Johannes Berg Cc: stable@kernel.org Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 52 +++++++++++++++++++++++++++++++++++++++++++++------- net/mac80211/rx.c | 3 +++ 2 files changed, 48 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0839c4e8fd2e..3310e70aa52f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1692,14 +1692,52 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, rma = ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_ACTION: - if (mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT) + switch (mgmt->u.action.category) { + case WLAN_CATEGORY_BACK: { + struct ieee80211_local *local = sdata->local; + int len = skb->len; + struct sta_info *sta; + + rcu_read_lock(); + sta = sta_info_get(sdata, mgmt->sa); + if (!sta) { + rcu_read_unlock(); + break; + } + + local_bh_disable(); + + switch (mgmt->u.action.u.addba_req.action_code) { + case WLAN_ACTION_ADDBA_REQ: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.addba_req))) + break; + ieee80211_process_addba_request(local, sta, mgmt, len); + break; + case WLAN_ACTION_ADDBA_RESP: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.addba_resp))) + break; + ieee80211_process_addba_resp(local, sta, mgmt, len); + break; + case WLAN_ACTION_DELBA: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.delba))) + break; + ieee80211_process_delba(sdata, sta, mgmt, len); + break; + } + local_bh_enable(); + rcu_read_unlock(); break; - - ieee80211_sta_process_chanswitch(sdata, - &mgmt->u.action.u.chan_switch.sw_elem, - (void *)ifmgd->associated->priv, - rx_status->mactime); - break; + } + case WLAN_CATEGORY_SPECTRUM_MGMT: + ieee80211_sta_process_chanswitch(sdata, + &mgmt->u.action.u.chan_switch.sw_elem, + (void *)ifmgd->associated->priv, + rx_status->mactime); + break; + } } mutex_unlock(&ifmgd->mtx); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 5e0b65406c44..be9abc2e6348 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1944,6 +1944,9 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) if (len < IEEE80211_MIN_ACTION_SIZE + 1) break; + if (sdata->vif.type == NL80211_IFTYPE_STATION) + return ieee80211_sta_rx_mgmt(sdata, rx->skb); + switch (mgmt->u.action.u.addba_req.action_code) { case WLAN_ACTION_ADDBA_REQ: if (len < (IEEE80211_MIN_ACTION_SIZE + -- cgit v1.2.1 From 8764ab2ca7ab5055e1ca80f9cfa4970c34acb804 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 4 Jun 2010 01:57:38 +0000 Subject: net: check for refcount if pop a stacked dst_entry xfrm triggers a warning if dst_pop() drops a refcount on a noref dst. This patch changes dst_pop() to skb_dst_pop(). skb_dst_pop() drops the refcnt only on a refcounted dst. Also we don't clone the child dst_entry, so it is not refcounted and we can use skb_dst_set_noref() in xfrm_output_one(). Signed-off-by: Steffen Klassert Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/xfrm/xfrm_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 6a329158bdfa..a3cca0a94346 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -95,13 +95,13 @@ resume: goto error_nolock; } - dst = dst_pop(dst); + dst = skb_dst_pop(skb); if (!dst) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); err = -EHOSTUNREACH; goto error_nolock; } - skb_dst_set(skb, dst); + skb_dst_set_noref(skb, dst); x = dst->xfrm; } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); -- cgit v1.2.1 From 57f1553ee5d9f093660cc49098f494e17ed11668 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 3 Jun 2010 00:42:30 +0000 Subject: syncookies: remove Kconfig text line about disabled-by-default syncookies default to on since e994b7c901ded7200b525a707c6da71f2cf6d4bb (tcp: Don't make syn cookies initial setting depend on CONFIG_SYSCTL). Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 8e3a1fd938ab..7c3a7d191249 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -303,7 +303,7 @@ config ARPD If unsure, say N. config SYN_COOKIES - bool "IP: TCP syncookie support (disabled per default)" + bool "IP: TCP syncookie support" ---help--- Normal TCP/IP networking is open to an attack known as "SYN flooding". This denial-of-service attack prevents legitimate remote @@ -328,13 +328,13 @@ config SYN_COOKIES server is really overloaded. If this happens frequently better turn them off. - If you say Y here, note that SYN cookies aren't enabled by default; - you can enable them by saying Y to "/proc file system support" and + If you say Y here, you can disable SYN cookies at run time by + saying Y to "/proc file system support" and "Sysctl support" below and executing the command - echo 1 >/proc/sys/net/ipv4/tcp_syncookies + echo 0 > /proc/sys/net/ipv4/tcp_syncookies - at boot time after the /proc file system has been mounted. + after the /proc file system has been mounted. If unsure, say N. -- cgit v1.2.1 From ca55158c6ecb7832a6ad80ac44a14d23bab8cdfc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Jun 2010 09:03:58 +0000 Subject: rps: tcp: fix rps_sock_flow_table table updates I believe a moderate SYN flood attack can corrupt RFS flow table (rps_sock_flow_table), making RPS/RFS much less effective. Even in a normal situation, server handling short lived sessions suffer from bad steering for the first data packet of a session, if another SYN packet is received for another session. We do following action in tcp_v4_rcv() : sock_rps_save_rxhash(sk, skb->rxhash); We should _not_ do this if sk is a LISTEN socket, as about each packet received on a LISTEN socket has a different rxhash than previous one. -> RPS_NO_CPU markers are spread all over rps_sock_flow_table. Also, it makes sense to protect sk->rxhash field changes with socket lock (We currently can change it even if user thread owns the lock and might use rxhash) This patch moves sock_rps_save_rxhash() to a sock locked section, and only for non LISTEN sockets. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 202cf09c4cd4..fe193e53af44 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1555,6 +1555,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) #endif if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ + sock_rps_save_rxhash(sk, skb->rxhash); TCP_CHECK_TIMER(sk); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; @@ -1579,7 +1580,9 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) } return 0; } - } + } else + sock_rps_save_rxhash(sk, skb->rxhash); + TCP_CHECK_TIMER(sk); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { @@ -1672,8 +1675,6 @@ process: skb->dev = NULL; - sock_rps_save_rxhash(sk, skb->rxhash); - bh_lock_sock_nested(sk); ret = 0; if (!sock_owned_by_user(sk)) { -- cgit v1.2.1 From c44649216522cd607a4027d2ebf4a8147d3fa94c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Jun 2010 05:45:47 +0000 Subject: tcp: use correct net ns in cookie_v4_check() Its better to make a route lookup in appropriate namespace. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 5c24db4a3c91..9f6b22206c52 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -347,7 +347,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, { .sport = th->dest, .dport = th->source } } }; security_req_classify_flow(req, &fl); - if (ip_route_output_key(&init_net, &rt, &fl)) { + if (ip_route_output_key(sock_net(sk), &rt, &fl)) { reqsk_free(req); goto out; } -- cgit v1.2.1 From 72e09ad107e78d69ff4d3b97a69f0aad2b77280f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 5 Jun 2010 03:03:30 -0700 Subject: ipv6: avoid high order allocations With mtu=9000, mld_newpack() use order-2 GFP_ATOMIC allocations, that are very unreliable, on machines where PAGE_SIZE=4K Limit allocated skbs to be at most one page. (order-0 allocations) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 59f1881968c7..ab1622d7d409 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1356,7 +1356,10 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) IPV6_TLV_PADN, 0 }; /* we assume size > sizeof(ra) here */ - skb = sock_alloc_send_skb(sk, size + LL_ALLOCATED_SPACE(dev), 1, &err); + size += LL_ALLOCATED_SPACE(dev); + /* limit our allocations to order-0 page */ + size = min_t(int, size, SKB_MAX_ORDER(0, 0)); + skb = sock_alloc_send_skb(sk, size, 1, &err); if (!skb) return NULL; -- cgit v1.2.1 From 8ffb335e8d696affc04f963bf73ce2196f80edb9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 6 Jun 2010 15:34:40 -0700 Subject: ip6mr: fix a typo in ip6mr_for_each_table() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 073071f2b75b..89c0b077c7aa 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -120,7 +120,7 @@ static void mroute_clean_tables(struct mr6_table *mrt); static void ipmr_expire_process(unsigned long arg); #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES -#define ip6mr_for_each_table(mrt, met) \ +#define ip6mr_for_each_table(mrt, net) \ list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list) static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) -- cgit v1.2.1 From 035320d54758e21227987e3aae0d46e7a04f4ddc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 6 Jun 2010 23:48:40 +0000 Subject: ipmr: dont corrupt lists ipmr_rules_exit() and ip6mr_rules_exit() free a list of items, but forget to properly remove these items from list. List head is not changed and still points to freed memory. This can trigger a fault later when icmpv6_sk_exit() is called. Fix is to either reinit list, or use list_del() to properly remove items from list before freeing them. bugzilla report : https://bugzilla.kernel.org/show_bug.cgi?id=16120 Introduced by commit d1db275dd3f6e4 (ipv6: ip6mr: support multiple tables) and commit f0ad0860d01e (ipv4: ipmr: support multiple tables) Reported-by: Alex Zhavnerchik Signed-off-by: Eric Dumazet CC: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 4 +++- net/ipv6/ip6mr.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 856123fe32f9..757f25eb9b4b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -267,8 +267,10 @@ static void __net_exit ipmr_rules_exit(struct net *net) { struct mr_table *mrt, *next; - list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) + list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { + list_del(&mrt->list); kfree(mrt); + } fib_rules_unregister(net->ipv4.mr_rules_ops); } #else diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 89c0b077c7aa..66078dad7fe8 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -254,8 +254,10 @@ static void __net_exit ip6mr_rules_exit(struct net *net) { struct mr6_table *mrt, *next; - list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) + list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { + list_del(&mrt->list); ip6mr_free_table(mrt); + } fib_rules_unregister(net->ipv6.mr6_rules_ops); } #else -- cgit v1.2.1 From 35dd0509b21e4b5bab36b9eb80c8dab0322f5007 Mon Sep 17 00:00:00 2001 From: Holger Schurig Date: Mon, 7 Jun 2010 16:33:49 +0200 Subject: mac80211: fix function pointer check This makes "iw wlan0 dump survey" work again with mac80211-based drivers that support it, e.g. ath5k. Signed-off-by: Holger Schurig Signed-off-by: John W. Linville --- net/mac80211/driver-ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 4f2271316650..9c1da0809160 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -349,7 +349,7 @@ static inline int drv_get_survey(struct ieee80211_local *local, int idx, struct survey_info *survey) { int ret = -EOPNOTSUPP; - if (local->ops->conf_tx) + if (local->ops->get_survey) ret = local->ops->get_survey(&local->hw, idx, survey); /* trace_drv_get_survey(local, idx, survey, ret); */ return ret; -- cgit v1.2.1 From b054b747a694927879c94dd11af54d04346aed7d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Jun 2010 21:50:07 +0200 Subject: mac80211: fix deauth before assoc When we receive a deauthentication frame before having successfully associated, we neither print a message nor abort assocation. The former makes it hard to debug, while the latter later causes a warning in cfg80211 when, as will typically be the case, association timed out. This warning was reported by many, e.g. in https://bugzilla.kernel.org/show_bug.cgi?id=15981, but I couldn't initially pinpoint it. I verified the fix by hacking hostapd to send a deauth frame instead of an association response. Cc: stable@kernel.org Signed-off-by: Johannes Berg Tested-by: Miles Lane Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3310e70aa52f..f803f8b72a93 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1760,9 +1760,45 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mutex_unlock(&ifmgd->mtx); if (skb->len >= 24 + 2 /* mgmt + deauth reason */ && - (fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_DEAUTH) - cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); + (fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_DEAUTH) { + struct ieee80211_local *local = sdata->local; + struct ieee80211_work *wk; + + mutex_lock(&local->work_mtx); + list_for_each_entry(wk, &local->work_list, list) { + if (wk->sdata != sdata) + continue; + + if (wk->type != IEEE80211_WORK_ASSOC) + continue; + + if (memcmp(mgmt->bssid, wk->filter_ta, ETH_ALEN)) + continue; + if (memcmp(mgmt->sa, wk->filter_ta, ETH_ALEN)) + continue; + /* + * Printing the message only here means we can't + * spuriously print it, but it also means that it + * won't be printed when the frame comes in before + * we even tried to associate or in similar cases. + * + * Ultimately, I suspect cfg80211 should print the + * messages instead. + */ + printk(KERN_DEBUG + "%s: deauthenticated from %pM (Reason: %u)\n", + sdata->name, mgmt->bssid, + le16_to_cpu(mgmt->u.deauth.reason_code)); + + list_del_rcu(&wk->list); + free_work(wk); + break; + } + mutex_unlock(&local->work_mtx); + + cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); + } out: kfree_skb(skb); } -- cgit v1.2.1 From aea9d711f3d68c656ad31ab578ecfb0bb5cd7f97 Mon Sep 17 00:00:00 2001 From: Sven Wegener Date: Wed, 9 Jun 2010 16:10:57 +0200 Subject: ipvs: Add missing locking during connection table hashing and unhashing The code that hashes and unhashes connections from the connection table is missing locking of the connection being modified, which opens up a race condition and results in memory corruption when this race condition is hit. Here is what happens in pretty verbose form: CPU 0 CPU 1 ------------ ------------ An active connection is terminated and we schedule ip_vs_conn_expire() on this CPU to expire this connection. IRQ assignment is changed to this CPU, but the expire timer stays scheduled on the other CPU. New connection from same ip:port comes in right before the timer expires, we find the inactive connection in our connection table and get a reference to it. We proper lock the connection in tcp_state_transition() and read the connection flags in set_tcp_state(). ip_vs_conn_expire() gets called, we unhash the connection from our connection table and remove the hashed flag in ip_vs_conn_unhash(), without proper locking! While still holding proper locks we write the connection flags in set_tcp_state() and this sets the hashed flag again. ip_vs_conn_expire() fails to expire the connection, because the other CPU has incremented the reference count. We try to re-insert the connection into our connection table, but this fails in ip_vs_conn_hash(), because the hashed flag has been set by the other CPU. We re-schedule execution of ip_vs_conn_expire(). Now this connection has the hashed flag set, but isn't actually hashed in our connection table and has a dangling list_head. We drop the reference we held on the connection and schedule the expire timer for timeouting the connection on this CPU. Further packets won't be able to find this connection in our connection table. ip_vs_conn_expire() gets called again, we think it's already hashed, but the list_head is dangling and while removing the connection from our connection table we write to the memory location where this list_head points to. The result will probably be a kernel oops at some other point in time. This race condition is pretty subtle, but it can be triggered remotely. It needs the IRQ assignment change or another circumstance where packets coming from the same ip:port for the same service are being processed on different CPUs. And it involves hitting the exact time at which ip_vs_conn_expire() gets called. It can be avoided by making sure that all packets from one connection are always processed on the same CPU and can be made harder to exploit by changing the connection timeouts to some custom values. Signed-off-by: Sven Wegener Cc: stable@kernel.org Acked-by: Simon Horman Signed-off-by: Patrick McHardy --- net/netfilter/ipvs/ip_vs_conn.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index d8f7e8ef67b4..ff04e9edbed6 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -162,6 +162,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); ct_write_lock(hash); + spin_lock(&cp->lock); if (!(cp->flags & IP_VS_CONN_F_HASHED)) { list_add(&cp->c_list, &ip_vs_conn_tab[hash]); @@ -174,6 +175,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) ret = 0; } + spin_unlock(&cp->lock); ct_write_unlock(hash); return ret; @@ -193,6 +195,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); ct_write_lock(hash); + spin_lock(&cp->lock); if (cp->flags & IP_VS_CONN_F_HASHED) { list_del(&cp->c_list); @@ -202,6 +205,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) } else ret = 0; + spin_unlock(&cp->lock); ct_write_unlock(hash); return ret; -- cgit v1.2.1 From 08c801f8d45387a1b46066aad1789a9bb9c4b645 Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Tue, 8 Jun 2010 17:51:27 -0600 Subject: net: Print num_rx_queues imbalance warning only when there are allocated queues BugLink: http://bugs.launchpad.net/bugs/591416 There are a number of network drivers (bridge, bonding, etc) that are not yet receive multi-queue enabled and use alloc_netdev(), so don't print a num_rx_queues imbalance warning in that case. Also, only print the warning once for those drivers that _are_ multi-queue enabled. Signed-off-by: Tim Gardner Acked-by: Eric Dumazet --- net/core/dev.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index d03470f5260a..14a85682af38 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2253,11 +2253,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, if (skb_rx_queue_recorded(skb)) { u16 index = skb_get_rx_queue(skb); if (unlikely(index >= dev->num_rx_queues)) { - if (net_ratelimit()) { - pr_warning("%s received packet on queue " - "%u, but number of RX queues is %u\n", - dev->name, index, dev->num_rx_queues); - } + WARN_ONCE(dev->num_rx_queues > 1, "%s received packet " + "on queue %u, but number of RX queues is %u\n", + dev->name, index, dev->num_rx_queues); goto done; } rxqueue = dev->_rx + index; -- cgit v1.2.1 From aea34e7ae7a40bc72f9f11b5658160dfb4b90c48 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 7 Jun 2010 04:51:58 +0000 Subject: caif: fix a couple range checks The extra ! character means that these conditions are always false. Signed-off-by: Dan Carpenter Acked-by: Sjur Braendeland Signed-off-by: David S. Miller --- net/caif/cfrfml.c | 2 +- net/caif/cfveil.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c index cd2830fec935..fd27b172fb5d 100644 --- a/net/caif/cfrfml.c +++ b/net/caif/cfrfml.c @@ -83,7 +83,7 @@ static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt) if (!cfsrvl_ready(service, &ret)) return ret; - if (!cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) { + if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) { pr_err("CAIF: %s():Packet too large - size=%d\n", __func__, cfpkt_getlen(pkt)); return -EOVERFLOW; diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c index 0fd827f49491..e04f7d964e83 100644 --- a/net/caif/cfveil.c +++ b/net/caif/cfveil.c @@ -84,7 +84,7 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt) return ret; caif_assert(layr->dn != NULL); caif_assert(layr->dn->transmit != NULL); - if (!cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) { + if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) { pr_warning("CAIF: %s(): Packet too large - size=%d\n", __func__, cfpkt_getlen(pkt)); return -EOVERFLOW; -- cgit v1.2.1 From 00d9d6a185de89edc0649ca4ead58f0283dfcbac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Jun 2010 22:24:44 +0000 Subject: ipv6: fix ICMP6_MIB_OUTERRORS In commit 1f8438a85366 (icmp: Account for ICMP out errors), I did a typo on IPV6 side, using ICMP6_MIB_OUTMSGS instead of ICMP6_MIB_OUTERRORS Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index ce7992982557..03e62f94ff8e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -483,7 +483,7 @@ route_done: np->tclass, NULL, &fl, (struct rt6_info*)dst, MSG_DONTWAIT, np->dontfrag); if (err) { - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); + ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); goto out_put; } @@ -565,7 +565,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) np->dontfrag); if (err) { - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); + ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); goto out_put; } -- cgit v1.2.1 From 597a264b1a9c7e36d1728f677c66c5c1f7e3b837 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 3 Jun 2010 09:30:11 +0000 Subject: net: deliver skbs on inactive slaves to exact matches Currently, the accelerated receive path for VLAN's will drop packets if the real device is an inactive slave and is not one of the special pkts tested for in skb_bond_should_drop(). This behavior is different then the non-accelerated path and for pkts over a bonded vlan. For example, vlanx -> bond0 -> ethx will be dropped in the vlan path and not delivered to any packet handlers at all. However, bond0 -> vlanx -> ethx and bond0 -> ethx will be delivered to handlers that match the exact dev, because the VLAN path checks the real_dev which is not a slave and netif_recv_skb() doesn't drop frames but only delivers them to exact matches. This patch adds a sk_buff flag which is used for tagging skbs that would previously been dropped and allows the skb to continue to skb_netif_recv(). Here we add logic to check for the deliver_no_wcard flag and if it is set only deliver to handlers that match exactly. This makes both paths above consistent and gives pkt handlers a way to identify skbs that come from inactive slaves. Without this patch in some configurations skbs will be delivered to handlers with exact matches and in others be dropped out right in the vlan path. I have tested the following 4 configurations in failover modes and load balancing modes. # bond0 -> ethx # vlanx -> bond0 -> ethx # bond0 -> vlanx -> ethx # bond0 -> ethx | vlanx -> -- Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/8021q/vlan_core.c | 4 ++-- net/core/dev.c | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index bd537fc10254..50f58f5f1c34 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -12,7 +12,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, return NET_RX_DROP; if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master))) - goto drop; + skb->deliver_no_wcard = 1; skb->skb_iif = skb->dev->ifindex; __vlan_hwaccel_put_tag(skb, vlan_tci); @@ -84,7 +84,7 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, struct sk_buff *p; if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master))) - goto drop; + skb->deliver_no_wcard = 1; skb->skb_iif = skb->dev->ifindex; __vlan_hwaccel_put_tag(skb, vlan_tci); diff --git a/net/core/dev.c b/net/core/dev.c index 14a85682af38..2b3bf53bc687 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2810,13 +2810,24 @@ static int __netif_receive_skb(struct sk_buff *skb) if (!skb->skb_iif) skb->skb_iif = skb->dev->ifindex; + /* + * bonding note: skbs received on inactive slaves should only + * be delivered to pkt handlers that are exact matches. Also + * the deliver_no_wcard flag will be set. If packet handlers + * are sensitive to duplicate packets these skbs will need to + * be dropped at the handler. The vlan accel path may have + * already set the deliver_no_wcard flag. + */ null_or_orig = NULL; orig_dev = skb->dev; master = ACCESS_ONCE(orig_dev->master); - if (master) { - if (skb_bond_should_drop(skb, master)) + if (skb->deliver_no_wcard) + null_or_orig = orig_dev; + else if (master) { + if (skb_bond_should_drop(skb, master)) { + skb->deliver_no_wcard = 1; null_or_orig = orig_dev; /* deliver only exact match */ - else + } else skb->dev = master; } -- cgit v1.2.1 From ae638c47dc040b8def16d05dc6acdd527628f231 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Jun 2010 23:39:10 +0000 Subject: pkt_sched: gen_estimator: add a new lock gen_kill_estimator() / gen_new_estimator() is not always called with RTNL held. net/netfilter/xt_RATEEST.c is one user of these API that do not hold RTNL, so random corruptions can occur between "tc" and "iptables". Add a new fine grained lock instead of trying to use RTNL in netfilter. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/gen_estimator.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index cf8e70392fe0..785e5276a300 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -107,6 +107,7 @@ static DEFINE_RWLOCK(est_lock); /* Protects against soft lockup during large deletion */ static struct rb_root est_root = RB_ROOT; +static DEFINE_SPINLOCK(est_tree_lock); static void est_timer(unsigned long arg) { @@ -201,7 +202,6 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats * * Returns 0 on success or a negative error code. * - * NOTE: Called under rtnl_mutex */ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est, @@ -232,6 +232,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, est->last_packets = bstats->packets; est->avpps = rate_est->pps<<10; + spin_lock(&est_tree_lock); if (!elist[idx].timer.function) { INIT_LIST_HEAD(&elist[idx].list); setup_timer(&elist[idx].timer, est_timer, idx); @@ -242,6 +243,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, list_add_rcu(&est->list, &elist[idx].list); gen_add_node(est); + spin_unlock(&est_tree_lock); return 0; } @@ -261,13 +263,13 @@ static void __gen_kill_estimator(struct rcu_head *head) * * Removes the rate estimator specified by &bstats and &rate_est. * - * NOTE: Called under rtnl_mutex */ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est) { struct gen_estimator *e; + spin_lock(&est_tree_lock); while ((e = gen_find_node(bstats, rate_est))) { rb_erase(&e->node, &est_root); @@ -278,6 +280,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, list_del_rcu(&e->list); call_rcu(&e->e_rcu, __gen_kill_estimator); } + spin_unlock(&est_tree_lock); } EXPORT_SYMBOL(gen_kill_estimator); @@ -312,8 +315,14 @@ EXPORT_SYMBOL(gen_replace_estimator); bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, const struct gnet_stats_rate_est *rate_est) { + bool res; + ASSERT_RTNL(); - return gen_find_node(bstats, rate_est) != NULL; + spin_lock(&est_tree_lock); + res = gen_find_node(bstats, rate_est) != NULL; + spin_unlock(&est_tree_lock); + + return res; } EXPORT_SYMBOL(gen_estimator_active); -- cgit v1.2.1 From 07a0f0f07a68014c92c752a5598102372bddf46e Mon Sep 17 00:00:00 2001 From: Daniel Turull Date: Thu, 10 Jun 2010 23:08:11 -0700 Subject: pktgen: Fix accuracy of inter-packet delay. This patch correct a bug in the delay of pktgen. It makes sure the inter-packet interval is accurate. Signed-off-by: Daniel Turull Signed-off-by: Robert Olsson Signed-off-by: David S. Miller --- net/core/pktgen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 2ad68da418df..1dacd7ba8dbb 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2170,7 +2170,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) end_time = ktime_now(); pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); - pkt_dev->next_tx = ktime_add_ns(end_time, pkt_dev->delay); + pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay); } static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) -- cgit v1.2.1 From e897082fe7a5b591dc4dd5599ac39081a7c8e482 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 13 Jun 2010 10:36:30 +0000 Subject: net: fix deliver_no_wcard regression on loopback device deliver_no_wcard is not being set in skb_copy_header. In the skb_cloned case it is not being cleared and may cause the skb to be dropped when the loopback device pushes it back up the stack. Signed-off-by: John Fastabend Acked-by: Eric Dumazet Tested-by: Markus Trippelsdorf Signed-off-by: David S. Miller --- net/core/skbuff.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9f07e749d7b1..bcf2fa3e0ddc 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -532,6 +532,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->ip_summed = old->ip_summed; skb_copy_queue_mapping(new, old); new->priority = old->priority; + new->deliver_no_wcard = old->deliver_no_wcard; #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) new->ipvs_property = old->ipvs_property; #endif -- cgit v1.2.1 From e8d15e6460cb0eea00f2574a80d94496943403ba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 13 Jun 2010 10:50:46 +0000 Subject: net: rxhash already set in __copy_skb_header No need to copy rxhash again in __skb_clone() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index bcf2fa3e0ddc..34432b4e96bb 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -570,7 +570,6 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) C(len); C(data_len); C(mac_len); - C(rxhash); n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; n->cloned = 1; n->nohdr = 0; -- cgit v1.2.1 From fed396a585d8e1870b326f2e8e1888a72957abb8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 15 Jun 2010 21:43:07 -0700 Subject: bridge: Fix OOM crash in deliver_clone The bridge multicast patches introduced an OOM crash in the forward path, when deliver_clone fails to clone the skb. Reported-by: Mark Wagner Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_forward.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index a98ef1393097..a4e72a89e4ff 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -140,10 +140,10 @@ static int deliver_clone(const struct net_bridge_port *prev, void (*__packet_hook)(const struct net_bridge_port *p, struct sk_buff *skb)) { + struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; + skb = skb_clone(skb, GFP_ATOMIC); if (!skb) { - struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; - dev->stats.tx_dropped++; return -ENOMEM; } -- cgit v1.2.1 From 021570e55b7152843376b9d9f60624e3e05ac054 Mon Sep 17 00:00:00 2001 From: Christoph Fritz Date: Wed, 16 Jun 2010 16:37:34 +0200 Subject: mac80211: fix warn, enum may be used uninitialized MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit regression introduced by b8d92c9c141ee3dc9b3537b1f0ffb4a54ea8d9b2 In function ‘ieee80211_work_rx_queued_mgmt’: warning: ‘rma’ may be used uninitialized in this function this re-adds default value WORK_ACT_NONE back to rma Signed-off-by: Christoph Fritz Signed-off-by: John W. Linville --- net/mac80211/work.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/work.c b/net/mac80211/work.c index be3d4a698692..b025dc7bb0fd 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -715,7 +715,7 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local, struct ieee80211_rx_status *rx_status; struct ieee80211_mgmt *mgmt; struct ieee80211_work *wk; - enum work_action rma; + enum work_action rma = WORK_ACT_NONE; u16 fc; rx_status = (struct ieee80211_rx_status *) skb->cb; -- cgit v1.2.1 From fa68a7822780fdc1295f7efb7e4313e62b447e75 Mon Sep 17 00:00:00 2001 From: Tom Hughes Date: Tue, 15 Jun 2010 22:24:28 +0000 Subject: Clear IFF_XMIT_DST_RELEASE for teql interfaces https://bugzilla.kernel.org/show_bug.cgi?id=16183 The sch_teql module, which can be used to load balance over a set of underlying interfaces, stopped working after 2.6.30 and has been broken in all kernels since then for any underlying interface which requires the addition of link level headers. The problem is that the transmit routine relies on being able to access the destination address in the skb in order to do address resolution once it has decided which underlying interface it is going to transmit through. In 2.6.31 the IFF_XMIT_DST_RELEASE flag was introduced, and set by default for all interfaces, which causes the destination address to be released before the transmit routine for the interface is called. The solution is to clear that flag for teql interfaces. Signed-off-by: Tom Hughes Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_teql.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 3415b6ce1c0a..807643bdcbac 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -449,6 +449,7 @@ static __init void teql_master_setup(struct net_device *dev) dev->tx_queue_len = 100; dev->flags = IFF_NOARP; dev->hard_header_len = LL_MAX_HEADER; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; } static LIST_HEAD(master_dev_list); -- cgit v1.2.1 From 25442e06d20aaba7d7b16438078a562b3e4cf19b Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 15 Jun 2010 06:14:12 +0000 Subject: bridge: fdb cleanup runs too often It is common in end-node, non STP bridges to set forwarding delay to zero; which causes the forwarding database cleanup to run every clock tick. Change to run only as soon as needed or at next ageing timer interval which ever is sooner. Use round_jiffies_up macro rather than attempting round up by changing value. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 26637439965b..b01dde35a69e 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -128,7 +128,7 @@ void br_fdb_cleanup(unsigned long _data) { struct net_bridge *br = (struct net_bridge *)_data; unsigned long delay = hold_time(br); - unsigned long next_timer = jiffies + br->forward_delay; + unsigned long next_timer = jiffies + br->ageing_time; int i; spin_lock_bh(&br->hash_lock); @@ -149,9 +149,7 @@ void br_fdb_cleanup(unsigned long _data) } spin_unlock_bh(&br->hash_lock); - /* Add HZ/4 to ensure we round the jiffies upwards to be after the next - * timer, otherwise we might round down and will have no-op run. */ - mod_timer(&br->gc_timer, round_jiffies(next_timer + HZ/4)); + mod_timer(&br->gc_timer, round_jiffies_up(next_timer)); } /* Completely flush all dynamic entries in forwarding database.*/ -- cgit v1.2.1 From 26cde9f7e2747b6d254b704594eed87ab959afa5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 15 Jun 2010 01:52:25 +0000 Subject: udp: Fix bogus UFO packet generation It has been reported that the new UFO software fallback path fails under certain conditions with NFS. I tracked the problem down to the generation of UFO packets that are smaller than the MTU. The software fallback path simply discards these packets. This patch fixes the problem by not generating such packets on the UFO path. Signed-off-by: Herbert Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 9a4a6c96cb0d..041d41df1224 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -873,8 +873,10 @@ int ip_append_data(struct sock *sk, !exthdrlen) csummode = CHECKSUM_PARTIAL; + skb = skb_peek_tail(&sk->sk_write_queue); + inet->cork.length += length; - if (((length> mtu) || !skb_queue_empty(&sk->sk_write_queue)) && + if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->u.dst.dev->features & NETIF_F_UFO)) { err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, @@ -892,7 +894,7 @@ int ip_append_data(struct sock *sk, * adding appropriate IP header. */ - if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) + if (!skb) goto alloc_new_skb; while (length > 0) { @@ -1121,7 +1123,8 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, return -EINVAL; inet->cork.length += size; - if ((sk->sk_protocol == IPPROTO_UDP) && + if ((size + skb->len > mtu) && + (sk->sk_protocol == IPPROTO_UDP) && (rt->u.dst.dev->features & NETIF_F_UFO)) { skb_shinfo(skb)->gso_size = mtu - fragheaderlen; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; -- cgit v1.2.1 From b76ce56192bcf618013fb9aecd83488cffd645cc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 Jun 2010 13:57:32 -0400 Subject: SUNRPC: Fix a re-entrancy bug in xs_tcp_read_calldir() If the attempt to read the calldir fails, then instead of storing the read bytes, we currently discard them. This leads to a garbage final result when upon re-entry to the same routine, we read the remaining bytes. Fixes the regression in bugzilla number 16213. Please see https://bugzilla.kernel.org/show_bug.cgi?id=16213 Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- net/sunrpc/xprtsock.c | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 2a9675136c68..7ca65c7005ea 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -210,7 +210,8 @@ struct sock_xprt { * State of TCP reply receive */ __be32 tcp_fraghdr, - tcp_xid; + tcp_xid, + tcp_calldir; u32 tcp_offset, tcp_reclen; @@ -927,7 +928,7 @@ static inline void xs_tcp_read_calldir(struct sock_xprt *transport, { size_t len, used; u32 offset; - __be32 calldir; + char *p; /* * We want transport->tcp_offset to be 8 at the end of this routine @@ -936,26 +937,33 @@ static inline void xs_tcp_read_calldir(struct sock_xprt *transport, * transport->tcp_offset is 4 (after having already read the xid). */ offset = transport->tcp_offset - sizeof(transport->tcp_xid); - len = sizeof(calldir) - offset; + len = sizeof(transport->tcp_calldir) - offset; dprintk("RPC: reading CALL/REPLY flag (%Zu bytes)\n", len); - used = xdr_skb_read_bits(desc, &calldir, len); + p = ((char *) &transport->tcp_calldir) + offset; + used = xdr_skb_read_bits(desc, p, len); transport->tcp_offset += used; if (used != len) return; transport->tcp_flags &= ~TCP_RCV_READ_CALLDIR; - transport->tcp_flags |= TCP_RCV_COPY_CALLDIR; - transport->tcp_flags |= TCP_RCV_COPY_DATA; /* * We don't yet have the XDR buffer, so we will write the calldir * out after we get the buffer from the 'struct rpc_rqst' */ - if (ntohl(calldir) == RPC_REPLY) + switch (ntohl(transport->tcp_calldir)) { + case RPC_REPLY: + transport->tcp_flags |= TCP_RCV_COPY_CALLDIR; + transport->tcp_flags |= TCP_RCV_COPY_DATA; transport->tcp_flags |= TCP_RPC_REPLY; - else + break; + case RPC_CALL: + transport->tcp_flags |= TCP_RCV_COPY_CALLDIR; + transport->tcp_flags |= TCP_RCV_COPY_DATA; transport->tcp_flags &= ~TCP_RPC_REPLY; - dprintk("RPC: reading %s CALL/REPLY flag %08x\n", - (transport->tcp_flags & TCP_RPC_REPLY) ? - "reply for" : "request with", calldir); + break; + default: + dprintk("RPC: invalid request message type\n"); + xprt_force_disconnect(&transport->xprt); + } xs_tcp_check_fraghdr(transport); } @@ -975,12 +983,10 @@ static inline void xs_tcp_read_common(struct rpc_xprt *xprt, /* * Save the RPC direction in the XDR buffer */ - __be32 calldir = transport->tcp_flags & TCP_RPC_REPLY ? - htonl(RPC_REPLY) : 0; - memcpy(rcvbuf->head[0].iov_base + transport->tcp_copied, - &calldir, sizeof(calldir)); - transport->tcp_copied += sizeof(calldir); + &transport->tcp_calldir, + sizeof(transport->tcp_calldir)); + transport->tcp_copied += sizeof(transport->tcp_calldir); transport->tcp_flags &= ~TCP_RCV_COPY_CALLDIR; } -- cgit v1.2.1 From b1312c89f0016f778cac4f1536f1434e132f8713 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Thu, 24 Jun 2010 14:35:00 -0700 Subject: xfrm: check bundle policy existance before dereferencing it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the bundle validation code to not assume having a valid policy. When we have multiple transformations for a xfrm policy, the bundle instance will be a chain of bundles with only the first one having the policy reference. When policy_genid is bumped it will expire the first bundle in the chain which is equivalent of expiring the whole chain. Reported-bisected-and-tested-by: Justin P. Mattock Signed-off-by: Timo Teräs Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 4bf27d901333..af1c173be4ad 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2300,7 +2300,8 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, return 0; if (xdst->xfrm_genid != dst->xfrm->genid) return 0; - if (xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) + if (xdst->num_pols > 0 && + xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) return 0; if (strict && fl && -- cgit v1.2.1 From 1a61a83ff59378a5613d8c706c4a660c353b62a8 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Fri, 18 Jun 2010 14:24:00 +0000 Subject: Bluetooth: Bring back var 'i' increment commit ff6e2163f28a1094fb5ca5950fe2b43c3cf6bc7a accidentally added a regression on the bnep code. Fixing it. Signed-off-by: Gustavo F. Padovan Signed-off-by: David S. Miller --- net/bluetooth/bnep/netdev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index 0faad5ce6dc4..8c100c9dae28 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -104,6 +104,8 @@ static void bnep_net_set_mc_list(struct net_device *dev) break; memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN); memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN); + + i++; } r->len = htons(skb->len - len); } -- cgit v1.2.1 From 9f888160bdcccf0565dd2774956b8d9456e610be Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 21 Jun 2010 11:00:13 +0000 Subject: ipv6: fix NULL reference in proxy neighbor discovery The addition of TLLAO option created a kernel OOPS regression for the case where neighbor advertisement is being sent via proxy path. When using proxy, ipv6_get_ifaddr() returns NULL causing the NULL dereference. Change causing the bug was: commit f7734fdf61ec6bb848e0bafc1fb8bad2c124bb50 Author: Octavian Purdila Date: Fri Oct 2 11:39:15 2009 +0000 make TLLAO option for NA packets configurable Signed-off-by: Stephen Hemminger Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0abdc242ddb7..2efef52fb461 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -586,6 +586,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, src_addr = solicited_addr; if (ifp->flags & IFA_F_OPTIMISTIC) override = 0; + inc_opt |= ifp->idev->cnf.force_tllao; in6_ifa_put(ifp); } else { if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr, @@ -599,7 +600,6 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, icmp6h.icmp6_solicited = solicited; icmp6h.icmp6_override = override; - inc_opt |= ifp->idev->cnf.force_tllao; __ndisc_send(dev, neigh, daddr, src_addr, &icmp6h, solicited_addr, inc_opt ? ND_OPT_TARGET_LL_ADDR : 0); -- cgit v1.2.1