From 4479ff76c43607b680f9349128d8493228b49dce Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 9 Sep 2013 09:39:01 +0200
Subject: xfrm: Fix replay size checking on async events

We pass the wrong netlink attribute to xfrm_replay_verify_len().
It should be XFRMA_REPLAY_ESN_VAL and not XFRMA_REPLAY_VAL as
we currently doing. This causes memory corruptions if the
replay esn attribute has incorrect length. Fix this by passing
the right attribute to xfrm_replay_verify_len().

Reported-by: Michael Rossberg <michael.rossberg@tu-ilmenau.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 3f565e495ac6..4b26ceedff26 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1856,7 +1856,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (x->km.state != XFRM_STATE_VALID)
 		goto out;
 
-	err = xfrm_replay_verify_len(x->replay_esn, rp);
+	err = xfrm_replay_verify_len(x->replay_esn, re);
 	if (err)
 		goto out;
 
-- 
cgit v1.2.1


From bafd4bd4dcfa13145db7f951251eef3e10f8c278 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 9 Sep 2013 10:38:38 +0200
Subject: xfrm: Decode sessions with output interface.

The output interface matching does not work on forward
policy lookups, the output interface of the flowi is
always 0. Fix this by setting the output interface when
we decode the session.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/xfrm4_policy.c | 1 +
 net/ipv6/xfrm6_policy.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 9a459be24af7..ccde54248c8c 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -107,6 +107,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 
 	memset(fl4, 0, sizeof(struct flowi4));
 	fl4->flowi4_mark = skb->mark;
+	fl4->flowi4_oif = skb_dst(skb)->dev->ifindex;
 
 	if (!ip_is_fragment(iph)) {
 		switch (iph->protocol) {
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 23ed03d786c8..08ed2772b7aa 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -138,6 +138,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 
 	memset(fl6, 0, sizeof(struct flowi6));
 	fl6->flowi6_mark = skb->mark;
+	fl6->flowi6_oif = skb_dst(skb)->dev->ifindex;
 
 	fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
 	fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
-- 
cgit v1.2.1


From b3b2b9e192d5811f91f9cd92aeec489cecabc92e Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 10 Sep 2013 13:43:09 +0200
Subject: ipsec: Don't update the pmtu on ICMPV6_DEST_UNREACH

Currently we update the pmtu in the IPsec protocol error handlers
if icmpv6 message type is either ICMPV6_DEST_UNREACH or
ICMPV6_PKT_TOOBIG. Updating the pmtu on ICMPV6_DEST_UNREACH
is wrong in any case, it causes strangely fragmented packets.
Only ICMPV6_PKT_TOOBIG signalizes pmtu discovery, so remove the
ICMPV6_DEST_UNREACH check in the IPsec protocol error handlers.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/ah6.c     | 3 +--
 net/ipv6/esp6.c    | 3 +--
 net/ipv6/ipcomp6.c | 3 +--
 3 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 73784c3d4642..82e1da3a40b9 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -618,8 +618,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset);
 	struct xfrm_state *x;
 
-	if (type != ICMPV6_DEST_UNREACH &&
-	    type != ICMPV6_PKT_TOOBIG &&
+	if (type != ICMPV6_PKT_TOOBIG &&
 	    type != NDISC_REDIRECT)
 		return;
 
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index d3618a78fcac..e67e63f9858d 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -436,8 +436,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + offset);
 	struct xfrm_state *x;
 
-	if (type != ICMPV6_DEST_UNREACH &&
-	    type != ICMPV6_PKT_TOOBIG &&
+	if (type != ICMPV6_PKT_TOOBIG &&
 	    type != NDISC_REDIRECT)
 		return;
 
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 5636a912074a..ce507d9e1c90 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -64,8 +64,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		(struct ip_comp_hdr *)(skb->data + offset);
 	struct xfrm_state *x;
 
-	if (type != ICMPV6_DEST_UNREACH &&
-	    type != ICMPV6_PKT_TOOBIG &&
+	if (type != ICMPV6_PKT_TOOBIG &&
 	    type != NDISC_REDIRECT)
 		return;
 
-- 
cgit v1.2.1


From 33fce60d6a6e137035f8e23a89d7fd55f3a24cda Mon Sep 17 00:00:00 2001
From: Fan Du <fan.du@windriver.com>
Date: Tue, 17 Sep 2013 15:14:13 +0800
Subject: xfrm: Guard IPsec anti replay window against replay bitmap

For legacy IPsec anti replay mechanism:

bitmap in struct xfrm_replay_state could only provide a 32 bits
window size limit in current design, thus user level parameter
sadb_sa_replay should honor this limit, otherwise misleading
outputs("replay=244") by setkey -D will be:

192.168.25.2 192.168.22.2
	esp mode=transport spi=147561170(0x08cb9ad2) reqid=0(0x00000000)
	E: aes-cbc  9a8d7468 7655cf0b 719d27be b0ddaac2
	A: hmac-sha1  2d2115c2 ebf7c126 1c54f186 3b139b58 264a7331
	seq=0x00000000 replay=244 flags=0x00000000 state=mature
	created: Sep 17 14:00:00 2013	current: Sep 17 14:00:22 2013
	diff: 22(s)	hard: 30(s)	soft: 26(s)
	last: Sep 17 14:00:00 2013	hard: 0(s)	soft: 0(s)
	current: 1408(bytes)	hard: 0(bytes)	soft: 0(bytes)
	allocated: 22	hard: 0	soft: 0
	sadb_seq=1 pid=4854 refcnt=0
192.168.22.2 192.168.25.2
	esp mode=transport spi=255302123(0x0f3799eb) reqid=0(0x00000000)
	E: aes-cbc  6485d990 f61a6bd5 e5660252 608ad282
	A: hmac-sha1  0cca811a eb4fa893 c47ae56c 98f6e413 87379a88
	seq=0x00000000 replay=244 flags=0x00000000 state=mature
	created: Sep 17 14:00:00 2013	current: Sep 17 14:00:22 2013
	diff: 22(s)	hard: 30(s)	soft: 26(s)
	last: Sep 17 14:00:00 2013	hard: 0(s)	soft: 0(s)
	current: 1408(bytes)	hard: 0(bytes)	soft: 0(bytes)
	allocated: 22	hard: 0	soft: 0
	sadb_seq=0 pid=4854 refcnt=0

And also, optimizing xfrm_replay_check window checking by setting the
desirable x->props.replay_window with only doing the comparison once
for all when xfrm_state is first born.

Signed-off-by: Fan Du <fan.du@windriver.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/key/af_key.c       | 3 ++-
 net/xfrm/xfrm_replay.c | 3 +--
 net/xfrm/xfrm_user.c   | 3 ++-
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 9d585370c5b4..911ef03bf8fb 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1098,7 +1098,8 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 
 	x->id.proto = proto;
 	x->id.spi = sa->sadb_sa_spi;
-	x->props.replay_window = sa->sadb_sa_replay;
+	x->props.replay_window = min_t(unsigned int, sa->sadb_sa_replay,
+					(sizeof(x->replay.bitmap) * 8));
 	if (sa->sadb_sa_flags & SADB_SAFLAGS_NOECN)
 		x->props.flags |= XFRM_STATE_NOECN;
 	if (sa->sadb_sa_flags & SADB_SAFLAGS_DECAP_DSCP)
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 8dafe6d3c6e4..eeca388effc7 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -129,8 +129,7 @@ static int xfrm_replay_check(struct xfrm_state *x,
 		return 0;
 
 	diff = x->replay.seq - seq;
-	if (diff >= min_t(unsigned int, x->props.replay_window,
-			  sizeof(x->replay.bitmap) * 8)) {
+	if (diff >= x->props.replay_window) {
 		x->stats.replay_window++;
 		goto err;
 	}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 4b26ceedff26..f964d4c00ffb 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -446,7 +446,8 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *
 	memcpy(&x->sel, &p->sel, sizeof(x->sel));
 	memcpy(&x->lft, &p->lft, sizeof(x->lft));
 	x->props.mode = p->mode;
-	x->props.replay_window = p->replay_window;
+	x->props.replay_window = min_t(unsigned int, p->replay_window,
+					sizeof(x->replay.bitmap) * 8);
 	x->props.reqid = p->reqid;
 	x->props.family = p->family;
 	memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr));
-- 
cgit v1.2.1


From cd808fc9a6c7cd3a4311d9d2cffc4adbeaef5f6c Mon Sep 17 00:00:00 2001
From: Thomas Egerer <thomas.egerer@secunet.com>
Date: Thu, 19 Sep 2013 13:19:19 +0200
Subject: xfrm: Fix aevent generation for each received packet

If asynchronous events are enabled for a particular netlink socket,
the notify function is called by the advance function. The notify
function creates and dispatches a km_event if a replay timeout occurred,
or at least replay_maxdiff packets have been received since the last
asynchronous event has been sent. The function is supposed to return if
neither of the two events were detected for a state, or replay_maxdiff
is equal to zero.
Replay_maxdiff is initialized in xfrm_state_construct to the value of
the xfrm.sysctl_aevent_rseqth (2 by default), and updated if for a state
if the netlink attribute XFRMA_REPLAY_THRESH is set.
If, however, replay_maxdiff is set to zero, then all of the three notify
implementations perform a break from the switch statement instead of
checking whether a timeout occurred, and -- if not -- return.  As a
result an asynchronous event is generated for every replay update of a
state that has a zero replay_maxdiff value.
This patch modifies the notify functions such that they immediately
return if replay_maxdiff has the value zero, unless a timeout occurred.

Signed-off-by: Thomas Egerer <thomas.egerer@secunet.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_replay.c | 51 ++++++++++++++++++++++++++------------------------
 1 file changed, 27 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index eeca388effc7..dab57daae408 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -61,9 +61,9 @@ static void xfrm_replay_notify(struct xfrm_state *x, int event)
 
 	switch (event) {
 	case XFRM_REPLAY_UPDATE:
-		if (x->replay_maxdiff &&
-		    (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
-		    (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
+		if (!x->replay_maxdiff ||
+		    ((x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
+		    (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff))) {
 			if (x->xflags & XFRM_TIME_DEFER)
 				event = XFRM_REPLAY_TIMEOUT;
 			else
@@ -301,9 +301,10 @@ static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event)
 
 	switch (event) {
 	case XFRM_REPLAY_UPDATE:
-		if (x->replay_maxdiff &&
-		    (replay_esn->seq - preplay_esn->seq < x->replay_maxdiff) &&
-		    (replay_esn->oseq - preplay_esn->oseq < x->replay_maxdiff)) {
+		if (!x->replay_maxdiff ||
+		    ((replay_esn->seq - preplay_esn->seq < x->replay_maxdiff) &&
+		    (replay_esn->oseq - preplay_esn->oseq
+		     < x->replay_maxdiff))) {
 			if (x->xflags & XFRM_TIME_DEFER)
 				event = XFRM_REPLAY_TIMEOUT;
 			else
@@ -352,28 +353,30 @@ static void xfrm_replay_notify_esn(struct xfrm_state *x, int event)
 
 	switch (event) {
 	case XFRM_REPLAY_UPDATE:
-		if (!x->replay_maxdiff)
-			break;
-
-		if (replay_esn->seq_hi == preplay_esn->seq_hi)
-			seq_diff = replay_esn->seq - preplay_esn->seq;
-		else
-			seq_diff = ~preplay_esn->seq + replay_esn->seq + 1;
-
-		if (replay_esn->oseq_hi == preplay_esn->oseq_hi)
-			oseq_diff = replay_esn->oseq - preplay_esn->oseq;
-		else
-			oseq_diff = ~preplay_esn->oseq + replay_esn->oseq + 1;
-
-		if (seq_diff < x->replay_maxdiff &&
-		    oseq_diff < x->replay_maxdiff) {
+		if (x->replay_maxdiff) {
+			if (replay_esn->seq_hi == preplay_esn->seq_hi)
+				seq_diff = replay_esn->seq - preplay_esn->seq;
+			else
+				seq_diff = ~preplay_esn->seq + replay_esn->seq
+					   + 1;
 
-			if (x->xflags & XFRM_TIME_DEFER)
-				event = XFRM_REPLAY_TIMEOUT;
+			if (replay_esn->oseq_hi == preplay_esn->oseq_hi)
+				oseq_diff = replay_esn->oseq
+					    - preplay_esn->oseq;
 			else
-				return;
+				oseq_diff = ~preplay_esn->oseq
+					    + replay_esn->oseq + 1;
+
+			if (seq_diff >= x->replay_maxdiff ||
+			    oseq_diff >= x->replay_maxdiff)
+				break;
 		}
 
+		if (x->xflags & XFRM_TIME_DEFER)
+			event = XFRM_REPLAY_TIMEOUT;
+		else
+			return;
+
 		break;
 
 	case XFRM_REPLAY_TIMEOUT:
-- 
cgit v1.2.1


From 180032973ee97daddf5c9d733e5b425b108f8679 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Thu, 29 Aug 2013 13:26:57 +0300
Subject: cfg80211: use the correct macro to check for active monitor support

Use MONITOR_FLAG_ACTIVE, which is a flag mask, instead of
NL80211_MNTR_FLAG_ACTIVE, which is a flag index, when checking if the
hardware supports active monitoring.

Cc: stable@vger.kernel.org
Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index af8d84a4a5b2..626dc3b5fd8d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2421,7 +2421,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
 		change = true;
 	}
 
-	if (flags && (*flags & NL80211_MNTR_FLAG_ACTIVE) &&
+	if (flags && (*flags & MONITOR_FLAG_ACTIVE) &&
 	    !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR))
 		return -EOPNOTSUPP;
 
@@ -2483,7 +2483,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 				  info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL,
 				  &flags);
 
-	if (!err && (flags & NL80211_MNTR_FLAG_ACTIVE) &&
+	if (!err && (flags & MONITOR_FLAG_ACTIVE) &&
 	    !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR))
 		return -EOPNOTSUPP;
 
-- 
cgit v1.2.1


From f478f33a93f9353dcd1fe55445343d76b1c3f84a Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Thu, 26 Sep 2013 16:55:28 +0100
Subject: cfg80211: fix warning when using WEXT for IBSS

Fix kernel warning when using WEXT for configuring ad-hoc mode,
e.g.  "iwconfig wlan0 essid test channel 1"

WARNING: at net/wireless/chan.c:373 cfg80211_chandef_usable+0x50/0x21c [cfg80211]()

The warning is caused by an uninitialized variable center_freq1.

Cc: stable@vger.kernel.org
Signed-off-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/ibss.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 39bff7d36768..403fe29c024d 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -263,6 +263,8 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
 				if (chan->flags & IEEE80211_CHAN_DISABLED)
 					continue;
 				wdev->wext.ibss.chandef.chan = chan;
+				wdev->wext.ibss.chandef.center_freq1 =
+					chan->center_freq;
 				break;
 			}
 
@@ -347,6 +349,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
 	if (chan) {
 		wdev->wext.ibss.chandef.chan = chan;
 		wdev->wext.ibss.chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
+		wdev->wext.ibss.chandef.center_freq1 = freq;
 		wdev->wext.ibss.channel_fixed = true;
 	} else {
 		/* cfg80211_ibss_wext_join will pick one if needed */
-- 
cgit v1.2.1


From 6329b8d917adc077caa60c2447385554130853a3 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Tue, 17 Sep 2013 11:15:43 +0200
Subject: mac80211: drop spoofed packets in ad-hoc mode

If an Ad-Hoc node receives packets with the Cell ID or its own MAC
address as source address, it hits a WARN_ON in sta_info_insert_check()
With many packets, this can massively spam the logs. One way that this
can easily happen is through having Cisco APs in the area with rouge AP
detection and countermeasures enabled.
Such Cisco APs will regularly send fake beacons, disassoc and deauth
packets that trigger these warnings.

To fix this issue, drop such spoofed packets early in the rx path.

Cc: stable@vger.kernel.org
Reported-by: Thomas Huehn <thomas@net.t-labs.tu-berlin.de>
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 54395d7583ba..674eac1f996c 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3056,6 +3056,9 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
 	case NL80211_IFTYPE_ADHOC:
 		if (!bssid)
 			return 0;
+		if (ether_addr_equal(sdata->vif.addr, hdr->addr2) ||
+		    ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2))
+			return 0;
 		if (ieee80211_is_beacon(hdr->frame_control)) {
 			return 1;
 		} else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) {
-- 
cgit v1.2.1


From cc63ec766b7821c8dc4dc1d1e980124aea96b553 Mon Sep 17 00:00:00 2001
From: Chun-Yeow Yeoh <yeohchunyeow@cozybit.com>
Date: Sat, 7 Sep 2013 23:40:44 -0700
Subject: mac80211: fix the setting of extended supported rate IE

The patch "mac80211: select and adjust bitrates according to
channel mode" causes regression and breaks the extended supported rate
IE setting. Since "i" is starting with 8, so this is not necessary
to introduce "skip" here.

Signed-off-by: Chun-Yeow Yeoh <yeohchunyeow@cozybit.com>
Signed-off-by: Colleen Twitty <colleen@cozybit.com>
Reviewed-by: Jason Abele <jason@cozybit.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/util.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index e1b34a18b243..9c3200bcfc02 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2103,7 +2103,7 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_supported_band *sband;
-	int rate, skip, shift;
+	int rate, shift;
 	u8 i, exrates, *pos;
 	u32 basic_rates = sdata->vif.bss_conf.basic_rates;
 	u32 rate_flags;
@@ -2131,14 +2131,11 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
 		pos = skb_put(skb, exrates + 2);
 		*pos++ = WLAN_EID_EXT_SUPP_RATES;
 		*pos++ = exrates;
-		skip = 0;
 		for (i = 8; i < sband->n_bitrates; i++) {
 			u8 basic = 0;
 			if ((rate_flags & sband->bitrates[i].flags)
 			    != rate_flags)
 				continue;
-			if (skip++ < 8)
-				continue;
 			if (need_basic && basic_rates & BIT(i))
 				basic = 0x80;
 			rate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
-- 
cgit v1.2.1


From aa5f66d5a1df1c2b04bccdcb19711675c765d7c4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 26 Sep 2013 20:03:45 +0200
Subject: cfg80211: fix sysfs registration race

My locking rework/race fixes caused a regression in the
registration, causing uevent notifications for wireless
devices before the device is really fully registered and
available in nl80211.

Fix this by moving the device_add() under rtnl and move
the rfkill to afterwards (it can't be under rtnl.)

Reported-and-tested-by: Maxime Bizon <mbizon@freebox.fr>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/core.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 67153964aad2..fe8d4f2be49b 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -566,18 +566,13 @@ int wiphy_register(struct wiphy *wiphy)
 	/* check and set up bitrates */
 	ieee80211_set_bitrate_flags(wiphy);
 
-
+	rtnl_lock();
 	res = device_add(&rdev->wiphy.dev);
-	if (res)
-		return res;
-
-	res = rfkill_register(rdev->rfkill);
 	if (res) {
-		device_del(&rdev->wiphy.dev);
+		rtnl_unlock();
 		return res;
 	}
 
-	rtnl_lock();
 	/* set up regulatory info */
 	wiphy_regulatory_register(wiphy);
 
@@ -606,6 +601,15 @@ int wiphy_register(struct wiphy *wiphy)
 
 	rdev->wiphy.registered = true;
 	rtnl_unlock();
+
+	res = rfkill_register(rdev->rfkill);
+	if (res) {
+		rfkill_destroy(rdev->rfkill);
+		rdev->rfkill = NULL;
+		wiphy_unregister(&rdev->wiphy);
+		return res;
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL(wiphy_register);
@@ -640,7 +644,8 @@ void wiphy_unregister(struct wiphy *wiphy)
 		rtnl_unlock();
 		__count == 0; }));
 
-	rfkill_unregister(rdev->rfkill);
+	if (rdev->rfkill)
+		rfkill_unregister(rdev->rfkill);
 
 	rtnl_lock();
 	rdev->wiphy.registered = false;
-- 
cgit v1.2.1


From 03bb7f42765ce596604f03d179f3137d7df05bba Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sun, 29 Sep 2013 21:39:33 +0200
Subject: mac80211: use sta_info_get_bss() for nl80211 tx and client probing

This allows calls for clients in AP_VLANs (e.g. for 4-addr) to succeed

Cc: stable@vger.kernel.org
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c | 2 +-
 net/mac80211/tx.c  | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 2e7855a1b10d..629dee7ec9bf 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3518,7 +3518,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
 		return -EINVAL;
 	}
 	band = chanctx_conf->def.chan->band;
-	sta = sta_info_get(sdata, peer);
+	sta = sta_info_get_bss(sdata, peer);
 	if (sta) {
 		qos = test_sta_flag(sta, WLAN_STA_WME);
 	} else {
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 3456c0486b48..70b5a05c0a4e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1120,7 +1120,8 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 		tx->sta = rcu_dereference(sdata->u.vlan.sta);
 		if (!tx->sta && sdata->dev->ieee80211_ptr->use_4addr)
 			return TX_DROP;
-	} else if (info->flags & IEEE80211_TX_CTL_INJECTED ||
+	} else if (info->flags & (IEEE80211_TX_CTL_INJECTED |
+				  IEEE80211_TX_INTFL_NL80211_FRAME_TX) ||
 		   tx->sdata->control_port_protocol == tx->skb->protocol) {
 		tx->sta = sta_info_get_bss(sdata, hdr->addr1);
 	}
-- 
cgit v1.2.1


From 0c5b93290b2f3c7a376567c03ae8d385b0e99851 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sun, 29 Sep 2013 21:39:34 +0200
Subject: mac80211: update sta->last_rx on acked tx frames

When clients are idle for too long, hostapd sends nullfunc frames for
probing. When those are acked by the client, the idle time needs to be
updated.

To make this work (and to avoid unnecessary probing), update sta->last_rx
whenever an ACK was received for a tx packet. Only do this if the flag
IEEE80211_HW_REPORTS_TX_ACK_STATUS is set.

Cc: stable@vger.kernel.org
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/status.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 368837fe3b80..78dc2e99027e 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -180,6 +180,9 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
 	struct ieee80211_local *local = sta->local;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 
+	if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
+		sta->last_rx = jiffies;
+
 	if (ieee80211_is_data_qos(mgmt->frame_control)) {
 		struct ieee80211_hdr *hdr = (void *) skb->data;
 		u8 *qc = ieee80211_get_qos_ctl(hdr);
-- 
cgit v1.2.1


From 22c4ceed0184318ec5a6182c6d75d398452c2e39 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Mon, 30 Sep 2013 12:36:05 +0300
Subject: mac80211: Run deferred scan if last roc_list item is not started

mac80211 scan processing could get stuck if roc work for pending, but
not started when a scan request was deferred due to such roc item.
Normally the deferred scan would be started from
ieee80211_start_next_roc(), but ieee80211_sw_roc_work() calls that only
if the finished ROC was started. Fix this by calling
ieee80211_run_deferred_scan() in the case the last ROC was not actually
started.

This issue was hit relatively easily in P2P find operations where Listen
state (remain-on-channel) and Search state (scan) are repeated in a
loop.

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/offchannel.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index acd1f71adc03..0c2a29484c07 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -394,6 +394,8 @@ void ieee80211_sw_roc_work(struct work_struct *work)
 
 		if (started)
 			ieee80211_start_next_roc(local);
+		else if (list_empty(&local->roc_list))
+			ieee80211_run_deferred_scan(local);
 	}
 
  out_unlock:
-- 
cgit v1.2.1


From 6c519bad7b19a2c14a075b400edabaa630330123 Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Fri, 27 Sep 2013 18:03:39 +0200
Subject: batman-adv: set up network coding packet handlers during module init

batman-adv saves its table of packet handlers as a global state, so handlers
must be set up only once (and setting them up a second time will fail).

The recently-added network coding support tries to set up its handler each time
a new softif is registered, which obviously fails when more that one softif is
used (and in consequence, the softif creation fails).

Fix this by splitting up batadv_nc_init into batadv_nc_init (which is called
only once) and batadv_nc_mesh_init (which is called for each softif); in
addition batadv_nc_free is renamed to batadv_nc_mesh_free to keep naming
consistent.

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
---
 net/batman-adv/main.c           |  5 +++--
 net/batman-adv/network-coding.c | 28 ++++++++++++++++++----------
 net/batman-adv/network-coding.h | 14 ++++++++++----
 3 files changed, 31 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index c72d1bcdcf49..1356af660b5b 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -65,6 +65,7 @@ static int __init batadv_init(void)
 	batadv_recv_handler_init();
 
 	batadv_iv_init();
+	batadv_nc_init();
 
 	batadv_event_workqueue = create_singlethread_workqueue("bat_events");
 
@@ -142,7 +143,7 @@ int batadv_mesh_init(struct net_device *soft_iface)
 	if (ret < 0)
 		goto err;
 
-	ret = batadv_nc_init(bat_priv);
+	ret = batadv_nc_mesh_init(bat_priv);
 	if (ret < 0)
 		goto err;
 
@@ -167,7 +168,7 @@ void batadv_mesh_free(struct net_device *soft_iface)
 	batadv_vis_quit(bat_priv);
 
 	batadv_gw_node_purge(bat_priv);
-	batadv_nc_free(bat_priv);
+	batadv_nc_mesh_free(bat_priv);
 	batadv_dat_free(bat_priv);
 	batadv_bla_free(bat_priv);
 
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index a487d46e0aec..4ecc0b6bf8ab 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -34,6 +34,20 @@ static void batadv_nc_worker(struct work_struct *work);
 static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
 				       struct batadv_hard_iface *recv_if);
 
+/**
+ * batadv_nc_init - one-time initialization for network coding
+ */
+int __init batadv_nc_init(void)
+{
+	int ret;
+
+	/* Register our packet type */
+	ret = batadv_recv_handler_register(BATADV_CODED,
+					   batadv_nc_recv_coded_packet);
+
+	return ret;
+}
+
 /**
  * batadv_nc_start_timer - initialise the nc periodic worker
  * @bat_priv: the bat priv with all the soft interface information
@@ -45,10 +59,10 @@ static void batadv_nc_start_timer(struct batadv_priv *bat_priv)
 }
 
 /**
- * batadv_nc_init - initialise coding hash table and start house keeping
+ * batadv_nc_mesh_init - initialise coding hash table and start house keeping
  * @bat_priv: the bat priv with all the soft interface information
  */
-int batadv_nc_init(struct batadv_priv *bat_priv)
+int batadv_nc_mesh_init(struct batadv_priv *bat_priv)
 {
 	bat_priv->nc.timestamp_fwd_flush = jiffies;
 	bat_priv->nc.timestamp_sniffed_purge = jiffies;
@@ -70,11 +84,6 @@ int batadv_nc_init(struct batadv_priv *bat_priv)
 	batadv_hash_set_lock_class(bat_priv->nc.coding_hash,
 				   &batadv_nc_decoding_hash_lock_class_key);
 
-	/* Register our packet type */
-	if (batadv_recv_handler_register(BATADV_CODED,
-					 batadv_nc_recv_coded_packet) < 0)
-		goto err;
-
 	INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker);
 	batadv_nc_start_timer(bat_priv);
 
@@ -1721,12 +1730,11 @@ free_nc_packet:
 }
 
 /**
- * batadv_nc_free - clean up network coding memory
+ * batadv_nc_mesh_free - clean up network coding memory
  * @bat_priv: the bat priv with all the soft interface information
  */
-void batadv_nc_free(struct batadv_priv *bat_priv)
+void batadv_nc_mesh_free(struct batadv_priv *bat_priv)
 {
-	batadv_recv_handler_unregister(BATADV_CODED);
 	cancel_delayed_work_sync(&bat_priv->nc.work);
 
 	batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, NULL);
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index 85a4ec81ad50..ddfa618e80bf 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -22,8 +22,9 @@
 
 #ifdef CONFIG_BATMAN_ADV_NC
 
-int batadv_nc_init(struct batadv_priv *bat_priv);
-void batadv_nc_free(struct batadv_priv *bat_priv);
+int batadv_nc_init(void);
+int batadv_nc_mesh_init(struct batadv_priv *bat_priv);
+void batadv_nc_mesh_free(struct batadv_priv *bat_priv);
 void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
 			      struct batadv_orig_node *orig_node,
 			      struct batadv_orig_node *orig_neigh_node,
@@ -46,12 +47,17 @@ int batadv_nc_init_debugfs(struct batadv_priv *bat_priv);
 
 #else /* ifdef CONFIG_BATMAN_ADV_NC */
 
-static inline int batadv_nc_init(struct batadv_priv *bat_priv)
+static inline int batadv_nc_init(void)
 {
 	return 0;
 }
 
-static inline void batadv_nc_free(struct batadv_priv *bat_priv)
+static inline int batadv_nc_mesh_init(struct batadv_priv *bat_priv)
+{
+	return 0;
+}
+
+static inline void batadv_nc_mesh_free(struct batadv_priv *bat_priv)
 {
 	return;
 }
-- 
cgit v1.2.1


From 6865d1e834be84ddd5808d93d5035b492346c64a Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Mon, 30 Sep 2013 22:05:40 +0200
Subject: unix_diag: fix info leak

When filling the netlink message we miss to wipe the pad field,
therefore leak one byte of heap memory to userland. Fix this by
setting pad to 0.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/diag.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/unix/diag.c b/net/unix/diag.c
index d591091603bf..86fa0f3b2caf 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -124,6 +124,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
 	rep->udiag_family = AF_UNIX;
 	rep->udiag_type = sk->sk_type;
 	rep->udiag_state = sk->sk_state;
+	rep->pad = 0;
 	rep->udiag_ino = sk_ino;
 	sock_diag_save_cookie(sk, rep->udiag_cookie);
 
-- 
cgit v1.2.1


From 5843ef421311c34f06d5ab82bced5aebfe185b2c Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Mon, 30 Sep 2013 13:29:11 -0700
Subject: tcp: Always set options to 0 before calling tcp_established_options

tcp_established_options assumes opts->options is 0 before calling,
as it read modify writes it.

For the tcp_current_mss() case the opts structure is not zeroed,
so this can be done with uninitialized values.

This is ok, because ->options is not read in this path.
But it's still better to avoid the operation on the uninitialized
field. This shuts up a static code analyzer, and presumably
may help the optimizer.

Cc: netdev@vger.kernel.org
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e6bb8256e59f..c6f01f2cdb32 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -637,6 +637,8 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
 	unsigned int size = 0;
 	unsigned int eff_sacks;
 
+	opts->options = 0;
+
 #ifdef CONFIG_TCP_MD5SIG
 	*md5 = tp->af_specific->md5_lookup(sk, sk);
 	if (unlikely(*md5)) {
-- 
cgit v1.2.1


From 80ad1d61e72d626e30ebe8529a0455e660ca4693 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 1 Oct 2013 21:04:11 -0700
Subject: net: do not call sock_put() on TIMEWAIT sockets

commit 3ab5aee7fe84 ("net: Convert TCP & DCCP hash tables to use RCU /
hlist_nulls") incorrectly used sock_put() on TIMEWAIT sockets.

We should instead use inet_twsk_put()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_hashtables.c  | 2 +-
 net/ipv6/inet6_hashtables.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 7bd8983dbfcf..96da9c77deca 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -287,7 +287,7 @@ begintw:
 			if (unlikely(!INET_TW_MATCH(sk, net, acookie,
 						    saddr, daddr, ports,
 						    dif))) {
-				sock_put(sk);
+				inet_twsk_put(inet_twsk(sk));
 				goto begintw;
 			}
 			goto out;
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 32b4a1675d82..066640e0ba8e 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -116,7 +116,7 @@ begintw:
 			}
 			if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr,
 						     ports, dif))) {
-				sock_put(sk);
+				inet_twsk_put(inet_twsk(sk));
 				goto begintw;
 			}
 			goto out;
-- 
cgit v1.2.1


From e18503f41f9b12132c95d7c31ca6ee5155e44e5c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Cachereul?= <f.cachereul@alphalink.fr>
Date: Wed, 2 Oct 2013 10:16:02 +0200
Subject: l2tp: fix kernel panic when using IPv4-mapped IPv6 addresses
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

IPv4 mapped addresses cause kernel panic.
The patch juste check whether the IPv6 address is an IPv4 mapped
address. If so, use IPv4 API instead of IPv6.

[  940.026915] general protection fault: 0000 [#1]
[  940.026915] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core pppox ppp_generic slhc loop psmouse
[  940.026915] CPU: 0 PID: 3184 Comm: memcheck-amd64- Not tainted 3.11.0+ #1
[  940.026915] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
[  940.026915] task: ffff880007130e20 ti: ffff88000737e000 task.ti: ffff88000737e000
[  940.026915] RIP: 0010:[<ffffffff81333780>]  [<ffffffff81333780>] ip6_xmit+0x276/0x326
[  940.026915] RSP: 0018:ffff88000737fd28  EFLAGS: 00010286
[  940.026915] RAX: c748521a75ceff48 RBX: ffff880000c30800 RCX: 0000000000000000
[  940.026915] RDX: ffff88000075cc4e RSI: 0000000000000028 RDI: ffff8800060e5a40
[  940.026915] RBP: ffff8800060e5a40 R08: 0000000000000000 R09: ffff88000075cc90
[  940.026915] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88000737fda0
[  940.026915] R13: 0000000000000000 R14: 0000000000002000 R15: ffff880005d3b580
[  940.026915] FS:  00007f163dc5e800(0000) GS:ffffffff81623000(0000) knlGS:0000000000000000
[  940.026915] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  940.026915] CR2: 00000004032dc940 CR3: 0000000005c25000 CR4: 00000000000006f0
[  940.026915] Stack:
[  940.026915]  ffff88000075cc4e ffffffff81694e90 ffff880000c30b38 0000000000000020
[  940.026915]  11000000523c4bac ffff88000737fdb4 0000000000000000 ffff880000c30800
[  940.026915]  ffff880005d3b580 ffff880000c30b38 ffff8800060e5a40 0000000000000020
[  940.026915] Call Trace:
[  940.026915]  [<ffffffff81356cc3>] ? inet6_csk_xmit+0xa4/0xc4
[  940.026915]  [<ffffffffa0038535>] ? l2tp_xmit_skb+0x503/0x55a [l2tp_core]
[  940.026915]  [<ffffffff812b8d3b>] ? pskb_expand_head+0x161/0x214
[  940.026915]  [<ffffffffa003e91d>] ? pppol2tp_xmit+0xf2/0x143 [l2tp_ppp]
[  940.026915]  [<ffffffffa00292e0>] ? ppp_channel_push+0x36/0x8b [ppp_generic]
[  940.026915]  [<ffffffffa00293fe>] ? ppp_write+0xaf/0xc5 [ppp_generic]
[  940.026915]  [<ffffffff8110ead4>] ? vfs_write+0xa2/0x106
[  940.026915]  [<ffffffff8110edd6>] ? SyS_write+0x56/0x8a
[  940.026915]  [<ffffffff81378ac0>] ? system_call_fastpath+0x16/0x1b
[  940.026915] Code: 00 49 8b 8f d8 00 00 00 66 83 7c 11 02 00 74 60 49
8b 47 58 48 83 e0 fe 48 8b 80 18 01 00 00 48 85 c0 74 13 48 8b 80 78 02
00 00 <48> ff 40 28 41 8b 57 68 48 01 50 30 48 8b 54 24 08 49 c7 c1 51
[  940.026915] RIP  [<ffffffff81333780>] ip6_xmit+0x276/0x326
[  940.026915]  RSP <ffff88000737fd28>
[  940.057945] ---[ end trace be8aba9a61c8b7f3 ]---
[  940.058583] Kernel panic - not syncing: Fatal exception in interrupt

Signed-off-by: François CACHEREUL <f.cachereul@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 27 +++++++++++++++++++++++----
 net/l2tp/l2tp_core.h |  3 +++
 2 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index feae495a0a30..aedaa2cd4237 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -496,6 +496,7 @@ out:
 static inline int l2tp_verify_udp_checksum(struct sock *sk,
 					   struct sk_buff *skb)
 {
+	struct l2tp_tunnel *tunnel = (struct l2tp_tunnel *)sk->sk_user_data;
 	struct udphdr *uh = udp_hdr(skb);
 	u16 ulen = ntohs(uh->len);
 	__wsum psum;
@@ -504,7 +505,7 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk,
 		return 0;
 
 #if IS_ENABLED(CONFIG_IPV6)
-	if (sk->sk_family == PF_INET6) {
+	if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) {
 		if (!uh->check) {
 			LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n");
 			return 1;
@@ -1128,7 +1129,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
 	/* Queue the packet to IP for output */
 	skb->local_df = 1;
 #if IS_ENABLED(CONFIG_IPV6)
-	if (skb->sk->sk_family == PF_INET6)
+	if (skb->sk->sk_family == PF_INET6 && !tunnel->v4mapped)
 		error = inet6_csk_xmit(skb, NULL);
 	else
 #endif
@@ -1255,7 +1256,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 
 		/* Calculate UDP checksum if configured to do so */
 #if IS_ENABLED(CONFIG_IPV6)
-		if (sk->sk_family == PF_INET6)
+		if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
 			l2tp_xmit_ipv6_csum(sk, skb, udp_len);
 		else
 #endif
@@ -1704,6 +1705,24 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	if (cfg != NULL)
 		tunnel->debug = cfg->debug;
 
+#if IS_ENABLED(CONFIG_IPV6)
+	if (sk->sk_family == PF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+
+		if (ipv6_addr_v4mapped(&np->saddr) &&
+		    ipv6_addr_v4mapped(&np->daddr)) {
+			struct inet_sock *inet = inet_sk(sk);
+
+			tunnel->v4mapped = true;
+			inet->inet_saddr = np->saddr.s6_addr32[3];
+			inet->inet_rcv_saddr = np->rcv_saddr.s6_addr32[3];
+			inet->inet_daddr = np->daddr.s6_addr32[3];
+		} else {
+			tunnel->v4mapped = false;
+		}
+	}
+#endif
+
 	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
 	tunnel->encap = encap;
 	if (encap == L2TP_ENCAPTYPE_UDP) {
@@ -1712,7 +1731,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 		udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
 		udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy;
 #if IS_ENABLED(CONFIG_IPV6)
-		if (sk->sk_family == PF_INET6)
+		if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
 			udpv6_encap_enable();
 		else
 #endif
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 66a559b104b6..6f251cbc2ed7 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -194,6 +194,9 @@ struct l2tp_tunnel {
 	struct sock		*sock;		/* Parent socket */
 	int			fd;		/* Parent fd, if tunnel socket
 						 * was created by userspace */
+#if IS_ENABLED(CONFIG_IPV6)
+	bool			v4mapped;
+#endif
 
 	struct work_struct	del_work;
 
-- 
cgit v1.2.1


From 1661bf364ae9c506bc8795fef70d1532931be1e8 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 3 Oct 2013 00:27:20 +0300
Subject: net: heap overflow in __audit_sockaddr()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We need to cap ->msg_namelen or it leads to a buffer overflow when we
to the memcpy() in __audit_sockaddr().  It requires CAP_AUDIT_CONTROL to
exploit this bug.

The call tree is:
___sys_recvmsg()
  move_addr_to_user()
    audit_sockaddr()
      __audit_sockaddr()

Reported-by: Jüri Aedla <juri.aedla@gmail.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/compat.c |  2 ++
 net/socket.c | 24 ++++++++++++++++++++----
 2 files changed, 22 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/compat.c b/net/compat.c
index f0a1ba6c8086..89032580bd1d 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -71,6 +71,8 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg)
 	    __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
 	    __get_user(kmsg->msg_flags, &umsg->msg_flags))
 		return -EFAULT;
+	if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
+		return -EINVAL;
 	kmsg->msg_name = compat_ptr(tmp1);
 	kmsg->msg_iov = compat_ptr(tmp2);
 	kmsg->msg_control = compat_ptr(tmp3);
diff --git a/net/socket.c b/net/socket.c
index ebed4b68f768..c226aceee65b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1964,6 +1964,16 @@ struct used_address {
 	unsigned int name_len;
 };
 
+static int copy_msghdr_from_user(struct msghdr *kmsg,
+				 struct msghdr __user *umsg)
+{
+	if (copy_from_user(kmsg, umsg, sizeof(struct msghdr)))
+		return -EFAULT;
+	if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
+		return -EINVAL;
+	return 0;
+}
+
 static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
 			 struct msghdr *msg_sys, unsigned int flags,
 			 struct used_address *used_address)
@@ -1982,8 +1992,11 @@ static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
 	if (MSG_CMSG_COMPAT & flags) {
 		if (get_compat_msghdr(msg_sys, msg_compat))
 			return -EFAULT;
-	} else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
-		return -EFAULT;
+	} else {
+		err = copy_msghdr_from_user(msg_sys, msg);
+		if (err)
+			return err;
+	}
 
 	if (msg_sys->msg_iovlen > UIO_FASTIOV) {
 		err = -EMSGSIZE;
@@ -2191,8 +2204,11 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
 	if (MSG_CMSG_COMPAT & flags) {
 		if (get_compat_msghdr(msg_sys, msg_compat))
 			return -EFAULT;
-	} else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
-		return -EFAULT;
+	} else {
+		err = copy_msghdr_from_user(msg_sys, msg);
+		if (err)
+			return err;
+	}
 
 	if (msg_sys->msg_iovlen > UIO_FASTIOV) {
 		err = -EMSGSIZE;
-- 
cgit v1.2.1


From 5e8a402f831dbe7ee831340a91439e46f0d38acd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 4 Oct 2013 10:31:41 -0700
Subject: tcp: do not forget FIN in tcp_shifted_skb()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Yuchung found following problem :

 There are bugs in the SACK processing code, merging part in
 tcp_shift_skb_data(), that incorrectly resets or ignores the sacked
 skbs FIN flag. When a receiver first SACK the FIN sequence, and later
 throw away ofo queue (e.g., sack-reneging), the sender will stop
 retransmitting the FIN flag, and hangs forever.

Following packetdrill test can be used to reproduce the bug.

$ cat sack-merge-bug.pkt
`sysctl -q net.ipv4.tcp_fack=0`

// Establish a connection and send 10 MSS.
0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+.000 bind(3, ..., ...) = 0
+.000 listen(3, 1) = 0

+.050 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+.000 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 6>
+.001 < . 1:1(0) ack 1 win 1024
+.000 accept(3, ..., ...) = 4

+.100 write(4, ..., 12000) = 12000
+.000 shutdown(4, SHUT_WR) = 0
+.000 > . 1:10001(10000) ack 1
+.050 < . 1:1(0) ack 2001 win 257
+.000 > FP. 10001:12001(2000) ack 1
+.050 < . 1:1(0) ack 2001 win 257 <sack 10001:11001,nop,nop>
+.050 < . 1:1(0) ack 2001 win 257 <sack 10001:12002,nop,nop>
// SACK reneg
+.050 < . 1:1(0) ack 12001 win 257
+0 %{ print "unacked: ",tcpi_unacked }%
+5 %{ print "" }%

First, a typo inverted left/right of one OR operation, then
code forgot to advance end_seq if the merged skb carried FIN.

Bug was added in 2.6.29 by commit 832d11c5cd076ab
("tcp: Try to restore large SKBs while SACK processing")

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Cc: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Acked-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 25a89eaa669d..113dc5f17d47 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1284,7 +1284,10 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 		tp->lost_cnt_hint -= tcp_skb_pcount(prev);
 	}
 
-	TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags;
+	TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
+	if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+		TCP_SKB_CB(prev)->end_seq++;
+
 	if (skb == tcp_highest_sack(sk))
 		tcp_advance_highest_sack(sk, skb);
 
-- 
cgit v1.2.1


From 3573540cafa4296dd60f8be02f2aecaa31047525 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 2 Oct 2013 09:14:06 +0300
Subject: netif_set_xps_queue: make cpu mask const

virtio wants to pass in cpumask_of(cpu), make parameter
const to avoid build warnings.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 65f829cfd928..3430b1ed12e5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1917,7 +1917,8 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
 	return new_map;
 }
 
-int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index)
+int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
+			u16 index)
 {
 	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
 	struct xps_map *map, *new_map;
-- 
cgit v1.2.1


From 582442d6d5bc74c1e11a2515f9387d3d227278e2 Mon Sep 17 00:00:00 2001
From: Oussama Ghorbel <ou.ghorbel@gmail.com>
Date: Thu, 3 Oct 2013 14:49:26 +0100
Subject: ipv6: Allow the MTU of ipip6 tunnel to be set below 1280

The (inner) MTU of a ipip6 (IPv4-in-IPv6) tunnel cannot be set below 1280, which is the minimum MTU in IPv6.
However, there should be no IPv6 on the tunnel interface at all, so the IPv6 rules should not apply.
More info at https://bugzilla.kernel.org/show_bug.cgi?id=15530

This patch allows to check the minimum MTU for ipv6 tunnel according to these rules:
-In case the tunnel is configured with ipip6 mode the minimum MTU is 68.
-In case the tunnel is configured with ip6ip6 or any mode the minimum MTU is 1280.

Signed-off-by: Oussama Ghorbel <ou.ghorbel@gmail.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a791552e0422..583b77e2f69b 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1430,9 +1430,17 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 static int
 ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
 {
-	if (new_mtu < IPV6_MIN_MTU) {
-		return -EINVAL;
+	struct ip6_tnl *tnl = netdev_priv(dev);
+
+	if (tnl->parms.proto == IPPROTO_IPIP) {
+		if (new_mtu < 68)
+			return -EINVAL;
+	} else {
+		if (new_mtu < IPV6_MIN_MTU)
+			return -EINVAL;
 	}
+	if (new_mtu > 0xFFF8 - dev->hard_header_len)
+		return -EINVAL;
 	dev->mtu = new_mtu;
 	return 0;
 }
-- 
cgit v1.2.1


From d45ed4a4e33ae103053c0a53d280014e7101bb5c Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 4 Oct 2013 00:14:06 -0700
Subject: net: fix unsafe set_memory_rw from softirq

on x86 system with net.core.bpf_jit_enable = 1

sudo tcpdump -i eth1 'tcp port 22'

causes the warning:
[   56.766097]  Possible unsafe locking scenario:
[   56.766097]
[   56.780146]        CPU0
[   56.786807]        ----
[   56.793188]   lock(&(&vb->lock)->rlock);
[   56.799593]   <Interrupt>
[   56.805889]     lock(&(&vb->lock)->rlock);
[   56.812266]
[   56.812266]  *** DEADLOCK ***
[   56.812266]
[   56.830670] 1 lock held by ksoftirqd/1/13:
[   56.836838]  #0:  (rcu_read_lock){.+.+..}, at: [<ffffffff8118f44c>] vm_unmap_aliases+0x8c/0x380
[   56.849757]
[   56.849757] stack backtrace:
[   56.862194] CPU: 1 PID: 13 Comm: ksoftirqd/1 Not tainted 3.12.0-rc3+ #45
[   56.868721] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012
[   56.882004]  ffffffff821944c0 ffff88080bbdb8c8 ffffffff8175a145 0000000000000007
[   56.895630]  ffff88080bbd5f40 ffff88080bbdb928 ffffffff81755b14 0000000000000001
[   56.909313]  ffff880800000001 ffff880800000000 ffffffff8101178f 0000000000000001
[   56.923006] Call Trace:
[   56.929532]  [<ffffffff8175a145>] dump_stack+0x55/0x76
[   56.936067]  [<ffffffff81755b14>] print_usage_bug+0x1f7/0x208
[   56.942445]  [<ffffffff8101178f>] ? save_stack_trace+0x2f/0x50
[   56.948932]  [<ffffffff810cc0a0>] ? check_usage_backwards+0x150/0x150
[   56.955470]  [<ffffffff810ccb52>] mark_lock+0x282/0x2c0
[   56.961945]  [<ffffffff810ccfed>] __lock_acquire+0x45d/0x1d50
[   56.968474]  [<ffffffff810cce6e>] ? __lock_acquire+0x2de/0x1d50
[   56.975140]  [<ffffffff81393bf5>] ? cpumask_next_and+0x55/0x90
[   56.981942]  [<ffffffff810cef72>] lock_acquire+0x92/0x1d0
[   56.988745]  [<ffffffff8118f52a>] ? vm_unmap_aliases+0x16a/0x380
[   56.995619]  [<ffffffff817628f1>] _raw_spin_lock+0x41/0x50
[   57.002493]  [<ffffffff8118f52a>] ? vm_unmap_aliases+0x16a/0x380
[   57.009447]  [<ffffffff8118f52a>] vm_unmap_aliases+0x16a/0x380
[   57.016477]  [<ffffffff8118f44c>] ? vm_unmap_aliases+0x8c/0x380
[   57.023607]  [<ffffffff810436b0>] change_page_attr_set_clr+0xc0/0x460
[   57.030818]  [<ffffffff810cfb8d>] ? trace_hardirqs_on+0xd/0x10
[   57.037896]  [<ffffffff811a8330>] ? kmem_cache_free+0xb0/0x2b0
[   57.044789]  [<ffffffff811b59c3>] ? free_object_rcu+0x93/0xa0
[   57.051720]  [<ffffffff81043d9f>] set_memory_rw+0x2f/0x40
[   57.058727]  [<ffffffff8104e17c>] bpf_jit_free+0x2c/0x40
[   57.065577]  [<ffffffff81642cba>] sk_filter_release_rcu+0x1a/0x30
[   57.072338]  [<ffffffff811108e2>] rcu_process_callbacks+0x202/0x7c0
[   57.078962]  [<ffffffff81057f17>] __do_softirq+0xf7/0x3f0
[   57.085373]  [<ffffffff81058245>] run_ksoftirqd+0x35/0x70

cannot reuse jited filter memory, since it's readonly,
so use original bpf insns memory to hold work_struct

defer kfree of sk_filter until jit completed freeing

tested on x86_64 and i386

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 6438f29ff266..01b780856db2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -644,7 +644,6 @@ void sk_filter_release_rcu(struct rcu_head *rcu)
 	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
 
 	bpf_jit_free(fp);
-	kfree(fp);
 }
 EXPORT_SYMBOL(sk_filter_release_rcu);
 
@@ -683,7 +682,7 @@ int sk_unattached_filter_create(struct sk_filter **pfp,
 	if (fprog->filter == NULL)
 		return -EINVAL;
 
-	fp = kmalloc(fsize + sizeof(*fp), GFP_KERNEL);
+	fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL);
 	if (!fp)
 		return -ENOMEM;
 	memcpy(fp->insns, fprog->filter, fsize);
@@ -723,6 +722,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 {
 	struct sk_filter *fp, *old_fp;
 	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
+	unsigned int sk_fsize = sk_filter_size(fprog->len);
 	int err;
 
 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
@@ -732,11 +732,11 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	if (fprog->filter == NULL)
 		return -EINVAL;
 
-	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
+	fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL);
 	if (!fp)
 		return -ENOMEM;
 	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
-		sock_kfree_s(sk, fp, fsize+sizeof(*fp));
+		sock_kfree_s(sk, fp, sk_fsize);
 		return -EFAULT;
 	}
 
-- 
cgit v1.2.1


From 0a7e22609067ff524fc7bbd45c6951dd08561667 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Fri, 4 Oct 2013 17:04:48 +0200
Subject: ipv4: fix ineffective source address selection

When sending out multicast messages, the source address in inet->mc_addr is
ignored and rewritten by an autoselected one. This is caused by a typo in
commit 813b3b5db831 ("ipv4: Use caller's on-stack flowi as-is in output
route lookups").

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 727f4365bcdf..6011615e810d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2072,7 +2072,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
 							      RT_SCOPE_LINK);
 			goto make_route;
 		}
-		if (fl4->saddr) {
+		if (!fl4->saddr) {
 			if (ipv4_is_multicast(fl4->daddr))
 				fl4->saddr = inet_select_addr(dev_out, 0,
 							      fl4->flowi4_scope);
-- 
cgit v1.2.1


From 88ba09df236c618b5466d35a0165df0dc865eac5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 5 Oct 2013 13:15:30 -0700
Subject: net: Update the sysctl permissions handler to test effective uid/gid

On Tue, 20 Aug 2013 11:40:04 -0500 Eric Sandeen <sandeen@redhat.com> wrote:
> This was brought up in a Red Hat bug (which may be marked private, I'm sorry):
>
> Bug 987055 - open O_WRONLY succeeds on some root owned files in /proc for process running with unprivileged EUID
>
> "On RHEL7 some of the files in /proc can be opened for writing by an unprivileged EUID."
>
> The flaw existed upstream as well last I checked.
>
> This commit in kernel v3.8 caused the regression:
>
> commit cff109768b2d9c03095848f4cd4b0754117262aa
> Author: Eric W. Biederman <ebiederm@xmission.com>
> Date:   Fri Nov 16 03:03:01 2012 +0000
>
>     net: Update the per network namespace sysctls to be available to the network namespace owner
>
>     - Allow anyone with CAP_NET_ADMIN rights in the user namespace of the
>       the netowrk namespace to change sysctls.
>     - Allow anyone the uid of the user namespace root the same
>       permissions over the network namespace sysctls as the global root.
>     - Allow anyone with gid of the user namespace root group the same
>       permissions over the network namespace sysctl as the global root group.
>
>     Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
>     Signed-off-by: David S. Miller <davem@davemloft.net>
>
> because it changed /sys/net's special permission handler to test current_uid, not
> current_euid; same for current_gid/current_egid.
>
> So in this case, root cannot drop privs via set[ug]id, and retains all privs
> in this codepath.

Modify the code to use current_euid(), and in_egroup_p, as in done
in fs/proc/proc_sysctl.c:test_perm()

Cc: stable@vger.kernel.org
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Reported-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sysctl_net.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 9bc6db04be3e..e7000be321b0 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -47,12 +47,12 @@ static int net_ctl_permissions(struct ctl_table_header *head,
 
 	/* Allow network administrator to have same access as root. */
 	if (ns_capable(net->user_ns, CAP_NET_ADMIN) ||
-	    uid_eq(root_uid, current_uid())) {
+	    uid_eq(root_uid, current_euid())) {
 		int mode = (table->mode >> 6) & 7;
 		return (mode << 6) | (mode << 3) | mode;
 	}
 	/* Allow netns root group to have the same access as the root group */
-	if (gid_eq(root_gid, current_gid())) {
+	if (in_egroup_p(root_gid)) {
 		int mode = (table->mode >> 3) & 7;
 		return (mode << 3) | mode;
 	}
-- 
cgit v1.2.1


From e7d8f6cb2f8735693396872f4608bbe305e8baee Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 8 Oct 2013 10:49:45 +0200
Subject: xfrm: Add refcount handling to queued policies

We need to ensure that policies can't go away as long as the hold timer
is armed, so take a refcont when we arm the timer and drop one if we
delete it.

Bug was introduced with git commit a0073fe18 ("xfrm: Add a state
resolution packet queue")

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ed38d5d81f9e..5f9be976770e 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -334,7 +334,8 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
 
 	atomic_inc(&policy->genid);
 
-	del_timer(&policy->polq.hold_timer);
+	if (del_timer(&policy->polq.hold_timer))
+		xfrm_pol_put(policy);
 	xfrm_queue_purge(&policy->polq.hold_queue);
 
 	if (del_timer(&policy->timer))
@@ -589,7 +590,8 @@ static void xfrm_policy_requeue(struct xfrm_policy *old,
 
 	spin_lock_bh(&pq->hold_queue.lock);
 	skb_queue_splice_init(&pq->hold_queue, &list);
-	del_timer(&pq->hold_timer);
+	if (del_timer(&pq->hold_timer))
+		xfrm_pol_put(old);
 	spin_unlock_bh(&pq->hold_queue.lock);
 
 	if (skb_queue_empty(&list))
@@ -600,7 +602,8 @@ static void xfrm_policy_requeue(struct xfrm_policy *old,
 	spin_lock_bh(&pq->hold_queue.lock);
 	skb_queue_splice(&list, &pq->hold_queue);
 	pq->timeout = XFRM_QUEUE_TMO_MIN;
-	mod_timer(&pq->hold_timer, jiffies);
+	if (!mod_timer(&pq->hold_timer, jiffies))
+		xfrm_pol_hold(new);
 	spin_unlock_bh(&pq->hold_queue.lock);
 }
 
@@ -1787,8 +1790,9 @@ static void xfrm_policy_queue_process(unsigned long arg)
 			goto purge_queue;
 
 		pq->timeout = pq->timeout << 1;
-		mod_timer(&pq->hold_timer, jiffies + pq->timeout);
-		return;
+		if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout))
+			xfrm_pol_hold(pol);
+	goto out;
 	}
 
 	dst_release(dst);
@@ -1819,11 +1823,14 @@ static void xfrm_policy_queue_process(unsigned long arg)
 		err = dst_output(skb);
 	}
 
+out:
+	xfrm_pol_put(pol);
 	return;
 
 purge_queue:
 	pq->timeout = 0;
 	xfrm_queue_purge(&pq->hold_queue);
+	xfrm_pol_put(pol);
 }
 
 static int xdst_queue_output(struct sk_buff *skb)
@@ -1831,7 +1838,8 @@ static int xdst_queue_output(struct sk_buff *skb)
 	unsigned long sched_next;
 	struct dst_entry *dst = skb_dst(skb);
 	struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
-	struct xfrm_policy_queue *pq = &xdst->pols[0]->polq;
+	struct xfrm_policy *pol = xdst->pols[0];
+	struct xfrm_policy_queue *pq = &pol->polq;
 
 	if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) {
 		kfree_skb(skb);
@@ -1850,10 +1858,12 @@ static int xdst_queue_output(struct sk_buff *skb)
 	if (del_timer(&pq->hold_timer)) {
 		if (time_before(pq->hold_timer.expires, sched_next))
 			sched_next = pq->hold_timer.expires;
+		xfrm_pol_put(pol);
 	}
 
 	__skb_queue_tail(&pq->hold_queue, skb);
-	mod_timer(&pq->hold_timer, sched_next);
+	if (!mod_timer(&pq->hold_timer, sched_next))
+		xfrm_pol_hold(pol);
 
 	spin_unlock_bh(&pq->hold_queue.lock);
 
-- 
cgit v1.2.1


From 2bb53e2557964c2c5368a0392cf3b3b63a288cd0 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 8 Oct 2013 10:49:51 +0200
Subject: xfrm: check for a vaild skb in xfrm_policy_queue_process

We might dreference a NULL pointer if the hold_queue is empty,
so add a check to avoid this.

Bug was introduced with git commit a0073fe18 ("xfrm: Add a state
resolution packet queue")

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 5f9be976770e..76e1873811d4 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1772,6 +1772,10 @@ static void xfrm_policy_queue_process(unsigned long arg)
 
 	spin_lock(&pq->hold_queue.lock);
 	skb = skb_peek(&pq->hold_queue);
+	if (!skb) {
+		spin_unlock(&pq->hold_queue.lock);
+		goto out;
+	}
 	dst = skb_dst(skb);
 	sk = skb->sk;
 	xfrm_decode_session(skb, &fl, dst->ops->family);
-- 
cgit v1.2.1


From 7adac1ec8198e9f5c802918e58a967b92db4a66f Mon Sep 17 00:00:00 2001
From: Alan Ott <alan@signal11.us>
Date: Sat, 5 Oct 2013 23:15:18 -0400
Subject: 6lowpan: Only make 6lowpan links to IEEE802154 devices

Refuse to create 6lowpan links if the actual hardware interface is
of any type other than ARPHRD_IEEE802154.

Signed-off-by: Alan Ott <alan@signal11.us>
Suggested-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ieee802154/6lowpan.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index c85e71e0c7ff..8f56b2b63bfb 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -1372,6 +1372,8 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
 	real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
 	if (!real_dev)
 		return -ENODEV;
+	if (real_dev->type != ARPHRD_IEEE802154)
+		return -EINVAL;
 
 	lowpan_dev_info(dev)->real_dev = real_dev;
 	lowpan_dev_info(dev)->fragment_tag = 0;
-- 
cgit v1.2.1


From ab2d95df9c19f45805977be1c869ba3df9a10835 Mon Sep 17 00:00:00 2001
From: Alan Ott <alan@signal11.us>
Date: Sat, 5 Oct 2013 23:15:19 -0400
Subject: 6lowpan: Sync default hardware address of lowpan links to their wpan

When a lowpan link to a wpan device is created, set the hardware address
of the lowpan link to that of the wpan device.

Signed-off-by: Alan Ott <alan@signal11.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ieee802154/6lowpan.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index 8f56b2b63bfb..ff41b4d60d30 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -1388,6 +1388,9 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
 
 	entry->ldev = dev;
 
+	/* Set the lowpan harware address to the wpan hardware address. */
+	memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN);
+
 	mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx);
 	INIT_LIST_HEAD(&entry->list);
 	list_add_tail(&entry->list, &lowpan_devices);
-- 
cgit v1.2.1


From 8d8a51e26a6d415e1470759f2cf5f3ee3ee86196 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 8 Oct 2013 15:44:26 -0400
Subject: l2tp: Fix build warning with ipv6 disabled.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net/l2tp/l2tp_core.c: In function ‘l2tp_verify_udp_checksum’:
net/l2tp/l2tp_core.c:499:22: warning: unused variable ‘tunnel’ [-Wunused-variable]

Create a helper "l2tp_tunnel()" to facilitate this, and as a side
effect get rid of a bunch of unnecessary void pointer casts.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index aedaa2cd4237..b076e8309bc2 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -115,6 +115,11 @@ struct l2tp_net {
 static void l2tp_session_set_header_len(struct l2tp_session *session, int version);
 static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
 
+static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
+{
+	return sk->sk_user_data;
+}
+
 static inline struct l2tp_net *l2tp_pernet(struct net *net)
 {
 	BUG_ON(!net);
@@ -496,7 +501,6 @@ out:
 static inline int l2tp_verify_udp_checksum(struct sock *sk,
 					   struct sk_buff *skb)
 {
-	struct l2tp_tunnel *tunnel = (struct l2tp_tunnel *)sk->sk_user_data;
 	struct udphdr *uh = udp_hdr(skb);
 	u16 ulen = ntohs(uh->len);
 	__wsum psum;
@@ -505,7 +509,7 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk,
 		return 0;
 
 #if IS_ENABLED(CONFIG_IPV6)
-	if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) {
+	if (sk->sk_family == PF_INET6 && !l2tp_tunnel(sk)->v4mapped) {
 		if (!uh->check) {
 			LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n");
 			return 1;
@@ -1305,10 +1309,9 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
  */
 static void l2tp_tunnel_destruct(struct sock *sk)
 {
-	struct l2tp_tunnel *tunnel;
+	struct l2tp_tunnel *tunnel = l2tp_tunnel(sk);
 	struct l2tp_net *pn;
 
-	tunnel = sk->sk_user_data;
 	if (tunnel == NULL)
 		goto end;
 
@@ -1676,7 +1679,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	}
 
 	/* Check if this socket has already been prepped */
-	tunnel = (struct l2tp_tunnel *)sk->sk_user_data;
+	tunnel = l2tp_tunnel(sk);
 	if (tunnel != NULL) {
 		/* This socket has already been prepped */
 		err = -EBUSY;
-- 
cgit v1.2.1


From 0e719e3a53cc03bc1bafbb2cae1ddf99a0d73ab0 Mon Sep 17 00:00:00 2001
From: Oussama Ghorbel <ou.ghorbel@gmail.com>
Date: Mon, 7 Oct 2013 18:50:05 +0100
Subject: ipv6: Fix the upper MTU limit in GRE tunnel

Unlike ipv4, the struct member hlen holds the length of the GRE and ipv6
headers. This length is also counted in dev->hard_header_len.
Perhaps, it's more clean to modify the hlen to count only the GRE header
without ipv6 header as the variable name suggest, but the simple way to fix
this without regression risk is simply modify the calculation of the limit
in ip6gre_tunnel_change_mtu function.
Verified in kernel version v3.11.

Signed-off-by: Oussama Ghorbel <ou.ghorbel@gmail.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 7bb5446b9d73..1ef1fa2b22a6 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1173,9 +1173,8 @@ done:
 
 static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 {
-	struct ip6_tnl *tunnel = netdev_priv(dev);
 	if (new_mtu < 68 ||
-	    new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
+	    new_mtu > 0xFFF8 - dev->hard_header_len)
 		return -EINVAL;
 	dev->mtu = new_mtu;
 	return 0;
-- 
cgit v1.2.1


From ede869cd0f45488195d56267d0c09ed511611d66 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 7 Oct 2013 12:50:18 -0700
Subject: pkt_sched: fq: fix typo for initial_quantum

TCA_FQ_INITIAL_QUANTUM should set q->initial_quantum

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_fq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index a2fef8b10b96..48501a2baf75 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -656,7 +656,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
 		q->quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
 
 	if (tb[TCA_FQ_INITIAL_QUANTUM])
-		q->quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
+		q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
 
 	if (tb[TCA_FQ_FLOW_DEFAULT_RATE])
 		q->flow_default_rate = nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]);
-- 
cgit v1.2.1


From c33a39c575068c2ea9bffb22fd6de2df19c74b89 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 7 Oct 2013 23:19:58 +0200
Subject: net: vlan: fix nlmsg size calculation in vlan_get_size()

This patch fixes the calculation of the nlmsg size, by adding the missing
nla_total_size().

Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 309129732285..c7e634af8516 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -171,7 +171,7 @@ static size_t vlan_get_size(const struct net_device *dev)
 
 	return nla_total_size(2) +	/* IFLA_VLAN_PROTOCOL */
 	       nla_total_size(2) +	/* IFLA_VLAN_ID */
-	       sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */
+	       nla_total_size(sizeof(struct ifla_vlan_flags)) + /* IFLA_VLAN_FLAGS */
 	       vlan_qos_map_size(vlan->nr_ingress_mappings) +
 	       vlan_qos_map_size(vlan->nr_egress_mappings);
 }
-- 
cgit v1.2.1


From 7eec4174ff29cd42f2acfae8112f51c228545d40 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 8 Oct 2013 15:16:00 -0700
Subject: pkt_sched: fq: fix non TCP flows pacing

Steinar reported FQ pacing was not working for UDP flows.

It looks like the initial sk->sk_pacing_rate value of 0 was
a wrong choice. We should init it to ~0U (unlimited)

Then, TCA_FQ_FLOW_DEFAULT_RATE should be removed because it makes
no real sense. The default rate is really unlimited, and we
need to avoid a zero divide.

Reported-by: Steinar H. Gunderson <sesse@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c    |  1 +
 net/sched/sch_fq.c | 20 +++++++++-----------
 2 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 5b6beba494a3..0b39e7ae4383 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2319,6 +2319,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_ll_usec		=	sysctl_net_busy_read;
 #endif
 
+	sk->sk_pacing_rate = ~0U;
 	/*
 	 * Before updating sk_refcnt, we must commit prior changes to memory
 	 * (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 48501a2baf75..a9dfdda9ed1d 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -472,20 +472,16 @@ begin:
 	if (f->credit > 0 || !q->rate_enable)
 		goto out;
 
-	if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) {
-		rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate;
+	rate = q->flow_max_rate;
+	if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT)
+		rate = min(skb->sk->sk_pacing_rate, rate);
 
-		rate = min(rate, q->flow_max_rate);
-	} else {
-		rate = q->flow_max_rate;
-		if (rate == ~0U)
-			goto out;
-	}
-	if (rate) {
+	if (rate != ~0U) {
 		u32 plen = max(qdisc_pkt_len(skb), q->quantum);
 		u64 len = (u64)plen * NSEC_PER_SEC;
 
-		do_div(len, rate);
+		if (likely(rate))
+			do_div(len, rate);
 		/* Since socket rate can change later,
 		 * clamp the delay to 125 ms.
 		 * TODO: maybe segment the too big skb, as in commit
@@ -735,12 +731,14 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (opts == NULL)
 		goto nla_put_failure;
 
+	/* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore,
+	 * do not bother giving its value
+	 */
 	if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
 	    nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
 	    nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
 	    nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
 	    nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
-	    nla_put_u32(skb, TCA_FQ_FLOW_DEFAULT_RATE, q->flow_default_rate) ||
 	    nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) ||
 	    nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
 		goto nla_put_failure;
-- 
cgit v1.2.1


From a754055a1296fcbe6f32de3a5eaca6efb2fd1865 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 16 Sep 2013 11:12:07 +0300
Subject: mac80211: correctly close cancelled scans

__ieee80211_scan_completed is called from a worker. This
means that the following flow is possible.

 * driver calls ieee80211_scan_completed
 * mac80211 cancels the scan (that is already complete)
 * __ieee80211_scan_completed runs

When scan_work will finally run, it will see that the scan
hasn't been aborted and might even trigger another scan on
another band. This leads to a situation where cfg80211's
scan is not done and no further scan can be issued.

Fix this by setting a new flag when a HW scan is being
cancelled so that no other scan will be triggered.

Cc: stable@vger.kernel.org
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h |  3 +++
 net/mac80211/scan.c        | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b6186517ec56..611abfcfb5eb 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -893,6 +893,8 @@ struct tpt_led_trigger {
  *	that the scan completed.
  * @SCAN_ABORTED: Set for our scan work function when the driver reported
  *	a scan complete for an aborted scan.
+ * @SCAN_HW_CANCELLED: Set for our scan work function when the scan is being
+ *	cancelled.
  */
 enum {
 	SCAN_SW_SCANNING,
@@ -900,6 +902,7 @@ enum {
 	SCAN_ONCHANNEL_SCANNING,
 	SCAN_COMPLETED,
 	SCAN_ABORTED,
+	SCAN_HW_CANCELLED,
 };
 
 /**
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 08afe74b98f4..d2d17a449224 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -238,6 +238,9 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
 	enum ieee80211_band band;
 	int i, ielen, n_chans;
 
+	if (test_bit(SCAN_HW_CANCELLED, &local->scanning))
+		return false;
+
 	do {
 		if (local->hw_scan_band == IEEE80211_NUM_BANDS)
 			return false;
@@ -940,7 +943,23 @@ void ieee80211_scan_cancel(struct ieee80211_local *local)
 	if (!local->scan_req)
 		goto out;
 
+	/*
+	 * We have a scan running and the driver already reported completion,
+	 * but the worker hasn't run yet or is stuck on the mutex - mark it as
+	 * cancelled.
+	 */
+	if (test_bit(SCAN_HW_SCANNING, &local->scanning) &&
+	    test_bit(SCAN_COMPLETED, &local->scanning)) {
+		set_bit(SCAN_HW_CANCELLED, &local->scanning);
+		goto out;
+	}
+
 	if (test_bit(SCAN_HW_SCANNING, &local->scanning)) {
+		/*
+		 * Make sure that __ieee80211_scan_completed doesn't trigger a
+		 * scan on another band.
+		 */
+		set_bit(SCAN_HW_CANCELLED, &local->scanning);
 		if (local->ops->cancel_hw_scan)
 			drv_cancel_hw_scan(local,
 				rcu_dereference_protected(local->scan_sdata,
-- 
cgit v1.2.1


From f38dd58ccca0d612e62509f75e99952dcf316cb2 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Tue, 17 Sep 2013 15:20:13 +0300
Subject: cfg80211: don't add p2p device while in RFKILL

Since P2P device doesn't have a netdev associated to it,
we cannot prevent the user to start it when in RFKILL.
So refuse to even add it when in RFKILL.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/core.c | 2 --
 net/wireless/core.h | 3 +++
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index fe8d4f2be49b..aff959e5a1b3 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -958,8 +958,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 	case NETDEV_PRE_UP:
 		if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype)))
 			return notifier_from_errno(-EOPNOTSUPP);
-		if (rfkill_blocked(rdev->rfkill))
-			return notifier_from_errno(-ERFKILL);
 		ret = cfg80211_can_add_interface(rdev, wdev->iftype);
 		if (ret)
 			return notifier_from_errno(ret);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 9ad43c619c54..3159e9c284c5 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -411,6 +411,9 @@ static inline int
 cfg80211_can_add_interface(struct cfg80211_registered_device *rdev,
 			   enum nl80211_iftype iftype)
 {
+	if (rfkill_blocked(rdev->rfkill))
+		return -ERFKILL;
+
 	return cfg80211_can_change_interface(rdev, NULL, iftype);
 }
 
-- 
cgit v1.2.1


From cb03db9d0e964568407fb08ea46cc2b6b7f67587 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Sat, 5 Oct 2013 17:56:59 -0300
Subject: net: secure_seq: Fix warning when CONFIG_IPV6 and CONFIG_INET are not
 selected

net_secret() is only used when CONFIG_IPV6 or CONFIG_INET are selected.

Building a defconfig with both of these symbols unselected (Using the ARM
at91sam9rl_defconfig, for example) leads to the following build warning:

$ make at91sam9rl_defconfig
#
# configuration written to .config
#

$ make net/core/secure_seq.o
scripts/kconfig/conf --silentoldconfig Kconfig
  CHK     include/config/kernel.release
  CHK     include/generated/uapi/linux/version.h
  CHK     include/generated/utsrelease.h
make[1]: `include/generated/mach-types.h' is up to date.
  CALL    scripts/checksyscalls.sh
  CC      net/core/secure_seq.o
net/core/secure_seq.c:17:13: warning: 'net_secret_init' defined but not used [-Wunused-function]

Fix this warning by protecting the definition of net_secret() with these
symbols.

Reported-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/secure_seq.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 3f1ec1586ae1..8d9d05edd2eb 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -10,6 +10,7 @@
 
 #include <net/secure_seq.h>
 
+#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET)
 #define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
 
 static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
@@ -29,6 +30,7 @@ static void net_secret_init(void)
 		cmpxchg(&net_secret[--i], 0, tmp);
 	}
 }
+#endif
 
 #ifdef CONFIG_INET
 static u32 seq_scale(u32 seq)
-- 
cgit v1.2.1


From f144febd93d5ee534fdf23505ab091b2b9088edc Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Thu, 10 Oct 2013 15:57:59 -0400
Subject: bridge: update mdb expiration timer upon reports.

commit 9f00b2e7cf241fa389733d41b615efdaa2cb0f5b
	bridge: only expire the mdb entry when query is received
changed the mdb expiration timer to be armed only when QUERY is
received.  Howerver, this causes issues in an environment where
the multicast server socket comes and goes very fast while a client
is trying to send traffic to it.

The root cause is a race where a sequence of LEAVE followed by REPORT
messages can race against QUERY messages generated in response to LEAVE.
The QUERY ends up starting the expiration timer, and that timer can
potentially expire after the new REPORT message has been received signaling
the new join operation.  This leads to a significant drop in multicast
traffic and possible complete stall.

The solution is to have REPORT messages update the expiration timer
on entries that already exist.

CC: Cong Wang <xiyou.wangcong@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index d1c578630678..1085f2180f3a 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -611,6 +611,9 @@ rehash:
 		break;
 
 	default:
+		/* If we have an existing entry, update it's expire timer */
+		mod_timer(&mp->timer,
+			  jiffies + br->multicast_membership_interval);
 		goto out;
 	}
 
@@ -680,8 +683,12 @@ static int br_multicast_add_group(struct net_bridge *br,
 	for (pp = &mp->ports;
 	     (p = mlock_dereference(*pp, br)) != NULL;
 	     pp = &p->next) {
-		if (p->port == port)
+		if (p->port == port) {
+			/* We already have a portgroup, update the timer.  */
+			mod_timer(&p->timer,
+				  jiffies + br->multicast_membership_interval);
 			goto out;
+		}
 		if ((unsigned long)p->port < (unsigned long)port)
 			break;
 	}
-- 
cgit v1.2.1


From 7263a5187f9e9de45fcb51349cf0e031142c19a1 Mon Sep 17 00:00:00 2001
From: Christophe Gouault <christophe.gouault@6wind.com>
Date: Tue, 8 Oct 2013 17:21:22 +0200
Subject: vti: get rid of nf mark rule in prerouting

This patch fixes and improves the use of vti interfaces (while
lightly changing the way of configuring them).

Currently:

- it is necessary to identify and mark inbound IPsec
  packets destined to each vti interface, via netfilter rules in
  the mangle table at prerouting hook.

- the vti module cannot retrieve the right tunnel in input since
  commit b9959fd3: vti tunnels all have an i_key, but the tunnel lookup
  is done with flag TUNNEL_NO_KEY, so there no chance to retrieve them.

- the i_key is used by the outbound processing as a mark to lookup
  for the right SP and SA bundle.

This patch uses the o_key to store the vti mark (instead of i_key) and
enables:

- to avoid the need for previously marking the inbound skbuffs via a
  netfilter rule.
- to properly retrieve the right tunnel in input, only based on the IPsec
  packet outer addresses.
- to properly perform an inbound policy check (using the tunnel o_key
  as a mark).
- to properly perform an outbound SPD and SAD lookup (using the tunnel
  o_key as a mark).
- to keep the current mark of the skbuff. The skbuff mark is neither
  used nor changed by the vti interface. Only the vti interface o_key
  is used.

SAs have a wildcard mark.
SPs have a mark equal to the vti interface o_key.

The vti interface must be created as follows (i_key = 0, o_key = mark):

   ip link add vti1 mode vti local 1.1.1.1 remote 2.2.2.2 okey 1

The SPs attached to vti1 must be created as follows (mark = vti1 o_key):

   ip xfrm policy add dir out mark 1 tmpl src 1.1.1.1 dst 2.2.2.2 \
      proto esp mode tunnel
   ip xfrm policy add dir in  mark 1 tmpl src 2.2.2.2 dst 1.1.1.1 \
      proto esp mode tunnel

The SAs are created with the default wildcard mark. There is no
distinction between global vs. vti SAs. Just their addresses will
possibly link them to a vti interface:

   ip xfrm state add src 1.1.1.1 dst 2.2.2.2 proto esp spi 1000 mode tunnel \
                 enc "cbc(aes)" "azertyuiopqsdfgh"

   ip xfrm state add src 2.2.2.2 dst 1.1.1.1 proto esp spi 2000 mode tunnel \
                 enc "cbc(aes)" "sqbdhgqsdjqjsdfh"

To avoid matching "global" (not vti) SPs in vti interfaces, global SPs
should no use the default wildcard mark, but explicitly match mark 0.

To avoid a double SPD lookup in input and output (in global and vti SPDs),
the NOPOLICY and NOXFRM options should be set on the vti interfaces:

   echo 1 > /proc/sys/net/ipv4/conf/vti1/disable_policy
   echo 1 > /proc/sys/net/ipv4/conf/vti1/disable_xfrm

The outgoing traffic is steered to vti1 by a route via the vti interface:

   ip route add 192.168.0.0/16 dev vti1

The incoming IPsec traffic is steered to vti1 because its outer addresses
match the vti1 tunnel configuration.

Signed-off-by: Christophe Gouault <christophe.gouault@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_vti.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index e805e7b3030e..6e87f853d033 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -125,8 +125,17 @@ static int vti_rcv(struct sk_buff *skb)
 				  iph->saddr, iph->daddr, 0);
 	if (tunnel != NULL) {
 		struct pcpu_tstats *tstats;
+		u32 oldmark = skb->mark;
+		int ret;
 
-		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+
+		/* temporarily mark the skb with the tunnel o_key, to
+		 * only match policies with this mark.
+		 */
+		skb->mark = be32_to_cpu(tunnel->parms.o_key);
+		ret = xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb);
+		skb->mark = oldmark;
+		if (!ret)
 			return -1;
 
 		tstats = this_cpu_ptr(tunnel->dev->tstats);
@@ -135,7 +144,6 @@ static int vti_rcv(struct sk_buff *skb)
 		tstats->rx_bytes += skb->len;
 		u64_stats_update_end(&tstats->syncp);
 
-		skb->mark = 0;
 		secpath_reset(skb);
 		skb->dev = tunnel->dev;
 		return 1;
@@ -167,7 +175,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	memset(&fl4, 0, sizeof(fl4));
 	flowi4_init_output(&fl4, tunnel->parms.link,
-			   be32_to_cpu(tunnel->parms.i_key), RT_TOS(tos),
+			   be32_to_cpu(tunnel->parms.o_key), RT_TOS(tos),
 			   RT_SCOPE_UNIVERSE,
 			   IPPROTO_IPIP, 0,
 			   dst, tiph->saddr, 0, 0);
-- 
cgit v1.2.1


From 455cc32bf128e114455d11ad919321ab89a2c312 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 10 Oct 2013 06:30:09 -0700
Subject: l2tp: must disable bh before calling l2tp_xmit_skb()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

François Cachereul made a very nice bug report and suspected
the bh_lock_sock() / bh_unlok_sock() pair used in l2tp_xmit_skb() from
process context was not good.

This problem was added by commit 6af88da14ee284aaad6e4326da09a89191ab6165
("l2tp: Fix locking in l2tp_core.c").

l2tp_eth_dev_xmit() runs from BH context, so we must disable BH
from other l2tp_xmit_skb() users.

[  452.060011] BUG: soft lockup - CPU#1 stuck for 23s! [accel-pppd:6662]
[  452.061757] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core pppoe pppox
ppp_generic slhc ipv6 ext3 mbcache jbd virtio_balloon xfs exportfs dm_mod
virtio_blk ata_generic virtio_net floppy ata_piix libata virtio_pci virtio_ring virtio [last unloaded: scsi_wait_scan]
[  452.064012] CPU 1
[  452.080015] BUG: soft lockup - CPU#2 stuck for 23s! [accel-pppd:6643]
[  452.080015] CPU 2
[  452.080015]
[  452.080015] Pid: 6643, comm: accel-pppd Not tainted 3.2.46.mini #1 Bochs Bochs
[  452.080015] RIP: 0010:[<ffffffff81059f6c>]  [<ffffffff81059f6c>] do_raw_spin_lock+0x17/0x1f
[  452.080015] RSP: 0018:ffff88007125fc18  EFLAGS: 00000293
[  452.080015] RAX: 000000000000aba9 RBX: ffffffff811d0703 RCX: 0000000000000000
[  452.080015] RDX: 00000000000000ab RSI: ffff8800711f6896 RDI: ffff8800745c8110
[  452.080015] RBP: ffff88007125fc18 R08: 0000000000000020 R09: 0000000000000000
[  452.080015] R10: 0000000000000000 R11: 0000000000000280 R12: 0000000000000286
[  452.080015] R13: 0000000000000020 R14: 0000000000000240 R15: 0000000000000000
[  452.080015] FS:  00007fdc0cc24700(0000) GS:ffff8800b6f00000(0000) knlGS:0000000000000000
[  452.080015] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  452.080015] CR2: 00007fdb054899b8 CR3: 0000000074404000 CR4: 00000000000006a0
[  452.080015] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  452.080015] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[  452.080015] Process accel-pppd (pid: 6643, threadinfo ffff88007125e000, task ffff8800b27e6dd0)
[  452.080015] Stack:
[  452.080015]  ffff88007125fc28 ffffffff81256559 ffff88007125fc98 ffffffffa01b2bd1
[  452.080015]  ffff88007125fc58 000000000000000c 00000000029490d0 0000009c71dbe25e
[  452.080015]  000000000000005c 000000080000000e 0000000000000000 ffff880071170600
[  452.080015] Call Trace:
[  452.080015]  [<ffffffff81256559>] _raw_spin_lock+0xe/0x10
[  452.080015]  [<ffffffffa01b2bd1>] l2tp_xmit_skb+0x189/0x4ac [l2tp_core]
[  452.080015]  [<ffffffffa01c2d36>] pppol2tp_sendmsg+0x15e/0x19c [l2tp_ppp]
[  452.080015]  [<ffffffff811c7872>] __sock_sendmsg_nosec+0x22/0x24
[  452.080015]  [<ffffffff811c83bd>] sock_sendmsg+0xa1/0xb6
[  452.080015]  [<ffffffff81254e88>] ? __schedule+0x5c1/0x616
[  452.080015]  [<ffffffff8103c7c6>] ? __dequeue_signal+0xb7/0x10c
[  452.080015]  [<ffffffff810bbd21>] ? fget_light+0x75/0x89
[  452.080015]  [<ffffffff811c8444>] ? sockfd_lookup_light+0x20/0x56
[  452.080015]  [<ffffffff811c9b34>] sys_sendto+0x10c/0x13b
[  452.080015]  [<ffffffff8125cac2>] system_call_fastpath+0x16/0x1b
[  452.080015] Code: 81 48 89 e5 72 0c 31 c0 48 81 ff 45 66 25 81 0f 92 c0 5d c3 55 b8 00 01 00 00 48 89 e5 f0 66 0f c1 07 0f b6 d4 38 d0 74 06 f3 90 <8a> 07 eb f6 5d c3 90 90 55 48 89 e5 9c 58 0f 1f 44 00 00 5d c3
[  452.080015] Call Trace:
[  452.080015]  [<ffffffff81256559>] _raw_spin_lock+0xe/0x10
[  452.080015]  [<ffffffffa01b2bd1>] l2tp_xmit_skb+0x189/0x4ac [l2tp_core]
[  452.080015]  [<ffffffffa01c2d36>] pppol2tp_sendmsg+0x15e/0x19c [l2tp_ppp]
[  452.080015]  [<ffffffff811c7872>] __sock_sendmsg_nosec+0x22/0x24
[  452.080015]  [<ffffffff811c83bd>] sock_sendmsg+0xa1/0xb6
[  452.080015]  [<ffffffff81254e88>] ? __schedule+0x5c1/0x616
[  452.080015]  [<ffffffff8103c7c6>] ? __dequeue_signal+0xb7/0x10c
[  452.080015]  [<ffffffff810bbd21>] ? fget_light+0x75/0x89
[  452.080015]  [<ffffffff811c8444>] ? sockfd_lookup_light+0x20/0x56
[  452.080015]  [<ffffffff811c9b34>] sys_sendto+0x10c/0x13b
[  452.080015]  [<ffffffff8125cac2>] system_call_fastpath+0x16/0x1b
[  452.064012]
[  452.064012] Pid: 6662, comm: accel-pppd Not tainted 3.2.46.mini #1 Bochs Bochs
[  452.064012] RIP: 0010:[<ffffffff81059f6e>]  [<ffffffff81059f6e>] do_raw_spin_lock+0x19/0x1f
[  452.064012] RSP: 0018:ffff8800b6e83ba0  EFLAGS: 00000297
[  452.064012] RAX: 000000000000aaa9 RBX: ffff8800b6e83b40 RCX: 0000000000000002
[  452.064012] RDX: 00000000000000aa RSI: 000000000000000a RDI: ffff8800745c8110
[  452.064012] RBP: ffff8800b6e83ba0 R08: 000000000000c802 R09: 000000000000001c
[  452.064012] R10: ffff880071096c4e R11: 0000000000000006 R12: ffff8800b6e83b18
[  452.064012] R13: ffffffff8125d51e R14: ffff8800b6e83ba0 R15: ffff880072a589c0
[  452.064012] FS:  00007fdc0b81e700(0000) GS:ffff8800b6e80000(0000) knlGS:0000000000000000
[  452.064012] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  452.064012] CR2: 0000000000625208 CR3: 0000000074404000 CR4: 00000000000006a0
[  452.064012] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  452.064012] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[  452.064012] Process accel-pppd (pid: 6662, threadinfo ffff88007129a000, task ffff8800744f7410)
[  452.064012] Stack:
[  452.064012]  ffff8800b6e83bb0 ffffffff81256559 ffff8800b6e83bc0 ffffffff8121c64a
[  452.064012]  ffff8800b6e83bf0 ffffffff8121ec7a ffff880072a589c0 ffff880071096c62
[  452.064012]  0000000000000011 ffffffff81430024 ffff8800b6e83c80 ffffffff8121f276
[  452.064012] Call Trace:
[  452.064012]  <IRQ>
[  452.064012]  [<ffffffff81256559>] _raw_spin_lock+0xe/0x10
[  452.064012]  [<ffffffff8121c64a>] spin_lock+0x9/0xb
[  452.064012]  [<ffffffff8121ec7a>] udp_queue_rcv_skb+0x186/0x269
[  452.064012]  [<ffffffff8121f276>] __udp4_lib_rcv+0x297/0x4ae
[  452.064012]  [<ffffffff8121c178>] ? raw_rcv+0xe9/0xf0
[  452.064012]  [<ffffffff8121f4a7>] udp_rcv+0x1a/0x1c
[  452.064012]  [<ffffffff811fe385>] ip_local_deliver_finish+0x12b/0x1a5
[  452.064012]  [<ffffffff811fe54e>] ip_local_deliver+0x53/0x84
[  452.064012]  [<ffffffff811fe1d0>] ip_rcv_finish+0x2bc/0x2f3
[  452.064012]  [<ffffffff811fe78f>] ip_rcv+0x210/0x269
[  452.064012]  [<ffffffff8101911e>] ? kvm_clock_get_cycles+0x9/0xb
[  452.064012]  [<ffffffff811d88cd>] __netif_receive_skb+0x3a5/0x3f7
[  452.064012]  [<ffffffff811d8eba>] netif_receive_skb+0x57/0x5e
[  452.064012]  [<ffffffff811cf30f>] ? __netdev_alloc_skb+0x1f/0x3b
[  452.064012]  [<ffffffffa0049126>] virtnet_poll+0x4ba/0x5a4 [virtio_net]
[  452.064012]  [<ffffffff811d9417>] net_rx_action+0x73/0x184
[  452.064012]  [<ffffffffa01b2cc2>] ? l2tp_xmit_skb+0x27a/0x4ac [l2tp_core]
[  452.064012]  [<ffffffff810343b9>] __do_softirq+0xc3/0x1a8
[  452.064012]  [<ffffffff81013b56>] ? ack_APIC_irq+0x10/0x12
[  452.064012]  [<ffffffff81256559>] ? _raw_spin_lock+0xe/0x10
[  452.064012]  [<ffffffff8125e0ac>] call_softirq+0x1c/0x26
[  452.064012]  [<ffffffff81003587>] do_softirq+0x45/0x82
[  452.064012]  [<ffffffff81034667>] irq_exit+0x42/0x9c
[  452.064012]  [<ffffffff8125e146>] do_IRQ+0x8e/0xa5
[  452.064012]  [<ffffffff8125676e>] common_interrupt+0x6e/0x6e
[  452.064012]  <EOI>
[  452.064012]  [<ffffffff810b82a1>] ? kfree+0x8a/0xa3
[  452.064012]  [<ffffffffa01b2cc2>] ? l2tp_xmit_skb+0x27a/0x4ac [l2tp_core]
[  452.064012]  [<ffffffffa01b2c25>] ? l2tp_xmit_skb+0x1dd/0x4ac [l2tp_core]
[  452.064012]  [<ffffffffa01c2d36>] pppol2tp_sendmsg+0x15e/0x19c [l2tp_ppp]
[  452.064012]  [<ffffffff811c7872>] __sock_sendmsg_nosec+0x22/0x24
[  452.064012]  [<ffffffff811c83bd>] sock_sendmsg+0xa1/0xb6
[  452.064012]  [<ffffffff81254e88>] ? __schedule+0x5c1/0x616
[  452.064012]  [<ffffffff8103c7c6>] ? __dequeue_signal+0xb7/0x10c
[  452.064012]  [<ffffffff810bbd21>] ? fget_light+0x75/0x89
[  452.064012]  [<ffffffff811c8444>] ? sockfd_lookup_light+0x20/0x56
[  452.064012]  [<ffffffff811c9b34>] sys_sendto+0x10c/0x13b
[  452.064012]  [<ffffffff8125cac2>] system_call_fastpath+0x16/0x1b
[  452.064012] Code: 89 e5 72 0c 31 c0 48 81 ff 45 66 25 81 0f 92 c0 5d c3 55 b8 00 01 00 00 48 89 e5 f0 66 0f c1 07 0f b6 d4 38 d0 74 06 f3 90 8a 07 <eb> f6 5d c3 90 90 55 48 89 e5 9c 58 0f 1f 44 00 00 5d c3 55 48
[  452.064012] Call Trace:
[  452.064012]  <IRQ>  [<ffffffff81256559>] _raw_spin_lock+0xe/0x10
[  452.064012]  [<ffffffff8121c64a>] spin_lock+0x9/0xb
[  452.064012]  [<ffffffff8121ec7a>] udp_queue_rcv_skb+0x186/0x269
[  452.064012]  [<ffffffff8121f276>] __udp4_lib_rcv+0x297/0x4ae
[  452.064012]  [<ffffffff8121c178>] ? raw_rcv+0xe9/0xf0
[  452.064012]  [<ffffffff8121f4a7>] udp_rcv+0x1a/0x1c
[  452.064012]  [<ffffffff811fe385>] ip_local_deliver_finish+0x12b/0x1a5
[  452.064012]  [<ffffffff811fe54e>] ip_local_deliver+0x53/0x84
[  452.064012]  [<ffffffff811fe1d0>] ip_rcv_finish+0x2bc/0x2f3
[  452.064012]  [<ffffffff811fe78f>] ip_rcv+0x210/0x269
[  452.064012]  [<ffffffff8101911e>] ? kvm_clock_get_cycles+0x9/0xb
[  452.064012]  [<ffffffff811d88cd>] __netif_receive_skb+0x3a5/0x3f7
[  452.064012]  [<ffffffff811d8eba>] netif_receive_skb+0x57/0x5e
[  452.064012]  [<ffffffff811cf30f>] ? __netdev_alloc_skb+0x1f/0x3b
[  452.064012]  [<ffffffffa0049126>] virtnet_poll+0x4ba/0x5a4 [virtio_net]
[  452.064012]  [<ffffffff811d9417>] net_rx_action+0x73/0x184
[  452.064012]  [<ffffffffa01b2cc2>] ? l2tp_xmit_skb+0x27a/0x4ac [l2tp_core]
[  452.064012]  [<ffffffff810343b9>] __do_softirq+0xc3/0x1a8
[  452.064012]  [<ffffffff81013b56>] ? ack_APIC_irq+0x10/0x12
[  452.064012]  [<ffffffff81256559>] ? _raw_spin_lock+0xe/0x10
[  452.064012]  [<ffffffff8125e0ac>] call_softirq+0x1c/0x26
[  452.064012]  [<ffffffff81003587>] do_softirq+0x45/0x82
[  452.064012]  [<ffffffff81034667>] irq_exit+0x42/0x9c
[  452.064012]  [<ffffffff8125e146>] do_IRQ+0x8e/0xa5
[  452.064012]  [<ffffffff8125676e>] common_interrupt+0x6e/0x6e
[  452.064012]  <EOI>  [<ffffffff810b82a1>] ? kfree+0x8a/0xa3
[  452.064012]  [<ffffffffa01b2cc2>] ? l2tp_xmit_skb+0x27a/0x4ac [l2tp_core]
[  452.064012]  [<ffffffffa01b2c25>] ? l2tp_xmit_skb+0x1dd/0x4ac [l2tp_core]
[  452.064012]  [<ffffffffa01c2d36>] pppol2tp_sendmsg+0x15e/0x19c [l2tp_ppp]
[  452.064012]  [<ffffffff811c7872>] __sock_sendmsg_nosec+0x22/0x24
[  452.064012]  [<ffffffff811c83bd>] sock_sendmsg+0xa1/0xb6
[  452.064012]  [<ffffffff81254e88>] ? __schedule+0x5c1/0x616
[  452.064012]  [<ffffffff8103c7c6>] ? __dequeue_signal+0xb7/0x10c
[  452.064012]  [<ffffffff810bbd21>] ? fget_light+0x75/0x89
[  452.064012]  [<ffffffff811c8444>] ? sockfd_lookup_light+0x20/0x56
[  452.064012]  [<ffffffff811c9b34>] sys_sendto+0x10c/0x13b
[  452.064012]  [<ffffffff8125cac2>] system_call_fastpath+0x16/0x1b

Reported-by: François Cachereul <f.cachereul@alphalink.fr>
Tested-by: François Cachereul <f.cachereul@alphalink.fr>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ppp.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 5ebee2ded9e9..8c46b271064a 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -353,7 +353,9 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
 		goto error_put_sess_tun;
 	}
 
+	local_bh_disable();
 	l2tp_xmit_skb(session, skb, session->hdr_len);
+	local_bh_enable();
 
 	sock_put(ps->tunnel_sock);
 	sock_put(sk);
@@ -422,7 +424,9 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	skb->data[0] = ppph[0];
 	skb->data[1] = ppph[1];
 
+	local_bh_disable();
 	l2tp_xmit_skb(session, skb, session->hdr_len);
+	local_bh_enable();
 
 	sock_put(sk_tun);
 	sock_put(sk);
-- 
cgit v1.2.1


From 638a52b801e40ed276ceb69b73579ad99365361a Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Sun, 6 Oct 2013 15:15:33 -0700
Subject: netem: update backlog after drop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When packet is dropped from rb-tree netem the backlog statistic should
also be updated.

Reported-by: Сергеев Сергей <adron@yapic.net>
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_netem.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index a6d788d45216..6b75b6733e8e 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -520,6 +520,7 @@ static unsigned int netem_drop(struct Qdisc *sch)
 			skb->next = NULL;
 			skb->prev = NULL;
 			len = qdisc_pkt_len(skb);
+			sch->qstats.backlog -= len;
 			kfree_skb(skb);
 		}
 	}
-- 
cgit v1.2.1


From ff704050f2fc0f3382b5a70bba56a51a3feca79d Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Sun, 6 Oct 2013 15:16:49 -0700
Subject: netem: free skb's in tree on reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Netem can leak memory because packets get stored in red-black
tree and it is not cleared on reset.

Reported by: Сергеев Сергей <adron@yapic.net>
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_netem.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'net')

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 6b75b6733e8e..b87e83d07478 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -358,6 +358,21 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche
 	return PSCHED_NS2TICKS(ticks);
 }
 
+static void tfifo_reset(struct Qdisc *sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	struct rb_node *p;
+
+	while ((p = rb_first(&q->t_root))) {
+		struct sk_buff *skb = netem_rb_to_skb(p);
+
+		rb_erase(p, &q->t_root);
+		skb->next = NULL;
+		skb->prev = NULL;
+		kfree_skb(skb);
+	}
+}
+
 static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
@@ -610,6 +625,7 @@ static void netem_reset(struct Qdisc *sch)
 	struct netem_sched_data *q = qdisc_priv(sch);
 
 	qdisc_reset_queue(sch);
+	tfifo_reset(sch);
 	if (q->qdisc)
 		qdisc_reset(q->qdisc);
 	qdisc_watchdog_cancel(&q->watchdog);
-- 
cgit v1.2.1


From bf58175954f2d390300df00f96070312d1b15096 Mon Sep 17 00:00:00 2001
From: Oussama Ghorbel <ou.ghorbel@gmail.com>
Date: Thu, 10 Oct 2013 18:50:27 +0100
Subject: ipv6: Initialize ip6_tnl.hlen in gre tunnel even if no route is found

The ip6_tnl.hlen (gre and ipv6 headers length) is independent from the
outgoing interface, so it would be better to initialize it even when no
route is found, otherwise its value will be zero.
While I'm not sure if this could happen in real life, but doing that
will avoid to call the skb_push function with a zero in ip6gre_header
function.

Suggested-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Oussama Ghorbel <ou.ghorbel@gmail.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 1ef1fa2b22a6..bf4a9a084de5 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -976,6 +976,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
 		if (t->parms.o_flags&GRE_SEQ)
 			addend += 4;
 	}
+	t->hlen = addend;
 
 	if (p->flags & IP6_TNL_F_CAP_XMIT) {
 		int strict = (ipv6_addr_type(&p->raddr) &
@@ -1002,8 +1003,6 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
 		}
 		ip6_rt_put(rt);
 	}
-
-	t->hlen = addend;
 }
 
 static int ip6gre_tnl_change(struct ip6_tnl *t,
-- 
cgit v1.2.1


From f5563318ff1bde15b10e736e97ffce13be08bc1a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 11 Oct 2013 14:47:05 +0200
Subject: wireless: radiotap: fix parsing buffer overrun

When parsing an invalid radiotap header, the parser can overrun
the buffer that is passed in because it doesn't correctly check
 1) the minimum radiotap header size
 2) the space for extended bitmaps

The first issue doesn't affect any in-kernel user as they all
check the minimum size before calling the radiotap function.
The second issue could potentially affect the kernel if an skb
is passed in that consists only of the radiotap header with a
lot of extended bitmaps that extend past the SKB. In that case
a read-only buffer overrun by at most 4 bytes is possible.

Fix this by adding the appropriate checks to the parser.

Cc: stable@vger.kernel.org
Reported-by: Evan Huus <eapache@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/radiotap.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c
index 7d604c06c3dc..a271c27fac77 100644
--- a/net/wireless/radiotap.c
+++ b/net/wireless/radiotap.c
@@ -97,6 +97,10 @@ int ieee80211_radiotap_iterator_init(
 	struct ieee80211_radiotap_header *radiotap_header,
 	int max_length, const struct ieee80211_radiotap_vendor_namespaces *vns)
 {
+	/* check the radiotap header can actually be present */
+	if (max_length < sizeof(struct ieee80211_radiotap_header))
+		return -EINVAL;
+
 	/* Linux only supports version 0 radiotap format */
 	if (radiotap_header->it_version)
 		return -EINVAL;
@@ -131,7 +135,8 @@ int ieee80211_radiotap_iterator_init(
 			 */
 
 			if ((unsigned long)iterator->_arg -
-			    (unsigned long)iterator->_rtheader >
+			    (unsigned long)iterator->_rtheader +
+			    sizeof(uint32_t) >
 			    (unsigned long)iterator->_max_length)
 				return -EINVAL;
 		}
-- 
cgit v1.2.1


From d86aa4f8ca58898ec6a94c0635da20b948171ed7 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 11 Oct 2013 15:47:06 +0200
Subject: mac80211: fix crash if bitrate calculation goes wrong

If a frame's timestamp is calculated, and the bitrate
calculation goes wrong and returns zero, the system
will attempt to divide by zero and crash. Catch this
case and print the rate information that the driver
reported when this happens.

Cc: stable@vger.kernel.org
Reported-by: Thomas Lindroth <thomas.lindroth@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/util.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 9c3200bcfc02..69e4ef5348a0 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2238,6 +2238,10 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
 	}
 
 	rate = cfg80211_calculate_bitrate(&ri);
+	if (WARN_ONCE(!rate,
+		      "Invalid bitrate: flags=0x%x, idx=%d, vht_nss=%d\n",
+		      status->flag, status->rate_idx, status->vht_nss))
+		return 0;
 
 	/* rewind from end of MPDU */
 	if (status->flag & RX_FLAG_MACTIME_END)
-- 
cgit v1.2.1


From 27127a82561a2a3ed955ce207048e1b066a80a2a Mon Sep 17 00:00:00 2001
From: Fan Du <fan.du@windriver.com>
Date: Tue, 15 Oct 2013 22:01:30 -0400
Subject: sctp: Use software crc32 checksum when xfrm transform will happen.

igb/ixgbe have hardware sctp checksum support, when this feature is enabled
and also IPsec is armed to protect sctp traffic, ugly things happened as
xfrm_output checks CHECKSUM_PARTIAL to do checksum operation(sum every thing
up and pack the 16bits result in the checksum field). The result is fail
establishment of sctp communication.

Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Fan Du <fan.du@windriver.com>
Signed-off-by: Vlad Yasevich <vyasevich@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/output.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/output.c b/net/sctp/output.c
index 0ac3a65daccb..d35b54cb3020 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -536,7 +536,8 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	 * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
 	 */
 	if (!sctp_checksum_disable) {
-		if (!(dst->dev->features & NETIF_F_SCTP_CSUM)) {
+		if (!(dst->dev->features & NETIF_F_SCTP_CSUM) ||
+		    (dst_xfrm(dst) != NULL)) {
 			__u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len);
 
 			/* 3) Put the resultant value into the checksum field in the
-- 
cgit v1.2.1


From d2dbbba77e95dff4b4f901fee236fef6d9552072 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Tue, 15 Oct 2013 22:01:31 -0400
Subject: sctp: Perform software checksum if packet has to be fragmented.

IP/IPv6 fragmentation knows how to compute only TCP/UDP checksum.
This causes problems if SCTP packets has to be fragmented and
ipsummed has been set to PARTIAL due to checksum offload support.
This condition can happen when retransmitting after MTU discover,
or when INIT or other control chunks are larger then MTU.
Check for the rare fragmentation condition in SCTP and use software
checksum calculation in this case.

CC: Fan Du <fan.du@windriver.com>
Signed-off-by: Vlad Yasevich <vyasevich@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/output.c b/net/sctp/output.c
index d35b54cb3020..319137340d15 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -537,7 +537,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	 */
 	if (!sctp_checksum_disable) {
 		if (!(dst->dev->features & NETIF_F_SCTP_CSUM) ||
-		    (dst_xfrm(dst) != NULL)) {
+		    (dst_xfrm(dst) != NULL) || packet->ipfragok) {
 			__u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len);
 
 			/* 3) Put the resultant value into the checksum field in the
-- 
cgit v1.2.1


From 031afe4990a7c9dbff41a3a742c44d3e740ea0a1 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Sat, 12 Oct 2013 10:16:27 -0700
Subject: tcp: fix incorrect ca_state in tail loss probe

On receiving an ACK that covers the loss probe sequence, TLP
immediately sets the congestion state to Open, even though some packets
are not recovered and retransmisssion are on the way.  The later ACks
may trigger a WARN_ON check in step D of tcp_fastretrans_alert(), e.g.,
https://bugzilla.redhat.com/show_bug.cgi?id=989251

The fix is to follow the similar procedure in recovery by calling
tcp_try_keep_open(). The sender switches to Open state if no packets
are retransmissted. Otherwise it goes to Disorder and let subsequent
ACKs move the state to Recovery or Open.

Reported-By: Michael Sterrett <michael@sterretts.net>
Tested-By: Dormando <dormando@rydia.net>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 113dc5f17d47..53974c7c04fe 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3291,7 +3291,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
 			tcp_init_cwnd_reduction(sk, true);
 			tcp_set_ca_state(sk, TCP_CA_CWR);
 			tcp_end_cwnd_reduction(sk);
-			tcp_set_ca_state(sk, TCP_CA_Open);
+			tcp_try_keep_open(sk);
 			NET_INC_STATS_BH(sock_net(sk),
 					 LINUX_MIB_TCPLOSSPROBERECOVERY);
 		}
-- 
cgit v1.2.1


From c52e2421f7368fd36cbe330d2cf41b10452e39a9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 15 Oct 2013 11:54:30 -0700
Subject: tcp: must unclone packets before mangling them

TCP stack should make sure it owns skbs before mangling them.

We had various crashes using bnx2x, and it turned out gso_size
was cleared right before bnx2x driver was populating TC descriptor
of the _previous_ packet send. TCP stack can sometime retransmit
packets that are still in Qdisc.

Of course we could make bnx2x driver more robust (using
ACCESS_ONCE(shinfo->gso_size) for example), but the bug is TCP stack.

We have identified two points where skb_unclone() was needed.

This patch adds a WARN_ON_ONCE() to warn us if we missed another
fix of this kind.

Kudos to Neal for finding the root cause of this bug. Its visible
using small MSS.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c6f01f2cdb32..8fad1c155688 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -986,6 +986,9 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
 				 unsigned int mss_now)
 {
+	/* Make sure we own this skb before messing gso_size/gso_segs */
+	WARN_ON_ONCE(skb_cloned(skb));
+
 	if (skb->len <= mss_now || !sk_can_gso(sk) ||
 	    skb->ip_summed == CHECKSUM_NONE) {
 		/* Avoid the costly divide in the normal
@@ -1067,9 +1070,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
 	if (nsize < 0)
 		nsize = 0;
 
-	if (skb_cloned(skb) &&
-	    skb_is_nonlinear(skb) &&
-	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		return -ENOMEM;
 
 	/* Get a new skb... force flag on. */
@@ -2344,6 +2345,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 		int oldpcount = tcp_skb_pcount(skb);
 
 		if (unlikely(oldpcount > 1)) {
+			if (skb_unclone(skb, GFP_ATOMIC))
+				return -ENOMEM;
 			tcp_init_tso_segs(sk, skb, cur_mss);
 			tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
 		}
-- 
cgit v1.2.1


From 8f26fb1c1ed81c33f5d87c5936f4d9d1b4118918 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 15 Oct 2013 12:24:54 -0700
Subject: tcp: remove the sk_can_gso() check from tcp_set_skb_tso_segs()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

sk_can_gso() should only be used as a hint in tcp_sendmsg() to build GSO
packets in the first place. (As a performance hint)

Once we have GSO packets in write queue, we can not decide they are no
longer GSO only because flow now uses a route which doesn't handle
TSO/GSO.

Core networking stack handles the case very well for us, all we need
is keeping track of packet counts in MSS terms, regardless of
segmentation done later (in GSO or hardware)

Right now, if  tcp_fragment() splits a GSO packet in two parts,
@left and @right, and route changed through a non GSO device,
both @left and @right have pcount set to 1, which is wrong,
and leads to incorrect packet_count tracking.

This problem was added in commit d5ac99a648 ("[TCP]: skb pcount with MTU
discovery")

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Reported-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8fad1c155688..d46f2143305c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -989,8 +989,7 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
 	/* Make sure we own this skb before messing gso_size/gso_segs */
 	WARN_ON_ONCE(skb_cloned(skb));
 
-	if (skb->len <= mss_now || !sk_can_gso(sk) ||
-	    skb->ip_summed == CHECKSUM_NONE) {
+	if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
 		/* Avoid the costly divide in the normal
 		 * non-TSO case.
 		 */
-- 
cgit v1.2.1


From 4b6c7879d84ad06a2ac5b964808ed599187a188d Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Tue, 15 Oct 2013 14:57:45 -0400
Subject: bridge: Correctly clamp MAX forward_delay when enabling STP

Commit be4f154d5ef0ca147ab6bcd38857a774133f5450
	bridge: Clamp forward_delay when enabling STP
had a typo when attempting to clamp maximum forward delay.

It is possible to set bridge_forward_delay to be higher then
permitted maximum when STP is off.  When turning STP on, the
higher then allowed delay has to be clamed down to max value.

CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Reviewed-by: Veaceslav Falico <vfalico@redhat.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_stp_if.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 108084a04671..656a6f3e40de 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -134,7 +134,7 @@ static void br_stp_start(struct net_bridge *br)
 
 	if (br->bridge_forward_delay < BR_MIN_FORWARD_DELAY)
 		__br_set_forward_delay(br, BR_MIN_FORWARD_DELAY);
-	else if (br->bridge_forward_delay < BR_MAX_FORWARD_DELAY)
+	else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY)
 		__br_set_forward_delay(br, BR_MAX_FORWARD_DELAY);
 
 	if (r == 0) {
-- 
cgit v1.2.1


From 8adff41c3d259eb5e313b7b04669eee545925154 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Wed, 16 Oct 2013 17:07:13 +0900
Subject: bridge: Don't use VID 0 and 4095 in vlan filtering

IEEE 802.1Q says that:
- VID 0 shall not be configured as a PVID, or configured in any Filtering
Database entry.
- VID 4095 shall not be configured as a PVID, or transmitted in a tag
header. This VID value may be used to indicate a wildcard match for the VID
in management operations or Filtering Database entries.
(See IEEE 802.1Q-2011 6.9.1 and Table 9-2)

Don't accept adding these VIDs in the vlan_filtering implementation.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Reviewed-by: Vlad Yasevich <vyasevic@redhat.com>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c     |  4 +-
 net/bridge/br_netlink.c |  2 +-
 net/bridge/br_vlan.c    | 97 +++++++++++++++++++++++--------------------------
 3 files changed, 49 insertions(+), 54 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index ffd5874f2592..33e8f23acddd 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -700,7 +700,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 
 		vid = nla_get_u16(tb[NDA_VLAN]);
 
-		if (vid >= VLAN_N_VID) {
+		if (!vid || vid >= VLAN_VID_MASK) {
 			pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n",
 				vid);
 			return -EINVAL;
@@ -794,7 +794,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
 
 		vid = nla_get_u16(tb[NDA_VLAN]);
 
-		if (vid >= VLAN_N_VID) {
+		if (!vid || vid >= VLAN_VID_MASK) {
 			pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n",
 				vid);
 			return -EINVAL;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index e74ddc1c29a8..f75d92e4f96b 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -243,7 +243,7 @@ static int br_afspec(struct net_bridge *br,
 
 		vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]);
 
-		if (vinfo->vid >= VLAN_N_VID)
+		if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
 			return -EINVAL;
 
 		switch (cmd) {
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 9a9ffe7e4019..21b6d217872b 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -45,37 +45,34 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
 		return 0;
 	}
 
-	if (vid) {
-		if (v->port_idx) {
-			p = v->parent.port;
-			br = p->br;
-			dev = p->dev;
-		} else {
-			br = v->parent.br;
-			dev = br->dev;
-		}
-		ops = dev->netdev_ops;
-
-		if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
-			/* Add VLAN to the device filter if it is supported.
-			 * Stricly speaking, this is not necessary now, since
-			 * devices are made promiscuous by the bridge, but if
-			 * that ever changes this code will allow tagged
-			 * traffic to enter the bridge.
-			 */
-			err = ops->ndo_vlan_rx_add_vid(dev, htons(ETH_P_8021Q),
-						       vid);
-			if (err)
-				return err;
-		}
-
-		err = br_fdb_insert(br, p, dev->dev_addr, vid);
-		if (err) {
-			br_err(br, "failed insert local address into bridge "
-			       "forwarding table\n");
-			goto out_filt;
-		}
+	if (v->port_idx) {
+		p = v->parent.port;
+		br = p->br;
+		dev = p->dev;
+	} else {
+		br = v->parent.br;
+		dev = br->dev;
+	}
+	ops = dev->netdev_ops;
+
+	if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
+		/* Add VLAN to the device filter if it is supported.
+		 * Stricly speaking, this is not necessary now, since
+		 * devices are made promiscuous by the bridge, but if
+		 * that ever changes this code will allow tagged
+		 * traffic to enter the bridge.
+		 */
+		err = ops->ndo_vlan_rx_add_vid(dev, htons(ETH_P_8021Q),
+					       vid);
+		if (err)
+			return err;
+	}
 
+	err = br_fdb_insert(br, p, dev->dev_addr, vid);
+	if (err) {
+		br_err(br, "failed insert local address into bridge "
+		       "forwarding table\n");
+		goto out_filt;
 	}
 
 	set_bit(vid, v->vlan_bitmap);
@@ -98,7 +95,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid)
 	__vlan_delete_pvid(v, vid);
 	clear_bit(vid, v->untagged_bitmap);
 
-	if (v->port_idx && vid) {
+	if (v->port_idx) {
 		struct net_device *dev = v->parent.port->dev;
 		const struct net_device_ops *ops = dev->netdev_ops;
 
@@ -248,7 +245,9 @@ bool br_allowed_egress(struct net_bridge *br,
 	return false;
 }
 
-/* Must be protected by RTNL */
+/* Must be protected by RTNL.
+ * Must be called with vid in range from 1 to 4094 inclusive.
+ */
 int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
 {
 	struct net_port_vlans *pv = NULL;
@@ -278,7 +277,9 @@ out:
 	return err;
 }
 
-/* Must be protected by RTNL */
+/* Must be protected by RTNL.
+ * Must be called with vid in range from 1 to 4094 inclusive.
+ */
 int br_vlan_delete(struct net_bridge *br, u16 vid)
 {
 	struct net_port_vlans *pv;
@@ -289,14 +290,9 @@ int br_vlan_delete(struct net_bridge *br, u16 vid)
 	if (!pv)
 		return -EINVAL;
 
-	if (vid) {
-		/* If the VID !=0 remove fdb for this vid. VID 0 is special
-		 * in that it's the default and is always there in the fdb.
-		 */
-		spin_lock_bh(&br->hash_lock);
-		fdb_delete_by_addr(br, br->dev->dev_addr, vid);
-		spin_unlock_bh(&br->hash_lock);
-	}
+	spin_lock_bh(&br->hash_lock);
+	fdb_delete_by_addr(br, br->dev->dev_addr, vid);
+	spin_unlock_bh(&br->hash_lock);
 
 	__vlan_del(pv, vid);
 	return 0;
@@ -329,7 +325,9 @@ unlock:
 	return 0;
 }
 
-/* Must be protected by RTNL */
+/* Must be protected by RTNL.
+ * Must be called with vid in range from 1 to 4094 inclusive.
+ */
 int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
 {
 	struct net_port_vlans *pv = NULL;
@@ -363,7 +361,9 @@ clean_up:
 	return err;
 }
 
-/* Must be protected by RTNL */
+/* Must be protected by RTNL.
+ * Must be called with vid in range from 1 to 4094 inclusive.
+ */
 int nbp_vlan_delete(struct net_bridge_port *port, u16 vid)
 {
 	struct net_port_vlans *pv;
@@ -374,14 +374,9 @@ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid)
 	if (!pv)
 		return -EINVAL;
 
-	if (vid) {
-		/* If the VID !=0 remove fdb for this vid. VID 0 is special
-		 * in that it's the default and is always there in the fdb.
-		 */
-		spin_lock_bh(&port->br->hash_lock);
-		fdb_delete_by_addr(port->br, port->dev->dev_addr, vid);
-		spin_unlock_bh(&port->br->hash_lock);
-	}
+	spin_lock_bh(&port->br->hash_lock);
+	fdb_delete_by_addr(port->br, port->dev->dev_addr, vid);
+	spin_unlock_bh(&port->br->hash_lock);
 
 	return __vlan_del(pv, vid);
 }
-- 
cgit v1.2.1


From b90356ce17c2b199cd55530cb9c3cfabe18dbdc3 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Wed, 16 Oct 2013 17:07:14 +0900
Subject: bridge: Apply the PVID to priority-tagged frames

IEEE 802.1Q says that when we receive priority-tagged (VID 0) frames
use the PVID for the port as its VID.
(See IEEE 802.1Q-2011 6.9.1 and Table 9-2)

Apply the PVID to not only untagged frames but also priority-tagged frames.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Reviewed-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_vlan.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 21b6d217872b..5a9c44a0c306 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -189,6 +189,8 @@ out:
 bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
 			struct sk_buff *skb, u16 *vid)
 {
+	int err;
+
 	/* If VLAN filtering is disabled on the bridge, all packets are
 	 * permitted.
 	 */
@@ -201,20 +203,31 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
 	if (!v)
 		return false;
 
-	if (br_vlan_get_tag(skb, vid)) {
+	err = br_vlan_get_tag(skb, vid);
+	if (!*vid) {
 		u16 pvid = br_get_pvid(v);
 
-		/* Frame did not have a tag.  See if pvid is set
-		 * on this port.  That tells us which vlan untagged
-		 * traffic belongs to.
+		/* Frame had a tag with VID 0 or did not have a tag.
+		 * See if pvid is set on this port.  That tells us which
+		 * vlan untagged or priority-tagged traffic belongs to.
 		 */
 		if (pvid == VLAN_N_VID)
 			return false;
 
-		/* PVID is set on this port.  Any untagged ingress
-		 * frame is considered to belong to this vlan.
+		/* PVID is set on this port.  Any untagged or priority-tagged
+		 * ingress frame is considered to belong to this vlan.
 		 */
-		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid);
+		if (likely(err))
+			/* Untagged Frame. */
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid);
+		else
+			/* Priority-tagged Frame.
+			 * At this point, We know that skb->vlan_tci had
+			 * VLAN_TAG_PRESENT bit and its VID field was 0x000.
+			 * We update only VID field and preserve PCP field.
+			 */
+			skb->vlan_tci |= pvid;
+
 		return true;
 	}
 
-- 
cgit v1.2.1


From d1c6c708c4da9d104e0b7c116654cb449bff9b5f Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Wed, 16 Oct 2013 17:07:15 +0900
Subject: bridge: Fix the way the PVID is referenced

We are using the VLAN_TAG_PRESENT bit to detect whether the PVID is
set or not at br_get_pvid(), while we don't care about the bit in
adding/deleting the PVID, which makes it impossible to forward any
incomming untagged frame with vlan_filtering enabled.

Since vid 0 cannot be used for the PVID, we can use vid 0 to indicate
that the PVID is not set, which is slightly more efficient than using
the VLAN_TAG_PRESENT.

Fix the problem by getting rid of using the VLAN_TAG_PRESENT.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Reviewed-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_private.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index efb57d911569..7ca2ae45b2d5 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -643,9 +643,7 @@ static inline u16 br_get_pvid(const struct net_port_vlans *v)
 	 * vid wasn't set
 	 */
 	smp_rmb();
-	return (v->pvid & VLAN_TAG_PRESENT) ?
-			(v->pvid & ~VLAN_TAG_PRESENT) :
-			VLAN_N_VID;
+	return v->pvid ?: VLAN_N_VID;
 }
 
 #else
-- 
cgit v1.2.1


From dfb5fa32c66496a53ec6a45302d902416b51ade2 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Wed, 16 Oct 2013 17:07:16 +0900
Subject: bridge: Fix updating FDB entries when the PVID is applied

We currently set the value that variable vid is pointing, which will be
used in FDB later, to 0 at br_allowed_ingress() when we receive untagged
or priority-tagged frames, even though the PVID is valid.
This leads to FDB updates in such a wrong way that they are learned with
VID 0.
Update the value to that of PVID if the PVID is applied.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Reviewed-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_vlan.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 5a9c44a0c306..53f0990eab58 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -217,6 +217,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
 		/* PVID is set on this port.  Any untagged or priority-tagged
 		 * ingress frame is considered to belong to this vlan.
 		 */
+		*vid = pvid;
 		if (likely(err))
 			/* Untagged Frame. */
 			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid);
-- 
cgit v1.2.1


From 90c6bd34f884cd9cee21f1d152baf6c18bcac949 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 17 Oct 2013 22:51:31 +0200
Subject: net: unix: inherit SOCK_PASS{CRED, SEC} flags from socket to fix race

In the case of credentials passing in unix stream sockets (dgram
sockets seem not affected), we get a rather sparse race after
commit 16e5726 ("af_unix: dont send SCM_CREDENTIALS by default").

We have a stream server on receiver side that requests credential
passing from senders (e.g. nc -U). Since we need to set SO_PASSCRED
on each spawned/accepted socket on server side to 1 first (as it's
not inherited), it can happen that in the time between accept() and
setsockopt() we get interrupted, the sender is being scheduled and
continues with passing data to our receiver. At that time SO_PASSCRED
is neither set on sender nor receiver side, hence in cmsg's
SCM_CREDENTIALS we get eventually pid:0, uid:65534, gid:65534
(== overflow{u,g}id) instead of what we actually would like to see.

On the sender side, here nc -U, the tests in maybe_add_creds()
invoked through unix_stream_sendmsg() would fail, as at that exact
time, as mentioned, the sender has neither SO_PASSCRED on his side
nor sees it on the server side, and we have a valid 'other' socket
in place. Thus, sender believes it would just look like a normal
connection, not needing/requesting SO_PASSCRED at that time.

As reverting 16e5726 would not be an option due to the significant
performance regression reported when having creds always passed,
one way/trade-off to prevent that would be to set SO_PASSCRED on
the listener socket and allow inheriting these flags to the spawned
socket on server side in accept(). It seems also logical to do so
if we'd tell the listener socket to pass those flags onwards, and
would fix the race.

Before, strace:

recvmsg(4, {msg_name(0)=NULL, msg_iov(1)=[{"blub\n", 4096}],
        msg_controllen=32, {cmsg_len=28, cmsg_level=SOL_SOCKET,
        cmsg_type=SCM_CREDENTIALS{pid=0, uid=65534, gid=65534}},
        msg_flags=0}, 0) = 5

After, strace:

recvmsg(4, {msg_name(0)=NULL, msg_iov(1)=[{"blub\n", 4096}],
        msg_controllen=32, {cmsg_len=28, cmsg_level=SOL_SOCKET,
        cmsg_type=SCM_CREDENTIALS{pid=11580, uid=1000, gid=1000}},
        msg_flags=0}, 0) = 5

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 86de99ad2976..c1f403bed683 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1246,6 +1246,15 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
 	return 0;
 }
 
+static void unix_sock_inherit_flags(const struct socket *old,
+				    struct socket *new)
+{
+	if (test_bit(SOCK_PASSCRED, &old->flags))
+		set_bit(SOCK_PASSCRED, &new->flags);
+	if (test_bit(SOCK_PASSSEC, &old->flags))
+		set_bit(SOCK_PASSSEC, &new->flags);
+}
+
 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
 {
 	struct sock *sk = sock->sk;
@@ -1280,6 +1289,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
 	/* attach accepted sock to socket */
 	unix_state_lock(tsk);
 	newsock->state = SS_CONNECTED;
+	unix_sock_inherit_flags(sock, newsock);
 	sock_graft(tsk, newsock);
 	unix_state_unlock(tsk);
 	return 0;
-- 
cgit v1.2.1


From e36d3ff91130002c7c2d1d6a55556991da1daecc Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Sat, 19 Oct 2013 12:29:15 +0200
Subject: udp6: respect IPV6_DONTFRAG sockopt in case there are pending frames

if up->pending != 0 dontfrag is left with default value -1. That
causes that application that do:
sendto len>mtu flag MSG_MORE
sendto len>mtu flag 0
will receive EMSGSIZE errno as the result of the second sendto.

This patch fixes it by respecting IPV6_DONTFRAG socket option.

introduced by:
commit 4b340ae20d0e2366792abe70f46629e576adaf5e "IPv6: Complete IPV6_DONTFRAG support"

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 72b7eaaf3ca0..18786098fd41 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1225,9 +1225,6 @@ do_udp_sendmsg:
 	if (tclass < 0)
 		tclass = np->tclass;
 
-	if (dontfrag < 0)
-		dontfrag = np->dontfrag;
-
 	if (msg->msg_flags&MSG_CONFIRM)
 		goto do_confirm;
 back_from_confirm:
@@ -1246,6 +1243,8 @@ back_from_confirm:
 	up->pending = AF_INET6;
 
 do_append_data:
+	if (dontfrag < 0)
+		dontfrag = np->dontfrag;
 	up->len += ulen;
 	getfrag  =  is_udplite ?  udplite_getfrag : ip_generic_getfrag;
 	err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen,
-- 
cgit v1.2.1


From c547dbf55d5f8cf615ccc0e7265e98db27d3fb8b Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Sat, 19 Oct 2013 12:29:16 +0200
Subject: ip6_output: do skb ufo init for peeked non ufo skb as well

Now, if user application does:
sendto len<mtu flag MSG_MORE
sendto len>mtu flag 0
The skb is not treated as fragmented one because it is not initialized
that way. So move the initialization to fix this.

introduced by:
commit e89e9cf539a28df7d0eb1d0a545368e9920b34ac "[IPv4/IPv6]: UFO Scatter-gather approach"

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a54c45ce4a48..975624b8d2ea 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1008,6 +1008,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
 
 {
 	struct sk_buff *skb;
+	struct frag_hdr fhdr;
 	int err;
 
 	/* There is support for UDP large send offload by network
@@ -1015,8 +1016,6 @@ static inline int ip6_ufo_append_data(struct sock *sk,
 	 * udp datagram
 	 */
 	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
-		struct frag_hdr fhdr;
-
 		skb = sock_alloc_send_skb(sk,
 			hh_len + fragheaderlen + transhdrlen + 20,
 			(flags & MSG_DONTWAIT), &err);
@@ -1036,20 +1035,24 @@ static inline int ip6_ufo_append_data(struct sock *sk,
 		skb->transport_header = skb->network_header + fragheaderlen;
 
 		skb->protocol = htons(ETH_P_IPV6);
-		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum = 0;
 
-		/* Specify the length of each IPv6 datagram fragment.
-		 * It has to be a multiple of 8.
-		 */
-		skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
-					     sizeof(struct frag_hdr)) & ~7;
-		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
-		ipv6_select_ident(&fhdr, rt);
-		skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
 		__skb_queue_tail(&sk->sk_write_queue, skb);
+	} else if (skb_is_gso(skb)) {
+		goto append;
 	}
 
+	skb->ip_summed = CHECKSUM_PARTIAL;
+	/* Specify the length of each IPv6 datagram fragment.
+	 * It has to be a multiple of 8.
+	 */
+	skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
+				     sizeof(struct frag_hdr)) & ~7;
+	skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+	ipv6_select_ident(&fhdr, rt);
+	skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
+
+append:
 	return skb_append_datato_frags(sk, skb, getfrag, from,
 				       (length - transhdrlen));
 }
-- 
cgit v1.2.1


From e93b7d748be887cd7639b113ba7d7ef792a7efb9 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Sat, 19 Oct 2013 12:29:17 +0200
Subject: ip_output: do skb ufo init for peeked non ufo skb as well

Now, if user application does:
sendto len<mtu flag MSG_MORE
sendto len>mtu flag 0
The skb is not treated as fragmented one because it is not initialized
that way. So move the initialization to fix this.

introduced by:
commit e89e9cf539a28df7d0eb1d0a545368e9920b34ac "[IPv4/IPv6]: UFO Scatter-gather approach"

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a04d872c54f9..3982eabf61e1 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -772,15 +772,20 @@ static inline int ip_ufo_append_data(struct sock *sk,
 		/* initialize protocol header pointer */
 		skb->transport_header = skb->network_header + fragheaderlen;
 
-		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum = 0;
 
-		/* specify the length of each IP datagram fragment */
-		skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen;
-		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+
 		__skb_queue_tail(queue, skb);
+	} else if (skb_is_gso(skb)) {
+		goto append;
 	}
 
+	skb->ip_summed = CHECKSUM_PARTIAL;
+	/* specify the length of each IP datagram fragment */
+	skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen;
+	skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+
+append:
 	return skb_append_datato_frags(sk, skb, getfrag, from,
 				       (length - transhdrlen));
 }
-- 
cgit v1.2.1


From 550bab42f83308c9d6ab04a980cc4333cef1c8fa Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sun, 20 Oct 2013 15:43:04 +0300
Subject: ipv6: fill rt6i_gateway with nexthop address

Make sure rt6i_gateway contains nexthop information in
all routes returned from lookup or when routes are directly
attached to skb for generated ICMP packets.

The effect of this patch should be a faster version of
rt6_nexthop() and the consideration of local addresses as
nexthop.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 4 ++--
 net/ipv6/route.c      | 8 ++++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 975624b8d2ea..91fb4e8212f5 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -105,7 +105,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
 	}
 
 	rcu_read_lock_bh();
-	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
+	nexthop = rt6_nexthop((struct rt6_info *)dst);
 	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
 	if (unlikely(!neigh))
 		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
@@ -874,7 +874,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,
 	 */
 	rt = (struct rt6_info *) *dst;
 	rcu_read_lock_bh();
-	n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr));
+	n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
 	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
 	rcu_read_unlock_bh();
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c979dd96d82a..c1ee3813e1ae 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -851,7 +851,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
 			if (ort->rt6i_dst.plen != 128 &&
 			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 				rt->rt6i_flags |= RTF_ANYCAST;
-			rt->rt6i_gateway = *daddr;
 		}
 
 		rt->rt6i_flags |= RTF_CACHE;
@@ -1338,6 +1337,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	rt->dst.flags |= DST_HOST;
 	rt->dst.output  = ip6_output;
 	atomic_set(&rt->dst.__refcnt, 1);
+	rt->rt6i_gateway  = fl6->daddr;
 	rt->rt6i_dst.addr = fl6->daddr;
 	rt->rt6i_dst.plen = 128;
 	rt->rt6i_idev     = idev;
@@ -1873,7 +1873,10 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
 			in6_dev_hold(rt->rt6i_idev);
 		rt->dst.lastuse = jiffies;
 
-		rt->rt6i_gateway = ort->rt6i_gateway;
+		if (ort->rt6i_flags & RTF_GATEWAY)
+			rt->rt6i_gateway = ort->rt6i_gateway;
+		else
+			rt->rt6i_gateway = *dest;
 		rt->rt6i_flags = ort->rt6i_flags;
 		if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
 		    (RTF_DEFAULT | RTF_ADDRCONF))
@@ -2160,6 +2163,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	else
 		rt->rt6i_flags |= RTF_LOCAL;
 
+	rt->rt6i_gateway  = *addr;
 	rt->rt6i_dst.addr = *addr;
 	rt->rt6i_dst.plen = 128;
 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
-- 
cgit v1.2.1


From 56e42441ed54b092d6c7411138ce60d049e7c731 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sun, 20 Oct 2013 15:43:05 +0300
Subject: netfilter: nf_conntrack: fix rt6i_gateway checks for H.323 helper

Now when rt6_nexthop() can return nexthop address we can use it
for proper nexthop comparison of directly connected destinations.
For more information refer to commit bbb5823cf742a7
("netfilter: nf_conntrack: fix rt_gateway checks for H.323 helper").

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_h323_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index bdebd03bc8cd..70866d192efc 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -778,8 +778,8 @@ static int callforward_do_filter(const union nf_inet_addr *src,
 				   flowi6_to_flowi(&fl1), false)) {
 			if (!afinfo->route(&init_net, (struct dst_entry **)&rt2,
 					   flowi6_to_flowi(&fl2), false)) {
-				if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway,
-					    sizeof(rt1->rt6i_gateway)) &&
+				if (ipv6_addr_equal(rt6_nexthop(rt1),
+						    rt6_nexthop(rt2)) &&
 				    rt1->dst.dev == rt2->dst.dev)
 					ret = 1;
 				dst_release(&rt2->dst);
-- 
cgit v1.2.1


From c2f17e827b419918c856131f592df9521e1a38e3 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 21 Oct 2013 06:17:15 +0200
Subject: ipv6: probe routes asynchronous in rt6_probe

Routes need to be probed asynchronous otherwise the call stack gets
exhausted when the kernel attemps to deliver another skb inline, like
e.g. xt_TEE does, and we probe at the same time.

We update neigh->updated still at once, otherwise we would send to
many probes.

Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 38 +++++++++++++++++++++++++++++++-------
 1 file changed, 31 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c1ee3813e1ae..f54e3a101098 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -476,6 +476,24 @@ out:
 }
 
 #ifdef CONFIG_IPV6_ROUTER_PREF
+struct __rt6_probe_work {
+	struct work_struct work;
+	struct in6_addr target;
+	struct net_device *dev;
+};
+
+static void rt6_probe_deferred(struct work_struct *w)
+{
+	struct in6_addr mcaddr;
+	struct __rt6_probe_work *work =
+		container_of(w, struct __rt6_probe_work, work);
+
+	addrconf_addr_solict_mult(&work->target, &mcaddr);
+	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
+	dev_put(work->dev);
+	kfree(w);
+}
+
 static void rt6_probe(struct rt6_info *rt)
 {
 	struct neighbour *neigh;
@@ -499,17 +517,23 @@ static void rt6_probe(struct rt6_info *rt)
 
 	if (!neigh ||
 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
-		struct in6_addr mcaddr;
-		struct in6_addr *target;
+		struct __rt6_probe_work *work;
+
+		work = kmalloc(sizeof(*work), GFP_ATOMIC);
 
-		if (neigh) {
+		if (neigh && work)
 			neigh->updated = jiffies;
+
+		if (neigh)
 			write_unlock(&neigh->lock);
-		}
 
-		target = (struct in6_addr *)&rt->rt6i_gateway;
-		addrconf_addr_solict_mult(target, &mcaddr);
-		ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
+		if (work) {
+			INIT_WORK(&work->work, rt6_probe_deferred);
+			work->target = rt->rt6i_gateway;
+			dev_hold(rt->dst.dev);
+			work->dev = rt->dst.dev;
+			schedule_work(&work->work);
+		}
 	} else {
 out:
 		write_unlock(&neigh->lock);
-- 
cgit v1.2.1


From 02cf4ebd82ff0ac7254b88e466820a290ed8289a Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Mon, 21 Oct 2013 15:40:19 -0400
Subject: tcp: initialize passive-side sk_pacing_rate after 3WHS

For passive TCP connections, upon receiving the ACK that completes the
3WHS, make sure we set our pacing rate after we get our first RTT
sample.

On passive TCP connections, when we receive the ACK completing the
3WHS we do not take an RTT sample in tcp_ack(), but rather in
tcp_synack_rtt_meas(). So upon receiving the ACK that completes the
3WHS, tcp_ack() leaves sk_pacing_rate at its initial value.

Originally the initial sk_pacing_rate value was 0, so passive-side
connections defaulted to sysctl_tcp_min_tso_segs (2 segs) in skbuffs
made in the first RTT. With a default initial cwnd of 10 packets, this
happened to be correct for RTTs 5ms or bigger, so it was hard to
see problems in WAN or emulated WAN testing.

Since 7eec4174ff ("pkt_sched: fq: fix non TCP flows pacing"), the
initial sk_pacing_rate is 0xffffffff. So after that change, passive
TCP connections were keeping this value (and using large numbers of
segments per skbuff) until receiving an ACK for data.

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 53974c7c04fe..a16b01b537ba 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5712,6 +5712,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		} else
 			tcp_init_metrics(sk);
 
+		tcp_update_pacing_rate(sk);
+
 		/* Prevent spurious tcp_cwnd_restart() on first data packet */
 		tp->lsndtime = tcp_time_stamp;
 
-- 
cgit v1.2.1


From 454594f3b93a49ef568cd190c5af31376b105a7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@web.de>
Date: Sun, 20 Oct 2013 00:58:57 +0200
Subject: Revert "bridge: only expire the mdb entry when query is received"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While this commit was a good attempt to fix issues occuring when no
multicast querier is present, this commit still has two more issues:

1) There are cases where mdb entries do not expire even if there is a
querier present. The bridge will unnecessarily continue flooding
multicast packets on the according ports.

2) Never removing an mdb entry could be exploited for a Denial of
Service by an attacker on the local link, slowly, but steadily eating up
all memory.

Actually, this commit became obsolete with
"bridge: disable snooping if there is no querier" (b00589af3b)
which included fixes for a few more cases.

Therefore reverting the following commits (the commit stated in the
commit message plus three of its follow up fixes):

====================
Revert "bridge: update mdb expiration timer upon reports."
This reverts commit f144febd93d5ee534fdf23505ab091b2b9088edc.
Revert "bridge: do not call setup_timer() multiple times"
This reverts commit 1faabf2aab1fdaa1ace4e8c829d1b9cf7bfec2f1.
Revert "bridge: fix some kernel warning in multicast timer"
This reverts commit c7e8e8a8f7a70b343ca1e0f90a31e35ab2d16de1.
Revert "bridge: only expire the mdb entry when query is received"
This reverts commit 9f00b2e7cf241fa389733d41b615efdaa2cb0f5b.
====================

CC: Cong Wang <amwang@redhat.com>
Signed-off-by: Linus Lüssing <linus.luessing@web.de>
Reviewed-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_mdb.c       |  2 +-
 net/bridge/br_multicast.c | 47 +++++++++++++++++++++++++++--------------------
 net/bridge/br_private.h   |  1 -
 3 files changed, 28 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 85a09bb5ca51..b7b1914dfa25 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -453,7 +453,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
 		call_rcu_bh(&p->rcu, br_multicast_free_pg);
 		err = 0;
 
-		if (!mp->ports && !mp->mglist && mp->timer_armed &&
+		if (!mp->ports && !mp->mglist &&
 		    netif_running(br->dev))
 			mod_timer(&mp->timer, jiffies);
 		break;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 1085f2180f3a..8b0b610ca2c9 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -272,7 +272,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
 		del_timer(&p->timer);
 		call_rcu_bh(&p->rcu, br_multicast_free_pg);
 
-		if (!mp->ports && !mp->mglist && mp->timer_armed &&
+		if (!mp->ports && !mp->mglist &&
 		    netif_running(br->dev))
 			mod_timer(&mp->timer, jiffies);
 
@@ -611,9 +611,6 @@ rehash:
 		break;
 
 	default:
-		/* If we have an existing entry, update it's expire timer */
-		mod_timer(&mp->timer,
-			  jiffies + br->multicast_membership_interval);
 		goto out;
 	}
 
@@ -623,7 +620,6 @@ rehash:
 
 	mp->br = br;
 	mp->addr = *group;
-
 	setup_timer(&mp->timer, br_multicast_group_expired,
 		    (unsigned long)mp);
 
@@ -663,6 +659,7 @@ static int br_multicast_add_group(struct net_bridge *br,
 	struct net_bridge_mdb_entry *mp;
 	struct net_bridge_port_group *p;
 	struct net_bridge_port_group __rcu **pp;
+	unsigned long now = jiffies;
 	int err;
 
 	spin_lock(&br->multicast_lock);
@@ -677,18 +674,15 @@ static int br_multicast_add_group(struct net_bridge *br,
 
 	if (!port) {
 		mp->mglist = true;
+		mod_timer(&mp->timer, now + br->multicast_membership_interval);
 		goto out;
 	}
 
 	for (pp = &mp->ports;
 	     (p = mlock_dereference(*pp, br)) != NULL;
 	     pp = &p->next) {
-		if (p->port == port) {
-			/* We already have a portgroup, update the timer.  */
-			mod_timer(&p->timer,
-				  jiffies + br->multicast_membership_interval);
-			goto out;
-		}
+		if (p->port == port)
+			goto found;
 		if ((unsigned long)p->port < (unsigned long)port)
 			break;
 	}
@@ -699,6 +693,8 @@ static int br_multicast_add_group(struct net_bridge *br,
 	rcu_assign_pointer(*pp, p);
 	br_mdb_notify(br->dev, port, group, RTM_NEWMDB);
 
+found:
+	mod_timer(&p->timer, now + br->multicast_membership_interval);
 out:
 	err = 0;
 
@@ -1198,9 +1194,6 @@ static int br_ip4_multicast_query(struct net_bridge *br,
 	if (!mp)
 		goto out;
 
-	mod_timer(&mp->timer, now + br->multicast_membership_interval);
-	mp->timer_armed = true;
-
 	max_delay *= br->multicast_last_member_count;
 
 	if (mp->mglist &&
@@ -1277,9 +1270,6 @@ static int br_ip6_multicast_query(struct net_bridge *br,
 	if (!mp)
 		goto out;
 
-	mod_timer(&mp->timer, now + br->multicast_membership_interval);
-	mp->timer_armed = true;
-
 	max_delay *= br->multicast_last_member_count;
 	if (mp->mglist &&
 	    (timer_pending(&mp->timer) ?
@@ -1365,7 +1355,7 @@ static void br_multicast_leave_group(struct net_bridge *br,
 			call_rcu_bh(&p->rcu, br_multicast_free_pg);
 			br_mdb_notify(br->dev, port, group, RTM_DELMDB);
 
-			if (!mp->ports && !mp->mglist && mp->timer_armed &&
+			if (!mp->ports && !mp->mglist &&
 			    netif_running(br->dev))
 				mod_timer(&mp->timer, jiffies);
 		}
@@ -1377,12 +1367,30 @@ static void br_multicast_leave_group(struct net_bridge *br,
 		     br->multicast_last_member_interval;
 
 	if (!port) {
-		if (mp->mglist && mp->timer_armed &&
+		if (mp->mglist &&
 		    (timer_pending(&mp->timer) ?
 		     time_after(mp->timer.expires, time) :
 		     try_to_del_timer_sync(&mp->timer) >= 0)) {
 			mod_timer(&mp->timer, time);
 		}
+
+		goto out;
+	}
+
+	for (p = mlock_dereference(mp->ports, br);
+	     p != NULL;
+	     p = mlock_dereference(p->next, br)) {
+		if (p->port != port)
+			continue;
+
+		if (!hlist_unhashed(&p->mglist) &&
+		    (timer_pending(&p->timer) ?
+		     time_after(p->timer.expires, time) :
+		     try_to_del_timer_sync(&p->timer) >= 0)) {
+			mod_timer(&p->timer, time);
+		}
+
+		break;
 	}
 out:
 	spin_unlock(&br->multicast_lock);
@@ -1805,7 +1813,6 @@ void br_multicast_stop(struct net_bridge *br)
 		hlist_for_each_entry_safe(mp, n, &mdb->mhash[i],
 					  hlist[ver]) {
 			del_timer(&mp->timer);
-			mp->timer_armed = false;
 			call_rcu_bh(&mp->rcu, br_multicast_free_group);
 		}
 	}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 7ca2ae45b2d5..e14c33b42f75 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -126,7 +126,6 @@ struct net_bridge_mdb_entry
 	struct timer_list		timer;
 	struct br_ip			addr;
 	bool				mglist;
-	bool				timer_armed;
 };
 
 struct net_bridge_mdb_htable
-- 
cgit v1.2.1