From c0d5f9db1c7d1b8a9e2f217706e8ea233bac2754 Mon Sep 17 00:00:00 2001
From: Jim Schutt <jaschut@sandia.gov>
Date: Fri, 16 Sep 2011 08:27:31 -0600
Subject: libceph: initialize ack_stamp to avoid unnecessary connection reset

Commit 4cf9d544631c recorded when an outgoing ceph message was ACKed,
in order to avoid unnecessary connection resets when an OSD is busy.

However, ack_stamp is uninitialized, so there is a window between
when the message is sent and when it is ACKed in which handle_timeout()
interprets the unitialized value as an expired timeout, and resets
the connection unnecessarily.

Close the window by initializing ack_stamp.

Signed-off-by: Jim Schutt <jaschut@sandia.gov>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/messenger.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index c340e2e0765b..9918e9eb276e 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2307,6 +2307,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
 	m->front_max = front_len;
 	m->front_is_vmalloc = false;
 	m->more_to_follow = false;
+	m->ack_stamp = 0;
 	m->pool = NULL;
 
 	/* middle */
-- 
cgit v1.2.1


From 1cad78932a0d139dceff78e68808e160a224d57a Mon Sep 17 00:00:00 2001
From: Noah Watkins <noahwatkins@gmail.com>
Date: Mon, 12 Sep 2011 14:51:53 -0700
Subject: libceph: fix parse options memory leak

ceph_destroy_options does not free opt->mon_addr that
is allocated in ceph_parse_options.

Signed-off-by: Noah Watkins <noahwatkins@gmail.com>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/ceph_common.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 132963abc266..2883ea01e680 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -232,6 +232,7 @@ void ceph_destroy_options(struct ceph_options *opt)
 		ceph_crypto_key_destroy(opt->key);
 		kfree(opt->key);
 	}
+	kfree(opt->mon_addr);
 	kfree(opt);
 }
 EXPORT_SYMBOL(ceph_destroy_options);
-- 
cgit v1.2.1


From 935b639a049053d0ccbcf7422f2f9cd221642f58 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Fri, 16 Sep 2011 11:13:17 -0700
Subject: libceph: fix linger request requeuing

The r_req_lru_item list node moves between several lists, and that cycle
is not directly related (and does not begin) with __register_request().
Initialize it in the request constructor, not __register_request(). This
fixes later badness (below) when OSDs restart underneath an rbd mount.

Crashes we've seen due to this include:

[  213.974288] kernel BUG at net/ceph/messenger.c:2193!

and

[  144.035274] BUG: unable to handle kernel NULL pointer dereference at 0000000000000048
[  144.035278] IP: [<ffffffffa036c053>] con_work+0x1463/0x2ce0 [libceph]

Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/osd_client.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 16836a7df7a6..88ad8a2501b5 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -217,6 +217,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 	INIT_LIST_HEAD(&req->r_unsafe_item);
 	INIT_LIST_HEAD(&req->r_linger_item);
 	INIT_LIST_HEAD(&req->r_linger_osd);
+	INIT_LIST_HEAD(&req->r_req_lru_item);
 	req->r_flags = flags;
 
 	WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0);
@@ -816,13 +817,10 @@ static void __register_request(struct ceph_osd_client *osdc,
 {
 	req->r_tid = ++osdc->last_tid;
 	req->r_request->hdr.tid = cpu_to_le64(req->r_tid);
-	INIT_LIST_HEAD(&req->r_req_lru_item);
-
 	dout("__register_request %p tid %lld\n", req, req->r_tid);
 	__insert_request(osdc, req);
 	ceph_osdc_get_request(req);
 	osdc->num_requests++;
-
 	if (osdc->num_requests == 1) {
 		dout(" first request, scheduling timeout\n");
 		__schedule_osd_timeout(osdc);
-- 
cgit v1.2.1


From 8b267b312df9343fea3bd679c509b36214b5a854 Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <ordex@autistici.org>
Date: Wed, 21 Sep 2011 16:06:42 +0200
Subject: batman-adv: do_bcast has to be true for broadcast packets only

corrects a critical bug of the GW feature. This bug made all the unicast
packets destined to a GW to be sent as broadcast. This bug is present even if
the sender GW feature is configured as OFF. It's an urgent bug fix and should
be committed as soon as possible.

This was a regression introduced by 43676ab590c3f8686fd047d34c3e33803eef71f0

Signed-off-by: Antonio Quartulli <ordex@autistici.org>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
---
 net/batman-adv/soft-interface.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 3e2f91ffa4e2..05dd35114a27 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -565,7 +565,7 @@ static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface)
 	struct orig_node *orig_node = NULL;
 	int data_len = skb->len, ret;
 	short vid = -1;
-	bool do_bcast = false;
+	bool do_bcast;
 
 	if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE)
 		goto dropped;
@@ -598,15 +598,15 @@ static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface)
 	tt_local_add(soft_iface, ethhdr->h_source);
 
 	orig_node = transtable_search(bat_priv, ethhdr->h_dest);
-	if (is_multicast_ether_addr(ethhdr->h_dest) ||
-				(orig_node && orig_node->gw_flags)) {
+	do_bcast = is_multicast_ether_addr(ethhdr->h_dest);
+	if (do_bcast ||	(orig_node && orig_node->gw_flags)) {
 		ret = gw_is_target(bat_priv, skb, orig_node);
 
 		if (ret < 0)
 			goto dropped;
 
-		if (ret == 0)
-			do_bcast = true;
+		if (ret)
+			do_bcast = false;
 	}
 
 	/* ethernet packet should be broadcasted */
-- 
cgit v1.2.1


From 2015de5fe2a47086a3260802275932bfd810884e Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Tue, 27 Sep 2011 15:16:08 -0400
Subject: ipv6-multicast: Fix memory leak in input path.

Have to free the skb before returning if we fail
the fib lookup.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6mr.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 705c82886281..825d02fa6586 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2052,8 +2052,10 @@ int ip6_mr_input(struct sk_buff *skb)
 	int err;
 
 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
-	if (err < 0)
+	if (err < 0) {
+		kfree_skb(skb);
 		return err;
+	}
 
 	read_lock(&mrt_lock);
 	cache = ip6mr_cache_find(mrt,
-- 
cgit v1.2.1


From d4cae56219755ccf8acfc8e2c1927009ff29d8c6 Mon Sep 17 00:00:00 2001
From: Madalin Bucur <madalin.bucur@freescale.com>
Date: Mon, 26 Sep 2011 07:04:36 +0000
Subject: net: check return value for dst_alloc

return value of dst_alloc must be checked before use

Signed-off-by: Madalin Bucur <madalin.bucur@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 94fdcc7f1030..552df27dcf53 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1349,14 +1349,16 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 		BUG();
 	}
 	xdst = dst_alloc(dst_ops, NULL, 0, 0, 0);
-	memset(&xdst->u.rt6.rt6i_table, 0, sizeof(*xdst) - sizeof(struct dst_entry));
-	xfrm_policy_put_afinfo(afinfo);
 
-	if (likely(xdst))
+	if (likely(xdst)) {
+		memset(&xdst->u.rt6.rt6i_table, 0,
+			sizeof(*xdst) - sizeof(struct dst_entry));
 		xdst->flo.ops = &xfrm_bundle_fc_ops;
-	else
+	} else
 		xdst = ERR_PTR(-ENOBUFS);
 
+	xfrm_policy_put_afinfo(afinfo);
+
 	return xdst;
 }
 
-- 
cgit v1.2.1


From fbe58186901155c0cb5398dd343337be0c456c04 Mon Sep 17 00:00:00 2001
From: Madalin Bucur <madalin.bucur@freescale.com>
Date: Mon, 26 Sep 2011 07:04:56 +0000
Subject: ipv6: check return value for dst_alloc

return value of dst_alloc must be checked before use

Signed-off-by: Madalin Bucur <madalin.bucur@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1250f9020670..fb545edef6ea 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -244,7 +244,9 @@ static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
 {
 	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
 
-	memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
+	if (rt != NULL)
+		memset(&rt->rt6i_table, 0,
+			sizeof(*rt) - sizeof(struct dst_entry));
 
 	return rt;
 }
-- 
cgit v1.2.1


From 67928c4041606f02725f3c95c4c0404e4532df1b Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Fri, 23 Sep 2011 13:11:01 +0000
Subject: ipv6-multicast: Fix memory leak in IPv6 multicast.

If reg_vif_xmit cannot find a routing entry, be sure to
free the skb before returning the error.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6mr.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 825d02fa6586..def0538e2413 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -696,8 +696,10 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 	int err;
 
 	err = ip6mr_fib_lookup(net, &fl6, &mrt);
-	if (err < 0)
+	if (err < 0) {
+		kfree_skb(skb);
 		return err;
+	}
 
 	read_lock(&mrt_lock);
 	dev->stats.tx_bytes += skb->len;
-- 
cgit v1.2.1


From 782e182e91e97f529a1edb30fdece9f1bef90ecc Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Wed, 28 Sep 2011 10:08:27 -0700
Subject: libceph: fix pg_temp mapping calculation

We need to apply the modulo pg_num calculation before looking up a pgid in
the pg_temp mapping rbtree.  This fixes pg_temp mappings, and fixes
(some) misdirected requests that result in messages like

[WRN] client4104 10.0.1.219:0/275025290 misdirected client4104.1:129 0.1 to osd0 not [1,0] in e11/11

on the server and stall make the client block without getting a reply (at
least until the pg_temp mapping goes way, but that can take a long long
time).

Reorder calc_pg_raw() a bit to make more sense.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/osdmap.c | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index e97c3588c3ec..eceb8d56703c 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1046,10 +1046,25 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 	struct ceph_pg_mapping *pg;
 	struct ceph_pg_pool_info *pool;
 	int ruleno;
-	unsigned poolid, ps, pps;
+	unsigned poolid, ps, pps, t;
 	int preferred;
 
+	poolid = le32_to_cpu(pgid.pool);
+	ps = le16_to_cpu(pgid.ps);
+	preferred = (s16)le16_to_cpu(pgid.preferred);
+
+	pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
+	if (!pool)
+		return NULL;
+
 	/* pg_temp? */
+	if (preferred >= 0)
+		t = ceph_stable_mod(ps, le32_to_cpu(pool->v.lpg_num),
+				    pool->lpgp_num_mask);
+	else
+		t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num),
+				    pool->pgp_num_mask);
+	pgid.ps = cpu_to_le16(t);
 	pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
 	if (pg) {
 		*num = pg->len;
@@ -1057,18 +1072,6 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 	}
 
 	/* crush */
-	poolid = le32_to_cpu(pgid.pool);
-	ps = le16_to_cpu(pgid.ps);
-	preferred = (s16)le16_to_cpu(pgid.preferred);
-
-	/* don't forcefeed bad device ids to crush */
-	if (preferred >= osdmap->max_osd ||
-	    preferred >= osdmap->crush->max_devices)
-		preferred = -1;
-
-	pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
-	if (!pool)
-		return NULL;
 	ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset,
 				 pool->v.type, pool->v.size);
 	if (ruleno < 0) {
@@ -1078,6 +1081,11 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 		return NULL;
 	}
 
+	/* don't forcefeed bad device ids to crush */
+	if (preferred >= osdmap->max_osd ||
+	    preferred >= osdmap->crush->max_devices)
+		preferred = -1;
+
 	if (preferred >= 0)
 		pps = ceph_stable_mod(ps,
 				      le32_to_cpu(pool->v.lpgp_num),
-- 
cgit v1.2.1


From 8adc8b3d780363d5df0dd6ace10336e3d7e331a1 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Wed, 28 Sep 2011 10:11:04 -0700
Subject: libceph: fix pg_temp mapping update

The incremental map updates have a record for each pg_temp mapping that is
to be add/updated (len > 0) or removed (len == 0).  The old code was
written as if the updates were a complete enumeration; that was just wrong.
Update the code to remove 0-length entries and drop the rbtree traversal.

This avoids misdirected (and hung) requests that manifest as server
errors like

[WRN] client4104 10.0.1.219:0/275025290 misdirected client4104.1:129 0.1 to osd0 not [1,0] in e11/11

Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/osdmap.c | 50 ++++++++++++++++++++++++--------------------------
 1 file changed, 24 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index eceb8d56703c..fd863fe76934 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -339,6 +339,7 @@ static int __insert_pg_mapping(struct ceph_pg_mapping *new,
 	struct ceph_pg_mapping *pg = NULL;
 	int c;
 
+	dout("__insert_pg_mapping %llx %p\n", *(u64 *)&new->pgid, new);
 	while (*p) {
 		parent = *p;
 		pg = rb_entry(parent, struct ceph_pg_mapping, node);
@@ -366,16 +367,33 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
 	while (n) {
 		pg = rb_entry(n, struct ceph_pg_mapping, node);
 		c = pgid_cmp(pgid, pg->pgid);
-		if (c < 0)
+		if (c < 0) {
 			n = n->rb_left;
-		else if (c > 0)
+		} else if (c > 0) {
 			n = n->rb_right;
-		else
+		} else {
+			dout("__lookup_pg_mapping %llx got %p\n",
+			     *(u64 *)&pgid, pg);
 			return pg;
+		}
 	}
 	return NULL;
 }
 
+static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid)
+{
+	struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid);
+
+	if (pg) {
+		dout("__remove_pg_mapping %llx %p\n", *(u64 *)&pgid, pg);
+		rb_erase(&pg->node, root);
+		kfree(pg);
+		return 0;
+	}
+	dout("__remove_pg_mapping %llx dne\n", *(u64 *)&pgid);
+	return -ENOENT;
+}
+
 /*
  * rbtree of pg pool info
  */
@@ -711,7 +729,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 	void *start = *p;
 	int err = -EINVAL;
 	u16 version;
-	struct rb_node *rbp;
 
 	ceph_decode_16_safe(p, end, version, bad);
 	if (version > CEPH_OSDMAP_INC_VERSION) {
@@ -861,7 +878,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 	}
 
 	/* new_pg_temp */
-	rbp = rb_first(&map->pg_temp);
 	ceph_decode_32_safe(p, end, len, bad);
 	while (len--) {
 		struct ceph_pg_mapping *pg;
@@ -872,18 +888,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 		ceph_decode_copy(p, &pgid, sizeof(pgid));
 		pglen = ceph_decode_32(p);
 
-		/* remove any? */
-		while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping,
-						node)->pgid, pgid) <= 0) {
-			struct ceph_pg_mapping *cur =
-				rb_entry(rbp, struct ceph_pg_mapping, node);
-
-			rbp = rb_next(rbp);
-			dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
-			rb_erase(&cur->node, &map->pg_temp);
-			kfree(cur);
-		}
-
 		if (pglen) {
 			/* insert */
 			ceph_decode_need(p, end, pglen*sizeof(u32), bad);
@@ -903,17 +907,11 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 			}
 			dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid,
 			     pglen);
+		} else {
+			/* remove */
+			__remove_pg_mapping(&map->pg_temp, pgid);
 		}
 	}
-	while (rbp) {
-		struct ceph_pg_mapping *cur =
-			rb_entry(rbp, struct ceph_pg_mapping, node);
-
-		rbp = rb_next(rbp);
-		dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
-		rb_erase(&cur->node, &map->pg_temp);
-		kfree(cur);
-	}
 
 	/* ignore the rest */
 	*p = end;
-- 
cgit v1.2.1


From aabdcb0b553b9c9547b1a506b34d55a764745870 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Fri, 23 Sep 2011 08:23:47 +0000
Subject: can bcm: fix tx_setup off-by-one errors

This patch fixes two off-by-one errors that canceled each other out.
Checking for the same condition two times in bcm_tx_timeout_tsklet() reduced
the count of frames to be sent by one. This did not show up the first time
tx_setup is invoked as an additional frame is sent due to TX_ANNONCE.
Invoking a second tx_setup on the same item led to a reduced (by 1) number of
sent frames.

Reported-by: Andre Naujoks <nautsch@gmail.com>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/bcm.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/can/bcm.c b/net/can/bcm.c
index d6c8ae5b2e6a..c9cdb8d78e70 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -365,9 +365,6 @@ static void bcm_tx_timeout_tsklet(unsigned long data)
 
 			bcm_send_to_user(op, &msg_head, NULL, 0);
 		}
-	}
-
-	if (op->kt_ival1.tv64 && (op->count > 0)) {
 
 		/* send (next) frame */
 		bcm_can_tx(op);
@@ -970,8 +967,9 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		/* spec: send can_frame when starting timer */
 		op->flags |= TX_ANNOUNCE;
 
-		if (op->kt_ival1.tv64 && (op->count > 0)) {
-			/* op->count-- is done in bcm_tx_timeout_handler */
+		/* only start timer when having more frames than sent below */
+		if (op->kt_ival1.tv64 && (op->count > 1)) {
+			/* op->count-- is done in bcm_tx_timeout_tsklet */
 			hrtimer_start(&op->timer, op->kt_ival1,
 				      HRTIMER_MODE_REL);
 		} else
@@ -979,8 +977,11 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 				      HRTIMER_MODE_REL);
 	}
 
-	if (op->flags & TX_ANNOUNCE)
+	if (op->flags & TX_ANNOUNCE) {
 		bcm_can_tx(op);
+		if (op->kt_ival1.tv64 && (op->count > 0))
+			op->count--;
+	}
 
 	return msg_head->nframes * CFSIZ + MHSIZ;
 }
-- 
cgit v1.2.1


From 676a1184e8afd4fed7948232df1ff91517400859 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Sun, 25 Sep 2011 02:21:30 +0000
Subject: ipv6: nullify ipv6_ac_list and ipv6_fl_list when creating new socket

ipv6_ac_list and ipv6_fl_list from listening socket are inadvertently
shared with new socket created for connection.

Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3c9fa618b69d..79cc6469508d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1383,6 +1383,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
 #endif
 
+		newnp->ipv6_ac_list = NULL;
+		newnp->ipv6_fl_list = NULL;
 		newnp->pktoptions  = NULL;
 		newnp->opt	   = NULL;
 		newnp->mcast_oif   = inet6_iif(skb);
@@ -1447,6 +1449,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	   First: no IPv4 options.
 	 */
 	newinet->inet_opt = NULL;
+	newnp->ipv6_ac_list = NULL;
 	newnp->ipv6_fl_list = NULL;
 
 	/* Clone RX bits */
-- 
cgit v1.2.1


From 85a64889492b45f931ddac87ec09d84aa7347ee1 Mon Sep 17 00:00:00 2001
From: Jonathan Lallinger <jonathan@ogc.us>
Date: Thu, 29 Sep 2011 07:58:41 +0000
Subject: RDSRDMA: Fix cleanup of rds_iw_mr_pool

In the rds_iw_mr_pool struct the free_pinned field keeps track of
memory pinned by free MRs. While this field is incremented properly
upon allocation, it is never decremented upon unmapping. This would
cause the rds_rdma module to crash the kernel upon unloading, by
triggering the BUG_ON in the rds_iw_destroy_mr_pool function.

This change keeps track of the MRs that become unpinned, so that
free_pinned can be decremented appropriately.

Signed-off-by: Jonathan Lallinger <jonathan@ogc.us>
Signed-off-by: Steve Wise <swise@ogc.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/iw_rdma.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index 8b77edbab272..4e1de171866c 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -84,7 +84,8 @@ static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
 static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
 static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
 			struct list_head *unmap_list,
-			struct list_head *kill_list);
+			struct list_head *kill_list,
+			int *unpinned);
 static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
 
 static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwdev, struct rdma_cm_id **cm_id)
@@ -499,7 +500,7 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
 	LIST_HEAD(unmap_list);
 	LIST_HEAD(kill_list);
 	unsigned long flags;
-	unsigned int nfreed = 0, ncleaned = 0, free_goal;
+	unsigned int nfreed = 0, ncleaned = 0, unpinned = 0, free_goal;
 	int ret = 0;
 
 	rds_iw_stats_inc(s_iw_rdma_mr_pool_flush);
@@ -524,7 +525,8 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
 	 * will be destroyed by the unmap function.
 	 */
 	if (!list_empty(&unmap_list)) {
-		ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list, &kill_list);
+		ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list,
+						     &kill_list, &unpinned);
 		/* If we've been asked to destroy all MRs, move those
 		 * that were simply cleaned to the kill list */
 		if (free_all)
@@ -548,6 +550,7 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
 		spin_unlock_irqrestore(&pool->list_lock, flags);
 	}
 
+	atomic_sub(unpinned, &pool->free_pinned);
 	atomic_sub(ncleaned, &pool->dirty_count);
 	atomic_sub(nfreed, &pool->item_count);
 
@@ -828,7 +831,8 @@ static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool,
 
 static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
 				struct list_head *unmap_list,
-				struct list_head *kill_list)
+				struct list_head *kill_list,
+				int *unpinned)
 {
 	struct rds_iw_mapping *mapping, *next;
 	unsigned int ncleaned = 0;
@@ -855,6 +859,7 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
 
 		spin_lock_irqsave(&pool->list_lock, flags);
 		list_for_each_entry_safe(mapping, next, unmap_list, m_list) {
+			*unpinned += mapping->m_sg.len;
 			list_move(&mapping->m_list, &laundered);
 			ncleaned++;
 		}
-- 
cgit v1.2.1


From 12d0d0d3a7349daa95dbfd5d7df8146255bc7c67 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Thu, 29 Sep 2011 15:33:47 -0400
Subject: can bcm: fix incomplete tx_setup fix

The commit aabdcb0b553b9c9547b1a506b34d55a764745870 ("can bcm: fix tx_setup
off-by-one errors") fixed only a part of the original problem reported by
Andre Naujoks. It turned out that the original code needed to be re-ordered
to reduce complexity and to finally fix the reported frame counting issues.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/bcm.c | 48 +++++++++++++++++++++---------------------------
 1 file changed, 21 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/can/bcm.c b/net/can/bcm.c
index c9cdb8d78e70..c84963d2dee6 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -344,6 +344,18 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
 	}
 }
 
+static void bcm_tx_start_timer(struct bcm_op *op)
+{
+	if (op->kt_ival1.tv64 && op->count)
+		hrtimer_start(&op->timer,
+			      ktime_add(ktime_get(), op->kt_ival1),
+			      HRTIMER_MODE_ABS);
+	else if (op->kt_ival2.tv64)
+		hrtimer_start(&op->timer,
+			      ktime_add(ktime_get(), op->kt_ival2),
+			      HRTIMER_MODE_ABS);
+}
+
 static void bcm_tx_timeout_tsklet(unsigned long data)
 {
 	struct bcm_op *op = (struct bcm_op *)data;
@@ -365,23 +377,12 @@ static void bcm_tx_timeout_tsklet(unsigned long data)
 
 			bcm_send_to_user(op, &msg_head, NULL, 0);
 		}
-
-		/* send (next) frame */
 		bcm_can_tx(op);
-		hrtimer_start(&op->timer,
-			      ktime_add(ktime_get(), op->kt_ival1),
-			      HRTIMER_MODE_ABS);
 
-	} else {
-		if (op->kt_ival2.tv64) {
+	} else if (op->kt_ival2.tv64)
+		bcm_can_tx(op);
 
-			/* send (next) frame */
-			bcm_can_tx(op);
-			hrtimer_start(&op->timer,
-				      ktime_add(ktime_get(), op->kt_ival2),
-				      HRTIMER_MODE_ABS);
-		}
-	}
+	bcm_tx_start_timer(op);
 }
 
 /*
@@ -961,28 +962,21 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 			hrtimer_cancel(&op->timer);
 	}
 
-	if ((op->flags & STARTTIMER) &&
-	    ((op->kt_ival1.tv64 && op->count) || op->kt_ival2.tv64)) {
-
+	if (op->flags & STARTTIMER) {
+		hrtimer_cancel(&op->timer);
 		/* spec: send can_frame when starting timer */
 		op->flags |= TX_ANNOUNCE;
-
-		/* only start timer when having more frames than sent below */
-		if (op->kt_ival1.tv64 && (op->count > 1)) {
-			/* op->count-- is done in bcm_tx_timeout_tsklet */
-			hrtimer_start(&op->timer, op->kt_ival1,
-				      HRTIMER_MODE_REL);
-		} else
-			hrtimer_start(&op->timer, op->kt_ival2,
-				      HRTIMER_MODE_REL);
 	}
 
 	if (op->flags & TX_ANNOUNCE) {
 		bcm_can_tx(op);
-		if (op->kt_ival1.tv64 && (op->count > 0))
+		if (op->count)
 			op->count--;
 	}
 
+	if (op->flags & STARTTIMER)
+		bcm_tx_start_timer(op);
+
 	return msg_head->nframes * CFSIZ + MHSIZ;
 }
 
-- 
cgit v1.2.1


From 7091fbd82cd5686444ffe9935ed6a8190101fe9d Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 30 Sep 2011 10:38:28 +0000
Subject: make PACKET_STATISTICS getsockopt report consistently between ring
 and non-ring

This is a minor change.

Up until kernel 2.6.32, getsockopt(fd, SOL_PACKET, PACKET_STATISTICS,
...) would return total and dropped packets since its last invocation. The
introduction of socket queue overflow reporting [1] changed drop
rate calculation in the normal packet socket path, but not when using a
packet ring. As a result, the getsockopt now returns different statistics
depending on the reception method used. With a ring, it still returns the
count since the last call, as counts are incremented in tpacket_rcv and
reset in getsockopt. Without a ring, it returns 0 if no drops occurred
since the last getsockopt and the total drops over the lifespan of
the socket otherwise. The culprit is this line in packet_rcv, executed
on a drop:

drop_n_acct:
        po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);

As it shows, the new drop number it taken from the socket drop counter,
which is not reset at getsockopt. I put together a small example
that demonstrates the issue [2]. It runs for 10 seconds and overflows
the queue/ring on every odd second. The reported drop rates are:
ring: 16, 0, 16, 0, 16, ...
non-ring: 0, 15, 0, 30, 0, 46, 0, 60, 0 , 74.

Note how the even ring counts monotonically increase. Because the
getsockopt adds tp_drops to tp_packets, total counts are similarly
reported cumulatively. Long story short, reinstating the original code, as
the below patch does, fixes the issue at the cost of additional per-packet
cycles. Another solution that does not introduce per-packet overhead
is be to keep the current data path, record the value of sk_drops at
getsockopt() at call N in a new field in struct packetsock and subtract
that when reporting at call N+1. I'll be happy to code that, instead,
it's just more messy.

[1] http://patchwork.ozlabs.org/patch/35665/
[2] http://kernel.googlecode.com/files/test-packetsock-getstatistics.c

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c698cec0a445..fabb4fafa281 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -961,7 +961,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 	return 0;
 
 drop_n_acct:
-	po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
+	spin_lock(&sk->sk_receive_queue.lock);
+	po->stats.tp_drops++;
+	atomic_inc(&sk->sk_drops);
+	spin_unlock(&sk->sk_receive_queue.lock);
 
 drop_n_restore:
 	if (skb_head != skb->data && skb_shared(skb)) {
-- 
cgit v1.2.1


From 260fcbeb1ae9e768a44c9925338fbacb0d7e5ba9 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Thu, 29 Sep 2011 17:10:10 +0000
Subject: tcp: properly handle md5sig_pool references

tcp_v4_clear_md5_list() assumes that multiple tcp md5sig peers
only hold one reference to md5sig_pool. but tcp_v4_md5_do_add()
increases use count of md5sig_pool for each peer. This patch
makes tcp_v4_md5_do_add() only increases use count for the first
tcp md5sig peer.

Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 11 +++++++----
 net/ipv6/tcp_ipv6.c |  8 +++++---
 2 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c34f01513945..7963e03f1068 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -927,18 +927,21 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
 			}
 			sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 		}
-		if (tcp_alloc_md5sig_pool(sk) == NULL) {
+
+		md5sig = tp->md5sig_info;
+		if (md5sig->entries4 == 0 &&
+		    tcp_alloc_md5sig_pool(sk) == NULL) {
 			kfree(newkey);
 			return -ENOMEM;
 		}
-		md5sig = tp->md5sig_info;
 
 		if (md5sig->alloced4 == md5sig->entries4) {
 			keys = kmalloc((sizeof(*keys) *
 					(md5sig->entries4 + 1)), GFP_ATOMIC);
 			if (!keys) {
 				kfree(newkey);
-				tcp_free_md5sig_pool();
+				if (md5sig->entries4 == 0)
+					tcp_free_md5sig_pool();
 				return -ENOMEM;
 			}
 
@@ -982,6 +985,7 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
 				kfree(tp->md5sig_info->keys4);
 				tp->md5sig_info->keys4 = NULL;
 				tp->md5sig_info->alloced4 = 0;
+				tcp_free_md5sig_pool();
 			} else if (tp->md5sig_info->entries4 != i) {
 				/* Need to do some manipulation */
 				memmove(&tp->md5sig_info->keys4[i],
@@ -989,7 +993,6 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
 					(tp->md5sig_info->entries4 - i) *
 					 sizeof(struct tcp4_md5sig_key));
 			}
-			tcp_free_md5sig_pool();
 			return 0;
 		}
 	}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 79cc6469508d..7b8fc5794352 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -591,7 +591,8 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer,
 			}
 			sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 		}
-		if (tcp_alloc_md5sig_pool(sk) == NULL) {
+		if (tp->md5sig_info->entries6 == 0 &&
+			tcp_alloc_md5sig_pool(sk) == NULL) {
 			kfree(newkey);
 			return -ENOMEM;
 		}
@@ -600,8 +601,9 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer,
 				       (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC);
 
 			if (!keys) {
-				tcp_free_md5sig_pool();
 				kfree(newkey);
+				if (tp->md5sig_info->entries6 == 0)
+					tcp_free_md5sig_pool();
 				return -ENOMEM;
 			}
 
@@ -647,6 +649,7 @@ static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer)
 				kfree(tp->md5sig_info->keys6);
 				tp->md5sig_info->keys6 = NULL;
 				tp->md5sig_info->alloced6 = 0;
+				tcp_free_md5sig_pool();
 			} else {
 				/* shrink the database */
 				if (tp->md5sig_info->entries6 != i)
@@ -655,7 +658,6 @@ static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer)
 						(tp->md5sig_info->entries6 - i)
 						* sizeof (tp->md5sig_info->keys6[0]));
 			}
-			tcp_free_md5sig_pool();
 			return 0;
 		}
 	}
-- 
cgit v1.2.1


From 1e5289e121372a3494402b1b131b41bfe1cf9b7f Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Sun, 2 Oct 2011 04:21:50 +0000
Subject: tcp: properly update lost_cnt_hint during shifting

lost_skb_hint is used by tcp_mark_head_lost() to mark the first unhandled skb.
lost_cnt_hint is the number of packets or sacked packets before the lost_skb_hint;
When shifting a skb that is before the lost_skb_hint, if tcp_is_fack() is ture,
the skb has already been counted in the lost_cnt_hint; if tcp_is_fack() is false,
tcp_sacktag_one() will increase the lost_cnt_hint. So tcp_shifted_skb() does not
need to adjust the lost_cnt_hint by itself. When shifting a skb that is equal to
lost_skb_hint, the shifted packets will not be counted by tcp_mark_head_lost().
So tcp_shifted_skb() should adjust the lost_cnt_hint even tcp_is_fack(tp) is true.

Signed-off-by: Zheng Yan <zheng.z.yan@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 21fab3edb92c..d73aab3fbfc0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1389,9 +1389,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 
 	BUG_ON(!pcount);
 
-	/* Tweak before seqno plays */
-	if (!tcp_is_fack(tp) && tcp_is_sack(tp) && tp->lost_skb_hint &&
-	    !before(TCP_SKB_CB(tp->lost_skb_hint)->seq, TCP_SKB_CB(skb)->seq))
+	if (skb == tp->lost_skb_hint)
 		tp->lost_cnt_hint += pcount;
 
 	TCP_SKB_CB(prev)->end_seq += shifted;
-- 
cgit v1.2.1


From 3458e21c0d384ca04b27a2ea24d9314c1b57530f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 5 Oct 2011 03:24:43 +0000
Subject: netfilter: Use proper rwlock init function

Replace the open coded initialization with the init function.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipvs/ip_vs_ctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 2b771dc708a3..5290ac353a5e 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3679,7 +3679,7 @@ int __net_init ip_vs_control_net_init(struct net *net)
 	int idx;
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
+	rwlock_init(&ipvs->rs_lock);
 
 	/* Initialize rs_table */
 	for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
-- 
cgit v1.2.1


From b64b73d7d0c480f75684519c6134e79d50c1b341 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 3 Oct 2011 18:14:45 +0000
Subject: bridge: leave carrier on for empty bridge

This resolves a regression seen by some users of bridging.
Some users use the bridge like a dummy device.
They expect to be able to put an IPv6 address on the device
with no ports attached. Although there are better ways of doing
this, there is no reason to not allow it.

Note: the bridge still will reflect the state of ports in the
bridge if there are any added.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 32b8f9f7f79e..ff3ed6086ce1 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -91,7 +91,6 @@ static int br_dev_open(struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
 
-	netif_carrier_off(dev);
 	netdev_update_features(dev);
 	netif_start_queue(dev);
 	br_stp_enable_bridge(br);
@@ -108,8 +107,6 @@ static int br_dev_stop(struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
 
-	netif_carrier_off(dev);
-
 	br_stp_disable_bridge(br);
 	br_multicast_stop(br);
 
-- 
cgit v1.2.1