14 files changed, 527 insertions, 219 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index d5e42d13bd67..eb999003bbb7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2101,26 +2101,23 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
 {
 	struct net_device_stats *stats = dev->get_stats(dev);
 
-	if (stats) {
-		seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
-				"%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
-			   dev->name, stats->rx_bytes, stats->rx_packets,
-			   stats->rx_errors,
-			   stats->rx_dropped + stats->rx_missed_errors,
-			   stats->rx_fifo_errors,
-			   stats->rx_length_errors + stats->rx_over_errors +
-			     stats->rx_crc_errors + stats->rx_frame_errors,
-			   stats->rx_compressed, stats->multicast,
-			   stats->tx_bytes, stats->tx_packets,
-			   stats->tx_errors, stats->tx_dropped,
-			   stats->tx_fifo_errors, stats->collisions,
-			   stats->tx_carrier_errors +
-			     stats->tx_aborted_errors +
-			     stats->tx_window_errors +
-			     stats->tx_heartbeat_errors,
-			   stats->tx_compressed);
-	} else
-		seq_printf(seq, "%6s: No statistics available.\n", dev->name);
+	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
+		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
+		   dev->name, stats->rx_bytes, stats->rx_packets,
+		   stats->rx_errors,
+		   stats->rx_dropped + stats->rx_missed_errors,
+		   stats->rx_fifo_errors,
+		   stats->rx_length_errors + stats->rx_over_errors +
+		    stats->rx_crc_errors + stats->rx_frame_errors,
+		   stats->rx_compressed, stats->multicast,
+		   stats->tx_bytes, stats->tx_packets,
+		   stats->tx_errors, stats->tx_dropped,
+		   stats->tx_fifo_errors, stats->collisions,
+		   stats->tx_carrier_errors +
+		    stats->tx_aborted_errors +
+		    stats->tx_window_errors +
+		    stats->tx_heartbeat_errors,
+		   stats->tx_compressed);
 }
 
 /*
@@ -3257,11 +3254,9 @@ out:
 	mutex_unlock(&net_todo_run_mutex);
 }
 
-static struct net_device_stats *maybe_internal_stats(struct net_device *dev)
+static struct net_device_stats *internal_stats(struct net_device *dev)
 {
-	if (dev->features & NETIF_F_INTERNAL_STATS)
-		return &dev->stats;
-	return NULL;
+	return &dev->stats;
 }
 
 /**
@@ -3299,7 +3294,7 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name,
 	if (sizeof_priv)
 		dev->priv = netdev_priv(dev);
 
-	dev->get_stats = maybe_internal_stats;
+	dev->get_stats = internal_stats;
 	setup(dev);
 	strcpy(dev->name, name);
 	return dev;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 324e7e0fdb2a..97069399d864 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -329,6 +329,7 @@ drop:
 static inline int ip_rcv_finish(struct sk_buff *skb)
 {
 	const struct iphdr *iph = ip_hdr(skb);
+	struct rtable *rt;
 
 	/*
 	 *	Initialise the virtual path cache for the packet. It describes
@@ -340,6 +341,8 @@ static inline int ip_rcv_finish(struct sk_buff *skb)
 		if (unlikely(err)) {
 			if (err == -EHOSTUNREACH)
 				IP_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+			else if (err == -ENETUNREACH)
+				IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES);
 			goto drop;
 		}
 	}
@@ -358,6 +361,12 @@ static inline int ip_rcv_finish(struct sk_buff *skb)
 	if (iph->ihl > 5 && ip_rcv_options(skb))
 		goto drop;
 
+	rt = (struct rtable*)skb->dst;
+	if (rt->rt_type == RTN_MULTICAST)
+		IP_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS);
+	else if (rt->rt_type == RTN_BROADCAST)
+		IP_INC_STATS_BH(IPSTATS_MIB_INBCASTPKTS);
+
 	return dst_input(skb);
 
 drop:
@@ -414,7 +423,10 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 		goto inhdr_error;
 
 	len = ntohs(iph->tot_len);
-	if (skb->len < len || len < (iph->ihl*4))
+	if (skb->len < len) {
+		IP_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS);
+		goto drop;
+	} else if (len < (iph->ihl*4))
 		goto inhdr_error;
 
 	/* Our transport medium may have padded the buffer out. Now we know it
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 534650cad3a8..d6427d918512 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -160,9 +160,15 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
 static inline int ip_finish_output2(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
+	struct rtable *rt = (struct rtable *)dst;
 	struct net_device *dev = dst->dev;
 	int hh_len = LL_RESERVED_SPACE(dev);
 
+	if (rt->rt_type == RTN_MULTICAST)
+		IP_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
+	else if (rt->rt_type == RTN_BROADCAST)
+		IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS);
+
 	/* Be paranoid, rather than too clever. */
 	if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) {
 		struct sk_buff *skb2;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2cf9a898ce50..d6e488668171 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1573,14 +1573,12 @@ void tcp_close(struct sock *sk, long timeout)
 
 	sk_stream_mem_reclaim(sk);
 
-	/* As outlined in draft-ietf-tcpimpl-prob-03.txt, section
-	 * 3.10, we send a RST here because data was lost.  To
-	 * witness the awful effects of the old behavior of always
-	 * doing a FIN, run an older 2.1.x kernel or 2.0.x, start
-	 * a bulk GET in an FTP client, suspend the process, wait
-	 * for the client to advertise a zero window, then kill -9
-	 * the FTP client, wheee...  Note: timeout is always zero
-	 * in such a case.
+	/* As outlined in RFC 2525, section 2.17, we send a RST here because
+	 * data was lost. To witness the awful effects of the old behavior of
+	 * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk
+	 * GET in an FTP client, suspend the process, wait for the client to
+	 * advertise a zero window, then kill -9 the FTP client, wheee...
+	 * Note: timeout is always zero in such a case.
 	 */
 	if (data_was_unread) {
 		/* Unread data was tossed, zap the connection. */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 051f0f815f17..7641b2761a14 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1265,20 +1265,15 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	return flag;
 }
 
-/* F-RTO can only be used if these conditions are satisfied:
- *  - there must be some unsent new data
- *  - the advertised window should allow sending it
- *  - TCP has never retransmitted anything other than head (SACK enhanced
- *    variant from Appendix B of RFC4138 is more robust here)
+/* F-RTO can only be used if TCP has never retransmitted anything other than
+ * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here)
  */
 int tcp_use_frto(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 
-	if (!sysctl_tcp_frto || !tcp_send_head(sk) ||
-		after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
-		      tp->snd_una + tp->snd_wnd))
+	if (!sysctl_tcp_frto)
 		return 0;
 
 	if (IsSackFrto())
@@ -2642,7 +2637,9 @@ static void tcp_undo_spur_to_response(struct sock *sk, int flag)
  *                  algorithm is not part of the F-RTO detection algorithm
  *                  given in RFC4138 but can be selected separately).
  * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss
- * and TCP falls back to conventional RTO recovery.
+ * and TCP falls back to conventional RTO recovery. F-RTO allows overriding
+ * of Nagle, this is done using frto_counter states 2 and 3, when a new data
+ * segment of any size sent during F-RTO, state 2 is upgraded to 3.
  *
  * Rationale: if the RTO was spurious, new ACKs should arrive from the
  * original window even after we transmit two new data segments.
@@ -2671,7 +2668,7 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 		inet_csk(sk)->icsk_retransmits = 0;
 
 	if (!before(tp->snd_una, tp->frto_highmark)) {
-		tcp_enter_frto_loss(sk, tp->frto_counter + 1, flag);
+		tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
 		return 1;
 	}
 
@@ -2697,7 +2694,7 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 			return 1;
 		}
 
-		if ((tp->frto_counter == 2) &&
+		if ((tp->frto_counter >= 2) &&
 		    (!(flag&FLAG_FORWARD_PROGRESS) ||
 		     ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) {
 			/* RFC4138 shortcoming (see comment above) */
@@ -2710,10 +2707,19 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 	}
 
 	if (tp->frto_counter == 1) {
+		/* Sending of the next skb must be allowed or no FRTO */
+		if (!tcp_send_head(sk) ||
+		    after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
+				     tp->snd_una + tp->snd_wnd)) {
+			tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3),
+					    flag);
+			return 1;
+		}
+
 		tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
 		tp->frto_counter = 2;
 		return 1;
-	} else /* frto_counter == 2 */ {
+	} else {
 		switch (sysctl_tcp_frto_response) {
 		case 2:
 			tcp_undo_spur_to_response(sk, flag);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e70a6840cb64..0faacf9c419d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1035,8 +1035,10 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
 	if (nonagle & TCP_NAGLE_PUSH)
 		return 1;
 
-	/* Don't use the nagle rule for urgent data (or for the final FIN).  */
-	if (tp->urg_mode ||
+	/* Don't use the nagle rule for urgent data (or for the final FIN).
+	 * Nagle can be ignored during F-RTO too (see RFC4138).
+	 */
+	if (tp->urg_mode || (tp->frto_counter == 2) ||
 	    (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN))
 		return 1;
 
@@ -2035,7 +2037,7 @@ void tcp_send_fin(struct sock *sk)
 /* We get here when a process closes a file descriptor (either due to
  * an explicit close() or as a byproduct of exit()'ing) and there
  * was unread data in the receive queue.  This behavior is recommended
- * by draft-ietf-tcpimpl-prob-03.txt section 3.10.  -DaveM
+ * by RFC 2525, section 2.17.  -DaveM
  */
 void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index cec0f2cc49b7..144970704c2c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -114,14 +114,33 @@ DEFINE_RWLOCK(udp_hash_lock);
 
 static int udp_port_rover;
 
-static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[])
+/*
+ * Note about this hash function :
+ * Typical use is probably daddr = 0, only dport is going to vary hash
+ */
+static inline unsigned int hash_port_and_addr(__u16 port, __be32 addr)
+{
+	addr ^= addr >> 16;
+	addr ^= addr >> 8;
+	return port ^ addr;
+}
+
+static inline int __udp_lib_port_inuse(unsigned int hash, int port,
+	__be32 daddr, struct hlist_head udptable[])
 {
 	struct sock *sk;
 	struct hlist_node *node;
+	struct inet_sock *inet;
 
-	sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
-		if (sk->sk_hash == num)
+	sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) {
+		if (sk->sk_hash != hash)
+			continue;
+		inet = inet_sk(sk);
+		if (inet->num != port)
+			continue;
+		if (inet->rcv_saddr == daddr)
 			return 1;
+	}
 	return 0;
 }
 
@@ -142,6 +161,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
 	struct hlist_node *node;
 	struct hlist_head *head;
 	struct sock *sk2;
+	unsigned int hash;
 	int    error = 1;
 
 	write_lock_bh(&udp_hash_lock);
@@ -156,7 +176,9 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
 		for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
 			int size;
 
-			head = &udptable[result & (UDP_HTABLE_SIZE - 1)];
+			hash = hash_port_and_addr(result,
+					inet_sk(sk)->rcv_saddr);
+			head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
 			if (hlist_empty(head)) {
 				if (result > sysctl_local_port_range[1])
 					result = sysctl_local_port_range[0] +
@@ -181,7 +203,10 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
 				result = sysctl_local_port_range[0]
 					+ ((result - sysctl_local_port_range[0]) &
 					   (UDP_HTABLE_SIZE - 1));
-			if (! __udp_lib_lport_inuse(result, udptable))
+			hash = hash_port_and_addr(result,
+					inet_sk(sk)->rcv_saddr);
+			if (! __udp_lib_port_inuse(hash, result,
+				inet_sk(sk)->rcv_saddr, udptable))
 				break;
 		}
 		if (i >= (1 << 16) / UDP_HTABLE_SIZE)
@@ -189,11 +214,13 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
 gotit:
 		*port_rover = snum = result;
 	} else {
-		head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
+		hash = hash_port_and_addr(snum, inet_sk(sk)->rcv_saddr);
+		head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
 
 		sk_for_each(sk2, node, head)
-			if (sk2->sk_hash == snum                             &&
+			if (sk2->sk_hash == hash                             &&
 			    sk2 != sk                                        &&
+			    inet_sk(sk2)->num == snum	                     &&
 			    (!sk2->sk_reuse        || !sk->sk_reuse)         &&
 			    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
 			     || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
@@ -201,9 +228,9 @@ gotit:
 				goto fail;
 	}
 	inet_sk(sk)->num = snum;
-	sk->sk_hash = snum;
+	sk->sk_hash = hash;
 	if (sk_unhashed(sk)) {
-		head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
+		head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
 		sk_add_node(sk, head);
 		sock_prot_inc_use(sk->sk_prot);
 	}
@@ -242,63 +269,78 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
 {
 	struct sock *sk, *result = NULL;
 	struct hlist_node *node;
-	unsigned short hnum = ntohs(dport);
-	int badness = -1;
+	unsigned int hash, hashwild;
+	int score, best = -1;
+
+	hash = hash_port_and_addr(ntohs(dport), daddr);
+	hashwild = hash_port_and_addr(ntohs(dport), 0);
 
 	read_lock(&udp_hash_lock);
-	sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
+
+lookup:
+
+	sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) {
 		struct inet_sock *inet = inet_sk(sk);
 
-		if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) {
-			int score = (sk->sk_family == PF_INET ? 1 : 0);
-			if (inet->rcv_saddr) {
-				if (inet->rcv_saddr != daddr)
-					continue;
-				score+=2;
-			}
-			if (inet->daddr) {
-				if (inet->daddr != saddr)
-					continue;
-				score+=2;
-			}
-			if (inet->dport) {
-				if (inet->dport != sport)
-					continue;
-				score+=2;
-			}
-			if (sk->sk_bound_dev_if) {
-				if (sk->sk_bound_dev_if != dif)
-					continue;
-				score+=2;
-			}
-			if (score == 9) {
-				result = sk;
-				break;
-			} else if (score > badness) {
-				result = sk;
-				badness = score;
-			}
+		if (sk->sk_hash != hash || ipv6_only_sock(sk) ||
+			inet->num != dport)
+			continue;
+
+		score = (sk->sk_family == PF_INET ? 1 : 0);
+		if (inet->rcv_saddr) {
+			if (inet->rcv_saddr != daddr)
+				continue;
+			score+=2;
+		}
+		if (inet->daddr) {
+			if (inet->daddr != saddr)
+				continue;
+			score+=2;
+		}
+		if (inet->dport) {
+			if (inet->dport != sport)
+				continue;
+			score+=2;
+		}
+		if (sk->sk_bound_dev_if) {
+			if (sk->sk_bound_dev_if != dif)
+				continue;
+			score+=2;
+		}
+		if (score == 9) {
+			result = sk;
+			goto found;
+		} else if (score > best) {
+			result = sk;
+			best = score;
 		}
 	}
+
+	if (hash != hashwild) {
+		hash = hashwild;
+		goto lookup;
+	}
+found:
 	if (result)
 		sock_hold(result);
 	read_unlock(&udp_hash_lock);
 	return result;
 }
 
-static inline struct sock *udp_v4_mcast_next(struct sock *sk,
-					     __be16 loc_port, __be32 loc_addr,
-					     __be16 rmt_port, __be32 rmt_addr,
-					     int dif)
+static inline struct sock *udp_v4_mcast_next(
+			struct sock *sk,
+			unsigned int hnum, __be16 loc_port, __be32 loc_addr,
+			__be16 rmt_port, __be32 rmt_addr,
+			int dif)
 {
 	struct hlist_node *node;
 	struct sock *s = sk;
-	unsigned short hnum = ntohs(loc_port);
 
 	sk_for_each_from(s, node) {
 		struct inet_sock *inet = inet_sk(s);
 
 		if (s->sk_hash != hnum					||
+		    inet->num != loc_port				||
 		    (inet->daddr && inet->daddr != rmt_addr)		||
 		    (inet->dport != rmt_port && inet->dport)		||
 		    (inet->rcv_saddr && inet->rcv_saddr != loc_addr)	||
@@ -1129,29 +1171,44 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
 				    __be32 saddr, __be32 daddr,
 				    struct hlist_head udptable[])
 {
-	struct sock *sk;
+	struct sock *sk, *skw, *sknext;
 	int dif;
+	unsigned int hash = hash_port_and_addr(ntohs(uh->dest), daddr);
+	unsigned int hashwild = hash_port_and_addr(ntohs(uh->dest), 0);
 
-	read_lock(&udp_hash_lock);
-	sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
 	dif = skb->dev->ifindex;
-	sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
-	if (sk) {
-		struct sock *sknext = NULL;
 
+	read_lock(&udp_hash_lock);
+
+	sk = sk_head(&udptable[hash & (UDP_HTABLE_SIZE - 1)]);
+	skw = sk_head(&udptable[hashwild & (UDP_HTABLE_SIZE - 1)]);
+
+	sk = udp_v4_mcast_next(sk, hash, uh->dest, daddr, uh->source, saddr, dif);
+	if (!sk) {
+		hash = hashwild;
+		sk = udp_v4_mcast_next(skw, hash, uh->dest, daddr, uh->source,
+			saddr, dif);
+	}
+	if (sk) {
 		do {
 			struct sk_buff *skb1 = skb;
-
-			sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
-						   uh->source, saddr, dif);
+			sknext = udp_v4_mcast_next(sk_next(sk), hash, uh->dest,
+				daddr, uh->source, saddr, dif);
+			if (!sknext && hash != hashwild) {
+				hash = hashwild;
+				sknext = udp_v4_mcast_next(skw, hash, uh->dest,
+					daddr, uh->source, saddr, dif);
+			}
 			if (sknext)
 				skb1 = skb_clone(skb, GFP_ATOMIC);
 
 			if (skb1) {
 				int ret = udp_queue_rcv_skb(sk, skb1);
 				if (ret > 0)
-					/* we should probably re-process instead
-					 * of dropping packets here. */
+					/*
+					 * we should probably re-process
+					 * instead of dropping packets here.
+					 */
 					kfree_skb(skb1);
 			}
 			sk = sknext;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e04e49373505..3452433cbc96 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2359,8 +2359,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 		break;
 
 	case NETDEV_CHANGENAME:
-#ifdef CONFIG_SYSCTL
 		if (idev) {
+			snmp6_unregister_dev(idev);
+#ifdef CONFIG_SYSCTL
 			addrconf_sysctl_unregister(&idev->cnf);
 			neigh_sysctl_unregister(idev->nd_parms);
 			neigh_sysctl_register(dev, idev->nd_parms,
@@ -2368,8 +2369,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 					      &ndisc_ifinfo_sysctl_change,
 					      NULL);
 			addrconf_sysctl_register(idev, &idev->cnf);
-		}
 #endif
+			snmp6_register_dev(idev);
+		}
 		break;
 	}
 
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index acb306a5dd56..920dc9cf6a84 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -223,6 +223,7 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
 		return -EINVAL;
 	remove_proc_entry(idev->stats.proc_dir_entry->name,
 			  proc_net_devsnmp6);
+	idev->stats.proc_dir_entry = NULL;
 	return 0;
 }
 
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 538499a89975..5502cc948dfb 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -261,7 +261,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb)
 	__be32 spi;
 
 	spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr);
-	return xfrm6_rcv_spi(skb, spi);
+	return xfrm6_rcv_spi(skb, spi) > 0 ? : 0;
 }
 
 static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 60f293842a39..903bdb6eaaa1 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -90,20 +90,43 @@ struct iucv_irq_data {
 	u32 res2[8];
 };
 
-struct iucv_work {
+struct iucv_irq_list {
 	struct list_head list;
 	struct iucv_irq_data data;
 };
 
-static LIST_HEAD(iucv_work_queue);
-static DEFINE_SPINLOCK(iucv_work_lock);
-
 static struct iucv_irq_data *iucv_irq_data;
 static cpumask_t iucv_buffer_cpumask = CPU_MASK_NONE;
 static cpumask_t iucv_irq_cpumask = CPU_MASK_NONE;
 
-static void iucv_tasklet_handler(unsigned long);
-static DECLARE_TASKLET(iucv_tasklet, iucv_tasklet_handler,0);
+/*
+ * Queue of interrupt buffers lock for delivery via the tasklet
+ * (fast but can't call smp_call_function).
+ */
+static LIST_HEAD(iucv_task_queue);
+
+/*
+ * The tasklet for fast delivery of iucv interrupts.
+ */
+static void iucv_tasklet_fn(unsigned long);
+static DECLARE_TASKLET(iucv_tasklet, iucv_tasklet_fn,0);
+
+/*
+ * Queue of interrupt buffers for delivery via a work queue
+ * (slower but can call smp_call_function).
+ */
+static LIST_HEAD(iucv_work_queue);
+
+/*
+ * The work element to deliver path pending interrupts.
+ */
+static void iucv_work_fn(struct work_struct *work);
+static DECLARE_WORK(iucv_work, iucv_work_fn);
+
+/*
+ * Spinlock protecting task and work queue.
+ */
+static DEFINE_SPINLOCK(iucv_queue_lock);
 
 enum iucv_command_codes {
 	IUCV_QUERY = 0,
@@ -147,10 +170,10 @@ static unsigned long iucv_max_pathid;
 static DEFINE_SPINLOCK(iucv_table_lock);
 
 /*
- * iucv_tasklet_cpu: contains the number of the cpu executing the tasklet.
- * Needed for iucv_path_sever called from tasklet.
+ * iucv_active_cpu: contains the number of the cpu executing the tasklet
+ * or the work handler. Needed for iucv_path_sever called from tasklet.
  */
-static int iucv_tasklet_cpu = -1;
+static int iucv_active_cpu = -1;
 
 /*
  * Mutex and wait queue for iucv_register/iucv_unregister.
@@ -449,17 +472,19 @@ static void iucv_setmask_mp(void)
 {
 	int cpu;
 
+	preempt_disable();
 	for_each_online_cpu(cpu)
 		/* Enable all cpus with a declared buffer. */
 		if (cpu_isset(cpu, iucv_buffer_cpumask) &&
 		    !cpu_isset(cpu, iucv_irq_cpumask))
 			smp_call_function_on(iucv_allow_cpu, NULL, 0, 1, cpu);
+	preempt_enable();
 }
 
 /**
  * iucv_setmask_up
  *
- * Allow iucv interrupts on a single cpus.
+ * Allow iucv interrupts on a single cpu.
  */
 static void iucv_setmask_up(void)
 {
@@ -493,8 +518,10 @@ static int iucv_enable(void)
 		goto out;
 	/* Declare per cpu buffers. */
 	rc = -EIO;
+	preempt_disable();
 	for_each_online_cpu(cpu)
 		smp_call_function_on(iucv_declare_cpu, NULL, 0, 1, cpu);
+	preempt_enable();
 	if (cpus_empty(iucv_buffer_cpumask))
 		/* No cpu could declare an iucv buffer. */
 		goto out_path;
@@ -584,48 +611,49 @@ static int iucv_sever_pathid(u16 pathid, u8 userdata[16])
 	return iucv_call_b2f0(IUCV_SEVER, parm);
 }
 
+#ifdef CONFIG_SMP
 /**
- * __iucv_cleanup_pathid
+ * __iucv_cleanup_queue
  * @dummy: unused dummy argument
  *
  * Nop function called via smp_call_function to force work items from
  * pending external iucv interrupts to the work queue.
  */
-static void __iucv_cleanup_pathid(void *dummy)
+static void __iucv_cleanup_queue(void *dummy)
 {
 }
+#endif
 
 /**
- * iucv_cleanup_pathid
- * @pathid: 16 bit pathid
+ * iucv_cleanup_queue
  *
  * Function called after a path has been severed to find all remaining
  * work items for the now stale pathid. The caller needs to hold the
  * iucv_table_lock.
  */
-static void iucv_cleanup_pathid(u16 pathid)
+static void iucv_cleanup_queue(void)
 {
-	struct iucv_work *p, *n;
+	struct iucv_irq_list *p, *n;
 
 	/*
-	 * Path is severed, the pathid can be reused immediatly on
-	 * a iucv connect or a connection pending interrupt.
-	 * iucv_path_connect and connection pending interrupt will
-	 * wait until the iucv_table_lock is released before the
-	 * recycled pathid enters the system.
-	 * Force remaining interrupts to the work queue, then
-	 * scan the work queue for items of this path.
+	 * When a path is severed, the pathid can be reused immediatly
+	 * on a iucv connect or a connection pending interrupt. Remove
+	 * all entries from the task queue that refer to a stale pathid
+	 * (iucv_path_table[ix] == NULL). Only then do the iucv connect
+	 * or deliver the connection pending interrupt. To get all the
+	 * pending interrupts force them to the work queue by calling
+	 * an empty function on all cpus.
 	 */
-	smp_call_function(__iucv_cleanup_pathid, NULL, 0, 1);
-	spin_lock_irq(&iucv_work_lock);
-	list_for_each_entry_safe(p, n, &iucv_work_queue, list) {
-		/* Remove work items for pathid except connection pending */
-		if (p->data.ippathid == pathid && p->data.iptype != 0x01) {
+	smp_call_function(__iucv_cleanup_queue, NULL, 0, 1);
+	spin_lock_irq(&iucv_queue_lock);
+	list_for_each_entry_safe(p, n, &iucv_task_queue, list) {
+		/* Remove stale work items from the task queue. */
+		if (iucv_path_table[p->data.ippathid] == NULL) {
 			list_del(&p->list);
 			kfree(p);
 		}
 	}
-	spin_unlock_irq(&iucv_work_lock);
+	spin_unlock_irq(&iucv_queue_lock);
 }
 
 /**
@@ -684,7 +712,6 @@ void iucv_unregister(struct iucv_handler *handler, int smp)
 		iucv_sever_pathid(p->pathid, NULL);
 		iucv_path_table[p->pathid] = NULL;
 		list_del(&p->list);
-		iucv_cleanup_pathid(p->pathid);
 		iucv_path_free(p);
 	}
 	spin_unlock_bh(&iucv_table_lock);
@@ -757,9 +784,9 @@ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
 	union iucv_param *parm;
 	int rc;
 
-	preempt_disable();
-	if (iucv_tasklet_cpu != smp_processor_id())
-		spin_lock_bh(&iucv_table_lock);
+	BUG_ON(in_atomic());
+	spin_lock_bh(&iucv_table_lock);
+	iucv_cleanup_queue();
 	parm = percpu_ptr(iucv_param, smp_processor_id());
 	memset(parm, 0, sizeof(union iucv_param));
 	parm->ctrl.ipmsglim = path->msglim;
@@ -794,9 +821,7 @@ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
 			rc = -EIO;
 		}
 	}
-	if (iucv_tasklet_cpu != smp_processor_id())
-		spin_unlock_bh(&iucv_table_lock);
-	preempt_enable();
+	spin_unlock_bh(&iucv_table_lock);
 	return rc;
 }
 
@@ -867,15 +892,14 @@ int iucv_path_sever(struct iucv_path *path, u8 userdata[16])
 
 
 	preempt_disable();
-	if (iucv_tasklet_cpu != smp_processor_id())
+	if (iucv_active_cpu != smp_processor_id())
 		spin_lock_bh(&iucv_table_lock);
 	rc = iucv_sever_pathid(path->pathid, userdata);
 	if (!rc) {
 		iucv_path_table[path->pathid] = NULL;
 		list_del_init(&path->list);
-		iucv_cleanup_pathid(path->pathid);
 	}
-	if (iucv_tasklet_cpu != smp_processor_id())
+	if (iucv_active_cpu != smp_processor_id())
 		spin_unlock_bh(&iucv_table_lock);
 	preempt_enable();
 	return rc;
@@ -1244,8 +1268,7 @@ static void iucv_path_complete(struct iucv_irq_data *data)
 	struct iucv_path_complete *ipc = (void *) data;
 	struct iucv_path *path = iucv_path_table[ipc->ippathid];
 
-	BUG_ON(!path || !path->handler);
-	if (path->handler->path_complete)
+	if (path && path->handler && path->handler->path_complete)
 		path->handler->path_complete(path, ipc->ipuser);
 }
 
@@ -1273,14 +1296,14 @@ static void iucv_path_severed(struct iucv_irq_data *data)
 	struct iucv_path_severed *ips = (void *) data;
 	struct iucv_path *path = iucv_path_table[ips->ippathid];
 
-	BUG_ON(!path || !path->handler);
+	if (!path || !path->handler)	/* Already severed */
+		return;
 	if (path->handler->path_severed)
 		path->handler->path_severed(path, ips->ipuser);
 	else {
 		iucv_sever_pathid(path->pathid, NULL);
 		iucv_path_table[path->pathid] = NULL;
 		list_del_init(&path->list);
-		iucv_cleanup_pathid(path->pathid);
 		iucv_path_free(path);
 	}
 }
@@ -1309,8 +1332,7 @@ static void iucv_path_quiesced(struct iucv_irq_data *data)
 	struct iucv_path_quiesced *ipq = (void *) data;
 	struct iucv_path *path = iucv_path_table[ipq->ippathid];
 
-	BUG_ON(!path || !path->handler);
-	if (path->handler->path_quiesced)
+	if (path && path->handler && path->handler->path_quiesced)
 		path->handler->path_quiesced(path, ipq->ipuser);
 }
 
@@ -1338,8 +1360,7 @@ static void iucv_path_resumed(struct iucv_irq_data *data)
 	struct iucv_path_resumed *ipr = (void *) data;
 	struct iucv_path *path = iucv_path_table[ipr->ippathid];
 
-	BUG_ON(!path || !path->handler);
-	if (path->handler->path_resumed)
+	if (path && path->handler && path->handler->path_resumed)
 		path->handler->path_resumed(path, ipr->ipuser);
 }
 
@@ -1371,8 +1392,7 @@ static void iucv_message_complete(struct iucv_irq_data *data)
 	struct iucv_path *path = iucv_path_table[imc->ippathid];
 	struct iucv_message msg;
 
-	BUG_ON(!path || !path->handler);
-	if (path->handler->message_complete) {
+	if (path && path->handler && path->handler->message_complete) {
 		msg.flags = imc->ipflags1;
 		msg.id = imc->ipmsgid;
 		msg.audit = imc->ipaudit;
@@ -1417,8 +1437,7 @@ static void iucv_message_pending(struct iucv_irq_data *data)
 	struct iucv_path *path = iucv_path_table[imp->ippathid];
 	struct iucv_message msg;
 
-	BUG_ON(!path || !path->handler);
-	if (path->handler->message_pending) {
+	if (path && path->handler && path->handler->message_pending) {
 		msg.flags = imp->ipflags1;
 		msg.id = imp->ipmsgid;
 		msg.class = imp->iptrgcls;
@@ -1433,17 +1452,16 @@ static void iucv_message_pending(struct iucv_irq_data *data)
 }
 
 /**
- * iucv_tasklet_handler:
+ * iucv_tasklet_fn:
  *
  * This tasklet loops over the queue of irq buffers created by
  * iucv_external_interrupt, calls the appropriate action handler
  * and then frees the buffer.
  */
-static void iucv_tasklet_handler(unsigned long ignored)
+static void iucv_tasklet_fn(unsigned long ignored)
 {
 	typedef void iucv_irq_fn(struct iucv_irq_data *);
 	static iucv_irq_fn *irq_fn[] = {
-		[0x01] = iucv_path_pending,
 		[0x02] = iucv_path_complete,
 		[0x03] = iucv_path_severed,
 		[0x04] = iucv_path_quiesced,
@@ -1453,38 +1471,70 @@ static void iucv_tasklet_handler(unsigned long ignored)
 		[0x08] = iucv_message_pending,
 		[0x09] = iucv_message_pending,
 	};
-	struct iucv_work *p;
+	struct list_head task_queue = LIST_HEAD_INIT(task_queue);
+	struct iucv_irq_list *p, *n;
 
 	/* Serialize tasklet, iucv_path_sever and iucv_path_connect. */
 	spin_lock(&iucv_table_lock);
-	iucv_tasklet_cpu = smp_processor_id();
+	iucv_active_cpu = smp_processor_id();
 
-	spin_lock_irq(&iucv_work_lock);
-	while (!list_empty(&iucv_work_queue)) {
-		p = list_entry(iucv_work_queue.next, struct iucv_work, list);
+	spin_lock_irq(&iucv_queue_lock);
+	list_splice_init(&iucv_task_queue, &task_queue);
+	spin_unlock_irq(&iucv_queue_lock);
+
+	list_for_each_entry_safe(p, n, &task_queue, list) {
 		list_del_init(&p->list);
-		spin_unlock_irq(&iucv_work_lock);
 		irq_fn[p->data.iptype](&p->data);
 		kfree(p);
-		spin_lock_irq(&iucv_work_lock);
 	}
-	spin_unlock_irq(&iucv_work_lock);
 
-	iucv_tasklet_cpu = -1;
+	iucv_active_cpu = -1;
 	spin_unlock(&iucv_table_lock);
 }
 
 /**
+ * iucv_work_fn:
+ *
+ * This work function loops over the queue of path pending irq blocks
+ * created by iucv_external_interrupt, calls the appropriate action
+ * handler and then frees the buffer.
+ */
+static void iucv_work_fn(struct work_struct *work)
+{
+	typedef void iucv_irq_fn(struct iucv_irq_data *);
+	struct list_head work_queue = LIST_HEAD_INIT(work_queue);
+	struct iucv_irq_list *p, *n;
+
+	/* Serialize tasklet, iucv_path_sever and iucv_path_connect. */
+	spin_lock_bh(&iucv_table_lock);
+	iucv_active_cpu = smp_processor_id();
+
+	spin_lock_irq(&iucv_queue_lock);
+	list_splice_init(&iucv_work_queue, &work_queue);
+	spin_unlock_irq(&iucv_queue_lock);
+
+	iucv_cleanup_queue();
+	list_for_each_entry_safe(p, n, &work_queue, list) {
+		list_del_init(&p->list);
+		iucv_path_pending(&p->data);
+		kfree(p);
+	}
+
+	iucv_active_cpu = -1;
+	spin_unlock_bh(&iucv_table_lock);
+}
+
+/**
  * iucv_external_interrupt
  * @code: irq code
  *
  * Handles external interrupts coming in from CP.
- * Places the interrupt buffer on a queue and schedules iucv_tasklet_handler().
+ * Places the interrupt buffer on a queue and schedules iucv_tasklet_fn().
  */
 static void iucv_external_interrupt(u16 code)
 {
 	struct iucv_irq_data *p;
-	struct iucv_work *work;
+	struct iucv_irq_list *work;
 
 	p = percpu_ptr(iucv_irq_data, smp_processor_id());
 	if (p->ippathid >= iucv_max_pathid) {
@@ -1498,16 +1548,23 @@ static void iucv_external_interrupt(u16 code)
 		printk(KERN_ERR "iucv_do_int: unknown iucv interrupt\n");
 		return;
 	}
-	work = kmalloc(sizeof(struct iucv_work), GFP_ATOMIC);
+	work = kmalloc(sizeof(struct iucv_irq_list), GFP_ATOMIC);
 	if (!work) {
 		printk(KERN_WARNING "iucv_external_interrupt: out of memory\n");
 		return;
 	}
 	memcpy(&work->data, p, sizeof(work->data));
-	spin_lock(&iucv_work_lock);
-	list_add_tail(&work->list, &iucv_work_queue);
-	spin_unlock(&iucv_work_lock);
-	tasklet_schedule(&iucv_tasklet);
+	spin_lock(&iucv_queue_lock);
+	if (p->iptype == 0x01) {
+		/* Path pending interrupt. */
+		list_add_tail(&work->list, &iucv_work_queue);
+		schedule_work(&iucv_work);
+	} else {
+		/* The other interrupts. */
+		list_add_tail(&work->list, &iucv_task_queue);
+		tasklet_schedule(&iucv_tasklet);
+	}
+	spin_unlock(&iucv_queue_lock);
 }
 
 /**
@@ -1577,12 +1634,14 @@ out:
  */
 static void iucv_exit(void)
 {
-	struct iucv_work *p, *n;
+	struct iucv_irq_list *p, *n;
 
-	spin_lock_irq(&iucv_work_lock);
+	spin_lock_irq(&iucv_queue_lock);
+	list_for_each_entry_safe(p, n, &iucv_task_queue, list)
+		kfree(p);
 	list_for_each_entry_safe(p, n, &iucv_work_queue, list)
 		kfree(p);
-	spin_unlock_irq(&iucv_work_lock);
+	spin_unlock_irq(&iucv_queue_lock);
 	unregister_hotcpu_notifier(&iucv_cpu_notifier);
 	percpu_free(iucv_param);
 	percpu_free(iucv_irq_data);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 11938fb20395..2fc0a92caa78 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3987,7 +3987,7 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len,
 		memcpy(&temp, &from->ipaddr, sizeof(temp));
 		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
 		addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len;
-		if(space_left < addrlen)
+		if (space_left < addrlen)
 			return -ENOMEM;
 		if (copy_to_user(to, &temp, addrlen))
 			return -EFAULT;
@@ -4076,8 +4076,9 @@ done:
 /* Helper function that copies local addresses to user and returns the number
  * of addresses copied.
  */
-static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_addrs,
-					void __user *to)
+static int sctp_copy_laddrs_old(struct sock *sk, __u16 port,
+					int max_addrs, void *to,
+					int *bytes_copied)
 {
 	struct list_head *pos, *next;
 	struct sctp_sockaddr_entry *addr;
@@ -4094,10 +4095,10 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add
 		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
 								&temp);
 		addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
-		if (copy_to_user(to, &temp, addrlen))
-			return -EFAULT;
+		memcpy(to, &temp, addrlen);
 
 		to += addrlen;
+		*bytes_copied += addrlen;
 		cnt ++;
 		if (cnt >= max_addrs) break;
 	}
@@ -4105,8 +4106,8 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add
 	return cnt;
 }
 
-static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port,
-				    void __user **to, size_t space_left)
+static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
+			    size_t space_left, int *bytes_copied)
 {
 	struct list_head *pos, *next;
 	struct sctp_sockaddr_entry *addr;
@@ -4123,14 +4124,14 @@ static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port,
 		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
 								&temp);
 		addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
-		if(space_left<addrlen)
+		if (space_left < addrlen)
 			return -ENOMEM;
-		if (copy_to_user(*to, &temp, addrlen))
-			return -EFAULT;
+		memcpy(to, &temp, addrlen);
 
-		*to += addrlen;
+		to += addrlen;
 		cnt ++;
 		space_left -= addrlen;
+		bytes_copied += addrlen;
 	}
 
 	return cnt;
@@ -4154,6 +4155,8 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
 	int addrlen;
 	rwlock_t *addr_lock;
 	int err = 0;
+	void *addrs;
+	int bytes_copied = 0;
 
 	if (len != sizeof(struct sctp_getaddrs_old))
 		return -EINVAL;
@@ -4181,6 +4184,15 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
 
 	to = getaddrs.addrs;
 
+	/* Allocate space for a local instance of packed array to hold all
+	 * the data.  We store addresses here first and then put write them
+	 * to the user in one shot.
+	 */
+	addrs = kmalloc(sizeof(union sctp_addr) * getaddrs.addr_num,
+			GFP_KERNEL);
+	if (!addrs)
+		return -ENOMEM;
+
 	sctp_read_lock(addr_lock);
 
 	/* If the endpoint is bound to 0.0.0.0 or ::0, get the valid
@@ -4190,13 +4202,9 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
 		addr = list_entry(bp->address_list.next,
 				  struct sctp_sockaddr_entry, list);
 		if (sctp_is_any(&addr->a)) {
-			cnt = sctp_copy_laddrs_to_user_old(sk, bp->port,
-							   getaddrs.addr_num,
-							   to);
-			if (cnt < 0) {
-				err = cnt;
-				goto unlock;
-			}
+			cnt = sctp_copy_laddrs_old(sk, bp->port,
+						   getaddrs.addr_num,
+						   addrs, &bytes_copied);
 			goto copy_getaddrs;
 		}
 	}
@@ -4206,22 +4214,29 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
 		memcpy(&temp, &addr->a, sizeof(temp));
 		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
 		addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
-		if (copy_to_user(to, &temp, addrlen)) {
-			err = -EFAULT;
-			goto unlock;
-		}
+		memcpy(addrs, &temp, addrlen);
 		to += addrlen;
+		bytes_copied += addrlen;
 		cnt ++;
 		if (cnt >= getaddrs.addr_num) break;
 	}
 
 copy_getaddrs:
+	sctp_read_unlock(addr_lock);
+
+	/* copy the entire address list into the user provided space */
+	if (copy_to_user(to, addrs, bytes_copied)) {
+		err = -EFAULT;
+		goto error;
+	}
+
+	/* copy the leading structure back to user */
 	getaddrs.addr_num = cnt;
 	if (copy_to_user(optval, &getaddrs, sizeof(struct sctp_getaddrs_old)))
 		err = -EFAULT;
 
-unlock:
-	sctp_read_unlock(addr_lock);
+error:
+	kfree(addrs);
 	return err;
 }
 
@@ -4241,7 +4256,8 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
 	rwlock_t *addr_lock;
 	int err = 0;
 	size_t space_left;
-	int bytes_copied;
+	int bytes_copied = 0;
+	void *addrs;
 
 	if (len <= sizeof(struct sctp_getaddrs))
 		return -EINVAL;
@@ -4269,6 +4285,9 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
 	to = optval + offsetof(struct sctp_getaddrs,addrs);
 	space_left = len - sizeof(struct sctp_getaddrs) -
 			 offsetof(struct sctp_getaddrs,addrs);
+	addrs = kmalloc(space_left, GFP_KERNEL);
+	if (!addrs)
+		return -ENOMEM;
 
 	sctp_read_lock(addr_lock);
 
@@ -4279,11 +4298,11 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
 		addr = list_entry(bp->address_list.next,
 				  struct sctp_sockaddr_entry, list);
 		if (sctp_is_any(&addr->a)) {
-			cnt = sctp_copy_laddrs_to_user(sk, bp->port,
-						       &to, space_left);
+			cnt = sctp_copy_laddrs(sk, bp->port, addrs,
+						space_left, &bytes_copied);
 			if (cnt < 0) {
 				err = cnt;
-				goto unlock;
+				goto error;
 			}
 			goto copy_getaddrs;
 		}
@@ -4294,26 +4313,31 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
 		memcpy(&temp, &addr->a, sizeof(temp));
 		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
 		addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
-		if(space_left < addrlen)
-			return -ENOMEM; /*fixme: right error?*/
-		if (copy_to_user(to, &temp, addrlen)) {
-			err = -EFAULT;
-			goto unlock;
+		if (space_left < addrlen) {
+			err =  -ENOMEM; /*fixme: right error?*/
+			goto error;
 		}
+		memcpy(addrs, &temp, addrlen);
 		to += addrlen;
+		bytes_copied += addrlen;
 		cnt ++;
 		space_left -= addrlen;
 	}
 
 copy_getaddrs:
+	sctp_read_unlock(addr_lock);
+
+	if (copy_to_user(to, addrs, bytes_copied)) {
+		err = -EFAULT;
+		goto error;
+	}
 	if (put_user(cnt, &((struct sctp_getaddrs __user *)optval)->addr_num))
 		return -EFAULT;
-	bytes_copied = ((char __user *)to) - optval;
 	if (put_user(bytes_copied, optlen))
 		return -EFAULT;
 
-unlock:
-	sctp_read_unlock(addr_lock);
+error:
+	kfree(addrs);
 	return err;
 }
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 762926009c04..263e34e45265 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -579,8 +579,22 @@ static inline int xfrm_byidx_should_resize(int total)
 	return 0;
 }
 
-static DEFINE_MUTEX(hash_resize_mutex);
+void xfrm_spd_getinfo(struct xfrm_spdinfo *si)
+{
+	read_lock_bh(&xfrm_policy_lock);
+	si->incnt = xfrm_policy_count[XFRM_POLICY_IN];
+	si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT];
+	si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD];
+	si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
+	si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
+	si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
+	si->spdhcnt = xfrm_idx_hmask;
+	si->spdhmcnt = xfrm_policy_hashmax;
+	read_unlock_bh(&xfrm_policy_lock);
+}
+EXPORT_SYMBOL(xfrm_spd_getinfo);
 
+static DEFINE_MUTEX(hash_resize_mutex);
 static void xfrm_hash_resize(struct work_struct *__unused)
 {
 	int dir, total;
@@ -1330,6 +1344,40 @@ xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
 	return err;
 }
 
+static int inline
+xfrm_dst_alloc_copy(void **target, void *src, int size)
+{
+	if (!*target) {
+		*target = kmalloc(size, GFP_ATOMIC);
+		if (!*target)
+			return -ENOMEM;
+	}
+	memcpy(*target, src, size);
+	return 0;
+}
+
+static int inline
+xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
+{
+#ifdef CONFIG_XFRM_SUB_POLICY
+	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+	return xfrm_dst_alloc_copy((void **)&(xdst->partner),
+				   sel, sizeof(*sel));
+#else
+	return 0;
+#endif
+}
+
+static int inline
+xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
+{
+#ifdef CONFIG_XFRM_SUB_POLICY
+	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+	return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
+#else
+	return 0;
+#endif
+}
 
 static int stale_bundle(struct dst_entry *dst);
 
@@ -1518,6 +1566,18 @@ restart:
 			err = -EHOSTUNREACH;
 			goto error;
 		}
+
+		if (npols > 1)
+			err = xfrm_dst_update_parent(dst, &pols[1]->selector);
+		else
+			err = xfrm_dst_update_origin(dst, fl);
+		if (unlikely(err)) {
+			write_unlock_bh(&policy->lock);
+			if (dst)
+				dst_free(dst);
+			goto error;
+		}
+
 		dst->next = policy->bundles;
 		policy->bundles = dst;
 		dst_hold(dst);
@@ -1933,6 +1993,15 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
 	if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
 	    (dst->dev && !netif_running(dst->dev)))
 		return 0;
+#ifdef CONFIG_XFRM_SUB_POLICY
+	if (fl) {
+		if (first->origin && !flow_cache_uli_match(first->origin, fl))
+			return 0;
+		if (first->partner &&
+		    !xfrm_selector_match(first->partner, fl, family))
+			return 0;
+	}
+#endif
 
 	last = NULL;
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 69110fed64b6..4210d91624cd 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -672,6 +672,81 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
 	return skb;
 }
 
+static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
+{
+	struct xfrm_spdinfo si;
+	struct nlmsghdr *nlh;
+	u32 *f;
+
+	nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0);
+	if (nlh == NULL) /* shouldnt really happen ... */
+		return -EMSGSIZE;
+
+	f = nlmsg_data(nlh);
+	*f = flags;
+	xfrm_spd_getinfo(&si);
+
+	if (flags & XFRM_SPD_HMASK)
+		NLA_PUT_U32(skb, XFRMA_SPDHMASK, si.spdhcnt);
+	if (flags & XFRM_SPD_HMAX)
+		NLA_PUT_U32(skb, XFRMA_SPDHMAX, si.spdhmcnt);
+	if (flags & XFRM_SPD_ICNT)
+		NLA_PUT_U32(skb, XFRMA_SPDICNT, si.incnt);
+	if (flags & XFRM_SPD_OCNT)
+		NLA_PUT_U32(skb, XFRMA_SPDOCNT, si.outcnt);
+	if (flags & XFRM_SPD_FCNT)
+		NLA_PUT_U32(skb, XFRMA_SPDFCNT, si.fwdcnt);
+	if (flags & XFRM_SPD_ISCNT)
+		NLA_PUT_U32(skb, XFRMA_SPDISCNT, si.inscnt);
+	if (flags & XFRM_SPD_OSCNT)
+		NLA_PUT_U32(skb, XFRMA_SPDOSCNT, si.inscnt);
+	if (flags & XFRM_SPD_FSCNT)
+		NLA_PUT_U32(skb, XFRMA_SPDFSCNT, si.inscnt);
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
+		struct rtattr **xfrma)
+{
+	struct sk_buff *r_skb;
+	u32 *flags = NLMSG_DATA(nlh);
+	u32 spid = NETLINK_CB(skb).pid;
+	u32 seq = nlh->nlmsg_seq;
+	int len = NLMSG_LENGTH(sizeof(u32));
+
+
+	if (*flags & XFRM_SPD_HMASK)
+		len += RTA_SPACE(sizeof(u32));
+	if (*flags & XFRM_SPD_HMAX)
+		len += RTA_SPACE(sizeof(u32));
+	if (*flags & XFRM_SPD_ICNT)
+		len += RTA_SPACE(sizeof(u32));
+	if (*flags & XFRM_SPD_OCNT)
+		len += RTA_SPACE(sizeof(u32));
+	if (*flags & XFRM_SPD_FCNT)
+		len += RTA_SPACE(sizeof(u32));
+	if (*flags & XFRM_SPD_ISCNT)
+		len += RTA_SPACE(sizeof(u32));
+	if (*flags & XFRM_SPD_OSCNT)
+		len += RTA_SPACE(sizeof(u32));
+	if (*flags & XFRM_SPD_FSCNT)
+		len += RTA_SPACE(sizeof(u32));
+
+	r_skb = alloc_skb(len, GFP_ATOMIC);
+	if (r_skb == NULL)
+		return -ENOMEM;
+
+	if (build_spdinfo(r_skb, spid, seq, *flags) < 0)
+		BUG();
+
+	return nlmsg_unicast(xfrm_nl, r_skb, spid);
+}
+
 static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
 {
 	struct xfrm_sadinfo si;
@@ -1879,6 +1954,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
 	[XFRM_MSG_REPORT      - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report),
 	[XFRM_MSG_MIGRATE     - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id),
 	[XFRM_MSG_GETSADINFO  - XFRM_MSG_BASE] = NLMSG_LENGTH(sizeof(u32)),
+	[XFRM_MSG_GETSPDINFO  - XFRM_MSG_BASE] = NLMSG_LENGTH(sizeof(u32)),
 };
 
 #undef XMSGSIZE
@@ -1907,6 +1983,7 @@ static struct xfrm_link {
 	[XFRM_MSG_GETAE       - XFRM_MSG_BASE] = { .doit = xfrm_get_ae  },
 	[XFRM_MSG_MIGRATE     - XFRM_MSG_BASE] = { .doit = xfrm_do_migrate    },
 	[XFRM_MSG_GETSADINFO  - XFRM_MSG_BASE] = { .doit = xfrm_get_sadinfo   },
+	[XFRM_MSG_GETSPDINFO  - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo   },
 };
 
 static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)