48 files changed, 709 insertions, 504 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index eaa150c33b04..d368cf249000 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -228,13 +228,14 @@ static int inet_create(struct socket *sock, int protocol)
 	unsigned char answer_flags;
 	char answer_no_check;
 	int try_loading_module = 0;
-	int err = -ESOCKTNOSUPPORT;
+	int err;
 
 	sock->state = SS_UNCONNECTED;
 
 	/* Look for the requested type/protocol pair. */
 	answer = NULL;
 lookup_protocol:
+	err = -ESOCKTNOSUPPORT;
 	rcu_read_lock();
 	list_for_each_rcu(p, &inetsw[sock->type]) {
 		answer = list_entry(p, struct inet_protosw, list);
@@ -252,6 +253,7 @@ lookup_protocol:
 			if (IPPROTO_IP == answer->protocol)
 				break;
 		}
+		err = -EPROTONOSUPPORT;
 		answer = NULL;
 	}
 
@@ -280,9 +282,6 @@ lookup_protocol:
 	err = -EPERM;
 	if (answer->capability > 0 && !capable(answer->capability))
 		goto out_rcu_unlock;
-	err = -EPROTONOSUPPORT;
-	if (!protocol)
-		goto out_rcu_unlock;
 
 	sock->ops = answer->ops;
 	answer_prot = answer->prot;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 4ec4b2ca6ab1..04a6fe3e95a2 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -234,7 +234,10 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 			 int destroy)
 {
 	struct in_ifaddr *promote = NULL;
-	struct in_ifaddr *ifa1 = *ifap;
+	struct in_ifaddr *ifa, *ifa1 = *ifap;
+	struct in_ifaddr *last_prim = in_dev->ifa_list;
+	struct in_ifaddr *prev_prom = NULL;
+	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
 
 	ASSERT_RTNL();
 
@@ -243,18 +246,22 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 	 **/
 
 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
-		struct in_ifaddr *ifa;
 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
 
 		while ((ifa = *ifap1) != NULL) {
+			if (!(ifa->ifa_flags & IFA_F_SECONDARY) && 
+			    ifa1->ifa_scope <= ifa->ifa_scope)
+				last_prim = ifa;
+
 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
 			    ifa1->ifa_mask != ifa->ifa_mask ||
 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
 				ifap1 = &ifa->ifa_next;
+				prev_prom = ifa;
 				continue;
 			}
 
-			if (!IN_DEV_PROMOTE_SECONDARIES(in_dev)) {
+			if (!do_promote) {
 				*ifap1 = ifa->ifa_next;
 
 				rtmsg_ifa(RTM_DELADDR, ifa);
@@ -283,18 +290,31 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 	 */
 	rtmsg_ifa(RTM_DELADDR, ifa1);
 	notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
-	if (destroy) {
-		inet_free_ifa(ifa1);
 
-		if (!in_dev->ifa_list)
-			inetdev_destroy(in_dev);
-	}
+	if (promote) {
+
+		if (prev_prom) {
+			prev_prom->ifa_next = promote->ifa_next;
+			promote->ifa_next = last_prim->ifa_next;
+			last_prim->ifa_next = promote;
+		}
 
-	if (promote && IN_DEV_PROMOTE_SECONDARIES(in_dev)) {
-		/* not sure if we should send a delete notify first? */
 		promote->ifa_flags &= ~IFA_F_SECONDARY;
 		rtmsg_ifa(RTM_NEWADDR, promote);
 		notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote);
+		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
+			if (ifa1->ifa_mask != ifa->ifa_mask ||
+			    !inet_ifa_match(ifa1->ifa_address, ifa))
+					continue;
+			fib_add_ifaddr(ifa);
+		}
+
+	}
+	if (destroy) {
+		inet_free_ifa(ifa1);
+
+		if (!in_dev->ifa_list)
+			inetdev_destroy(in_dev);
 	}
 }
 
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 2267c1fad879..19b1b984d687 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -407,7 +407,7 @@ static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr
 		tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
 }
 
-static void fib_add_ifaddr(struct in_ifaddr *ifa)
+void fib_add_ifaddr(struct in_ifaddr *ifa)
 {
 	struct in_device *in_dev = ifa->ifa_dev;
 	struct net_device *dev = in_dev->dev;
@@ -544,12 +544,16 @@ static void nl_fib_input(struct sock *sk, int len)
 	struct sk_buff *skb = NULL;
         struct nlmsghdr *nlh = NULL;
 	struct fib_result_nl *frn;
-	int err;
 	u32 pid;     
 	struct fib_table *tb;
 	
-	skb = skb_recv_datagram(sk, 0, 0, &err);
+	skb = skb_dequeue(&sk->sk_receive_queue);
 	nlh = (struct nlmsghdr *)skb->data;
+	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
+	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
+		kfree_skb(skb);
+		return;
+	}
 	
 	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
 	tb = fib_get_table(frn->tb_id_in);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 2a8c9afc3695..7ea0209cb169 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -975,7 +975,7 @@ static void fib_seq_stop(struct seq_file *seq, void *v)
 
 static unsigned fib_flag_trans(int type, u32 mask, struct fib_info *fi)
 {
-	static unsigned type2flags[RTN_MAX + 1] = {
+	static const unsigned type2flags[RTN_MAX + 1] = {
 		[7] = RTF_REJECT, [8] = RTF_REJECT,
 	};
 	unsigned flags = type2flags[type];
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 186f20c4a45e..6d2a6ac070e3 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -83,7 +83,7 @@ for (nhsel=0; nhsel < 1; nhsel++)
 #define endfor_nexthops(fi) }
 
 
-static struct 
+static const struct 
 {
 	int	error;
 	u8	scope;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 66247f38b371..705e3ce86df9 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2378,6 +2378,7 @@ static unsigned fib_flag_trans(int type, u32 mask, const struct fib_info *fi)
  */
 static int fib_route_seq_show(struct seq_file *seq, void *v)
 {
+	const struct fib_trie_iter *iter = seq->private;
 	struct leaf *l = v;
 	int i;
 	char bf[128];
@@ -2389,6 +2390,8 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 		return 0;
 	}
 
+	if (iter->trie == trie_local)
+		return 0;
 	if (IS_TNODE(l))
 		return 0;
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 175e093ec564..92e23b2ad4d2 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -220,7 +220,7 @@ struct icmp_control {
 	short   error;		/* This ICMP is classed as an error message */
 };
 
-static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
+static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
 
 /*
  *	The ICMP socket(s). This is the most convenient way to flow control
@@ -934,11 +934,11 @@ int icmp_rcv(struct sk_buff *skb)
 	case CHECKSUM_HW:
 		if (!(u16)csum_fold(skb->csum))
 			break;
-		LIMIT_NETDEBUG(KERN_DEBUG "icmp v4 hw csum failure\n");
+		/* fall through */
 	case CHECKSUM_NONE:
-		if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))
+		skb->csum = 0;
+		if (__skb_checksum_complete(skb))
 			goto error;
-	default:;
 	}
 
 	if (!pskb_pull(skb, sizeof(struct icmphdr)))
@@ -994,7 +994,7 @@ error:
 /*
  *	This table is the definition of how we handle ICMP.
  */
-static struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
+static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
 	[ICMP_ECHOREPLY] = {
 		.output_entry = ICMP_MIB_OUTECHOREPS,
 		.input_entry = ICMP_MIB_INECHOREPS,
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c6247fc84060..4a195c724f01 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -872,11 +872,18 @@ int igmp_rcv(struct sk_buff *skb)
 		return 0;
 	}
 
-	if (!pskb_may_pull(skb, sizeof(struct igmphdr)) || 
-	    (u16)csum_fold(skb_checksum(skb, 0, len, 0))) {
-		in_dev_put(in_dev);
-		kfree_skb(skb);
-		return 0;
+	if (!pskb_may_pull(skb, sizeof(struct igmphdr)))
+		goto drop;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_HW:
+		if (!(u16)csum_fold(skb->csum))
+			break;
+		/* fall through */
+	case CHECKSUM_NONE:
+		skb->csum = 0;
+		if (__skb_checksum_complete(skb))
+			goto drop;
 	}
 
 	ih = skb->h.igmph;
@@ -890,7 +897,10 @@ int igmp_rcv(struct sk_buff *skb)
 		/* Is it our report looped back? */
 		if (((struct rtable*)skb->dst)->fl.iif == 0)
 			break;
-		igmp_heard_report(in_dev, ih->group);
+		/* don't rely on MC router hearing unicast reports */
+		if (skb->pkt_type == PACKET_MULTICAST ||
+		    skb->pkt_type == PACKET_BROADCAST)
+			igmp_heard_report(in_dev, ih->group);
 		break;
 	case IGMP_PIM:
 #ifdef CONFIG_IP_PIMSM_V1
@@ -906,6 +916,8 @@ int igmp_rcv(struct sk_buff *skb)
 	default:
 		NETDEBUG(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type);
 	}
+
+drop:
 	in_dev_put(in_dev);
 	kfree_skb(skb);
 	return 0;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index e7d26d9943c2..8ce0ce2ee48e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -71,7 +71,7 @@ struct ipfrag_skb_cb
 
 /* Describe an entry in the "incomplete datagrams" queue. */
 struct ipq {
-	struct ipq	*next;		/* linked list pointers			*/
+	struct hlist_node list;
 	struct list_head lru_list;	/* lru list member 			*/
 	u32		user;
 	u32		saddr;
@@ -89,7 +89,6 @@ struct ipq {
 	spinlock_t	lock;
 	atomic_t	refcnt;
 	struct timer_list timer;	/* when will this queue expire?		*/
-	struct ipq	**pprev;
 	int		iif;
 	struct timeval	stamp;
 };
@@ -99,7 +98,7 @@ struct ipq {
 #define IPQ_HASHSZ	64
 
 /* Per-bucket lock is easy to add now. */
-static struct ipq *ipq_hash[IPQ_HASHSZ];
+static struct hlist_head ipq_hash[IPQ_HASHSZ];
 static DEFINE_RWLOCK(ipfrag_lock);
 static u32 ipfrag_hash_rnd;
 static LIST_HEAD(ipq_lru_list);
@@ -107,9 +106,7 @@ int ip_frag_nqueues = 0;
 
 static __inline__ void __ipq_unlink(struct ipq *qp)
 {
-	if(qp->next)
-		qp->next->pprev = qp->pprev;
-	*qp->pprev = qp->next;
+	hlist_del(&qp->list);
 	list_del(&qp->lru_list);
 	ip_frag_nqueues--;
 }
@@ -139,27 +136,18 @@ static void ipfrag_secret_rebuild(unsigned long dummy)
 	get_random_bytes(&ipfrag_hash_rnd, sizeof(u32));
 	for (i = 0; i < IPQ_HASHSZ; i++) {
 		struct ipq *q;
+		struct hlist_node *p, *n;
 
-		q = ipq_hash[i];
-		while (q) {
-			struct ipq *next = q->next;
+		hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], list) {
 			unsigned int hval = ipqhashfn(q->id, q->saddr,
 						      q->daddr, q->protocol);
 
 			if (hval != i) {
-				/* Unlink. */
-				if (q->next)
-					q->next->pprev = q->pprev;
-				*q->pprev = q->next;
+				hlist_del(&q->list);
 
 				/* Relink to new hash chain. */
-				if ((q->next = ipq_hash[hval]) != NULL)
-					q->next->pprev = &q->next;
-				ipq_hash[hval] = q;
-				q->pprev = &ipq_hash[hval];
+				hlist_add_head(&q->list, &ipq_hash[hval]);
 			}
-
-			q = next;
 		}
 	}
 	write_unlock(&ipfrag_lock);
@@ -310,14 +298,16 @@ out:
 static struct ipq *ip_frag_intern(unsigned int hash, struct ipq *qp_in)
 {
 	struct ipq *qp;
-
+#ifdef CONFIG_SMP
+	struct hlist_node *n;
+#endif
 	write_lock(&ipfrag_lock);
 #ifdef CONFIG_SMP
 	/* With SMP race we have to recheck hash table, because
 	 * such entry could be created on other cpu, while we
 	 * promoted read lock to write lock.
 	 */
-	for(qp = ipq_hash[hash]; qp; qp = qp->next) {
+	hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
 		if(qp->id == qp_in->id		&&
 		   qp->saddr == qp_in->saddr	&&
 		   qp->daddr == qp_in->daddr	&&
@@ -337,10 +327,7 @@ static struct ipq *ip_frag_intern(unsigned int hash, struct ipq *qp_in)
 		atomic_inc(&qp->refcnt);
 
 	atomic_inc(&qp->refcnt);
-	if((qp->next = ipq_hash[hash]) != NULL)
-		qp->next->pprev = &qp->next;
-	ipq_hash[hash] = qp;
-	qp->pprev = &ipq_hash[hash];
+	hlist_add_head(&qp->list, &ipq_hash[hash]);
 	INIT_LIST_HEAD(&qp->lru_list);
 	list_add_tail(&qp->lru_list, &ipq_lru_list);
 	ip_frag_nqueues++;
@@ -392,9 +379,10 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
 	__u8 protocol = iph->protocol;
 	unsigned int hash = ipqhashfn(id, saddr, daddr, protocol);
 	struct ipq *qp;
+	struct hlist_node *n;
 
 	read_lock(&ipfrag_lock);
-	for(qp = ipq_hash[hash]; qp; qp = qp->next) {
+	hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
 		if(qp->id == id		&&
 		   qp->saddr == saddr	&&
 		   qp->daddr == daddr	&&
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 896ce3f8f53a..46f9d9cf7a5f 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -577,15 +577,16 @@ static int ipgre_rcv(struct sk_buff *skb)
 			goto drop_nolock;
 
 		if (flags&GRE_CSUM) {
-			if (skb->ip_summed == CHECKSUM_HW) {
+			switch (skb->ip_summed) {
+			case CHECKSUM_HW:
 				csum = (u16)csum_fold(skb->csum);
-				if (csum)
-					skb->ip_summed = CHECKSUM_NONE;
-			}
-			if (skb->ip_summed == CHECKSUM_NONE) {
-				skb->csum = skb_checksum(skb, 0, skb->len, 0);
+				if (!csum)
+					break;
+				/* fall through */
+			case CHECKSUM_NONE:
+				skb->csum = 0;
+				csum = __skb_checksum_complete(skb);
 				skb->ip_summed = CHECKSUM_HW;
-				csum = (u16)csum_fold(skb->csum);
 			}
 			offset += 4;
 		}
@@ -617,7 +618,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 
 		skb->mac.raw = skb->nh.raw;
 		skb->nh.raw = __pskb_pull(skb, offset);
-		skb_postpull_rcsum(skb, skb->mac.raw, offset);
+		skb_postpull_rcsum(skb, skb->h.raw, offset);
 		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
 		skb->pkt_type = PACKET_HOST;
 #ifdef CONFIG_NET_IPGRE_BROADCAST
@@ -1216,7 +1217,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
 	return 0;
 }
 
-int __init ipgre_fb_tunnel_init(struct net_device *dev)
+static int __init ipgre_fb_tunnel_init(struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
 	struct iphdr *iph = &tunnel->parms.iph;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 11c2f68254f0..eba64e2bd397 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -690,7 +690,7 @@ csum_page(struct page *page, int offset, int copy)
 	return csum;
 }
 
-inline int ip_ufo_append_data(struct sock *sk,
+static inline int ip_ufo_append_data(struct sock *sk,
 			int getfrag(void *from, char *to, int offset, int len,
 			       int odd, struct sk_buff *skb),
 			void *from, int length, int hh_len, int fragheaderlen,
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index f828fa2eb7de..2a3a8c59c655 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -771,7 +771,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
 	 * The drop rate array needs tuning for real environments.
 	 * Called from timer bh only => no locking
 	 */
-	static char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
+	static const char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
 	static char todrop_counter[9] = {0};
 	int i;
 
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 2d66848e7aa0..9bdcf31b760e 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -1909,7 +1909,7 @@ static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
 #define DAEMON_ARG_LEN		(sizeof(struct ip_vs_daemon_user))
 #define MAX_ARG_LEN		SVCDEST_ARG_LEN
 
-static unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
+static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
 	[SET_CMDID(IP_VS_SO_SET_ADD)]		= SERVICE_ARG_LEN,
 	[SET_CMDID(IP_VS_SO_SET_EDIT)]		= SERVICE_ARG_LEN,
 	[SET_CMDID(IP_VS_SO_SET_DEL)]		= SERVICE_ARG_LEN,
@@ -2180,7 +2180,7 @@ __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
 #define GET_TIMEOUT_ARG_LEN	(sizeof(struct ip_vs_timeout_user))
 #define GET_DAEMON_ARG_LEN	(sizeof(struct ip_vs_daemon_user) * 2)
 
-static unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
+static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
 	[GET_CMDID(IP_VS_SO_GET_VERSION)]	= 64,
 	[GET_CMDID(IP_VS_SO_GET_INFO)]		= GET_INFO_ARG_LEN,
 	[GET_CMDID(IP_VS_SO_GET_SERVICES)]	= GET_SERVICES_ARG_LEN,
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index c19408973c09..0e878fd6215c 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -251,7 +251,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
 #define TCP_DIR_OUTPUT		4
 #define TCP_DIR_INPUT_ONLY	8
 
-static int tcp_state_off[IP_VS_DIR_LAST] = {
+static const int tcp_state_off[IP_VS_DIR_LAST] = {
 	[IP_VS_DIR_INPUT]		=	TCP_DIR_INPUT,
 	[IP_VS_DIR_OUTPUT]		=	TCP_DIR_OUTPUT,
 	[IP_VS_DIR_INPUT_ONLY]		=	TCP_DIR_INPUT_ONLY,
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 9d3c8b5f327e..88a60650e6b8 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -56,8 +56,8 @@ config IP_NF_CONNTRACK_MARK
 	  instead of the individual packets.
 	
 config IP_NF_CONNTRACK_EVENTS
-	bool "Connection tracking events"
-	depends on IP_NF_CONNTRACK
+	bool "Connection tracking events (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && IP_NF_CONNTRACK
 	help
 	  If this option is enabled, the connection tracking code will
 	  provide a notifier chain that can be used by other kernel code
@@ -66,8 +66,8 @@ config IP_NF_CONNTRACK_EVENTS
 	  IF unsure, say `N'.
 
 config IP_NF_CONNTRACK_NETLINK
-	tristate 'Connection tracking netlink interface'
-	depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
+	tristate 'Connection tracking netlink interface (EXPERIMENTAL)'
+	depends on EXPERIMENTAL && IP_NF_CONNTRACK && NETFILTER_NETLINK
 	depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
 	help
 	  This option enables support for a netlink-based userspace interface
@@ -440,7 +440,7 @@ config IP_NF_MATCH_COMMENT
 config IP_NF_MATCH_CONNMARK
 	tristate  'Connection mark match support'
 	depends on IP_NF_IPTABLES
-	depends on IP_NF_CONNTRACK_MARK || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
+	depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
 	help
 	  This option adds a `connmark' match, which allows you to match the
 	  connection mark value previously set for the session by `CONNMARK'. 
@@ -452,7 +452,7 @@ config IP_NF_MATCH_CONNMARK
 config IP_NF_MATCH_CONNBYTES
 	tristate  'Connection byte/packet counter match support'
 	depends on IP_NF_IPTABLES
-	depends on IP_NF_CT_ACCT || (NF_CT_ACCT && NF_CONNTRACK_IPV4)
+	depends on (IP_NF_CONNTRACK && IP_NF_CT_ACCT) || (NF_CT_ACCT && NF_CONNTRACK_IPV4)
 	help
 	  This option adds a `connbytes' match, which allows you to match the
 	  number of bytes and/or packets for each direction within a connection.
@@ -767,7 +767,7 @@ config IP_NF_TARGET_TTL
 config IP_NF_TARGET_CONNMARK
 	tristate  'CONNMARK target support'
 	depends on IP_NF_MANGLE
-	depends on IP_NF_CONNTRACK_MARK || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
+	depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
 	help
 	  This option adds a `CONNMARK' target, which allows one to manipulate
 	  the connection mark value.  Similar to the MARK target, but
@@ -779,8 +779,8 @@ config IP_NF_TARGET_CONNMARK
 
 config IP_NF_TARGET_CLUSTERIP
 	tristate "CLUSTERIP target support (EXPERIMENTAL)"
-	depends on IP_NF_IPTABLES && EXPERIMENTAL
-	depends on IP_NF_CONNTRACK_MARK || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
+	depends on IP_NF_MANGLE && EXPERIMENTAL
+	depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
 	help
 	  The CLUSTERIP target allows you to build load-balancing clusters of
 	  network servers without having a dedicated load-balancing
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 058c48e258fc..d0a447e520a2 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -12,6 +12,7 @@ ip_nat_pptp-objs	:= ip_nat_helper_pptp.o ip_nat_proto_gre.o
 
 # connection tracking
 obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
+obj-$(CONFIG_IP_NF_NAT) += ip_nat.o
 
 # conntrack netlink interface
 obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
@@ -41,7 +42,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
 # the three instances of ip_tables
 obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
 obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
-obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o ip_nat.o
+obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
 obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
 
 # matches
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index fa3f914117ec..e52847fa10f5 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -37,7 +37,7 @@ MODULE_LICENSE("GPL");
 module_param(master_timeout, int, 0600);
 MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
 
-static char *conns[] = { "DATA ", "MESG ", "INDEX " };
+static const char *conns[] = { "DATA ", "MESG ", "INDEX " };
 
 /* This is slow, but it's simple. --RR */
 static char *amanda_buffer;
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 422ab68ee7fb..84c66dbfedaf 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -1345,6 +1345,11 @@ static int kill_all(struct ip_conntrack *i, void *data)
 	return 1;
 }
 
+void ip_conntrack_flush(void)
+{
+	ip_ct_iterate_cleanup(kill_all, NULL);
+}
+
 static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
 {
 	if (vmalloced)
@@ -1354,8 +1359,12 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
 			   get_order(sizeof(struct list_head) * size));
 }
 
-void ip_conntrack_flush()
+/* Mishearing the voices in his head, our hero wonders how he's
+   supposed to kill the mall. */
+void ip_conntrack_cleanup(void)
 {
+	ip_ct_attach = NULL;
+
 	/* This makes sure all current packets have passed through
            netfilter framework.  Roll on, two-stage module
            delete... */
@@ -1363,7 +1372,7 @@ void ip_conntrack_flush()
 
 	ip_ct_event_cache_flush();
  i_see_dead_people:
-	ip_ct_iterate_cleanup(kill_all, NULL);
+	ip_conntrack_flush();
 	if (atomic_read(&ip_conntrack_count) != 0) {
 		schedule();
 		goto i_see_dead_people;
@@ -1371,14 +1380,7 @@ void ip_conntrack_flush()
 	/* wait until all references to ip_conntrack_untracked are dropped */
 	while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
 		schedule();
-}
 
-/* Mishearing the voices in his head, our hero wonders how he's
-   supposed to kill the mall. */
-void ip_conntrack_cleanup(void)
-{
-	ip_ct_attach = NULL;
-	ip_conntrack_flush();
 	kmem_cache_destroy(ip_conntrack_cachep);
 	kmem_cache_destroy(ip_conntrack_expect_cachep);
 	free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
@@ -1408,7 +1410,7 @@ static struct list_head *alloc_hashtable(int size, int *vmalloced)
 	return hash;
 }
 
-int set_hashsize(const char *val, struct kernel_param *kp)
+static int set_hashsize(const char *val, struct kernel_param *kp)
 {
 	int i, bucket, hashsize, vmalloced;
 	int old_vmalloced, old_size;
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index d77d6b3f5f80..68b173bcda60 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -29,9 +29,9 @@ static char *ftp_buffer;
 static DEFINE_SPINLOCK(ip_ftp_lock);
 
 #define MAX_PORTS 8
-static short ports[MAX_PORTS];
+static unsigned short ports[MAX_PORTS];
 static int ports_c;
-module_param_array(ports, short, &ports_c, 0400);
+module_param_array(ports, ushort, &ports_c, 0400);
 
 static int loose;
 module_param(loose, int, 0600);
@@ -55,7 +55,7 @@ static int try_rfc959(const char *, size_t, u_int32_t [], char);
 static int try_eprt(const char *, size_t, u_int32_t [], char);
 static int try_epsv_response(const char *, size_t, u_int32_t [], char);
 
-static struct ftp_search {
+static const struct ftp_search {
 	enum ip_conntrack_dir dir;
 	const char *pattern;
 	size_t plen;
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
index 15457415a4f3..d7c40421d0d1 100644
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ b/net/ipv4/netfilter/ip_conntrack_irc.c
@@ -34,7 +34,7 @@
 #include <linux/moduleparam.h>
 
 #define MAX_PORTS 8
-static short ports[MAX_PORTS];
+static unsigned short ports[MAX_PORTS];
 static int ports_c;
 static int max_dcc_channels = 8;
 static unsigned int dcc_timeout = 300;
@@ -52,14 +52,14 @@ EXPORT_SYMBOL_GPL(ip_nat_irc_hook);
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
 MODULE_LICENSE("GPL");
-module_param_array(ports, short, &ports_c, 0400);
+module_param_array(ports, ushort, &ports_c, 0400);
 MODULE_PARM_DESC(ports, "port numbers of IRC servers");
 module_param(max_dcc_channels, int, 0400);
 MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session");
 module_param(dcc_timeout, int, 0400);
 MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
 
-static char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
+static const char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
 #define MINMATCHLEN	5
 
 #if 0
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index d2a4fec22862..91fe8f2e38ff 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -27,6 +27,7 @@
 #include <linux/errno.h>
 #include <linux/netlink.h>
 #include <linux/spinlock.h>
+#include <linux/interrupt.h>
 #include <linux/notifier.h>
 
 #include <linux/netfilter.h>
@@ -59,11 +60,13 @@ ctnetlink_dump_tuples_proto(struct sk_buff *skb,
 
 	NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum);
 
+	/* If no protocol helper is found, this function will return the
+	 * generic protocol helper, so proto won't *ever* be NULL */
 	proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
-	if (likely(proto && proto->tuple_to_nfattr)) {
+	if (likely(proto->tuple_to_nfattr))
 		ret = proto->tuple_to_nfattr(skb, tuple);
-		ip_conntrack_proto_put(proto);
-	}
+	
+	ip_conntrack_proto_put(proto);
 
 	return ret;
 
@@ -128,9 +131,11 @@ ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
 
 	struct nfattr *nest_proto;
 	int ret;
-	
-	if (!proto || !proto->to_nfattr)
+
+	if (!proto->to_nfattr) {
+		ip_conntrack_proto_put(proto);
 		return 0;
+	}
 	
 	nest_proto = NFA_NEST(skb, CTA_PROTOINFO);
 
@@ -467,7 +472,7 @@ out:
 }
 #endif
 
-static const int cta_min_ip[CTA_IP_MAX] = {
+static const size_t cta_min_ip[CTA_IP_MAX] = {
 	[CTA_IP_V4_SRC-1]	= sizeof(u_int32_t),
 	[CTA_IP_V4_DST-1]	= sizeof(u_int32_t),
 };
@@ -497,8 +502,8 @@ ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple)
 	return 0;
 }
 
-static const int cta_min_proto[CTA_PROTO_MAX] = {
-	[CTA_PROTO_NUM-1]	= sizeof(u_int16_t),
+static const size_t cta_min_proto[CTA_PROTO_MAX] = {
+	[CTA_PROTO_NUM-1]	= sizeof(u_int8_t),
 	[CTA_PROTO_SRC_PORT-1]	= sizeof(u_int16_t),
 	[CTA_PROTO_DST_PORT-1]	= sizeof(u_int16_t),
 	[CTA_PROTO_ICMP_TYPE-1]	= sizeof(u_int8_t),
@@ -523,14 +528,14 @@ ctnetlink_parse_tuple_proto(struct nfattr *attr,
 
 	if (!tb[CTA_PROTO_NUM-1])
 		return -EINVAL;
-	tuple->dst.protonum = *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
+	tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
 
 	proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
 
-	if (likely(proto && proto->nfattr_to_tuple)) {
+	if (likely(proto->nfattr_to_tuple))
 		ret = proto->nfattr_to_tuple(tb, tuple);
-		ip_conntrack_proto_put(proto);
-	}
+	
+	ip_conntrack_proto_put(proto);
 	
 	return ret;
 }
@@ -576,7 +581,7 @@ ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple,
 }
 
 #ifdef CONFIG_IP_NF_NAT_NEEDED
-static const int cta_min_protonat[CTA_PROTONAT_MAX] = {
+static const size_t cta_min_protonat[CTA_PROTONAT_MAX] = {
 	[CTA_PROTONAT_PORT_MIN-1]	= sizeof(u_int16_t),
 	[CTA_PROTONAT_PORT_MAX-1]	= sizeof(u_int16_t),
 };
@@ -596,8 +601,6 @@ static int ctnetlink_parse_nat_proto(struct nfattr *attr,
 		return -EINVAL;
 
 	npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
-	if (!npt)
-		return 0;
 
 	if (!npt->nfattr_to_range) {
 		ip_nat_proto_put(npt);
@@ -614,6 +617,11 @@ static int ctnetlink_parse_nat_proto(struct nfattr *attr,
 	return 0;
 }
 
+static const size_t cta_min_nat[CTA_NAT_MAX] = {
+	[CTA_NAT_MINIP-1]       = sizeof(u_int32_t),
+	[CTA_NAT_MAXIP-1]       = sizeof(u_int32_t),
+};
+
 static inline int
 ctnetlink_parse_nat(struct nfattr *cda[],
 		    const struct ip_conntrack *ct, struct ip_nat_range *range)
@@ -627,6 +635,9 @@ ctnetlink_parse_nat(struct nfattr *cda[],
 	
 	nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]);
 
+	if (nfattr_bad_size(tb, CTA_NAT_MAX, cta_min_nat))
+		return -EINVAL;
+
 	if (tb[CTA_NAT_MINIP-1])
 		range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]);
 
@@ -667,6 +678,14 @@ ctnetlink_parse_help(struct nfattr *attr, char **helper_name)
 	return 0;
 }
 
+static const size_t cta_min[CTA_MAX] = {
+	[CTA_STATUS-1] 		= sizeof(u_int32_t),
+	[CTA_TIMEOUT-1] 	= sizeof(u_int32_t),
+	[CTA_MARK-1]		= sizeof(u_int32_t),
+	[CTA_USE-1]		= sizeof(u_int32_t),
+	[CTA_ID-1]		= sizeof(u_int32_t)
+};
+
 static int
 ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, 
 			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
@@ -678,6 +697,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 
 	DEBUGP("entered %s\n", __FUNCTION__);
 
+	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
+		return -EINVAL;
+
 	if (cda[CTA_TUPLE_ORIG-1])
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG);
 	else if (cda[CTA_TUPLE_REPLY-1])
@@ -706,11 +728,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			return -ENOENT;
 		}
 	}	
-	if (del_timer(&ct->timeout)) {
-		ip_conntrack_put(ct);
+	if (del_timer(&ct->timeout))
 		ct->timeout.function((unsigned long)ct);
-		return 0;
-	}
+
 	ip_conntrack_put(ct);
 	DEBUGP("leaving\n");
 
@@ -760,6 +780,9 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 		return 0;
 	}
 
+	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
+		return -EINVAL;
+
 	if (cda[CTA_TUPLE_ORIG-1])
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG);
 	else if (cda[CTA_TUPLE_REPLY-1])
@@ -852,7 +875,7 @@ ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[])
 		DEBUGP("NAT status: %lu\n", 
 		       status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK));
 		
-		if (ip_nat_initialized(ct, hooknum))
+		if (ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
 			return -EEXIST;
 		ip_nat_setup_info(ct, &range, hooknum);
 
@@ -935,8 +958,6 @@ ctnetlink_change_protoinfo(struct ip_conntrack *ct, struct nfattr *cda[])
 	nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr);
 
 	proto = ip_conntrack_proto_find_get(npt);
-	if (!proto)
-		return -EINVAL;
 
 	if (proto->from_nfattr)
 		err = proto->from_nfattr(tb, ct);
@@ -1047,6 +1068,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 
 	DEBUGP("entered %s\n", __FUNCTION__);
 
+	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
+		return -EINVAL;
+
 	if (cda[CTA_TUPLE_ORIG-1]) {
 		err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG);
 		if (err < 0)
@@ -1252,6 +1276,11 @@ out:
 	return skb->len;
 }
 
+static const size_t cta_min_exp[CTA_EXPECT_MAX] = {
+	[CTA_EXPECT_TIMEOUT-1]          = sizeof(u_int32_t),
+	[CTA_EXPECT_ID-1]               = sizeof(u_int32_t)
+};
+
 static int
 ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, 
 		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
@@ -1263,6 +1292,9 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 
 	DEBUGP("entered %s\n", __FUNCTION__);
 
+	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
+		return -EINVAL;
+
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct nfgenmsg *msg = NLMSG_DATA(nlh);
 		u32 rlen;
@@ -1333,6 +1365,9 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 	struct ip_conntrack_helper *h;
 	int err;
 
+	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
+		return -EINVAL;
+
 	if (cda[CTA_EXPECT_TUPLE-1]) {
 		/* delete a single expect by tuple */
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
@@ -1462,6 +1497,9 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
 
 	DEBUGP("entered %s\n", __FUNCTION__);	
 
+	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
+		return -EINVAL;
+
 	if (!cda[CTA_EXPECT_TUPLE-1]
 	    || !cda[CTA_EXPECT_MASK-1]
 	    || !cda[CTA_EXPECT_MASTER-1])
@@ -1504,29 +1542,22 @@ static struct notifier_block ctnl_notifier_exp = {
 
 static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
 	[IPCTNL_MSG_CT_NEW]		= { .call = ctnetlink_new_conntrack,
-					    .attr_count = CTA_MAX,
-					    .cap_required = CAP_NET_ADMIN },
+					    .attr_count = CTA_MAX, },
 	[IPCTNL_MSG_CT_GET] 		= { .call = ctnetlink_get_conntrack,
-					    .attr_count = CTA_MAX,
-					    .cap_required = CAP_NET_ADMIN },
+					    .attr_count = CTA_MAX, },
 	[IPCTNL_MSG_CT_DELETE]  	= { .call = ctnetlink_del_conntrack,
-					    .attr_count = CTA_MAX,
-					    .cap_required = CAP_NET_ADMIN },
+					    .attr_count = CTA_MAX, },
 	[IPCTNL_MSG_CT_GET_CTRZERO] 	= { .call = ctnetlink_get_conntrack,
-					    .attr_count = CTA_MAX,
-					    .cap_required = CAP_NET_ADMIN },
+					    .attr_count = CTA_MAX, },
 };
 
 static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
 	[IPCTNL_MSG_EXP_GET]		= { .call = ctnetlink_get_expect,
-					    .attr_count = CTA_EXPECT_MAX,
-					    .cap_required = CAP_NET_ADMIN },
+					    .attr_count = CTA_EXPECT_MAX, },
 	[IPCTNL_MSG_EXP_NEW]		= { .call = ctnetlink_new_expect,
-					    .attr_count = CTA_EXPECT_MAX,
-					    .cap_required = CAP_NET_ADMIN },
+					    .attr_count = CTA_EXPECT_MAX, },
 	[IPCTNL_MSG_EXP_DELETE]		= { .call = ctnetlink_del_expect,
-					    .attr_count = CTA_EXPECT_MAX,
-					    .cap_required = CAP_NET_ADMIN },
+					    .attr_count = CTA_EXPECT_MAX, },
 };
 
 static struct nfnetlink_subsystem ctnl_subsys = {
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 5198f3a1e2cd..5f9925db608e 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -13,6 +13,7 @@
 #include <linux/in.h>
 #include <linux/icmp.h>
 #include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <net/ip.h>
 #include <net/checksum.h>
 #include <linux/netfilter.h>
@@ -50,7 +51,7 @@ static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple,
 			     const struct ip_conntrack_tuple *orig)
 {
 	/* Add 1; spaces filled with 0. */
-	static u_int8_t invmap[]
+	static const u_int8_t invmap[]
 		= { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
 		    [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
 		    [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
@@ -109,7 +110,7 @@ static int icmp_packet(struct ip_conntrack *ct,
 	return NF_ACCEPT;
 }
 
-static u_int8_t valid_new[] = { 
+static const u_int8_t valid_new[] = { 
 	[ICMP_ECHO] = 1,
 	[ICMP_TIMESTAMP] = 1,
 	[ICMP_INFO_REQUEST] = 1,
@@ -230,19 +231,15 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
 	case CHECKSUM_HW:
 		if (!(u16)csum_fold(skb->csum)) 
 			break;
-		if (LOG_INVALID(IPPROTO_ICMP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				      "ip_ct_icmp: bad HW ICMP checksum ");
-		return -NF_ACCEPT;
+		/* fall through */
 	case CHECKSUM_NONE:
-		if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) {
+		skb->csum = 0;
+		if (__skb_checksum_complete(skb)) {
 			if (LOG_INVALID(IPPROTO_ICMP))
 				nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
 					      "ip_ct_icmp: bad ICMP checksum ");
 			return -NF_ACCEPT;
 		}
-	default:
-		break;
 	}
 
 checksum_skipped:
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index 59a4a0111dd3..977fb59d4563 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -65,7 +65,7 @@ static unsigned long ip_ct_sctp_timeout_shutdown_sent     = 300 SECS / 1000;
 static unsigned long ip_ct_sctp_timeout_shutdown_recd     = 300 SECS / 1000;
 static unsigned long ip_ct_sctp_timeout_shutdown_ack_sent =   3 SECS;
 
-static unsigned long * sctp_timeouts[]
+static const unsigned long * sctp_timeouts[]
 = { NULL,                                  /* SCTP_CONNTRACK_NONE  */
     &ip_ct_sctp_timeout_closed,	           /* SCTP_CONNTRACK_CLOSED */
     &ip_ct_sctp_timeout_cookie_wait,       /* SCTP_CONNTRACK_COOKIE_WAIT */
@@ -118,7 +118,7 @@ cookie echoed to closed.
 */
 
 /* SCTP conntrack state transitions */
-static enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
+static const enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
 	{
 /*	ORIGINAL	*/
 /*                  sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 468c6003b4c7..e7fa29e576dc 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -99,7 +99,7 @@ unsigned long ip_ct_tcp_timeout_close =        10 SECS;
    to ~13-30min depending on RTO. */
 unsigned long ip_ct_tcp_timeout_max_retrans =     5 MINS;
  
-static unsigned long * tcp_timeouts[]
+static const unsigned long * tcp_timeouts[]
 = { NULL,                              /*      TCP_CONNTRACK_NONE */
     &ip_ct_tcp_timeout_syn_sent,       /*      TCP_CONNTRACK_SYN_SENT, */
     &ip_ct_tcp_timeout_syn_recv,       /*      TCP_CONNTRACK_SYN_RECV, */
@@ -170,7 +170,7 @@ enum tcp_bit_set {
  *	if they are invalid
  *	or we do not support the request (simultaneous open)
  */
-static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
+static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
 	{
 /* ORIGINAL */
 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
@@ -272,9 +272,9 @@ static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  *	sCL -> sCL
  */
 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*ack*/	   { sIV, sIV, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*ack*/	   { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
 /*
- *	sSS -> sIV	Might be a half-open connection.
+ *	sSS -> sIG	Might be a half-open connection.
  *	sSR -> sSR	Might answer late resent SYN.
  *	sES -> sES	:-)
  *	sFW -> sCW	Normal close request answered by ACK.
@@ -341,9 +341,10 @@ static int tcp_print_conntrack(struct seq_file *s,
 static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
 			 const struct ip_conntrack *ct)
 {
-	struct nfattr *nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
+	struct nfattr *nest_parms;
 	
 	read_lock_bh(&tcp_lock);
+	nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
 	NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
 		&ct->proto.tcp.state);
 	read_unlock_bh(&tcp_lock);
@@ -357,6 +358,10 @@ nfattr_failure:
 	return -1;
 }
 
+static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
+	[CTA_PROTOINFO_TCP_STATE-1]	= sizeof(u_int8_t),
+};
+
 static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct)
 {
 	struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
@@ -369,6 +374,9 @@ static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct)
 
         nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr);
 
+	if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp))
+		return -EINVAL;
+
 	if (!tb[CTA_PROTOINFO_TCP_STATE-1])
 		return -EINVAL;
 
@@ -810,10 +818,11 @@ void ip_conntrack_tcp_update(struct sk_buff *skb,
 #define	TH_CWR	0x80
 
 /* table of valid flag combinations - ECE and CWR are always valid */
-static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
+static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
 {
 	[TH_SYN]			= 1,
 	[TH_SYN|TH_ACK]			= 1,
+	[TH_SYN|TH_PUSH]		= 1,
 	[TH_SYN|TH_ACK|TH_PUSH]		= 1,
 	[TH_RST]			= 1,
 	[TH_RST|TH_ACK]			= 1,
@@ -909,8 +918,12 @@ static int tcp_packet(struct ip_conntrack *conntrack,
 
 	switch (new_state) {
 	case TCP_CONNTRACK_IGNORE:
-		/* Either SYN in ORIGINAL
-		 * or SYN/ACK in REPLY. */
+		/* Ignored packets:
+		 * 
+		 * a) SYN in ORIGINAL
+		 * b) SYN/ACK in REPLY
+		 * c) ACK in reply direction after initial SYN in original.
+		 */
 		if (index == TCP_SYNACK_SET
 		    && conntrack->proto.tcp.last_index == TCP_SYN_SET
 		    && conntrack->proto.tcp.last_dir != dir
@@ -977,13 +990,20 @@ static int tcp_packet(struct ip_conntrack *conntrack,
 		}
 	case TCP_CONNTRACK_CLOSE:
 		if (index == TCP_RST_SET
-		    && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
-		    && conntrack->proto.tcp.last_index == TCP_SYN_SET
+		    && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
+		         && conntrack->proto.tcp.last_index == TCP_SYN_SET)
+		        || (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
+		            && conntrack->proto.tcp.last_index == TCP_ACK_SET))
 		    && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
-			/* RST sent to invalid SYN we had let trough
-			 * SYN was in window then, tear down connection.
+			/* RST sent to invalid SYN or ACK we had let trough
+			 * at a) and c) above:
+			 *
+			 * a) SYN was in window then
+			 * c) we hold a half-open connection.
+			 *
+			 * Delete our connection entry.
 			 * We skip window checking, because packet might ACK
-			 * segments we ignored in the SYN. */
+			 * segments we ignored. */
 			goto in_window;
 		}
 		/* Just fall trough */
diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c
index a78736b8525d..d3c5a371f993 100644
--- a/net/ipv4/netfilter/ip_conntrack_tftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_tftp.c
@@ -26,9 +26,9 @@ MODULE_DESCRIPTION("tftp connection tracking helper");
 MODULE_LICENSE("GPL");
 
 #define MAX_PORTS 8
-static short ports[MAX_PORTS];
+static unsigned short ports[MAX_PORTS];
 static int ports_c;
-module_param_array(ports, short, &ports_c, 0400);
+module_param_array(ports, ushort, &ports_c, 0400);
 MODULE_PARM_DESC(ports, "port numbers of tftp servers");
 
 #if 0
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 762f4d93936b..c1a61462507f 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -49,7 +49,7 @@ static unsigned int ip_nat_htable_size;
 static struct list_head *bysource;
 
 #define MAX_IP_NAT_PROTO 256
-struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
+static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
 
 static inline struct ip_nat_protocol *
 __ip_nat_proto_find(u_int8_t protonum)
diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c
index 2215317c76b7..43c3bd7c118e 100644
--- a/net/ipv4/netfilter/ip_nat_tftp.c
+++ b/net/ipv4/netfilter/ip_nat_tftp.c
@@ -42,7 +42,10 @@ static unsigned int help(struct sk_buff **pskb,
 			 enum ip_conntrack_info ctinfo,
 			 struct ip_conntrack_expect *exp)
 {
-	exp->saved_proto.udp.port = exp->tuple.dst.u.tcp.port;
+	struct ip_conntrack *ct = exp->master;
+
+	exp->saved_proto.udp.port
+		= ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
 	exp->dir = IP_CT_DIR_REPLY;
 	exp->expectfn = ip_nat_follow_master;
 	if (ip_conntrack_expect_related(exp) != 0)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 75c27e92f6ab..45886c8475e8 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1892,7 +1892,7 @@ static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
 	return pos;
 }
 
-static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
+static const struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
 { { "ip_tables_names", ipt_get_tables },
   { "ip_tables_targets", ipt_get_targets },
   { "ip_tables_matches", ipt_get_matches },
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 92ed050fac69..30be0f1dae37 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -197,7 +197,7 @@ static void dump_packet(const struct nf_loginfo *info,
 	}
 	case IPPROTO_ICMP: {
 		struct icmphdr _icmph, *ich;
-		static size_t required_len[NR_ICMP_TYPES+1]
+		static const size_t required_len[NR_ICMP_TYPES+1]
 			= { [ICMP_ECHOREPLY] = 4,
 			    [ICMP_DEST_UNREACH]
 			    = 8 + sizeof(struct iphdr),
@@ -351,7 +351,7 @@ static void dump_packet(const struct nf_loginfo *info,
 	/* maxlen = 230+   91  + 230 + 252 = 803 */
 }
 
-struct nf_loginfo default_loginfo = {
+static struct nf_loginfo default_loginfo = {
 	.type	= NF_LOG_TYPE_LOG,
 	.u = {
 		.log = {
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 2d44b07688af..261cbb4d4c49 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -532,6 +532,7 @@ match(const struct sk_buff *skb,
 			}
 			if(info->seconds && info->hit_count) {
 				for(pkt_count = 0, hits_found = 0; pkt_count < ip_pkt_list_tot; pkt_count++) {
+					if(r_list[location].last_pkts[pkt_count] == 0) break;
 					if(time_before_eq(now,r_list[location].last_pkts[pkt_count]+info->seconds*HZ)) hits_found++;
 				}
 				if(hits_found >= info->hit_count) ans = !info->invert; else ans = info->invert;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index a65e508fbd40..0d7dc668db46 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -98,7 +98,7 @@ fold_field(void *mib[], int offt)
 }
 
 /* snmp items */
-static struct snmp_mib snmp4_ipstats_list[] = {
+static const struct snmp_mib snmp4_ipstats_list[] = {
 	SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INRECEIVES),
 	SNMP_MIB_ITEM("InHdrErrors", IPSTATS_MIB_INHDRERRORS),
 	SNMP_MIB_ITEM("InAddrErrors", IPSTATS_MIB_INADDRERRORS),
@@ -119,7 +119,7 @@ static struct snmp_mib snmp4_ipstats_list[] = {
 	SNMP_MIB_SENTINEL
 };
 
-static struct snmp_mib snmp4_icmp_list[] = {
+static const struct snmp_mib snmp4_icmp_list[] = {
 	SNMP_MIB_ITEM("InMsgs", ICMP_MIB_INMSGS),
 	SNMP_MIB_ITEM("InErrors", ICMP_MIB_INERRORS),
 	SNMP_MIB_ITEM("InDestUnreachs", ICMP_MIB_INDESTUNREACHS),
@@ -149,7 +149,7 @@ static struct snmp_mib snmp4_icmp_list[] = {
 	SNMP_MIB_SENTINEL
 };
 
-static struct snmp_mib snmp4_tcp_list[] = {
+static const struct snmp_mib snmp4_tcp_list[] = {
 	SNMP_MIB_ITEM("RtoAlgorithm", TCP_MIB_RTOALGORITHM),
 	SNMP_MIB_ITEM("RtoMin", TCP_MIB_RTOMIN),
 	SNMP_MIB_ITEM("RtoMax", TCP_MIB_RTOMAX),
@@ -167,7 +167,7 @@ static struct snmp_mib snmp4_tcp_list[] = {
 	SNMP_MIB_SENTINEL
 };
 
-static struct snmp_mib snmp4_udp_list[] = {
+static const struct snmp_mib snmp4_udp_list[] = {
 	SNMP_MIB_ITEM("InDatagrams", UDP_MIB_INDATAGRAMS),
 	SNMP_MIB_ITEM("NoPorts", UDP_MIB_NOPORTS),
 	SNMP_MIB_ITEM("InErrors", UDP_MIB_INERRORS),
@@ -175,7 +175,7 @@ static struct snmp_mib snmp4_udp_list[] = {
 	SNMP_MIB_SENTINEL
 };
 
-static struct snmp_mib snmp4_net_list[] = {
+static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("SyncookiesSent", LINUX_MIB_SYNCOOKIESSENT),
 	SNMP_MIB_ITEM("SyncookiesRecv", LINUX_MIB_SYNCOOKIESRECV),
 	SNMP_MIB_ITEM("SyncookiesFailed", LINUX_MIB_SYNCOOKIESFAILED),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 381dd6a6aebb..f701a136a6ae 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1371,7 +1371,7 @@ out:	kfree_skb(skb);
  *	are needed for AMPRnet AX.25 paths.
  */
 
-static unsigned short mtu_plateau[] =
+static const unsigned short mtu_plateau[] =
 {32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 };
 
 static __inline__ unsigned short guess_mtu(unsigned short old_mtu)
@@ -3149,8 +3149,7 @@ int __init ip_rt_init(void)
 					sizeof(struct rt_hash_bucket),
 					rhash_entries,
 					(num_physpages >= 128 * 1024) ?
-						(27 - PAGE_SHIFT) :
-						(29 - PAGE_SHIFT),
+					15 : 17,
 					HASH_HIGHMEM,
 					&rt_hash_log,
 					&rt_hash_mask,
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 652685623519..01444a02b48b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -645,6 +645,14 @@ ctl_table ipv4_table[] = {
 		.proc_handler	= &proc_tcp_congestion_control,
 		.strategy	= &sysctl_tcp_congestion_control,
 	},
+	{
+		.ctl_name	= NET_TCP_ABC,
+		.procname	= "tcp_abc",
+		.data		= &sysctl_tcp_abc,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 
 	{ .ctl_name = 0 }
 };
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 72b7c22e1ea5..ef98b14ac56d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1413,7 +1413,7 @@ recv_urg:
  *	closed.
  */
 
-static unsigned char new_state[16] = {
+static const unsigned char new_state[16] = {
   /* current state:        new state:      action:	*/
   /* (Invalid)		*/ TCP_CLOSE,
   /* TCP_ESTABLISHED	*/ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
@@ -1640,7 +1640,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	} else if (tcp_need_reset(old_state) ||
 		   (tp->snd_nxt != tp->write_seq &&
 		    (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
-		/* The last check adjusts for discrepance of Linux wrt. RFC
+		/* The last check adjusts for discrepancy of Linux wrt. RFC
 		 * states
 		 */
 		tcp_send_active_reset(sk, gfp_any());
@@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tp->packets_out = 0;
 	tp->snd_ssthresh = 0x7fffffff;
 	tp->snd_cwnd_cnt = 0;
+	tp->bytes_acked = 0;
 	tcp_set_ca_state(sk, TCP_CA_Open);
 	tcp_clear_retrans(tp);
 	inet_csk_delack_init(sk);
@@ -2064,8 +2065,7 @@ void __init tcp_init(void)
 					sizeof(struct inet_ehash_bucket),
 					thash_entries,
 					(num_physpages >= 128 * 1024) ?
-						(25 - PAGE_SHIFT) :
-						(27 - PAGE_SHIFT),
+					13 : 15,
 					HASH_HIGHMEM,
 					&tcp_hashinfo.ehash_size,
 					NULL,
@@ -2081,8 +2081,7 @@ void __init tcp_init(void)
 					sizeof(struct inet_bind_hashbucket),
 					tcp_hashinfo.ehash_size,
 					(num_physpages >= 128 * 1024) ?
-						(25 - PAGE_SHIFT) :
-						(27 - PAGE_SHIFT),
+					13 : 15,
 					HASH_HIGHMEM,
 					&tcp_hashinfo.bhash_size,
 					NULL,
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index ae35e0609047..1d0cd86621b1 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -217,17 +217,15 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack,
 
 	bictcp_low_utilization(sk, data_acked);
 
-	if (in_flight < tp->snd_cwnd)
+	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
-	if (tp->snd_cwnd <= tp->snd_ssthresh) {
-		/* In "safe" area, increase. */
-		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-			tp->snd_cwnd++;
-	} else {
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+	else {
 		bictcp_update(ca, tp->snd_cwnd);
 
-                /* In dangerous area, increase slowly.
+		/* In dangerous area, increase slowly.
 		 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
 		 */
 		if (tp->snd_cwnd_cnt >= ca->cnt) {
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index bbf2d6624e89..c7cc62c8dc12 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -186,24 +186,32 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (in_flight < tp->snd_cwnd)
+	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
-        if (tp->snd_cwnd <= tp->snd_ssthresh) {
-                /* In "safe" area, increase. */
-		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-			tp->snd_cwnd++;
-	} else {
-                /* In dangerous area, increase slowly.
-		 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
-		 */
-		if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
-			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-				tp->snd_cwnd++;
-			tp->snd_cwnd_cnt = 0;
-		} else
-			tp->snd_cwnd_cnt++;
-	}
+	/* In "safe" area, increase. */
+        if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+
+ 	/* In dangerous area, increase slowly. */
+	else if (sysctl_tcp_abc) {
+ 		/* RFC3465: Apppriate Byte Count
+ 		 * increase once for each full cwnd acked
+ 		 */
+ 		if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
+ 			tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
+ 			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ 				tp->snd_cwnd++;
+ 		}
+ 	} else {
+ 		/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
+ 		if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+ 			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ 				tp->snd_cwnd++;
+ 			tp->snd_cwnd_cnt = 0;
+ 		} else
+ 			tp->snd_cwnd_cnt++;
+ 	}
 }
 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
 
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 6acc04bde080..63cf7e540847 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -111,18 +111,17 @@ static void hstcp_init(struct sock *sk)
 }
 
 static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt,
-			     u32 in_flight, int good)
+			     u32 in_flight, int data_acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct hstcp *ca = inet_csk_ca(sk);
 
-	if (in_flight < tp->snd_cwnd)
+	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
-	if (tp->snd_cwnd <= tp->snd_ssthresh) {
-		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-			tp->snd_cwnd++;
-	} else {
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+	else {
 		/* Update AIMD parameters */
 		if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) {
 			while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd &&
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index e47b37984e95..3284cfb993e6 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -207,14 +207,13 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct htcp *ca = inet_csk_ca(sk);
 
-	if (in_flight < tp->snd_cwnd)
+	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
-        if (tp->snd_cwnd <= tp->snd_ssthresh) {
-                /* In "safe" area, increase. */
-		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-			tp->snd_cwnd++;
-	} else {
+        if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+	else {
+
 		measure_rtt(sk);
 
 		/* keep track of number of round-trip times since last backoff event */
@@ -224,7 +223,7 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
 			htcp_alpha_update(ca);
 		}
 
-                /* In dangerous area, increase slowly.
+		/* In dangerous area, increase slowly.
 		 * In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd
 		 */
 		if ((tp->snd_cwnd_cnt++ * ca->alpha)>>7 >= tp->snd_cwnd) {
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 77add63623df..40dbb3877510 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -100,12 +100,12 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
 		ca->minrtt = tp->srtt;
 	}
 
+	if (!tcp_is_cwnd_limited(sk, in_flight))
+		return;
+
 	if (!ca->hybla_en)
 		return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag);
 
-	if (in_flight < tp->snd_cwnd)
-		return;
-
 	if (ca->rho == 0)
 		hybla_recalc_param(sk);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3e98b57578dc..bf2e23086bce 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -42,7 +42,7 @@
  *		Andi Kleen	:	Moved open_request checking here
  *					and process RSTs for open_requests.
  *		Andi Kleen	:	Better prune_queue, and other fixes.
- *		Andrey Savochkin:	Fix RTT measurements in the presnce of
+ *		Andrey Savochkin:	Fix RTT measurements in the presence of
  *					timestamps.
  *		Andrey Savochkin:	Check sequence numbers correctly when
  *					removing SACKs due to in sequence incoming
@@ -89,6 +89,7 @@ int sysctl_tcp_frto;
 int sysctl_tcp_nometrics_save;
 
 int sysctl_tcp_moderate_rcvbuf = 1;
+int sysctl_tcp_abc = 1;
 
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
@@ -223,7 +224,7 @@ static void tcp_fixup_sndbuf(struct sock *sk)
  *   of receiver window. Check #2.
  *
  * The scheme does not work when sender sends good segments opening
- * window and then starts to feed us spagetti. But it should work
+ * window and then starts to feed us spaghetti. But it should work
  * in common situations. Otherwise, we have to rely on queue collapsing.
  */
 
@@ -233,7 +234,7 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
 {
 	/* Optimize this! */
 	int truesize = tcp_win_from_space(skb->truesize)/2;
-	int window = tcp_full_space(sk)/2;
+	int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2;
 
 	while (tp->rcv_ssthresh <= window) {
 		if (truesize <= skb->len)
@@ -277,7 +278,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
 	int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
 
 	/* Try to select rcvbuf so that 4 mss-sized segments
-	 * will fit to window and correspoding skbs will fit to our rcvbuf.
+	 * will fit to window and corresponding skbs will fit to our rcvbuf.
 	 * (was 3; 4 is minimum to allow fast retransmit to work.)
 	 */
 	while (tcp_win_from_space(rcvmem) < tp->advmss)
@@ -286,7 +287,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
 		sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]);
 }
 
-/* 4. Try to fixup all. It is made iimediately after connection enters
+/* 4. Try to fixup all. It is made immediately after connection enters
  *    established state.
  */
 static void tcp_init_buffer_space(struct sock *sk)
@@ -326,37 +327,18 @@ static void tcp_init_buffer_space(struct sock *sk)
 static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
-	struct sk_buff *skb;
-	unsigned int app_win = tp->rcv_nxt - tp->copied_seq;
-	int ofo_win = 0;
 
 	icsk->icsk_ack.quick = 0;
 
-	skb_queue_walk(&tp->out_of_order_queue, skb) {
-		ofo_win += skb->len;
-	}
-
-	/* If overcommit is due to out of order segments,
-	 * do not clamp window. Try to expand rcvbuf instead.
-	 */
-	if (ofo_win) {
-		if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
-		    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
-		    !tcp_memory_pressure &&
-		    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
-			sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
-					    sysctl_tcp_rmem[2]);
+	if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
+	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
+	    !tcp_memory_pressure &&
+	    atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+		sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
+				    sysctl_tcp_rmem[2]);
 	}
-	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
-		app_win += ofo_win;
-		if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf)
-			app_win >>= 1;
-		if (app_win > icsk->icsk_ack.rcv_mss)
-			app_win -= icsk->icsk_ack.rcv_mss;
-		app_win = max(app_win, 2U*tp->advmss);
-
+	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
 		tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
-	}
 }
 
 /* Receiver "autotuning" code.
@@ -385,8 +367,8 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
 		 * are stalled on filesystem I/O.
 		 *
 		 * Also, since we are only going for a minimum in the
-		 * non-timestamp case, we do not smoothe things out
-		 * else with timestamps disabled convergance takes too
+		 * non-timestamp case, we do not smooth things out
+		 * else with timestamps disabled convergence takes too
 		 * long.
 		 */
 		if (!win_dep) {
@@ -395,7 +377,7 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
 		} else if (m < new_sample)
 			new_sample = m << 3;
 	} else {
-		/* No previous mesaure. */
+		/* No previous measure. */
 		new_sample = m << 3;
 	}
 
@@ -524,7 +506,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
 			if (icsk->icsk_ack.ato > icsk->icsk_rto)
 				icsk->icsk_ack.ato = icsk->icsk_rto;
 		} else if (m > icsk->icsk_rto) {
-			/* Too long gap. Apparently sender falled to
+			/* Too long gap. Apparently sender failed to
 			 * restart window, so that we send ACKs quickly.
 			 */
 			tcp_incr_quickack(sk);
@@ -548,10 +530,9 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
  * To save cycles in the RFC 1323 implementation it was better to break
  * it up into three procedures. -- erics
  */
-static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
+static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	const struct inet_connection_sock *icsk = inet_csk(sk);
 	long m = mrtt; /* RTT */
 
 	/*	The following amusing code comes from Jacobson's
@@ -565,7 +546,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
 	 *
 	 * Funny. This algorithm seems to be very broken.
 	 * These formulae increase RTO, when it should be decreased, increase
-	 * too slowly, when it should be incresed fastly, decrease too fastly
+	 * too slowly, when it should be increased quickly, decrease too quickly
 	 * etc. I guess in BSD RTO takes ONE value, so that it is absolutely
 	 * does not matter how to _calculate_ it. Seems, it was trap
 	 * that VJ failed to avoid. 8)
@@ -610,9 +591,6 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
 		tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
 		tp->rtt_seq = tp->snd_nxt;
 	}
-
-	if (icsk->icsk_ca_ops->rtt_sample)
-		icsk->icsk_ca_ops->rtt_sample(sk, *usrtt);
 }
 
 /* Calculate rto without backoff.  This is the second half of Van Jacobson's
@@ -629,14 +607,14 @@ static inline void tcp_set_rto(struct sock *sk)
 	 *    at least by solaris and freebsd. "Erratic ACKs" has _nothing_
 	 *    to do with delayed acks, because at cwnd>2 true delack timeout
 	 *    is invisible. Actually, Linux-2.4 also generates erratic
-	 *    ACKs in some curcumstances.
+	 *    ACKs in some circumstances.
 	 */
 	inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar;
 
 	/* 2. Fixups made earlier cannot be right.
 	 *    If we do not estimate RTO correctly without them,
 	 *    all the algo is pure shit and should be replaced
-	 *    with correct one. It is exaclty, which we pretend to do.
+	 *    with correct one. It is exactly, which we pretend to do.
 	 */
 }
 
@@ -794,7 +772,7 @@ static void tcp_init_metrics(struct sock *sk)
 	 * to make it more realistic.
 	 *
 	 * A bit of theory. RTT is time passed after "normal" sized packet
-	 * is sent until it is ACKed. In normal curcumstances sending small
+	 * is sent until it is ACKed. In normal circumstances sending small
 	 * packets force peer to delay ACKs and calculation is correct too.
 	 * The algorithm is adaptive and, provided we follow specs, it
 	 * NEVER underestimate RTT. BUT! If peer tries to make some clever
@@ -919,18 +897,32 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	int prior_fackets;
 	u32 lost_retrans = 0;
 	int flag = 0;
+	int dup_sack = 0;
 	int i;
 
 	if (!tp->sacked_out)
 		tp->fackets_out = 0;
 	prior_fackets = tp->fackets_out;
 
-	for (i=0; i<num_sacks; i++, sp++) {
-		struct sk_buff *skb;
-		__u32 start_seq = ntohl(sp->start_seq);
-		__u32 end_seq = ntohl(sp->end_seq);
-		int fack_count = 0;
-		int dup_sack = 0;
+	/* SACK fastpath:
+	 * if the only SACK change is the increase of the end_seq of
+	 * the first block then only apply that SACK block
+	 * and use retrans queue hinting otherwise slowpath */
+	flag = 1;
+	for (i = 0; i< num_sacks; i++) {
+		__u32 start_seq = ntohl(sp[i].start_seq);
+		__u32 end_seq =	 ntohl(sp[i].end_seq);
+
+		if (i == 0){
+			if (tp->recv_sack_cache[i].start_seq != start_seq)
+				flag = 0;
+		} else {
+			if ((tp->recv_sack_cache[i].start_seq != start_seq) ||
+			    (tp->recv_sack_cache[i].end_seq != end_seq))
+				flag = 0;
+		}
+		tp->recv_sack_cache[i].start_seq = start_seq;
+		tp->recv_sack_cache[i].end_seq = end_seq;
 
 		/* Check for D-SACK. */
 		if (i == 0) {
@@ -962,15 +954,58 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			if (before(ack, prior_snd_una - tp->max_window))
 				return 0;
 		}
+	}
+
+	if (flag)
+		num_sacks = 1;
+	else {
+		int j;
+		tp->fastpath_skb_hint = NULL;
+
+		/* order SACK blocks to allow in order walk of the retrans queue */
+		for (i = num_sacks-1; i > 0; i--) {
+			for (j = 0; j < i; j++){
+				if (after(ntohl(sp[j].start_seq),
+					  ntohl(sp[j+1].start_seq))){
+					sp[j].start_seq = htonl(tp->recv_sack_cache[j+1].start_seq);
+					sp[j].end_seq = htonl(tp->recv_sack_cache[j+1].end_seq);
+					sp[j+1].start_seq = htonl(tp->recv_sack_cache[j].start_seq);
+					sp[j+1].end_seq = htonl(tp->recv_sack_cache[j].end_seq);
+				}
+
+			}
+		}
+	}
+
+	/* clear flag as used for different purpose in following code */
+	flag = 0;
+
+	for (i=0; i<num_sacks; i++, sp++) {
+		struct sk_buff *skb;
+		__u32 start_seq = ntohl(sp->start_seq);
+		__u32 end_seq = ntohl(sp->end_seq);
+		int fack_count;
+
+		/* Use SACK fastpath hint if valid */
+		if (tp->fastpath_skb_hint) {
+			skb = tp->fastpath_skb_hint;
+			fack_count = tp->fastpath_cnt_hint;
+		} else {
+			skb = sk->sk_write_queue.next;
+			fack_count = 0;
+		}
 
 		/* Event "B" in the comment above. */
 		if (after(end_seq, tp->high_seq))
 			flag |= FLAG_DATA_LOST;
 
-		sk_stream_for_retrans_queue(skb, sk) {
+		sk_stream_for_retrans_queue_from(skb, sk) {
 			int in_sack, pcount;
 			u8 sacked;
 
+			tp->fastpath_skb_hint = skb;
+			tp->fastpath_cnt_hint = fack_count;
+
 			/* The retransmission queue is always in order, so
 			 * we can short-circuit the walk early.
 			 */
@@ -1045,6 +1080,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 						TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
 						tp->lost_out -= tcp_skb_pcount(skb);
 						tp->retrans_out -= tcp_skb_pcount(skb);
+
+						/* clear lost hint */
+						tp->retransmit_skb_hint = NULL;
 					}
 				} else {
 					/* New sack for not retransmitted frame,
@@ -1057,6 +1095,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 					if (sacked & TCPCB_LOST) {
 						TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
 						tp->lost_out -= tcp_skb_pcount(skb);
+
+						/* clear lost hint */
+						tp->retransmit_skb_hint = NULL;
 					}
 				}
 
@@ -1080,6 +1121,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			    (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
 				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 				tp->retrans_out -= tcp_skb_pcount(skb);
+				tp->retransmit_skb_hint = NULL;
 			}
 		}
 	}
@@ -1107,6 +1149,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 				tp->retrans_out -= tcp_skb_pcount(skb);
 
+				/* clear lost hint */
+				tp->retransmit_skb_hint = NULL;
+
 				if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) {
 					tp->lost_out += tcp_skb_pcount(skb);
 					TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@ -1214,6 +1259,8 @@ static void tcp_enter_frto_loss(struct sock *sk)
 	tcp_set_ca_state(sk, TCP_CA_Loss);
 	tp->high_seq = tp->frto_highmark;
 	TCP_ECN_queue_cwr(tp);
+
+	clear_all_retrans_hints(tp);
 }
 
 void tcp_clear_retrans(struct tcp_sock *tp)
@@ -1251,6 +1298,7 @@ void tcp_enter_loss(struct sock *sk, int how)
 	tp->snd_cwnd_cnt   = 0;
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 
+	tp->bytes_acked = 0;
 	tcp_clear_retrans(tp);
 
 	/* Push undo marker, if it was plain RTO and nothing
@@ -1279,6 +1327,8 @@ void tcp_enter_loss(struct sock *sk, int how)
 	tcp_set_ca_state(sk, TCP_CA_Loss);
 	tp->high_seq = tp->snd_nxt;
 	TCP_ECN_queue_cwr(tp);
+
+	clear_all_retrans_hints(tp);
 }
 
 static int tcp_check_sack_reneging(struct sock *sk)
@@ -1503,17 +1553,37 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
 			       int packets, u32 high_seq)
 {
 	struct sk_buff *skb;
-	int cnt = packets;
+	int cnt;
 
-	BUG_TRAP(cnt <= tp->packets_out);
+	BUG_TRAP(packets <= tp->packets_out);
+	if (tp->lost_skb_hint) {
+		skb = tp->lost_skb_hint;
+		cnt = tp->lost_cnt_hint;
+	} else {
+		skb = sk->sk_write_queue.next;
+		cnt = 0;
+	}
 
-	sk_stream_for_retrans_queue(skb, sk) {
-		cnt -= tcp_skb_pcount(skb);
-		if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq))
+	sk_stream_for_retrans_queue_from(skb, sk) {
+		/* TODO: do this better */
+		/* this is not the most efficient way to do this... */
+		tp->lost_skb_hint = skb;
+		tp->lost_cnt_hint = cnt;
+		cnt += tcp_skb_pcount(skb);
+		if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq))
 			break;
 		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
 			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
 			tp->lost_out += tcp_skb_pcount(skb);
+
+			/* clear xmit_retransmit_queue hints
+			 *  if this is beyond hint */
+			if(tp->retransmit_skb_hint != NULL &&
+			   before(TCP_SKB_CB(skb)->seq,
+				  TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) {
+
+				tp->retransmit_skb_hint = NULL;
+			}
 		}
 	}
 	tcp_sync_left_out(tp);
@@ -1540,13 +1610,28 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
 	if (tcp_head_timedout(sk, tp)) {
 		struct sk_buff *skb;
 
-		sk_stream_for_retrans_queue(skb, sk) {
-			if (tcp_skb_timedout(sk, skb) &&
-			    !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
+		skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
+			: sk->sk_write_queue.next;
+
+		sk_stream_for_retrans_queue_from(skb, sk) {
+			if (!tcp_skb_timedout(sk, skb))
+				break;
+
+			if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
 				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
 				tp->lost_out += tcp_skb_pcount(skb);
+
+				/* clear xmit_retrans hint */
+				if (tp->retransmit_skb_hint &&
+				    before(TCP_SKB_CB(skb)->seq,
+					   TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
+
+					tp->retransmit_skb_hint = NULL;
 			}
 		}
+
+		tp->scoreboard_skb_hint = skb;
+
 		tcp_sync_left_out(tp);
 	}
 }
@@ -1626,6 +1711,10 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
 	}
 	tcp_moderate_cwnd(tp);
 	tp->snd_cwnd_stamp = tcp_time_stamp;
+
+	/* There is something screwy going on with the retrans hints after
+	   an undo */
+	clear_all_retrans_hints(tp);
 }
 
 static inline int tcp_may_undo(struct tcp_sock *tp)
@@ -1709,6 +1798,9 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
 		sk_stream_for_retrans_queue(skb, sk) {
 			TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
 		}
+
+		clear_all_retrans_hints(tp);
+
 		DBGUNDO(sk, tp, "partial loss");
 		tp->lost_out = 0;
 		tp->left_out = tp->sacked_out;
@@ -1908,6 +2000,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 			TCP_ECN_queue_cwr(tp);
 		}
 
+		tp->bytes_acked = 0;
 		tp->snd_cwnd_cnt = 0;
 		tcp_set_ca_state(sk, TCP_CA_Recovery);
 	}
@@ -1919,9 +2012,9 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 }
 
 /* Read draft-ietf-tcplw-high-performance before mucking
- * with this code. (Superceeds RFC1323)
+ * with this code. (Supersedes RFC1323)
  */
-static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
+static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
 {
 	/* RTTM Rule: A TSecr value received in a segment is used to
 	 * update the averaged RTT measurement only if the segment
@@ -1932,7 +2025,7 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
 	 * 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
 	 *
 	 * Changed: reset backoff as soon as we see the first valid sample.
-	 * If we do not, we get strongly overstimated rto. With timestamps
+	 * If we do not, we get strongly overestimated rto. With timestamps
 	 * samples are accepted even from very old segments: f.e., when rtt=1
 	 * increases to 8, we retransmit 5 times and after 8 seconds delayed
 	 * answer arrives rto becomes 120 seconds! If at least one of segments
@@ -1940,13 +2033,13 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
 	 */
 	struct tcp_sock *tp = tcp_sk(sk);
 	const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
-	tcp_rtt_estimator(sk, seq_rtt, usrtt);
+	tcp_rtt_estimator(sk, seq_rtt);
 	tcp_set_rto(sk);
 	inet_csk(sk)->icsk_backoff = 0;
 	tcp_bound_rto(sk);
 }
 
-static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag)
+static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
 {
 	/* We don't have a timestamp. Can only use
 	 * packets that are not retransmitted to determine
@@ -1960,21 +2053,21 @@ static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag
 	if (flag & FLAG_RETRANS_DATA_ACKED)
 		return;
 
-	tcp_rtt_estimator(sk, seq_rtt, usrtt);
+	tcp_rtt_estimator(sk, seq_rtt);
 	tcp_set_rto(sk);
 	inet_csk(sk)->icsk_backoff = 0;
 	tcp_bound_rto(sk);
 }
 
 static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
-				      const s32 seq_rtt, u32 *usrtt)
+				      const s32 seq_rtt)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	/* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */
 	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
-		tcp_ack_saw_tstamp(sk, usrtt, flag);
+		tcp_ack_saw_tstamp(sk, flag);
 	else if (seq_rtt >= 0)
-		tcp_ack_no_tstamp(sk, seq_rtt, usrtt, flag);
+		tcp_ack_no_tstamp(sk, seq_rtt, flag);
 }
 
 static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
@@ -2054,20 +2147,27 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
 	return acked;
 }
 
+static inline u32 tcp_usrtt(const struct sk_buff *skb)
+{
+	struct timeval tv, now;
+
+	do_gettimeofday(&now);
+	skb_get_timestamp(skb, &tv);
+	return (now.tv_sec - tv.tv_sec) * 1000000 + (now.tv_usec - tv.tv_usec);
+}
 
 /* Remove acknowledged frames from the retransmission queue. */
-static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt)
+static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct sk_buff *skb;
 	__u32 now = tcp_time_stamp;
 	int acked = 0;
 	__s32 seq_rtt = -1;
-	struct timeval usnow;
 	u32 pkts_acked = 0;
-
-	if (seq_usrtt)
-		do_gettimeofday(&usnow);
+	void (*rtt_sample)(struct sock *sk, u32 usrtt)
+		= icsk->icsk_ca_ops->rtt_sample;
 
 	while ((skb = skb_peek(&sk->sk_write_queue)) &&
 	       skb != sk->sk_send_head) {
@@ -2107,16 +2207,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
 					tp->retrans_out -= tcp_skb_pcount(skb);
 				acked |= FLAG_RETRANS_DATA_ACKED;
 				seq_rtt = -1;
-			} else if (seq_rtt < 0)
+			} else if (seq_rtt < 0) {
 				seq_rtt = now - scb->when;
-			if (seq_usrtt) {
-				struct timeval tv;
-			
-				skb_get_timestamp(skb, &tv);
-				*seq_usrtt = (usnow.tv_sec - tv.tv_sec) * 1000000
-					+ (usnow.tv_usec - tv.tv_usec);
+				if (rtt_sample)
+					(*rtt_sample)(sk, tcp_usrtt(skb));
 			}
-
 			if (sacked & TCPCB_SACKED_ACKED)
 				tp->sacked_out -= tcp_skb_pcount(skb);
 			if (sacked & TCPCB_LOST)
@@ -2126,17 +2221,20 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
 				    !before(scb->end_seq, tp->snd_up))
 					tp->urg_mode = 0;
 			}
-		} else if (seq_rtt < 0)
+		} else if (seq_rtt < 0) {
 			seq_rtt = now - scb->when;
+			if (rtt_sample)
+				(*rtt_sample)(sk, tcp_usrtt(skb));
+		}
 		tcp_dec_pcount_approx(&tp->fackets_out, skb);
 		tcp_packets_out_dec(tp, skb);
 		__skb_unlink(skb, &sk->sk_write_queue);
 		sk_stream_free_skb(sk, skb);
+		clear_all_retrans_hints(tp);
 	}
 
 	if (acked&FLAG_ACKED) {
-		const struct inet_connection_sock *icsk = inet_csk(sk);
-		tcp_ack_update_rtt(sk, acked, seq_rtt, seq_usrtt);
+		tcp_ack_update_rtt(sk, acked, seq_rtt);
 		tcp_ack_packets_out(sk, tp);
 
 		if (icsk->icsk_ca_ops->pkts_acked)
@@ -2284,7 +2382,7 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
 	}
 
 	/* F-RTO affects on two new ACKs following RTO.
-	 * At latest on third ACK the TCP behavor is back to normal.
+	 * At latest on third ACK the TCP behavior is back to normal.
 	 */
 	tp->frto_counter = (tp->frto_counter + 1) % 3;
 }
@@ -2299,7 +2397,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	u32 ack = TCP_SKB_CB(skb)->ack_seq;
 	u32 prior_in_flight;
 	s32 seq_rtt;
-	s32 seq_usrtt = 0;
 	int prior_packets;
 
 	/* If the ack is newer than sent or older than previous acks
@@ -2311,6 +2408,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	if (before(ack, prior_snd_una))
 		goto old_ack;
 
+	if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR)
+		tp->bytes_acked += ack - prior_snd_una;
+
 	if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
 		/* Window is constant, pure forward advance.
 		 * No more checks are required.
@@ -2352,14 +2452,13 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	prior_in_flight = tcp_packets_in_flight(tp);
 
 	/* See if we can take anything off of the retransmit queue. */
-	flag |= tcp_clean_rtx_queue(sk, &seq_rtt,
-				    icsk->icsk_ca_ops->rtt_sample ? &seq_usrtt : NULL);
+	flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
 
 	if (tp->frto_counter)
 		tcp_process_frto(sk, prior_snd_una);
 
 	if (tcp_ack_is_dubious(sk, flag)) {
-		/* Advanve CWND, if state allows this. */
+		/* Advance CWND, if state allows this. */
 		if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
 			tcp_cong_avoid(sk, ack,  seq_rtt, prior_in_flight, 0);
 		tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
@@ -3148,7 +3247,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 {
 	struct sk_buff *skb;
 
-	/* First, check that queue is collapsable and find
+	/* First, check that queue is collapsible and find
 	 * the point where collapsing can be useful. */
 	for (skb = head; skb != tail; ) {
 		/* No new bits? It is possible on ofo queue. */
@@ -3456,7 +3555,7 @@ static __inline__ void tcp_ack_snd_check(struct sock *sk)
 
 /*
  *	This routine is only called when we have urgent data
- *	signalled. Its the 'slow' part of tcp_urg. It could be
+ *	signaled. Its the 'slow' part of tcp_urg. It could be
  *	moved inline now as tcp_urg is only called from one
  *	place. We handle URGent data wrong. We have to - as
  *	BSD still doesn't use the correction from RFC961.
@@ -3501,7 +3600,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
 	 * urgent. To do this requires some care. We cannot just ignore
 	 * tp->copied_seq since we would read the last urgent byte again
 	 * as data, nor can we alter copied_seq until this data arrives
-	 * or we break the sematics of SIOCATMARK (and thus sockatmark())
+	 * or we break the semantics of SIOCATMARK (and thus sockatmark())
 	 *
 	 * NOTE. Double Dutch. Rendering to plain English: author of comment
 	 * above did something sort of 	send("A", MSG_OOB); send("B", MSG_OOB);
@@ -3646,7 +3745,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 	tp->rx_opt.saw_tstamp = 0;
 
 	/*	pred_flags is 0xS?10 << 16 + snd_wnd
-	 *	if header_predition is to be made
+	 *	if header_prediction is to be made
 	 *	'S' will always be tp->tcp_header_len >> 2
 	 *	'?' will be 0 for the fast path, otherwise pred_flags is 0 to
 	 *  turn it off	(when there are holes in the receive 
@@ -4242,7 +4341,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 				 */
 				if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
 				    !tp->srtt)
-					tcp_ack_saw_tstamp(sk, NULL, 0);
+					tcp_ack_saw_tstamp(sk, 0);
 
 				if (tp->rx_opt.tstamp_ok)
 					tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
@@ -4372,6 +4471,7 @@ discard:
 
 EXPORT_SYMBOL(sysctl_tcp_ecn);
 EXPORT_SYMBOL(sysctl_tcp_reordering);
+EXPORT_SYMBOL(sysctl_tcp_abc);
 EXPORT_SYMBOL(tcp_parse_options);
 EXPORT_SYMBOL(tcp_rcv_established);
 EXPORT_SYMBOL(tcp_rcv_state_process);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 634dabb558fd..4d5021e1929b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -39,7 +39,7 @@
  *					request_sock handling and moved
  *					most of it into the af independent code.
  *					Added tail drop and some other bugfixes.
- *					Added new listen sematics.
+ *					Added new listen semantics.
  *		Mike McLagan	:	Routing by source
  *	Juan Jose Ciarlante:		ip_dynaddr bits
  *		Andi Kleen:		various fixes.
@@ -1110,24 +1110,18 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 static int tcp_v4_checksum_init(struct sk_buff *skb)
 {
 	if (skb->ip_summed == CHECKSUM_HW) {
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
 		if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
-				  skb->nh.iph->daddr, skb->csum))
+				  skb->nh.iph->daddr, skb->csum)) {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
 			return 0;
-
-		LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v4 csum failed\n");
-		skb->ip_summed = CHECKSUM_NONE;
+		}
 	}
+
+	skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr,
+				       skb->len, IPPROTO_TCP, 0);
+
 	if (skb->len <= 76) {
-		if (tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
-				 skb->nh.iph->daddr,
-				 skb_checksum(skb, 0, skb->len, 0)))
-			return -1;
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	} else {
-		skb->csum = ~tcp_v4_check(skb->h.th, skb->len,
-					  skb->nh.iph->saddr,
-					  skb->nh.iph->daddr, 0);
+		return __skb_checksum_complete(skb);
 	}
 	return 0;
 }
@@ -1216,10 +1210,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
 
 	/* An explanation is required here, I think.
 	 * Packet length and doff are validated by header prediction,
-	 * provided case of th->doff==0 is elimineted.
+	 * provided case of th->doff==0 is eliminated.
 	 * So, we defer the checks. */
 	if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
-	     tcp_v4_checksum_init(skb) < 0))
+	     tcp_v4_checksum_init(skb)))
 		goto bad_packet;
 
 	th = skb->h.th;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b1a63b2c6b4a..1b66a2ac4321 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -158,7 +158,7 @@ kill_with_rst:
 		/* I am shamed, but failed to make it more elegant.
 		 * Yes, it is direct reference to IP, which is impossible
 		 * to generalize to IPv6. Taking into account that IPv6
-		 * do not undertsnad recycling in any case, it not
+		 * do not understand recycling in any case, it not
 		 * a big problem in practice. --ANK */
 		if (tw->tw_family == AF_INET &&
 		    tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp &&
@@ -194,7 +194,7 @@ kill_with_rst:
 		/* In window segment, it may be only reset or bare ack. */
 
 		if (th->rst) {
-			/* This is TIME_WAIT assasination, in two flavors.
+			/* This is TIME_WAIT assassination, in two flavors.
 			 * Oh well... nobody has a sufficient solution to this
 			 * protocol bug yet.
 			 */
@@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		 */
 		newtp->snd_cwnd = 2;
 		newtp->snd_cwnd_cnt = 0;
+		newtp->bytes_acked = 0;
 
 		newtp->frto_counter = 0;
 		newtp->frto_highmark = 0;
@@ -550,7 +551,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 
 	/* RFC793 page 36: "If the connection is in any non-synchronized state ...
 	 *                  and the incoming segment acknowledges something not yet
-	 *                  sent (the segment carries an unaccaptable ACK) ...
+	 *                  sent (the segment carries an unacceptable ACK) ...
 	 *                  a reset is sent."
 	 *
 	 * Invalid ACK: reset will be sent by listening socket
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b907456a79f4..b7325e0b406a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -262,122 +262,139 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
  * We are working here with either a clone of the original
  * SKB, or a fresh unique copy made by the retransmit engine.
  */
-static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
+static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask)
 {
-	if (skb != NULL) {
-		const struct inet_connection_sock *icsk = inet_csk(sk);
-		struct inet_sock *inet = inet_sk(sk);
-		struct tcp_sock *tp = tcp_sk(sk);
-		struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
-		int tcp_header_size = tp->tcp_header_len;
-		struct tcphdr *th;
-		int sysctl_flags;
-		int err;
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+	struct inet_sock *inet;
+	struct tcp_sock *tp;
+	struct tcp_skb_cb *tcb;
+	int tcp_header_size;
+	struct tcphdr *th;
+	int sysctl_flags;
+	int err;
+
+	BUG_ON(!skb || !tcp_skb_pcount(skb));
 
-		BUG_ON(!tcp_skb_pcount(skb));
+	/* If congestion control is doing timestamping, we must
+	 * take such a timestamp before we potentially clone/copy.
+	 */
+	if (icsk->icsk_ca_ops->rtt_sample)
+		__net_timestamp(skb);
+
+	if (likely(clone_it)) {
+		if (unlikely(skb_cloned(skb)))
+			skb = pskb_copy(skb, gfp_mask);
+		else
+			skb = skb_clone(skb, gfp_mask);
+		if (unlikely(!skb))
+			return -ENOBUFS;
+	}
+
+	inet = inet_sk(sk);
+	tp = tcp_sk(sk);
+	tcb = TCP_SKB_CB(skb);
+	tcp_header_size = tp->tcp_header_len;
 
 #define SYSCTL_FLAG_TSTAMPS	0x1
 #define SYSCTL_FLAG_WSCALE	0x2
 #define SYSCTL_FLAG_SACK	0x4
 
-		/* If congestion control is doing timestamping */
-		if (icsk->icsk_ca_ops->rtt_sample)
-			__net_timestamp(skb);
-
-		sysctl_flags = 0;
-		if (tcb->flags & TCPCB_FLAG_SYN) {
-			tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
-			if(sysctl_tcp_timestamps) {
-				tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
-				sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
-			}
-			if(sysctl_tcp_window_scaling) {
-				tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
-				sysctl_flags |= SYSCTL_FLAG_WSCALE;
-			}
-			if(sysctl_tcp_sack) {
-				sysctl_flags |= SYSCTL_FLAG_SACK;
-				if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
-					tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
-			}
-		} else if (tp->rx_opt.eff_sacks) {
-			/* A SACK is 2 pad bytes, a 2 byte header, plus
-			 * 2 32-bit sequence numbers for each SACK block.
-			 */
-			tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
-					    (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
+	sysctl_flags = 0;
+	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+		tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
+		if(sysctl_tcp_timestamps) {
+			tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
+			sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
 		}
-		
-		if (tcp_packets_in_flight(tp) == 0)
-			tcp_ca_event(sk, CA_EVENT_TX_START);
-
-		th = (struct tcphdr *) skb_push(skb, tcp_header_size);
-		skb->h.th = th;
-		skb_set_owner_w(skb, sk);
-
-		/* Build TCP header and checksum it. */
-		th->source		= inet->sport;
-		th->dest		= inet->dport;
-		th->seq			= htonl(tcb->seq);
-		th->ack_seq		= htonl(tp->rcv_nxt);
-		*(((__u16 *)th) + 6)	= htons(((tcp_header_size >> 2) << 12) | tcb->flags);
-		if (tcb->flags & TCPCB_FLAG_SYN) {
-			/* RFC1323: The window in SYN & SYN/ACK segments
-			 * is never scaled.
-			 */
-			th->window	= htons(tp->rcv_wnd);
-		} else {
-			th->window	= htons(tcp_select_window(sk));
+		if (sysctl_tcp_window_scaling) {
+			tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
+			sysctl_flags |= SYSCTL_FLAG_WSCALE;
 		}
-		th->check		= 0;
-		th->urg_ptr		= 0;
-
-		if (tp->urg_mode &&
-		    between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF)) {
-			th->urg_ptr		= htons(tp->snd_up-tcb->seq);
-			th->urg			= 1;
+		if (sysctl_tcp_sack) {
+			sysctl_flags |= SYSCTL_FLAG_SACK;
+			if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
+				tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
 		}
+	} else if (unlikely(tp->rx_opt.eff_sacks)) {
+		/* A SACK is 2 pad bytes, a 2 byte header, plus
+		 * 2 32-bit sequence numbers for each SACK block.
+		 */
+		tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
+				    (tp->rx_opt.eff_sacks *
+				     TCPOLEN_SACK_PERBLOCK));
+	}
+		
+	if (tcp_packets_in_flight(tp) == 0)
+		tcp_ca_event(sk, CA_EVENT_TX_START);
+
+	th = (struct tcphdr *) skb_push(skb, tcp_header_size);
+	skb->h.th = th;
+	skb_set_owner_w(skb, sk);
+
+	/* Build TCP header and checksum it. */
+	th->source		= inet->sport;
+	th->dest		= inet->dport;
+	th->seq			= htonl(tcb->seq);
+	th->ack_seq		= htonl(tp->rcv_nxt);
+	*(((__u16 *)th) + 6)	= htons(((tcp_header_size >> 2) << 12) |
+					tcb->flags);
+
+	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+		/* RFC1323: The window in SYN & SYN/ACK segments
+		 * is never scaled.
+		 */
+		th->window	= htons(tp->rcv_wnd);
+	} else {
+		th->window	= htons(tcp_select_window(sk));
+	}
+	th->check		= 0;
+	th->urg_ptr		= 0;
 
-		if (tcb->flags & TCPCB_FLAG_SYN) {
-			tcp_syn_build_options((__u32 *)(th + 1),
-					      tcp_advertise_mss(sk),
-					      (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
-					      (sysctl_flags & SYSCTL_FLAG_SACK),
-					      (sysctl_flags & SYSCTL_FLAG_WSCALE),
-					      tp->rx_opt.rcv_wscale,
-					      tcb->when,
-		      			      tp->rx_opt.ts_recent);
-		} else {
-			tcp_build_and_update_options((__u32 *)(th + 1),
-						     tp, tcb->when);
+	if (unlikely(tp->urg_mode &&
+		     between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF))) {
+		th->urg_ptr		= htons(tp->snd_up-tcb->seq);
+		th->urg			= 1;
+	}
 
-			TCP_ECN_send(sk, tp, skb, tcp_header_size);
-		}
-		tp->af_specific->send_check(sk, th, skb->len, skb);
+	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+		tcp_syn_build_options((__u32 *)(th + 1),
+				      tcp_advertise_mss(sk),
+				      (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
+				      (sysctl_flags & SYSCTL_FLAG_SACK),
+				      (sysctl_flags & SYSCTL_FLAG_WSCALE),
+				      tp->rx_opt.rcv_wscale,
+				      tcb->when,
+				      tp->rx_opt.ts_recent);
+	} else {
+		tcp_build_and_update_options((__u32 *)(th + 1),
+					     tp, tcb->when);
+		TCP_ECN_send(sk, tp, skb, tcp_header_size);
+	}
 
-		if (tcb->flags & TCPCB_FLAG_ACK)
-			tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
+	tp->af_specific->send_check(sk, th, skb->len, skb);
 
-		if (skb->len != tcp_header_size)
-			tcp_event_data_sent(tp, skb, sk);
+	if (likely(tcb->flags & TCPCB_FLAG_ACK))
+		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
 
-		TCP_INC_STATS(TCP_MIB_OUTSEGS);
+	if (skb->len != tcp_header_size)
+		tcp_event_data_sent(tp, skb, sk);
 
-		err = tp->af_specific->queue_xmit(skb, 0);
-		if (err <= 0)
-			return err;
+	TCP_INC_STATS(TCP_MIB_OUTSEGS);
 
-		tcp_enter_cwr(sk);
+	err = tp->af_specific->queue_xmit(skb, 0);
+	if (unlikely(err <= 0))
+		return err;
+
+	tcp_enter_cwr(sk);
+
+	/* NET_XMIT_CN is special. It does not guarantee,
+	 * that this packet is lost. It tells that device
+	 * is about to start to drop packets or already
+	 * drops some packets of the same priority and
+	 * invokes us to send less aggressively.
+	 */
+	return err == NET_XMIT_CN ? 0 : err;
 
-		/* NET_XMIT_CN is special. It does not guarantee,
-		 * that this packet is lost. It tells that device
-		 * is about to start to drop packets or already
-		 * drops some packets of the same priority and
-		 * invokes us to send less aggressively.
-		 */
-		return err == NET_XMIT_CN ? 0 : err;
-	}
-	return -ENOBUFS;
 #undef SYSCTL_FLAG_TSTAMPS
 #undef SYSCTL_FLAG_WSCALE
 #undef SYSCTL_FLAG_SACK
@@ -436,6 +453,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 	u16 flags;
 
 	BUG_ON(len > skb->len);
+
+ 	clear_all_retrans_hints(tp);
 	nsize = skb_headlen(skb) - len;
 	if (nsize < 0)
 		nsize = 0;
@@ -599,7 +618,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
    for TCP options, but includes only bare TCP header.
 
    tp->rx_opt.mss_clamp is mss negotiated at connection setup.
-   It is minumum of user_mss and mss received with SYN.
+   It is minimum of user_mss and mss received with SYN.
    It also does not include TCP options.
 
    tp->pmtu_cookie is last pmtu, seen by this function.
@@ -1034,7 +1053,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 
 		TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
-		if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))))
+		if (unlikely(tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC)))
 			break;
 
 		/* Advance the send_head.  This one is sent out.
@@ -1107,7 +1126,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
 		/* Send it out now. */
 		TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
-		if (likely(!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation)))) {
+		if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) {
 			update_send_head(sk, tp, skb);
 			tcp_cwnd_validate(sk, tp);
 			return;
@@ -1171,7 +1190,7 @@ u32 __tcp_select_window(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	/* MSS for the peer's data.  Previous verions used mss_clamp
+	/* MSS for the peer's data.  Previous versions used mss_clamp
 	 * here.  I don't know if the value based on our guesses
 	 * of peer's MSS is better for the performance.  It's more correct
 	 * but may be worse for the performance because of rcv_mss
@@ -1260,7 +1279,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 		BUG_ON(tcp_skb_pcount(skb) != 1 ||
 		       tcp_skb_pcount(next_skb) != 1);
 
-		/* Ok.  We will be able to collapse the packet. */
+		/* changing transmit queue under us so clear hints */
+		clear_all_retrans_hints(tp);
+
+		/* Ok.	We will be able to collapse the packet. */
 		__skb_unlink(next_skb, &sk->sk_write_queue);
 
 		memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
@@ -1330,6 +1352,8 @@ void tcp_simple_retransmit(struct sock *sk)
 		}
 	}
 
+	clear_all_retrans_hints(tp);
+
 	if (!lost)
 		return;
 
@@ -1361,7 +1385,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	int err;
 
 	/* Do not sent more than we queued. 1/4 is reserved for possible
-	 * copying overhead: frgagmentation, tunneling, mangling etc.
+	 * copying overhead: fragmentation, tunneling, mangling etc.
 	 */
 	if (atomic_read(&sk->sk_wmem_alloc) >
 	    min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
@@ -1422,9 +1446,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	 */
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
-	err = tcp_transmit_skb(sk, (skb_cloned(skb) ?
-				    pskb_copy(skb, GFP_ATOMIC):
-				    skb_clone(skb, GFP_ATOMIC)));
+	err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 
 	if (err == 0) {
 		/* Update global TCP statistics. */
@@ -1468,13 +1490,25 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-	int packet_cnt = tp->lost_out;
+	int packet_cnt;
+
+	if (tp->retransmit_skb_hint) {
+		skb = tp->retransmit_skb_hint;
+		packet_cnt = tp->retransmit_cnt_hint;
+	}else{
+		skb = sk->sk_write_queue.next;
+		packet_cnt = 0;
+	}
 
 	/* First pass: retransmit lost packets. */
-	if (packet_cnt) {
-		sk_stream_for_retrans_queue(skb, sk) {
+	if (tp->lost_out) {
+		sk_stream_for_retrans_queue_from(skb, sk) {
 			__u8 sacked = TCP_SKB_CB(skb)->sacked;
 
+			/* we could do better than to assign each time */
+			tp->retransmit_skb_hint = skb;
+			tp->retransmit_cnt_hint = packet_cnt;
+
 			/* Assume this retransmit will generate
 			 * only one packet for congestion window
 			 * calculation purposes.  This works because
@@ -1485,10 +1519,12 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 			if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
 				return;
 
-			if (sacked&TCPCB_LOST) {
+			if (sacked & TCPCB_LOST) {
 				if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
-					if (tcp_retransmit_skb(sk, skb))
+					if (tcp_retransmit_skb(sk, skb)) {
+						tp->retransmit_skb_hint = NULL;
 						return;
+					}
 					if (icsk->icsk_ca_state != TCP_CA_Loss)
 						NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
 					else
@@ -1501,8 +1537,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 									  TCP_RTO_MAX);
 				}
 
-				packet_cnt -= tcp_skb_pcount(skb);
-				if (packet_cnt <= 0)
+				packet_cnt += tcp_skb_pcount(skb);
+				if (packet_cnt >= tp->lost_out)
 					break;
 			}
 		}
@@ -1528,9 +1564,18 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	if (tcp_may_send_now(sk, tp))
 		return;
 
-	packet_cnt = 0;
+	if (tp->forward_skb_hint) {
+		skb = tp->forward_skb_hint;
+		packet_cnt = tp->forward_cnt_hint;
+	} else{
+		skb = sk->sk_write_queue.next;
+		packet_cnt = 0;
+	}
+
+	sk_stream_for_retrans_queue_from(skb, sk) {
+		tp->forward_cnt_hint = packet_cnt;
+		tp->forward_skb_hint = skb;
 
-	sk_stream_for_retrans_queue(skb, sk) {
 		/* Similar to the retransmit loop above we
 		 * can pretend that the retransmitted SKB
 		 * we send out here will be composed of one
@@ -1547,8 +1592,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 			continue;
 
 		/* Ok, retransmit it. */
-		if (tcp_retransmit_skb(sk, skb))
+		if (tcp_retransmit_skb(sk, skb)) {
+			tp->forward_skb_hint = NULL;
 			break;
+		}
 
 		if (skb == skb_peek(&sk->sk_write_queue))
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
@@ -1633,7 +1680,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 	TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
-	if (tcp_transmit_skb(sk, skb))
+	if (tcp_transmit_skb(sk, skb, 0, priority))
 		NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
 }
 
@@ -1668,7 +1715,7 @@ int tcp_send_synack(struct sock *sk)
 		TCP_ECN_send_synack(tcp_sk(sk), skb);
 	}
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
-	return tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
+	return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 }
 
 /*
@@ -1829,7 +1876,7 @@ int tcp_connect(struct sock *sk)
 	__skb_queue_tail(&sk->sk_write_queue, buff);
 	sk_charge_skb(sk, buff);
 	tp->packets_out += tcp_skb_pcount(buff);
-	tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL));
+	tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
 	TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
 
 	/* Timer for repeating the SYN until an answer. */
@@ -1925,7 +1972,7 @@ void tcp_send_ack(struct sock *sk)
 		/* Send it off, this clears delayed acks for us. */
 		TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
 		TCP_SKB_CB(buff)->when = tcp_time_stamp;
-		tcp_transmit_skb(sk, buff);
+		tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
 	}
 }
 
@@ -1965,7 +2012,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
 	TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1;
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
-	return tcp_transmit_skb(sk, skb);
+	return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
 }
 
 int tcp_write_wakeup(struct sock *sk)
@@ -1998,7 +2045,7 @@ int tcp_write_wakeup(struct sock *sk)
 
 			TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
 			TCP_SKB_CB(skb)->when = tcp_time_stamp;
-			err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
+			err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 			if (!err) {
 				update_send_head(sk, tp, skb);
 			}
@@ -2058,3 +2105,4 @@ EXPORT_SYMBOL(tcp_connect);
 EXPORT_SYMBOL(tcp_make_synack);
 EXPORT_SYMBOL(tcp_simple_retransmit);
 EXPORT_SYMBOL(tcp_sync_mss);
+EXPORT_SYMBOL(sysctl_tcp_tso_win_divisor);
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 327770bf5522..26d7486ee501 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -20,20 +20,20 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
 				    u32 in_flight, int flag)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	if (in_flight < tp->snd_cwnd)
+
+	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
-	if (tp->snd_cwnd <= tp->snd_ssthresh) {
-		tp->snd_cwnd++;
-	} else {
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+	else {
 		tp->snd_cwnd_cnt++;
 		if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){
-			tp->snd_cwnd++;
+			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+				tp->snd_cwnd++;
 			tp->snd_cwnd_cnt = 0;
 		}
 	}
-	tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
-	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
 static u32 tcp_scalable_ssthresh(struct sock *sk)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 415ee47ac1c5..e1880959614a 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -58,7 +58,7 @@ static void tcp_write_err(struct sock *sk)
  * to prevent DoS attacks. It is called when a retransmission timeout
  * or zero probe timeout occurs on orphaned socket.
  *
- * Criterium is still not confirmed experimentally and may change.
+ * Criteria is still not confirmed experimentally and may change.
  * We kill the socket, if:
  * 1. If number of orphaned sockets exceeds an administratively configured
  *    limit.
@@ -132,7 +132,7 @@ static int tcp_write_timeout(struct sock *sk)
 			   hole detection. :-(
 
 			   It is place to make it. It is not made. I do not want
-			   to make it. It is disguisting. It does not work in any
+			   to make it. It is disgusting. It does not work in any
 			   case. Let me to cite the same draft, which requires for
 			   us to implement this:
 
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 93c5f92070f9..13e7e6e8df16 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -215,14 +215,6 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 		vegas->beg_snd_nxt  = tp->snd_nxt;
 		vegas->beg_snd_cwnd = tp->snd_cwnd;
 
-		/* Take into account the current RTT sample too, to
-		 * decrease the impact of delayed acks. This double counts
-		 * this sample since we count it for the next window as well,
-		 * but that's not too awful, since we're taking the min,
-		 * rather than averaging.
-		 */
-		tcp_vegas_rtt_calc(sk, seq_rtt * 1000);
-
 		/* We do the Vegas calculations only if we got enough RTT
 		 * samples that we can be reasonably sure that we got
 		 * at least one RTT sample that wasn't from a delayed ACK.
@@ -236,8 +228,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 			/* We don't have enough RTT samples to do the Vegas
 			 * calculation, so we'll behave like Reno.
 			 */
-			if (tp->snd_cwnd > tp->snd_ssthresh)
-				tp->snd_cwnd++;
+			tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag);
 		} else {
 			u32 rtt, target_cwnd, diff;
 
@@ -275,7 +266,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 			 */
 			diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd;
 
-			if (tp->snd_cwnd < tp->snd_ssthresh) {
+			if (tp->snd_cwnd <= tp->snd_ssthresh) {
 				/* Slow start.  */
 				if (diff > gamma) {
 					/* Going too fast. Time to slow down
@@ -295,6 +286,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 							    V_PARAM_SHIFT)+1);
 
 				}
+				tcp_slow_start(tp);
 			} else {
 				/* Congestion avoidance. */
 				u32 next_snd_cwnd;
@@ -327,37 +319,17 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 				else if (next_snd_cwnd < tp->snd_cwnd)
 					tp->snd_cwnd--;
 			}
+
+			if (tp->snd_cwnd < 2)
+				tp->snd_cwnd = 2;
+			else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
+				tp->snd_cwnd = tp->snd_cwnd_clamp;
 		}
 
 		/* Wipe the slate clean for the next RTT. */
 		vegas->cntRTT = 0;
 		vegas->minRTT = 0x7fffffff;
 	}
-
-	/* The following code is executed for every ack we receive,
-	 * except for conditions checked in should_advance_cwnd()
-	 * before the call to tcp_cong_avoid(). Mainly this means that
-	 * we only execute this code if the ack actually acked some
-	 * data.
-	 */
-
-	/* If we are in slow start, increase our cwnd in response to this ACK.
-	 * (If we are not in slow start then we are in congestion avoidance,
-	 * and adjust our congestion window only once per RTT. See the code
-	 * above.)
-	 */
-	if (tp->snd_cwnd <= tp->snd_ssthresh)
-		tp->snd_cwnd++;
-
-	/* to keep cwnd from growing without bound */
-	tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
-
-	/* Make sure that we are never so timid as to reduce our cwnd below
-	 * 2 MSS.
-	 *
-	 * Going below 2 MSS would risk huge delayed ACKs from our receiver.
-	 */
-	tp->snd_cwnd = max(tp->snd_cwnd, 2U);
 }
 
 /* Extract info for Tcp socket info provided via netlink. */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e0bd1013cb0d..2422a5f7195d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -761,7 +761,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 
 static __inline__ int __udp_checksum_complete(struct sk_buff *skb)
 {
-	return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+	return __skb_checksum_complete(skb);
 }
 
 static __inline__ int udp_checksum_complete(struct sk_buff *skb)
@@ -1100,11 +1100,8 @@ static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
 	if (uh->check == 0) {
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	} else if (skb->ip_summed == CHECKSUM_HW) {
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
 		if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
-			return 0;
-		LIMIT_NETDEBUG(KERN_DEBUG "udp v4 hw csum failure.\n");
-		skb->ip_summed = CHECKSUM_NONE;
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
 		skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index b2b60f3e9cdd..42196ba3b0b9 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -182,6 +182,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
 		case IPPROTO_UDP:
 		case IPPROTO_TCP:
 		case IPPROTO_SCTP:
+		case IPPROTO_DCCP:
 			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
 				u16 *ports = (u16 *)xprth;