48 files changed, 3037 insertions, 1335 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 145f5cde96cf..b74864889670 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -120,7 +120,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	unsigned short vid;
 	struct net_device_stats *stats;
 	unsigned short vlan_TCI;
-	unsigned short proto;
+	__be16 proto;
 
 	/* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */
 	vlan_TCI = ntohs(vhdr->h_vlan_TCI);
diff --git a/net/Kconfig b/net/Kconfig
index 2bdd5623fdd5..60f6f321bd76 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -140,6 +140,7 @@ config BRIDGE_NETFILTER
 
 	  If unsure, say N.
 
+source "net/netfilter/Kconfig"
 source "net/ipv4/netfilter/Kconfig"
 source "net/ipv6/netfilter/Kconfig"
 source "net/decnet/netfilter/Kconfig"
@@ -206,8 +207,6 @@ config NET_PKTGEN
 	  To compile this code as a module, choose M here: the
 	  module will be called pktgen.
 
-source "net/netfilter/Kconfig"
-
 endmenu
 
 endmenu
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 2d52fee63a8c..d8e36b775125 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -214,9 +214,11 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
 				     .tos = RT_TOS(iph->tos)} }, .proto = 0};
 
 			if (!ip_route_output_key(&rt, &fl)) {
-				/* Bridged-and-DNAT'ed traffic doesn't
-				 * require ip_forwarding. */
-				if (((struct dst_entry *)rt)->dev == dev) {
+				/* - Bridged-and-DNAT'ed traffic doesn't
+				 *   require ip_forwarding.
+				 * - Deal with redirected traffic. */
+				if (((struct dst_entry *)rt)->dev == dev ||
+				    rt->rt_type == RTN_LOCAL) {
 					skb->dst = (struct dst_entry *)rt;
 					goto bridged_dnat;
 				}
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index fb97bb042455..344a8da153fc 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -3,6 +3,8 @@ obj-$(CONFIG_IP_DCCP) += dccp.o
 dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \
 	  timer.o
 
+dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o
+
 obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
 
 dccp_diag-y := diag.o
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
new file mode 100644
index 000000000000..6530283eafca
--- /dev/null
+++ b/net/dccp/ackvec.c
@@ -0,0 +1,419 @@
+/*
+ *  net/dccp/ackvec.c
+ *
+ *  An implementation of the DCCP protocol
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
+ *
+ *      This program is free software; you can redistribute it and/or modify it
+ *      under the terms of the GNU General Public License as published by the
+ *      Free Software Foundation; version 2 of the License;
+ */
+
+#include "ackvec.h"
+#include "dccp.h"
+
+#include <linux/dccp.h>
+#include <linux/skbuff.h>
+
+#include <net/sock.h>
+
+int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
+{
+	struct dccp_sock *dp = dccp_sk(sk);
+	struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
+	int len = av->dccpav_vec_len + 2;
+	struct timeval now;
+	u32 elapsed_time;
+	unsigned char *to, *from;
+
+	dccp_timestamp(sk, &now);
+	elapsed_time = timeval_delta(&now, &av->dccpav_time) / 10;
+
+	if (elapsed_time != 0)
+		dccp_insert_option_elapsed_time(sk, skb, elapsed_time);
+
+	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
+		return -1;
+
+	/*
+	 * XXX: now we have just one ack vector sent record, so
+	 * we have to wait for it to be cleared.
+	 *
+	 * Of course this is not acceptable, but this is just for
+	 * basic testing now.
+	 */
+	if (av->dccpav_ack_seqno != DCCP_MAX_SEQNO + 1)
+		return -1;
+
+	DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+
+	to    = skb_push(skb, len);
+	*to++ = DCCPO_ACK_VECTOR_0;
+	*to++ = len;
+
+	len  = av->dccpav_vec_len;
+	from = av->dccpav_buf + av->dccpav_buf_head;
+
+	/* Check if buf_head wraps */
+	if (av->dccpav_buf_head + len > av->dccpav_vec_len) {
+		const u32 tailsize = (av->dccpav_vec_len - av->dccpav_buf_head);
+
+		memcpy(to, from, tailsize);
+		to   += tailsize;
+		len  -= tailsize;
+		from = av->dccpav_buf;
+	}
+
+	memcpy(to, from, len);
+	/*
+	 *	From draft-ietf-dccp-spec-11.txt:
+	 *
+	 *	For each acknowledgement it sends, the HC-Receiver will add an
+	 *	acknowledgement record.  ack_seqno will equal the HC-Receiver
+	 *	sequence number it used for the ack packet; ack_ptr will equal
+	 *	buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
+	 *	equal buf_nonce.
+	 *
+	 * This implemention uses just one ack record for now.
+	 */
+	av->dccpav_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
+	av->dccpav_ack_ptr   = av->dccpav_buf_head;
+	av->dccpav_ack_ackno = av->dccpav_buf_ackno;
+	av->dccpav_ack_nonce = av->dccpav_buf_nonce;
+	av->dccpav_sent_len  = av->dccpav_vec_len;
+
+	dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, "
+		      "ack_ackno=%llu\n",
+		      debug_prefix, av->dccpav_sent_len,
+		      (unsigned long long)av->dccpav_ack_seqno,
+		      (unsigned long long)av->dccpav_ack_ackno);
+	return -1;
+}
+
+struct dccp_ackvec *dccp_ackvec_alloc(const unsigned int len,
+				      const unsigned int __nocast priority)
+{
+	struct dccp_ackvec *av = kmalloc(sizeof(*av) + len, priority);
+
+	if (av != NULL) {
+		av->dccpav_buf_len	= len;
+		av->dccpav_buf_head	=
+			av->dccpav_buf_tail = av->dccpav_buf_len - 1;
+		av->dccpav_buf_ackno	=
+			av->dccpav_ack_ackno = av->dccpav_ack_seqno = ~0LLU;
+		av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
+		av->dccpav_ack_ptr	= 0;
+		av->dccpav_time.tv_sec	= 0;
+		av->dccpav_time.tv_usec	= 0;
+		av->dccpav_sent_len	= av->dccpav_vec_len = 0;
+	}
+
+	return av;
+}
+
+void dccp_ackvec_free(struct dccp_ackvec *av)
+{
+	kfree(av);
+}
+
+static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
+				   const unsigned int index)
+{
+	return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK;
+}
+
+static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
+				 const unsigned int index)
+{
+	return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK;
+}
+
+/*
+ * If several packets are missing, the HC-Receiver may prefer to enter multiple
+ * bytes with run length 0, rather than a single byte with a larger run length;
+ * this simplifies table updates if one of the missing packets arrives.
+ */
+static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
+						 const unsigned int packets,
+						  const unsigned char state)
+{
+	unsigned int gap;
+	signed long new_head;
+
+	if (av->dccpav_vec_len + packets > av->dccpav_buf_len)
+		return -ENOBUFS;
+
+	gap	 = packets - 1;
+	new_head = av->dccpav_buf_head - packets;
+
+	if (new_head < 0) {
+		if (gap > 0) {
+			memset(av->dccpav_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED,
+			       gap + new_head + 1);
+			gap = -new_head;
+		}
+		new_head += av->dccpav_buf_len;
+	} 
+
+	av->dccpav_buf_head = new_head;
+
+	if (gap > 0)
+		memset(av->dccpav_buf + av->dccpav_buf_head + 1,
+		       DCCP_ACKVEC_STATE_NOT_RECEIVED, gap);
+
+	av->dccpav_buf[av->dccpav_buf_head] = state;
+	av->dccpav_vec_len += packets;
+	return 0;
+}
+
+/*
+ * Implements the draft-ietf-dccp-spec-11.txt Appendix A
+ */
+int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
+		    const u64 ackno, const u8 state)
+{
+	/*
+	 * Check at the right places if the buffer is full, if it is, tell the
+	 * caller to start dropping packets till the HC-Sender acks our ACK
+	 * vectors, when we will free up space in dccpav_buf.
+	 *
+	 * We may well decide to do buffer compression, etc, but for now lets
+	 * just drop.
+	 *
+	 * From Appendix A:
+	 *
+	 *	Of course, the circular buffer may overflow, either when the
+	 *	HC-Sender is sending data at a very high rate, when the
+	 *	HC-Receiver's acknowledgements are not reaching the HC-Sender,
+	 *	or when the HC-Sender is forgetting to acknowledge those acks
+	 *	(so the HC-Receiver is unable to clean up old state). In this
+	 *	case, the HC-Receiver should either compress the buffer (by
+	 *	increasing run lengths when possible), transfer its state to
+	 *	a larger buffer, or, as a last resort, drop all received
+	 *	packets, without processing them whatsoever, until its buffer
+	 *	shrinks again.
+	 */
+
+	/* See if this is the first ackno being inserted */
+	if (av->dccpav_vec_len == 0) {
+		av->dccpav_buf[av->dccpav_buf_head] = state;
+		av->dccpav_vec_len = 1;
+	} else if (after48(ackno, av->dccpav_buf_ackno)) {
+		const u64 delta = dccp_delta_seqno(av->dccpav_buf_ackno,
+						   ackno);
+
+		/*
+		 * Look if the state of this packet is the same as the
+		 * previous ackno and if so if we can bump the head len.
+		 */
+		if (delta == 1 &&
+		    dccp_ackvec_state(av, av->dccpav_buf_head) == state &&
+		    (dccp_ackvec_len(av, av->dccpav_buf_head) <
+		     DCCP_ACKVEC_LEN_MASK))
+			av->dccpav_buf[av->dccpav_buf_head]++;
+		else if (dccp_ackvec_set_buf_head_state(av, delta, state))
+			return -ENOBUFS;
+	} else {
+		/*
+		 * A.1.2.  Old Packets
+		 *
+		 *	When a packet with Sequence Number S arrives, and
+		 *	S <= buf_ackno, the HC-Receiver will scan the table
+		 *	for the byte corresponding to S. (Indexing structures
+		 *	could reduce the complexity of this scan.)
+		 */
+		u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno);
+		unsigned int index = av->dccpav_buf_head;
+
+		while (1) {
+			const u8 len = dccp_ackvec_len(av, index);
+			const u8 state = dccp_ackvec_state(av, index);
+			/*
+			 * valid packets not yet in dccpav_buf have a reserved
+			 * entry, with a len equal to 0.
+			 */
+			if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED &&
+			    len == 0 && delta == 0) { /* Found our
+							 reserved seat! */
+				dccp_pr_debug("Found %llu reserved seat!\n",
+					      (unsigned long long)ackno);
+				av->dccpav_buf[index] = state;
+				goto out;
+			}
+			/* len == 0 means one packet */
+			if (delta < len + 1)
+				goto out_duplicate;
+
+			delta -= len + 1;
+			if (++index == av->dccpav_buf_len)
+				index = 0;
+		}
+	}
+
+	av->dccpav_buf_ackno = ackno;
+	dccp_timestamp(sk, &av->dccpav_time);
+out:
+	dccp_pr_debug("");
+	return 0;
+
+out_duplicate:
+	/* Duplicate packet */
+	dccp_pr_debug("Received a dup or already considered lost "
+		      "packet: %llu\n", (unsigned long long)ackno);
+	return -EILSEQ;
+}
+
+#ifdef CONFIG_IP_DCCP_DEBUG
+void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len)
+{
+	if (!dccp_debug)
+		return;
+
+	printk("ACK vector len=%d, ackno=%llu |", len,
+	       (unsigned long long)ackno);
+
+	while (len--) {
+		const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6;
+		const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
+
+		printk("%d,%d|", state, rl);
+		++vector;
+	}
+
+	printk("\n");
+}
+
+void dccp_ackvec_print(const struct dccp_ackvec *av)
+{
+	dccp_ackvector_print(av->dccpav_buf_ackno,
+			     av->dccpav_buf + av->dccpav_buf_head,
+			     av->dccpav_vec_len);
+}
+#endif
+
+static void dccp_ackvec_trow_away_ack_record(struct dccp_ackvec *av)
+{
+	/*
+	 * As we're keeping track of the ack vector size (dccpav_vec_len) and
+	 * the sent ack vector size (dccpav_sent_len) we don't need
+	 * dccpav_buf_tail at all, but keep this code here as in the future
+	 * we'll implement a vector of ack records, as suggested in
+	 * draft-ietf-dccp-spec-11.txt Appendix A. -acme
+	 */
+#if 0
+	av->dccpav_buf_tail = av->dccpav_ack_ptr + 1;
+	if (av->dccpav_buf_tail >= av->dccpav_vec_len)
+		av->dccpav_buf_tail -= av->dccpav_vec_len;
+#endif
+	av->dccpav_vec_len -= av->dccpav_sent_len;
+}
+
+void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
+				 const u64 ackno)
+{
+	/* Check if we actually sent an ACK vector */
+	if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1)
+		return;
+
+	if (ackno == av->dccpav_ack_seqno) {
+#ifdef CONFIG_IP_DCCP_DEBUG
+		struct dccp_sock *dp = dccp_sk(sk);
+		const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
+					"CLIENT rx ack: " : "server rx ack: ";
+#endif
+		dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, "
+			      "ack_ackno=%llu, ACKED!\n",
+			      debug_prefix, 1,
+			      (unsigned long long)av->dccpav_ack_seqno,
+			      (unsigned long long)av->dccpav_ack_ackno);
+		dccp_ackvec_trow_away_ack_record(av);
+		av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1;
+	}
+}
+
+static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
+					    struct sock *sk, u64 ackno,
+					    const unsigned char len,
+					    const unsigned char *vector)
+{
+	unsigned char i;
+
+	/* Check if we actually sent an ACK vector */
+	if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1)
+		return;
+	/*
+	 * We're in the receiver half connection, so if the received an ACK
+	 * vector ackno (e.g. 50) before dccpav_ack_seqno (e.g. 52), we're
+	 * not interested.
+	 *
+	 * Extra explanation with example:
+	 * 
+	 * if we received an ACK vector with ackno 50, it can only be acking
+	 * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent).
+	 */
+	/* dccp_pr_debug("is %llu < %llu? ", ackno, av->dccpav_ack_seqno); */
+	if (before48(ackno, av->dccpav_ack_seqno)) {
+		/* dccp_pr_debug_cat("yes\n"); */
+		return;
+	}
+	/* dccp_pr_debug_cat("no\n"); */
+
+	i = len;
+	while (i--) {
+		const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
+		u64 ackno_end_rl;
+
+		dccp_set_seqno(&ackno_end_rl, ackno - rl);
+
+		/*
+		 * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl,
+		 * av->dccpav_ack_seqno, ackno);
+		 */
+		if (between48(av->dccpav_ack_seqno, ackno_end_rl, ackno)) {
+			const u8 state = (*vector &
+					  DCCP_ACKVEC_STATE_MASK) >> 6;
+			/* dccp_pr_debug_cat("yes\n"); */
+
+			if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
+#ifdef CONFIG_IP_DCCP_DEBUG
+				struct dccp_sock *dp = dccp_sk(sk);
+				const char *debug_prefix =
+					dp->dccps_role == DCCP_ROLE_CLIENT ?
+					"CLIENT rx ack: " : "server rx ack: ";
+#endif
+				dccp_pr_debug("%sACK vector 0, len=%d, "
+					      "ack_seqno=%llu, ack_ackno=%llu, "
+					      "ACKED!\n",
+					      debug_prefix, len,
+					      (unsigned long long)
+					      av->dccpav_ack_seqno,
+					      (unsigned long long)
+					      av->dccpav_ack_ackno);
+				dccp_ackvec_trow_away_ack_record(av);
+			}
+			/*
+			 * If dccpav_ack_seqno was not received, no problem
+			 * we'll send another ACK vector.
+			 */
+			av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1;
+			break;
+		}
+		/* dccp_pr_debug_cat("no\n"); */
+
+		dccp_set_seqno(&ackno, ackno_end_rl - 1);
+		++vector;
+	}
+}
+
+int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
+		      const u8 opt, const u8 *value, const u8 len)
+{
+	if (len > DCCP_MAX_ACKVEC_LEN)
+		return -1;
+
+	/* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */
+	dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk,
+					DCCP_SKB_CB(skb)->dccpd_ack_seq,
+				        len, value);
+	return 0;
+}
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
new file mode 100644
index 000000000000..8ca51c9191f7
--- /dev/null
+++ b/net/dccp/ackvec.h
@@ -0,0 +1,133 @@
+#ifndef _ACKVEC_H
+#define _ACKVEC_H
+/*
+ *  net/dccp/ackvec.h
+ *
+ *  An implementation of the DCCP protocol
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com>
+ *
+ *	This program is free software; you can redistribute it and/or modify it
+ *	under the terms of the GNU General Public License version 2 as
+ *	published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/compiler.h>
+#include <linux/time.h>
+#include <linux/types.h>
+
+/* Read about the ECN nonce to see why it is 253 */
+#define DCCP_MAX_ACKVEC_LEN 253
+
+#define DCCP_ACKVEC_STATE_RECEIVED	0
+#define DCCP_ACKVEC_STATE_ECN_MARKED	(1 << 6)
+#define DCCP_ACKVEC_STATE_NOT_RECEIVED	(3 << 6)
+
+#define DCCP_ACKVEC_STATE_MASK		0xC0 /* 11000000 */
+#define DCCP_ACKVEC_LEN_MASK		0x3F /* 00111111 */
+
+/** struct dccp_ackvec - ack vector
+ *
+ * This data structure is the one defined in the DCCP draft
+ * Appendix A.
+ *
+ * @dccpav_buf_head - circular buffer head
+ * @dccpav_buf_tail - circular buffer tail
+ * @dccpav_buf_ackno - ack # of the most recent packet acknowledgeable in the
+ * 		       buffer (i.e. %dccpav_buf_head)
+ * @dccpav_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
+ * 		       by the buffer with State 0
+ *
+ * Additionally, the HC-Receiver must keep some information about the
+ * Ack Vectors it has recently sent. For each packet sent carrying an
+ * Ack Vector, it remembers four variables:
+ *
+ * @dccpav_ack_seqno - the Sequence Number used for the packet
+ * 		       (HC-Receiver seqno)
+ * @dccpav_ack_ptr - the value of buf_head at the time of acknowledgement.
+ * @dccpav_ack_ackno - the Acknowledgement Number used for the packet
+ * 		       (HC-Sender seqno)
+ * @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
+ *
+ * @dccpav_buf_len	- circular buffer length
+ * @dccpav_time		- the time in usecs
+ * @dccpav_buf - circular buffer of acknowledgeable packets
+ */
+struct dccp_ackvec {
+	unsigned int	dccpav_buf_head;
+	unsigned int	dccpav_buf_tail;
+	u64		dccpav_buf_ackno;
+	u64		dccpav_ack_seqno;
+	u64		dccpav_ack_ackno;
+	unsigned int	dccpav_ack_ptr;
+	unsigned int	dccpav_sent_len;
+	unsigned int	dccpav_vec_len;
+	unsigned int	dccpav_buf_len;
+	struct timeval	dccpav_time;
+	u8		dccpav_buf_nonce;
+	u8		dccpav_ack_nonce;
+	u8		dccpav_buf[0];
+};
+
+struct sock;
+struct sk_buff;
+
+#ifdef CONFIG_IP_DCCP_ACKVEC
+extern struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len,
+					  const unsigned int __nocast priority);
+extern void dccp_ackvec_free(struct dccp_ackvec *av);
+
+extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
+			   const u64 ackno, const u8 state);
+
+extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
+					struct sock *sk, const u64 ackno);
+extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
+			     const u8 opt, const u8 *value, const u8 len);
+
+extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb);
+
+static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
+{
+	return av->dccpav_sent_len != av->dccpav_vec_len;
+}
+#else /* CONFIG_IP_DCCP_ACKVEC */
+static inline struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len,
+					   const unsigned int __nocast priority)
+{
+	return NULL;
+}
+
+static inline void dccp_ackvec_free(struct dccp_ackvec *av)
+{
+}
+
+static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
+				  const u64 ackno, const u8 state)
+{
+	return -1;
+}
+
+static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
+					       struct sock *sk, const u64 ackno)
+{
+}
+
+static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
+				    const u8 opt, const u8 *value, const u8 len)
+{
+	return -1;
+}
+
+static inline int dccp_insert_option_ackvec(const struct sock *sk,
+					    const struct sk_buff *skb)
+{
+	return -1;
+}
+
+static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
+{
+	return 0;
+}
+#endif /* CONFIG_IP_DCCP_ACKVEC */
+#endif /* _ACKVEC_H */
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 962f1e9e2f7e..21e55142dcd3 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -14,6 +14,7 @@
  */
 
 #include <net/sock.h>
+#include <linux/compiler.h>
 #include <linux/dccp.h>
 #include <linux/list.h>
 #include <linux/module.h>
@@ -54,6 +55,14 @@ struct ccid {
 					       struct tcp_info *info);
 	void		(*ccid_hc_tx_get_info)(struct sock *sk,
 					       struct tcp_info *info);
+	int		(*ccid_hc_rx_getsockopt)(struct sock *sk,
+						 const int optname, int len,
+						 u32 __user *optval,
+						 int __user *optlen);
+	int		(*ccid_hc_tx_getsockopt)(struct sock *sk,
+						 const int optname, int len,
+						 u32 __user *optval,
+						 int __user *optlen);
 };
 
 extern int	   ccid_register(struct ccid *ccid);
@@ -177,4 +186,26 @@ static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk,
 	if (ccid->ccid_hc_tx_get_info != NULL)
 		ccid->ccid_hc_tx_get_info(sk, info);
 }
+
+static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk,
+					const int optname, int len,
+					u32 __user *optval, int __user *optlen)
+{
+	int rc = -ENOPROTOOPT;
+	if (ccid->ccid_hc_rx_getsockopt != NULL)
+		rc = ccid->ccid_hc_rx_getsockopt(sk, optname, len,
+						 optval, optlen);
+	return rc;
+}
+
+static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk,
+					const int optname, int len,
+					u32 __user *optval, int __user *optlen)
+{
+	int rc = -ENOPROTOOPT;
+	if (ccid->ccid_hc_tx_getsockopt != NULL)
+		rc = ccid->ccid_hc_tx_getsockopt(sk, optname, len,
+						 optval, optlen);
+	return rc;
+}
 #endif /* _CCID_H */
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 38aa84986118..aa68e0ab274d 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -1120,6 +1120,60 @@ static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_rtt = hctx->ccid3hctx_rtt;
 }
 
+static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
+				  u32 __user *optval, int __user *optlen)
+{
+	const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
+	const void *val;
+	
+	/* Listen socks doesn't have a private CCID block */
+	if (sk->sk_state == DCCP_LISTEN)
+		return -EINVAL;
+
+	switch (optname) {
+	case DCCP_SOCKOPT_CCID_RX_INFO:
+		if (len < sizeof(hcrx->ccid3hcrx_tfrc))
+			return -EINVAL;
+		len = sizeof(hcrx->ccid3hcrx_tfrc);
+		val = &hcrx->ccid3hcrx_tfrc;
+		break;
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	if (put_user(len, optlen) || copy_to_user(optval, val, len))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
+				  u32 __user *optval, int __user *optlen)
+{
+	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+	const void *val;
+	
+	/* Listen socks doesn't have a private CCID block */
+	if (sk->sk_state == DCCP_LISTEN)
+		return -EINVAL;
+
+	switch (optname) {
+	case DCCP_SOCKOPT_CCID_TX_INFO:
+		if (len < sizeof(hctx->ccid3hctx_tfrc))
+			return -EINVAL;
+		len = sizeof(hctx->ccid3hctx_tfrc);
+		val = &hctx->ccid3hctx_tfrc;
+		break;
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	if (put_user(len, optlen) || copy_to_user(optval, val, len))
+		return -EFAULT;
+
+	return 0;
+}
+
 static struct ccid ccid3 = {
 	.ccid_id		   = 3,
 	.ccid_name		   = "ccid3",
@@ -1139,6 +1193,8 @@ static struct ccid ccid3 = {
 	.ccid_hc_rx_packet_recv	   = ccid3_hc_rx_packet_recv,
 	.ccid_hc_rx_get_info	   = ccid3_hc_rx_get_info,
 	.ccid_hc_tx_get_info	   = ccid3_hc_tx_get_info,
+	.ccid_hc_rx_getsockopt	   = ccid3_hc_rx_getsockopt,
+	.ccid_hc_tx_getsockopt	   = ccid3_hc_tx_getsockopt,
 };
  
 module_param(ccid3_debug, int, 0444);
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index eb248778eea3..0bde4583d091 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -40,6 +40,7 @@
 #include <linux/list.h>
 #include <linux/time.h>
 #include <linux/types.h>
+#include <linux/tfrc.h>
 
 #define TFRC_MIN_PACKET_SIZE	   16
 #define TFRC_STD_PACKET_SIZE	  256
@@ -93,12 +94,15 @@ struct ccid3_options_received {
   * @ccid3hctx_hist - Packet history
   */
 struct ccid3_hc_tx_sock {
-	u32				ccid3hctx_x;
-	u32				ccid3hctx_x_recv;
-	u32				ccid3hctx_x_calc;
+	struct tfrc_tx_info		ccid3hctx_tfrc;
+#define ccid3hctx_x			ccid3hctx_tfrc.tfrctx_x
+#define ccid3hctx_x_recv		ccid3hctx_tfrc.tfrctx_x_recv
+#define ccid3hctx_x_calc		ccid3hctx_tfrc.tfrctx_x_calc
+#define ccid3hctx_rtt			ccid3hctx_tfrc.tfrctx_rtt
+#define ccid3hctx_p			ccid3hctx_tfrc.tfrctx_p
+#define ccid3hctx_t_rto			ccid3hctx_tfrc.tfrctx_rto
+#define ccid3hctx_t_ipi			ccid3hctx_tfrc.tfrctx_ipi
 	u16				ccid3hctx_s;
-	u32				ccid3hctx_rtt;
-	u32				ccid3hctx_p;
   	u8				ccid3hctx_state;
 	u8				ccid3hctx_last_win_count;
 	u8				ccid3hctx_idle;
@@ -106,19 +110,19 @@ struct ccid3_hc_tx_sock {
 	struct timer_list		ccid3hctx_no_feedback_timer;
 	struct timeval			ccid3hctx_t_ld;
 	struct timeval			ccid3hctx_t_nom;
-	u32				ccid3hctx_t_rto;
-	u32				ccid3hctx_t_ipi;
 	u32				ccid3hctx_delta;
 	struct list_head		ccid3hctx_hist;
 	struct ccid3_options_received	ccid3hctx_options_received;
 };
 
 struct ccid3_hc_rx_sock {
+	struct tfrc_rx_info	ccid3hcrx_tfrc;
+#define ccid3hcrx_x_recv	ccid3hcrx_tfrc.tfrcrx_x_recv
+#define ccid3hcrx_rtt		ccid3hcrx_tfrc.tfrcrx_rtt
+#define ccid3hcrx_p		ccid3hcrx_tfrc.tfrcrx_p
   	u64			ccid3hcrx_seqno_last_counter:48,
 				ccid3hcrx_state:8,
 				ccid3hcrx_last_counter:4;
-	u32			ccid3hcrx_rtt;
-  	u32			ccid3hcrx_p;
   	u32			ccid3hcrx_bytes_recv;
   	struct timeval		ccid3hcrx_tstamp_last_feedback;
   	struct timeval		ccid3hcrx_tstamp_last_ack;
@@ -127,7 +131,6 @@ struct ccid3_hc_rx_sock {
   	u16			ccid3hcrx_s;
   	u32			ccid3hcrx_pinv;
   	u32			ccid3hcrx_elapsed_time;
-  	u32			ccid3hcrx_x_recv;
 };
 
 static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 95c4630b3b18..5871c027f9dc 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -17,6 +17,7 @@
 #include <net/snmp.h>
 #include <net/sock.h>
 #include <net/tcp.h>
+#include "ackvec.h"
 
 #ifdef CONFIG_IP_DCCP_DEBUG
 extern int dccp_debug;
@@ -258,13 +259,12 @@ extern int	   dccp_v4_send_reset(struct sock *sk,
 extern void	   dccp_send_close(struct sock *sk, const int active);
 
 struct dccp_skb_cb {
-	__u8 dccpd_type;
-	__u8 dccpd_reset_code;
-	__u8 dccpd_service;
-	__u8 dccpd_ccval;
+	__u8  dccpd_type:4;
+	__u8  dccpd_ccval:4;
+	__u8  dccpd_reset_code;
+	__u16 dccpd_opt_len;
 	__u64 dccpd_seq;
 	__u64 dccpd_ack_seq;
-	int  dccpd_opt_len;
 };
 
 #define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
@@ -359,6 +359,17 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq)
 		       (dp->dccps_gss -
 			dp->dccps_options.dccpo_sequence_window + 1));
 }
+				
+static inline int dccp_ack_pending(const struct sock *sk)
+{
+	const struct dccp_sock *dp = dccp_sk(sk);
+	return dp->dccps_timestamp_echo != 0 ||
+#ifdef CONFIG_IP_DCCP_ACKVEC
+	       (dp->dccps_options.dccpo_send_ack_vector &&
+		dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
+#endif
+	       inet_csk_ack_scheduled(sk);
+}
 
 extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb);
 extern void dccp_insert_option_elapsed_time(struct sock *sk,
@@ -372,65 +383,6 @@ extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
 
 extern struct socket *dccp_ctl_socket;
 
-#define DCCP_ACKPKTS_STATE_RECEIVED	0
-#define DCCP_ACKPKTS_STATE_ECN_MARKED	(1 << 6)
-#define DCCP_ACKPKTS_STATE_NOT_RECEIVED	(3 << 6)
-
-#define DCCP_ACKPKTS_STATE_MASK		0xC0 /* 11000000 */
-#define DCCP_ACKPKTS_LEN_MASK		0x3F /* 00111111 */
-
-/** struct dccp_ackpkts - acknowledgeable packets
- *
- * This data structure is the one defined in the DCCP draft
- * Appendix A.
- *
- * @dccpap_buf_head - circular buffer head
- * @dccpap_buf_tail - circular buffer tail
- * @dccpap_buf_ackno - ack # of the most recent packet acknowledgeable in the
- * 		       buffer (i.e. %dccpap_buf_head)
- * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
- * 		       by the buffer with State 0
- *
- * Additionally, the HC-Receiver must keep some information about the
- * Ack Vectors it has recently sent. For each packet sent carrying an
- * Ack Vector, it remembers four variables:
- *
- * @dccpap_ack_seqno - the Sequence Number used for the packet
- * 		       (HC-Receiver seqno)
- * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement.
- * @dccpap_ack_ackno - the Acknowledgement Number used for the packet
- * 		       (HC-Sender seqno)
- * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
- *
- * @dccpap_buf_len - circular buffer length
- * @dccpap_time		- the time in usecs
- * @dccpap_buf - circular buffer of acknowledgeable packets
- */
-struct dccp_ackpkts {
-	unsigned int		dccpap_buf_head;
-	unsigned int		dccpap_buf_tail;
-	u64			dccpap_buf_ackno;
-	u64			dccpap_ack_seqno;
-	u64			dccpap_ack_ackno;
-	unsigned int		dccpap_ack_ptr;
-	unsigned int		dccpap_buf_vector_len;
-	unsigned int		dccpap_ack_vector_len;
-	unsigned int		dccpap_buf_len;
-	struct timeval		dccpap_time;
-	u8			dccpap_buf_nonce;
-	u8			dccpap_ack_nonce;
-	u8			dccpap_buf[0];
-};
-
-extern struct dccp_ackpkts *
-		dccp_ackpkts_alloc(unsigned int len,
-				  const unsigned int __nocast priority);
-extern void dccp_ackpkts_free(struct dccp_ackpkts *ap);
-extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk,
-			    u64 ackno, u8 state);
-extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap,
-					 struct sock *sk, u64 ackno);
-
 extern void dccp_timestamp(const struct sock *sk, struct timeval *tv);
 
 static inline suseconds_t timeval_usecs(const struct timeval *tv)
@@ -471,15 +423,4 @@ static inline void timeval_sub_usecs(struct timeval *tv,
 	}
 }
 
-#ifdef CONFIG_IP_DCCP_DEBUG
-extern void dccp_ackvector_print(const u64 ackno,
-				 const unsigned char *vector, int len);
-extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap);
-#else
-static inline void dccp_ackvector_print(const u64 ackno,
-					const unsigned char *vector,
-					int len) { }
-static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { }
-#endif
-
 #endif /* _DCCP_H */
diff --git a/net/dccp/input.c b/net/dccp/input.c
index c74034cf7ede..1b6b2cb12376 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -16,6 +16,7 @@
 
 #include <net/sock.h>
 
+#include "ackvec.h"
 #include "ccid.h"
 #include "dccp.h"
 
@@ -60,8 +61,8 @@ static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
 	struct dccp_sock *dp = dccp_sk(sk);
 
 	if (dp->dccps_options.dccpo_send_ack_vector)
-		dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk,
-					     DCCP_SKB_CB(skb)->dccpd_ack_seq);
+		dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk,
+					    DCCP_SKB_CB(skb)->dccpd_ack_seq);
 }
 
 static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
@@ -164,37 +165,11 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 	if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
 		dccp_event_ack_recv(sk, skb);
 
-	/*
-	 * FIXME: check ECN to see if we should use
-	 * DCCP_ACKPKTS_STATE_ECN_MARKED
-	 */
-	if (dp->dccps_options.dccpo_send_ack_vector) {
-		struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
-
-		if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk,
-				     DCCP_SKB_CB(skb)->dccpd_seq,
-				     DCCP_ACKPKTS_STATE_RECEIVED)) {
-			LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable "
-						    "packets buffer full!\n");
-			ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
-			inet_csk_schedule_ack(sk);
-			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
-						  TCP_DELACK_MIN,
-						  DCCP_RTO_MAX);
-			goto discard;
-		}
-
-		/*
-		 * FIXME: this activation is probably wrong, have to study more
-		 * TCP delack machinery and how it fits into DCCP draft, but
-		 * for now it kinda "works" 8)
-		 */
-		if (!inet_csk_ack_scheduled(sk)) {
-			inet_csk_schedule_ack(sk);
-			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ,
-						  DCCP_RTO_MAX);
-		}
-	}
+	if (dp->dccps_options.dccpo_send_ack_vector &&
+	    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
+			    DCCP_SKB_CB(skb)->dccpd_seq,
+			    DCCP_ACKVEC_STATE_RECEIVED))
+		goto discard;
 
 	ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
 	ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
@@ -384,9 +359,9 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 	}
 
 out_invalid_packet:
-	return 1; /* dccp_v4_do_rcv will send a reset, but...
-		     FIXME: the reset code should be
-			    DCCP_RESET_CODE_PACKET_ERROR */
+	/* dccp_v4_do_rcv will send a reset */
+	DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
+	return 1; 
 }
 
 static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
@@ -433,6 +408,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			   struct dccp_hdr *dh, unsigned len)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
+	struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
 	const int old_state = sk->sk_state;
 	int queued = 0;
 
@@ -473,7 +449,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		if (dh->dccph_type == DCCP_PKT_RESET)
 			goto discard;
 
-		/* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/
+		/* Caller (dccp_v4_do_rcv) will send Reset */
+		dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
 		return 1;
 	}
 
@@ -487,36 +464,17 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		if (dccp_parse_options(sk, skb))
 			goto discard;
 
-		if (DCCP_SKB_CB(skb)->dccpd_ack_seq !=
-		    DCCP_PKT_WITHOUT_ACK_SEQ)
+		if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
 			dccp_event_ack_recv(sk, skb);
 
 		ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
 		ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
 
-		/*
-		 * FIXME: check ECN to see if we should use
-		 * DCCP_ACKPKTS_STATE_ECN_MARKED
-		 */
-		if (dp->dccps_options.dccpo_send_ack_vector) {
-			if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk,
-					     DCCP_SKB_CB(skb)->dccpd_seq,
-					     DCCP_ACKPKTS_STATE_RECEIVED))
-				goto discard;
-			/*
-			 * FIXME: this activation is probably wrong, have to
-			 * study more TCP delack machinery and how it fits into
-			 * DCCP draft, but for now it kinda "works" 8)
-			 */
-			if ((dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno ==
-			     DCCP_MAX_SEQNO + 1) &&
-			    !inet_csk_ack_scheduled(sk)) {
-				inet_csk_schedule_ack(sk);
-				inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
-							  TCP_DELACK_MIN,
-							  DCCP_RTO_MAX);
-			}
-		}
+ 		if (dp->dccps_options.dccpo_send_ack_vector &&
+		    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
+ 				    DCCP_SKB_CB(skb)->dccpd_seq,
+ 				    DCCP_ACKVEC_STATE_RECEIVED))
+ 			goto discard;
 	}
 
 	/*
@@ -551,8 +509,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		     dh->dccph_type == DCCP_PKT_REQUEST) ||
 		    (sk->sk_state == DCCP_RESPOND &&
 		     dh->dccph_type == DCCP_PKT_DATA)) {
-		dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
-			       DCCP_PKT_SYNC);
+		dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC);
 		goto discard;
 	} else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {
 		dccp_rcv_closereq(sk, skb);
@@ -563,13 +520,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	}
 
 	if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) {
-		dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
-			       DCCP_PKT_SYNCACK);
+		dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK);
 		goto discard;
 	}
 
 	switch (sk->sk_state) {
 	case DCCP_CLOSED:
+		dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
 		return 1;
 
 	case DCCP_REQUESTING:
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 2afaa464e7f0..40fe6afacde6 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -23,6 +23,7 @@
 #include <net/tcp_states.h>
 #include <net/xfrm.h>
 
+#include "ackvec.h"
 #include "ccid.h"
 #include "dccp.h"
 
@@ -246,6 +247,9 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	dp->dccps_role = DCCP_ROLE_CLIENT;
 
+	if (dccp_service_not_initialized(sk))
+		return -EPROTO;
+
 	if (addr_len < sizeof(struct sockaddr_in))
 		return -EINVAL;
 
@@ -661,6 +665,16 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk,
 					   dccp_hdr(skb)->dccph_sport);
 }
 
+static inline int dccp_bad_service_code(const struct sock *sk,
+					const __u32 service)
+{
+	const struct dccp_sock *dp = dccp_sk(sk);
+
+	if (dp->dccps_service == service)
+		return 0;
+	return !dccp_list_has_service(dp->dccps_service_list, service);
+}
+
 int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct inet_request_sock *ireq;
@@ -669,13 +683,22 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	struct dccp_request_sock *dreq;
 	const __u32 saddr = skb->nh.iph->saddr;
 	const __u32 daddr = skb->nh.iph->daddr;
+ 	const __u32 service = dccp_hdr_request(skb)->dccph_req_service;
+	struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
+	__u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
 	struct dst_entry *dst = NULL;
 
 	/* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
 	if (((struct rtable *)skb->dst)->rt_flags &
-	    (RTCF_BROADCAST | RTCF_MULTICAST))
+	    (RTCF_BROADCAST | RTCF_MULTICAST)) {
+		reset_code = DCCP_RESET_CODE_NO_CONNECTION;
 		goto drop;
+	}
 
+	if (dccp_bad_service_code(sk, service)) {
+		reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
+		goto drop;
+ 	}
 	/*
 	 * TW buckets are converted to open requests without
 	 * limitations, they conserve resources and peer is
@@ -718,9 +741,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	 * dccp_create_openreq_child.
 	 */
 	dreq = dccp_rsk(req);
-	dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq;
-	dreq->dreq_iss = dccp_v4_init_sequence(sk, skb);
-	dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service;
+	dreq->dreq_isr	   = dcb->dccpd_seq;
+	dreq->dreq_iss	   = dccp_v4_init_sequence(sk, skb);
+	dreq->dreq_service = service;
 
 	if (dccp_v4_send_response(sk, req, dst))
 		goto drop_and_free;
@@ -735,6 +758,7 @@ drop_and_free:
 	__reqsk_free(req);
 drop:
 	DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+	dcb->dccpd_reset_code = reset_code;
 	return -1;
 }
 
@@ -1005,7 +1029,6 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 	return 0;
 
 reset:
-	DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
 	dccp_v4_ctl_send_reset(skb);
 discard:
 	kfree_skb(skb);
@@ -1090,45 +1113,7 @@ int dccp_v4_rcv(struct sk_buff *skb)
 		goto discard_it;
 
 	dh = dccp_hdr(skb);
-#if 0
-	/*
-	 * Use something like this to simulate some DATA/DATAACK loss to test
-	 * dccp_ackpkts_add, you'll get something like this on a session that
-	 * sends 10 DATA/DATAACK packets:
-	 *
-	 * ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1|
-	 *
-	 * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet
-	 * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets
-	 * 						   with the same state
-	 * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet
-	 *
-	 * So...
-	 *
-	 * 281473596467422 was received
-	 * 281473596467421 was not received
-	 * 281473596467420 was received
-	 * 281473596467419 was not received
-	 * 281473596467418 was received
-	 * 281473596467417 was not received
-	 * 281473596467416 was received
-	 * 281473596467415 was not received
-	 * 281473596467414 was received
-	 * 281473596467413 was received (this one was the 3way handshake
-	 * 				 RESPONSE)
-	 *
-	 */
-	if (dh->dccph_type == DCCP_PKT_DATA ||
-	    dh->dccph_type == DCCP_PKT_DATAACK) {
-		static int discard = 0;
 
-		if (discard) {
-			discard = 0;
-			goto discard_it;
-		}
-		discard = 1;
-	}
-#endif
 	DCCP_SKB_CB(skb)->dccpd_seq  = dccp_hdr_seq(skb);
 	DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
 
@@ -1242,11 +1227,9 @@ static int dccp_v4_init_sock(struct sock *sk)
 	do_gettimeofday(&dp->dccps_epoch);
 
 	if (dp->dccps_options.dccpo_send_ack_vector) {
-		dp->dccps_hc_rx_ackpkts =
-			dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
-					   GFP_KERNEL);
-
-		if (dp->dccps_hc_rx_ackpkts == NULL)
+		dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN,
+							   GFP_KERNEL);
+		if (dp->dccps_hc_rx_ackvec == NULL)
 			return -ENOMEM;
 	}
 
@@ -1258,16 +1241,18 @@ static int dccp_v4_init_sock(struct sock *sk)
 	 * setsockopt(CCIDs-I-want/accept). -acme
 	 */
 	if (likely(!dccp_ctl_socket_init)) {
-		dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid,
+		dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_rx_ccid,
 						 sk);
-		dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid,
+		dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_tx_ccid,
 						 sk);
 	    	if (dp->dccps_hc_rx_ccid == NULL ||
 		    dp->dccps_hc_tx_ccid == NULL) {
 			ccid_exit(dp->dccps_hc_rx_ccid, sk);
 			ccid_exit(dp->dccps_hc_tx_ccid, sk);
-			dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
-			dp->dccps_hc_rx_ackpkts = NULL;
+			if (dp->dccps_options.dccpo_send_ack_vector) {
+				dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
+				dp->dccps_hc_rx_ackvec = NULL;
+			}
 			dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
 			return -ENOMEM;
 		}
@@ -1280,6 +1265,7 @@ static int dccp_v4_init_sock(struct sock *sk)
 	sk->sk_write_space = dccp_write_space;
 	dp->dccps_mss_cache = 536;
 	dp->dccps_role = DCCP_ROLE_UNDEFINED;
+	dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
 
 	return 0;
 }
@@ -1301,10 +1287,17 @@ static int dccp_v4_destroy_sock(struct sock *sk)
 	if (inet_csk(sk)->icsk_bind_hash != NULL)
 		inet_put_port(&dccp_hashinfo, sk);
 
+	if (dp->dccps_service_list != NULL) {
+		kfree(dp->dccps_service_list);
+		dp->dccps_service_list = NULL;
+	}
+
 	ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
 	ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
-	dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
-	dp->dccps_hc_rx_ackpkts = NULL;
+	if (dp->dccps_options.dccpo_send_ack_vector) {
+		dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
+		dp->dccps_hc_rx_ackvec = NULL;
+	}
 	ccid_exit(dp->dccps_hc_rx_ccid, sk);
 	ccid_exit(dp->dccps_hc_tx_ccid, sk);
 	dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 18461bc04cbe..1393461898bb 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -19,6 +19,7 @@
 #include <net/xfrm.h>
 #include <net/inet_timewait_sock.h>
 
+#include "ackvec.h"
 #include "ccid.h"
 #include "dccp.h"
 
@@ -93,22 +94,24 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
 		struct inet_connection_sock *newicsk = inet_csk(sk);
 		struct dccp_sock *newdp = dccp_sk(newsk);
 
-		newdp->dccps_hc_rx_ackpkts = NULL;
-		newdp->dccps_role = DCCP_ROLE_SERVER;
-		newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
+		newdp->dccps_role	   = DCCP_ROLE_SERVER;
+		newdp->dccps_hc_rx_ackvec  = NULL;
+		newdp->dccps_service_list  = NULL;
+		newdp->dccps_service	   = dreq->dreq_service;
+		newicsk->icsk_rto	   = DCCP_TIMEOUT_INIT;
 		do_gettimeofday(&newdp->dccps_epoch);
 
 		if (newdp->dccps_options.dccpo_send_ack_vector) {
-			newdp->dccps_hc_rx_ackpkts =
-				dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
-						   GFP_ATOMIC);
+			newdp->dccps_hc_rx_ackvec =
+				dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN,
+						  GFP_ATOMIC);
 			/*
 			 * XXX: We're using the same CCIDs set on the parent,
 			 * i.e. sk_clone copied the master sock and left the
 			 * CCID pointers for this child, that is why we do the
 			 * __ccid_get calls.
 			 */
-			if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL))
+			if (unlikely(newdp->dccps_hc_rx_ackvec == NULL))
 				goto out_free;
 		}
 
@@ -116,7 +119,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
 					     newsk) != 0 ||
 			     ccid_hc_tx_init(newdp->dccps_hc_tx_ccid,
 				     	     newsk) != 0)) {
-			dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts);
+			dccp_ackvec_free(newdp->dccps_hc_rx_ackvec);
 			ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk);
 			ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk);
 out_free:
diff --git a/net/dccp/options.c b/net/dccp/options.c
index d4c4242d8dd7..0a76426c9aea 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -18,19 +18,15 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 
+#include "ackvec.h"
 #include "ccid.h"
 #include "dccp.h"
 
-static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
-					     struct sock *sk,
-					     const u64 ackno,
-					     const unsigned char len,
-					     const unsigned char *vector);
-
 /* stores the default values for new connection. may be changed with sysctl */
 static const struct dccp_options dccpo_default_values = {
 	.dccpo_sequence_window	  = DCCPF_INITIAL_SEQUENCE_WINDOW,
-	.dccpo_ccid		  = DCCPF_INITIAL_CCID,
+	.dccpo_rx_ccid		  = DCCPF_INITIAL_CCID,
+	.dccpo_tx_ccid		  = DCCPF_INITIAL_CCID,
 	.dccpo_send_ack_vector	  = DCCPF_INITIAL_SEND_ACK_VECTOR,
 	.dccpo_send_ndp_count	  = DCCPF_INITIAL_SEND_NDP_COUNT,
 };
@@ -113,25 +109,13 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
 				      opt_recv->dccpor_ndp);
 			break;
 		case DCCPO_ACK_VECTOR_0:
-			if (len > DCCP_MAX_ACK_VECTOR_LEN)
-				goto out_invalid_option;
-
+		case DCCPO_ACK_VECTOR_1:
 			if (pkt_type == DCCP_PKT_DATA)
 				continue;
 
-			opt_recv->dccpor_ack_vector_len = len;
-			opt_recv->dccpor_ack_vector_idx = value - options;
-
-			dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n",
-				      debug_prefix, len,
-				      (unsigned long long)
-				      DCCP_SKB_CB(skb)->dccpd_ack_seq);
-			dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq,
-					     value, len);
-			dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts,
-							 sk,
-						 DCCP_SKB_CB(skb)->dccpd_ack_seq,
-							 len, value);
+			if (dp->dccps_options.dccpo_send_ack_vector &&
+			    dccp_ackvec_parse(sk, skb, opt, value, len))
+				goto out_invalid_option;
 			break;
 		case DCCPO_TIMESTAMP:
 			if (len != 4)
@@ -352,86 +336,6 @@ void dccp_insert_option_elapsed_time(struct sock *sk,
 
 EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time);
 
-static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb)
-{
-	struct dccp_sock *dp = dccp_sk(sk);
-#ifdef CONFIG_IP_DCCP_DEBUG
-	const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
-					"CLIENT TX opt: " : "server TX opt: ";
-#endif
-	struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
-	int len = ap->dccpap_buf_vector_len + 2;
-	struct timeval now;
-	u32 elapsed_time;
-	unsigned char *to, *from;
-
-	dccp_timestamp(sk, &now);
-	elapsed_time = timeval_delta(&now, &ap->dccpap_time) / 10;
-
-	if (elapsed_time != 0)
-		dccp_insert_option_elapsed_time(sk, skb, elapsed_time);
-
-	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
-		LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to "
-					 "insert ACK Vector!\n");
-		return;
-	}
-
-	/*
-	 * XXX: now we have just one ack vector sent record, so
-	 * we have to wait for it to be cleared.
-	 *
-	 * Of course this is not acceptable, but this is just for
-	 * basic testing now.
-	 */
-	if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1)
-		return;
-
-	DCCP_SKB_CB(skb)->dccpd_opt_len += len;
-
-	to    = skb_push(skb, len);
-	*to++ = DCCPO_ACK_VECTOR_0;
-	*to++ = len;
-
-	len  = ap->dccpap_buf_vector_len;
-	from = ap->dccpap_buf + ap->dccpap_buf_head;
-
-	/* Check if buf_head wraps */
-	if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) {
-		const unsigned int tailsize = (ap->dccpap_buf_len -
-					       ap->dccpap_buf_head);
-
-		memcpy(to, from, tailsize);
-		to   += tailsize;
-		len  -= tailsize;
-		from = ap->dccpap_buf;
-	}
-
-	memcpy(to, from, len);
-	/*
-	 *	From draft-ietf-dccp-spec-11.txt:
-	 *
-	 *	For each acknowledgement it sends, the HC-Receiver will add an
-	 *	acknowledgement record.  ack_seqno will equal the HC-Receiver
-	 *	sequence number it used for the ack packet; ack_ptr will equal
-	 *	buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
-	 *	equal buf_nonce.
-	 *
-	 * This implemention uses just one ack record for now.
-	 */
-	ap->dccpap_ack_seqno	  = DCCP_SKB_CB(skb)->dccpd_seq;
-	ap->dccpap_ack_ptr	  = ap->dccpap_buf_head;
-	ap->dccpap_ack_ackno	  = ap->dccpap_buf_ackno;
-	ap->dccpap_ack_nonce	  = ap->dccpap_buf_nonce;
-	ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len;
-
-	dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, "
-		      "ack_ackno=%llu\n",
-		      debug_prefix, ap->dccpap_ack_vector_len,
-		      (unsigned long long) ap->dccpap_ack_seqno,
-		      (unsigned long long) ap->dccpap_ack_ackno);
-}
-
 void dccp_timestamp(const struct sock *sk, struct timeval *tv)
 {
 	const struct dccp_sock *dp = dccp_sk(sk);
@@ -528,9 +432,8 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 
 	if (!dccp_packet_without_ack(skb)) {
 		if (dp->dccps_options.dccpo_send_ack_vector &&
-		    (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno !=
-		     DCCP_MAX_SEQNO + 1))
-			dccp_insert_option_ack_vector(sk, skb);
+		    dccp_ackvec_pending(dp->dccps_hc_rx_ackvec))
+			dccp_insert_option_ackvec(sk, skb);
 		if (dp->dccps_timestamp_echo != 0)
 			dccp_insert_option_timestamp_echo(sk, skb);
 	}
@@ -557,331 +460,3 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 		}
 	}
 }
-
-struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len,
-				        const unsigned int __nocast priority)
-{
-	struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority);
-
-	if (ap != NULL) {
-#ifdef CONFIG_IP_DCCP_DEBUG
-		memset(ap->dccpap_buf, 0xFF, len);
-#endif
-		ap->dccpap_buf_len   = len;
-		ap->dccpap_buf_head  =
-			ap->dccpap_buf_tail =
-				ap->dccpap_buf_len - 1;
-		ap->dccpap_buf_ackno =
-			ap->dccpap_ack_ackno =
-				ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
-		ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0;
-		ap->dccpap_ack_ptr   = 0;
-		ap->dccpap_time.tv_sec = 0;
-		ap->dccpap_time.tv_usec = 0;
-		ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0;
-	}
-
-	return ap;
-}
-
-void dccp_ackpkts_free(struct dccp_ackpkts *ap)
-{
-	if (ap != NULL) {
-#ifdef CONFIG_IP_DCCP_DEBUG
-		memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len);
-#endif
-		kfree(ap);
-	}
-}
-
-static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap,
-				    const unsigned int index)
-{
-	return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK;
-}
-
-static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap,
-				  const unsigned int index)
-{
-	return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK;
-}
-
-/*
- * If several packets are missing, the HC-Receiver may prefer to enter multiple
- * bytes with run length 0, rather than a single byte with a larger run length;
- * this simplifies table updates if one of the missing packets arrives.
- */
-static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap,
-						  const unsigned int packets,
-						  const unsigned char state)
-{
-	unsigned int gap;
-	signed long new_head;
-
-	if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len)
-		return -ENOBUFS;
-
-	gap	 = packets - 1;
-	new_head = ap->dccpap_buf_head - packets;
-
-	if (new_head < 0) {
-		if (gap > 0) {
-			memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED,
-			       gap + new_head + 1);
-			gap = -new_head;
-		}
-		new_head += ap->dccpap_buf_len;
-	} 
-
-	ap->dccpap_buf_head = new_head;
-
-	if (gap > 0)
-		memset(ap->dccpap_buf + ap->dccpap_buf_head + 1,
-		       DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap);
-
-	ap->dccpap_buf[ap->dccpap_buf_head] = state;
-	ap->dccpap_buf_vector_len += packets;
-	return 0;
-}
-
-/*
- * Implements the draft-ietf-dccp-spec-11.txt Appendix A
- */
-int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk,
-		     u64 ackno, u8 state)
-{
-	/*
-	 * Check at the right places if the buffer is full, if it is, tell the
-	 * caller to start dropping packets till the HC-Sender acks our ACK
-	 * vectors, when we will free up space in dccpap_buf.
-	 *
-	 * We may well decide to do buffer compression, etc, but for now lets
-	 * just drop.
-	 *
-	 * From Appendix A:
-	 *
-	 *	Of course, the circular buffer may overflow, either when the
-	 *	HC-Sender is sending data at a very high rate, when the
-	 *	HC-Receiver's acknowledgements are not reaching the HC-Sender,
-	 *	or when the HC-Sender is forgetting to acknowledge those acks
-	 *	(so the HC-Receiver is unable to clean up old state). In this
-	 *	case, the HC-Receiver should either compress the buffer (by
-	 *	increasing run lengths when possible), transfer its state to
-	 *	a larger buffer, or, as a last resort, drop all received
-	 *	packets, without processing them whatsoever, until its buffer
-	 *	shrinks again.
-	 */
-
-	/* See if this is the first ackno being inserted */
-	if (ap->dccpap_buf_vector_len == 0) {
-		ap->dccpap_buf[ap->dccpap_buf_head] = state;
-		ap->dccpap_buf_vector_len = 1;
-	} else if (after48(ackno, ap->dccpap_buf_ackno)) {
-		const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno,
-						   ackno);
-
-		/*
-		 * Look if the state of this packet is the same as the
-		 * previous ackno and if so if we can bump the head len.
-		 */
-		if (delta == 1 &&
-		    dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state &&
-		    (dccp_ackpkts_len(ap, ap->dccpap_buf_head) <
-		     DCCP_ACKPKTS_LEN_MASK))
-			ap->dccpap_buf[ap->dccpap_buf_head]++;
-		else if (dccp_ackpkts_set_buf_head_state(ap, delta, state))
-			return -ENOBUFS;
-	} else {
-		/*
-		 * A.1.2.  Old Packets
-		 *
-		 *	When a packet with Sequence Number S arrives, and
-		 *	S <= buf_ackno, the HC-Receiver will scan the table
-		 *	for the byte corresponding to S. (Indexing structures
-		 *	could reduce the complexity of this scan.)
-		 */
-		u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno);
-		unsigned int index = ap->dccpap_buf_head;
-
-		while (1) {
-			const u8 len = dccp_ackpkts_len(ap, index);
-			const u8 state = dccp_ackpkts_state(ap, index);
-			/*
-			 * valid packets not yet in dccpap_buf have a reserved
-			 * entry, with a len equal to 0.
-			 */
-			if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED &&
-			    len == 0 && delta == 0) { /* Found our
-							 reserved seat! */
-				dccp_pr_debug("Found %llu reserved seat!\n",
-					      (unsigned long long) ackno);
-				ap->dccpap_buf[index] = state;
-				goto out;
-			}
-			/* len == 0 means one packet */
-			if (delta < len + 1)
-				goto out_duplicate;
-
-			delta -= len + 1;
-			if (++index == ap->dccpap_buf_len)
-				index = 0;
-		}
-	}
-
-	ap->dccpap_buf_ackno = ackno;
-	dccp_timestamp(sk, &ap->dccpap_time);
-out:
-	dccp_pr_debug("");
-	dccp_ackpkts_print(ap);
-	return 0;
-
-out_duplicate:
-	/* Duplicate packet */
-	dccp_pr_debug("Received a dup or already considered lost "
-		      "packet: %llu\n", (unsigned long long) ackno);
-	return -EILSEQ;
-}
-
-#ifdef CONFIG_IP_DCCP_DEBUG
-void dccp_ackvector_print(const u64 ackno, const unsigned char *vector,
-			  int len)
-{
-	if (!dccp_debug)
-		return;
-
-	printk("ACK vector len=%d, ackno=%llu |", len,
-	       (unsigned long long) ackno);
-
-	while (len--) {
-		const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6;
-		const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
-
-		printk("%d,%d|", state, rl);
-		++vector;
-	}
-
-	printk("\n");
-}
-
-void dccp_ackpkts_print(const struct dccp_ackpkts *ap)
-{
-	dccp_ackvector_print(ap->dccpap_buf_ackno,
-			     ap->dccpap_buf + ap->dccpap_buf_head,
-			     ap->dccpap_buf_vector_len);
-}
-#endif
-
-static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap)
-{
-	/*
-	 * As we're keeping track of the ack vector size
-	 * (dccpap_buf_vector_len) and the sent ack vector size
-	 * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but
-	 * keep this code here as in the future we'll implement a vector of
-	 * ack records, as suggested in draft-ietf-dccp-spec-11.txt
-	 * Appendix A. -acme
-	 */
-#if 0
-	ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1;
-	if (ap->dccpap_buf_tail >= ap->dccpap_buf_len)
-		ap->dccpap_buf_tail -= ap->dccpap_buf_len;
-#endif
-	ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len;
-}
-
-void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk,
-				 u64 ackno)
-{
-	/* Check if we actually sent an ACK vector */
-	if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
-		return;
-
-	if (ackno == ap->dccpap_ack_seqno) {
-#ifdef CONFIG_IP_DCCP_DEBUG
-		struct dccp_sock *dp = dccp_sk(sk);
-		const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
-					"CLIENT rx ack: " : "server rx ack: ";
-#endif
-		dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, "
-			      "ack_ackno=%llu, ACKED!\n",
-			      debug_prefix, 1,
-			      (unsigned long long) ap->dccpap_ack_seqno,
-			      (unsigned long long) ap->dccpap_ack_ackno);
-		dccp_ackpkts_trow_away_ack_record(ap);
-		ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
-	}
-}
-
-static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
-					     struct sock *sk, u64 ackno,
-					     const unsigned char len,
-					     const unsigned char *vector)
-{
-	unsigned char i;
-
-	/* Check if we actually sent an ACK vector */
-	if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
-		return;
-	/*
-	 * We're in the receiver half connection, so if the received an ACK
-	 * vector ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're
-	 * not interested.
-	 *
-	 * Extra explanation with example:
-	 * 
-	 * if we received an ACK vector with ackno 50, it can only be acking
-	 * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent).
-	 */
-	/* dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); */
-	if (before48(ackno, ap->dccpap_ack_seqno)) {
-		/* dccp_pr_debug_cat("yes\n"); */
-		return;
-	}
-	/* dccp_pr_debug_cat("no\n"); */
-
-	i = len;
-	while (i--) {
-		const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
-		u64 ackno_end_rl;
-
-		dccp_set_seqno(&ackno_end_rl, ackno - rl);
-
-		/*
-		 * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl,
-		 * ap->dccpap_ack_seqno, ackno);
-		 */
-		if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) {
-			const u8 state = (*vector &
-					  DCCP_ACKPKTS_STATE_MASK) >> 6;
-			/* dccp_pr_debug_cat("yes\n"); */
-
-			if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) {
-#ifdef CONFIG_IP_DCCP_DEBUG
-				struct dccp_sock *dp = dccp_sk(sk);
-				const char *debug_prefix =
-					dp->dccps_role == DCCP_ROLE_CLIENT ?
-					"CLIENT rx ack: " : "server rx ack: ";
-#endif
-				dccp_pr_debug("%sACK vector 0, len=%d, "
-					      "ack_seqno=%llu, ack_ackno=%llu, "
-					      "ACKED!\n",
-					      debug_prefix, len,
-					      (unsigned long long)
-					      ap->dccpap_ack_seqno,
-					      (unsigned long long)
-					      ap->dccpap_ack_ackno);
-				dccp_ackpkts_trow_away_ack_record(ap);
-			}
-			/*
-			 * If dccpap_ack_seqno was not received, no problem
-			 * we'll send another ACK vector.
-			 */
-			ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
-			break;
-		}
-		/* dccp_pr_debug_cat("no\n"); */
-
-		dccp_set_seqno(&ackno, ackno_end_rl - 1);
-		++vector;
-	}
-}
diff --git a/net/dccp/output.c b/net/dccp/output.c
index ea6d0e91e511..4786bdcddcc9 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -16,6 +16,7 @@
 
 #include <net/sock.h>
 
+#include "ackvec.h"
 #include "ccid.h"
 #include "dccp.h"
 
@@ -85,7 +86,7 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 		switch (dcb->dccpd_type) {
 		case DCCP_PKT_REQUEST:
 			dccp_hdr_request(skb)->dccph_req_service =
-							dcb->dccpd_service;
+							dp->dccps_service;
 			break;
 		case DCCP_PKT_RESET:
 			dccp_hdr_reset(skb)->dccph_reset_code =
@@ -225,7 +226,6 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
 		err = dccp_wait_for_ccid(sk, skb, timeo);
 
 	if (err == 0) {
-		const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
 		struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
 		const int len = skb->len;
 
@@ -236,15 +236,7 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
 						  inet_csk(sk)->icsk_rto,
 						  DCCP_RTO_MAX);
 			dcb->dccpd_type = DCCP_PKT_DATAACK;
-			/*
-			 * FIXME: we really should have a
-			 * dccps_ack_pending or use icsk.
-			 */
-		} else if (inet_csk_ack_scheduled(sk) ||
-			   dp->dccps_timestamp_echo != 0 ||
-			   (dp->dccps_options.dccpo_send_ack_vector &&
-			    ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 &&
-			    ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1))
+		} else if (dccp_ack_pending(sk))
 			dcb->dccpd_type = DCCP_PKT_DATAACK;
 		else
 			dcb->dccpd_type = DCCP_PKT_DATA;
@@ -270,6 +262,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
 				   struct request_sock *req)
 {
 	struct dccp_hdr *dh;
+	struct dccp_request_sock *dreq;
 	const int dccp_header_size = sizeof(struct dccp_hdr) +
 				     sizeof(struct dccp_hdr_ext) +
 				     sizeof(struct dccp_hdr_response);
@@ -285,8 +278,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
 	skb->dst = dst_clone(dst);
 	skb->csum = 0;
 
+	dreq = dccp_rsk(req);
 	DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
-	DCCP_SKB_CB(skb)->dccpd_seq  = dccp_rsk(req)->dreq_iss;
+	DCCP_SKB_CB(skb)->dccpd_seq  = dreq->dreq_iss;
 	dccp_insert_options(sk, skb);
 
 	skb->h.raw = skb_push(skb, dccp_header_size);
@@ -300,8 +294,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
 			   DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
 	dh->dccph_type	= DCCP_PKT_RESPONSE;
 	dh->dccph_x	= 1;
-	dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss);
-	dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr);
+	dccp_hdr_set_seq(dh, dreq->dreq_iss);
+	dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr);
+	dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service;
 
 	dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr,
 					      inet_rsk(req)->rmt_addr);
@@ -397,9 +392,6 @@ int dccp_connect(struct sock *sk)
 	skb_reserve(skb, MAX_DCCP_HEADER);
 
 	DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
-	/* FIXME: set service to something meaningful, coming
-	 * from userspace*/
-	DCCP_SKB_CB(skb)->dccpd_service = 0;
 	skb->csum = 0;
 	skb_set_owner_w(skb, sk);
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 18a0e69c9dc7..a1cfd0e9e3bc 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -94,7 +94,15 @@ EXPORT_SYMBOL_GPL(dccp_state_name);
 
 static inline int dccp_listen_start(struct sock *sk)
 {
-	dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN;
+	struct dccp_sock *dp = dccp_sk(sk);
+
+	dp->dccps_role = DCCP_ROLE_LISTEN;
+	/*
+	 * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
+	 * before calling listen()
+	 */
+	if (dccp_service_not_initialized(sk))
+		return -EPROTO;
 	return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
 }
 
@@ -202,6 +210,42 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 	return -ENOIOCTLCMD;
 }
 
+static int dccp_setsockopt_service(struct sock *sk, const u32 service,
+				   char __user *optval, int optlen)
+{
+	struct dccp_sock *dp = dccp_sk(sk);
+	struct dccp_service_list *sl = NULL;
+
+	if (service == DCCP_SERVICE_INVALID_VALUE || 
+	    optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
+		return -EINVAL;
+
+	if (optlen > sizeof(service)) {
+		sl = kmalloc(optlen, GFP_KERNEL);
+		if (sl == NULL)
+			return -ENOMEM;
+
+		sl->dccpsl_nr = optlen / sizeof(u32) - 1;
+		if (copy_from_user(sl->dccpsl_list,
+				   optval + sizeof(service),
+				   optlen - sizeof(service)) ||
+		    dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
+			kfree(sl);
+			return -EFAULT;
+		}
+	}
+
+	lock_sock(sk);
+	dp->dccps_service = service;
+
+	if (dp->dccps_service_list != NULL)
+		kfree(dp->dccps_service_list);
+
+	dp->dccps_service_list = sl;
+	release_sock(sk);
+	return 0;
+}
+
 int dccp_setsockopt(struct sock *sk, int level, int optname,
 		    char __user *optval, int optlen)
 {
@@ -218,8 +262,10 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
 	if (get_user(val, (int __user *)optval))
 		return -EFAULT;
 
-	lock_sock(sk);
+	if (optname == DCCP_SOCKOPT_SERVICE)
+		return dccp_setsockopt_service(sk, val, optval, optlen);
 
+	lock_sock(sk);
 	dp = dccp_sk(sk);
 	err = 0;
 
@@ -236,6 +282,37 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
 	return err;
 }
 
+static int dccp_getsockopt_service(struct sock *sk, int len,
+				   u32 __user *optval,
+				   int __user *optlen)
+{
+	const struct dccp_sock *dp = dccp_sk(sk);
+	const struct dccp_service_list *sl;
+	int err = -ENOENT, slen = 0, total_len = sizeof(u32);
+
+	lock_sock(sk);
+	if (dccp_service_not_initialized(sk))
+		goto out;
+
+	if ((sl = dp->dccps_service_list) != NULL) {
+		slen = sl->dccpsl_nr * sizeof(u32);
+		total_len += slen;
+	}
+
+	err = -EINVAL;
+	if (total_len > len)
+		goto out;
+
+	err = 0;
+	if (put_user(total_len, optlen) ||
+	    put_user(dp->dccps_service, optval) ||
+	    (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
+		err = -EFAULT;
+out:
+	release_sock(sk);
+	return err;
+}
+
 int dccp_getsockopt(struct sock *sk, int level, int optname,
 		    char __user *optval, int __user *optlen)
 {
@@ -248,8 +325,7 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
 	if (get_user(len, optlen))
 		return -EFAULT;
 
-	len = min_t(unsigned int, len, sizeof(int));
-	if (len < 0)
+	if (len < sizeof(int))
 		return -EINVAL;
 
 	dp = dccp_sk(sk);
@@ -257,7 +333,17 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
 	switch (optname) {
 	case DCCP_SOCKOPT_PACKET_SIZE:
 		val = dp->dccps_packet_size;
+		len = sizeof(dp->dccps_packet_size);
 		break;
+	case DCCP_SOCKOPT_SERVICE:
+		return dccp_getsockopt_service(sk, len,
+					       (u32 __user *)optval, optlen);
+	case 128 ... 191:
+		return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
+					     len, (u32 __user *)optval, optlen);
+	case 192 ... 255:
+		return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
+					     len, (u32 __user *)optval, optlen);
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 1b63b4824164..50c0519cd70d 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -43,7 +43,7 @@
  *		2 of the License, or (at your option) any later version.
  */
 
-#define VERSION "0.403"
+#define VERSION "0.404"
 
 #include <linux/config.h>
 #include <asm/uaccess.h>
@@ -224,7 +224,7 @@ static inline int tkey_mismatch(t_key a, int offset, t_key b)
   Consider a node 'n' and its parent 'tp'.
 
   If n is a leaf, every bit in its key is significant. Its presence is 
-  necessitaded by path compression, since during a tree traversal (when 
+  necessitated by path compression, since during a tree traversal (when 
   searching for a leaf - unless we are doing an insertion) we will completely 
   ignore all skipped bits we encounter. Thus we need to verify, at the end of 
   a potentially successful search, that we have indeed been walking the 
@@ -836,11 +836,12 @@ static void trie_init(struct trie *t)
 #endif
 }
 
-/* readside most use rcu_read_lock currently dump routines
+/* readside must use rcu_read_lock currently dump routines
  via get_fa_head and dump */
 
-static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen)
+static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
 {
+	struct hlist_head *head = &l->list;
 	struct hlist_node *node;
 	struct leaf_info *li;
 
@@ -853,7 +854,7 @@ static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen)
 
 static inline struct list_head * get_fa_head(struct leaf *l, int plen)
 {
-	struct leaf_info *li = find_leaf_info(&l->list, plen);
+	struct leaf_info *li = find_leaf_info(l, plen);
 
 	if (!li)
 		return NULL;
@@ -1085,7 +1086,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
 	}
 
 	if (tp && tp->pos + tp->bits > 32)
-		printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
+		printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
 		       tp, tp->pos, tp->bits, key, plen);
 
 	/* Rebalance the trie */
@@ -1248,7 +1249,7 @@ err:
 }
 
 
-/* should be clalled with rcu_read_lock */
+/* should be called with rcu_read_lock */
 static inline int check_leaf(struct trie *t, struct leaf *l,
 			     t_key key, int *plen, const struct flowi *flp,
 			     struct fib_result *res)
@@ -1590,7 +1591,7 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 	rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
 
 	l = fib_find_node(t, key);
-	li = find_leaf_info(&l->list, plen);
+	li = find_leaf_info(l, plen);
 
 	list_del_rcu(&fa->fa_list);
 
@@ -1714,7 +1715,6 @@ static int fn_trie_flush(struct fib_table *tb)
 
 	t->revision++;
 
-	rcu_read_lock();
 	for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
 		found += trie_flush_leaf(t, l);
 
@@ -1722,7 +1722,6 @@ static int fn_trie_flush(struct fib_table *tb)
 			trie_leaf_remove(t, ll->key);
 		ll = l;
 	}
-	rcu_read_unlock();  
 
 	if (ll && hlist_empty(&ll->list))
 		trie_leaf_remove(t, ll->key);
@@ -1833,16 +1832,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
 			i++;
 			continue;
 		}
-		if (fa->fa_info->fib_nh == NULL) {
-			printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
-			i++;
-			continue;
-		}
-		if (fa->fa_info == NULL) {
-			printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
-			i++;
-			continue;
-		}
+		BUG_ON(!fa->fa_info);
 
 		if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
 				  cb->nlh->nlmsg_seq,
@@ -1965,7 +1955,7 @@ struct fib_table * __init fib_hash_init(int id)
 		trie_main = t;
 
 	if (id == RT_TABLE_LOCAL)
-		printk("IPv4 FIB: Using LC-trie version %s\n", VERSION);
+		printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION);
 
 	return tb;
 }
@@ -2029,7 +2019,7 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
 		iter->tnode = (struct tnode *) n;
 		iter->trie = t;
 		iter->index = 0;
-		iter->depth = 0;
+		iter->depth = 1;
 		return n;
 	}
 	return NULL;
@@ -2274,11 +2264,12 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 				seq_puts(seq, "<local>:\n");
 			else
 				seq_puts(seq, "<main>:\n");
-		} else {
-			seq_indent(seq, iter->depth-1);
-			seq_printf(seq, "  +-- %d.%d.%d.%d/%d\n",
-				   NIPQUAD(prf), tn->pos);
-		}
+		} 
+		seq_indent(seq, iter->depth-1);
+		seq_printf(seq, "  +-- %d.%d.%d.%d/%d %d %d %d\n",
+			   NIPQUAD(prf), tn->pos, tn->bits, tn->full_children, 
+			   tn->empty_children);
+		
 	} else {
 		struct leaf *l = (struct leaf *) n;
 		int i;
@@ -2287,7 +2278,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 		seq_indent(seq, iter->depth);
 		seq_printf(seq, "  |-- %d.%d.%d.%d\n", NIPQUAD(val));
 		for (i = 32; i >= 0; i--) {
-			struct leaf_info *li = find_leaf_info(&l->list, i);
+			struct leaf_info *li = find_leaf_info(l, i);
 			if (li) {
 				struct fib_alias *fa;
 				list_for_each_entry_rcu(fa, &li->falh, fa_list) {
@@ -2383,7 +2374,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 		return 0;
 
 	for (i=32; i>=0; i--) {
-		struct leaf_info *li = find_leaf_info(&l->list, i);
+		struct leaf_info *li = find_leaf_info(l, i);
 		struct fib_alias *fa;
 		u32 mask, prefix;
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 44607f4767b8..70c44e4c3ceb 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1603,7 +1603,7 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc)
 	}
 	pmc->sources = NULL;
 	pmc->sfmode = MCAST_EXCLUDE;
-	pmc->sfcount[MCAST_EXCLUDE] = 0;
+	pmc->sfcount[MCAST_INCLUDE] = 0;
 	pmc->sfcount[MCAST_EXCLUDE] = 1;
 }
 
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index e11952ea17af..f828fa2eb7de 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -196,6 +196,7 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get
 	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
 		if (s_addr==cp->caddr && s_port==cp->cport &&
 		    d_port==cp->vport && d_addr==cp->vaddr &&
+		    ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
 		    protocol==cp->protocol) {
 			/* HIT */
 			atomic_inc(&cp->refcnt);
@@ -227,6 +228,40 @@ struct ip_vs_conn *ip_vs_conn_in_get
 	return cp;
 }
 
+/* Get reference to connection template */
+struct ip_vs_conn *ip_vs_ct_in_get
+(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
+{
+	unsigned hash;
+	struct ip_vs_conn *cp;
+
+	hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
+
+	ct_read_lock(hash);
+
+	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+		if (s_addr==cp->caddr && s_port==cp->cport &&
+		    d_port==cp->vport && d_addr==cp->vaddr &&
+		    cp->flags & IP_VS_CONN_F_TEMPLATE &&
+		    protocol==cp->protocol) {
+			/* HIT */
+			atomic_inc(&cp->refcnt);
+			goto out;
+		}
+	}
+	cp = NULL;
+
+  out:
+	ct_read_unlock(hash);
+
+	IP_VS_DBG(7, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
+		  ip_vs_proto_name(protocol),
+		  NIPQUAD(s_addr), ntohs(s_port),
+		  NIPQUAD(d_addr), ntohs(d_port),
+		  cp?"hit":"not hit");
+
+	return cp;
+}
 
 /*
  *  Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
@@ -367,7 +402,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 		  atomic_read(&dest->refcnt));
 
 	/* Update the connection counters */
-	if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+	if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
 		/* It is a normal connection, so increase the inactive
 		   connection counter because it is in TCP SYNRECV
 		   state (inactive) or other protocol inacive state */
@@ -406,7 +441,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
 		  atomic_read(&dest->refcnt));
 
 	/* Update the connection counters */
-	if (cp->cport || (cp->flags & IP_VS_CONN_F_NO_CPORT)) {
+	if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
 		/* It is a normal connection, so decrease the inactconns
 		   or activeconns counter */
 		if (cp->flags & IP_VS_CONN_F_INACTIVE) {
@@ -467,7 +502,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
 		/*
 		 * Invalidate the connection template
 		 */
-		if (ct->cport) {
+		if (ct->vport != 65535) {
 			if (ip_vs_conn_unhash(ct)) {
 				ct->dport = 65535;
 				ct->vport = 65535;
@@ -776,7 +811,7 @@ void ip_vs_random_dropentry(void)
 		ct_write_lock_bh(hash);
 
 		list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
-			if (!cp->cport && !(cp->flags & IP_VS_CONN_F_NO_CPORT))
+			if (cp->flags & IP_VS_CONN_F_TEMPLATE)
 				/* connection template */
 				continue;
 
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 3ac7eeca04ac..981cc3244ef2 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -243,10 +243,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	if (ports[1] == svc->port) {
 		/* Check if a template already exists */
 		if (svc->port != FTPPORT)
-			ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+			ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
 					       iph->daddr, ports[1]);
 		else
-			ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+			ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
 					       iph->daddr, 0);
 
 		if (!ct || !ip_vs_check_template(ct)) {
@@ -272,14 +272,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 						    iph->daddr,
 						    ports[1],
 						    dest->addr, dest->port,
-						    0,
+						    IP_VS_CONN_F_TEMPLATE,
 						    dest);
 			else
 				ct = ip_vs_conn_new(iph->protocol,
 						    snet, 0,
 						    iph->daddr, 0,
 						    dest->addr, 0,
-						    0,
+						    IP_VS_CONN_F_TEMPLATE,
 						    dest);
 			if (ct == NULL)
 				return NULL;
@@ -298,10 +298,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 		 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
 		 */
 		if (svc->fwmark)
-			ct = ip_vs_conn_in_get(IPPROTO_IP, snet, 0,
+			ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0,
 					       htonl(svc->fwmark), 0);
 		else
-			ct = ip_vs_conn_in_get(iph->protocol, snet, 0,
+			ct = ip_vs_ct_in_get(iph->protocol, snet, 0,
 					       iph->daddr, 0);
 
 		if (!ct || !ip_vs_check_template(ct)) {
@@ -326,14 +326,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 						    snet, 0,
 						    htonl(svc->fwmark), 0,
 						    dest->addr, 0,
-						    0,
+						    IP_VS_CONN_F_TEMPLATE,
 						    dest);
 			else
 				ct = ip_vs_conn_new(iph->protocol,
 						    snet, 0,
 						    iph->daddr, 0,
 						    dest->addr, 0,
-						    0,
+						    IP_VS_CONN_F_TEMPLATE,
 						    dest);
 			if (ct == NULL)
 				return NULL;
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 574d1f509b46..2e5ced3d8062 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -297,16 +297,24 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 
 	p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
 	for (i=0; i<m->nr_conns; i++) {
+		unsigned flags;
+
 		s = (struct ip_vs_sync_conn *)p;
-		cp = ip_vs_conn_in_get(s->protocol,
-				       s->caddr, s->cport,
-				       s->vaddr, s->vport);
+		flags = ntohs(s->flags);
+		if (!(flags & IP_VS_CONN_F_TEMPLATE))
+			cp = ip_vs_conn_in_get(s->protocol,
+					       s->caddr, s->cport,
+					       s->vaddr, s->vport);
+		else
+			cp = ip_vs_ct_in_get(s->protocol,
+					       s->caddr, s->cport,
+					       s->vaddr, s->vport);
 		if (!cp) {
 			cp = ip_vs_conn_new(s->protocol,
 					    s->caddr, s->cport,
 					    s->vaddr, s->vport,
 					    s->daddr, s->dport,
-					    ntohs(s->flags), NULL);
+					    flags, NULL);
 			if (!cp) {
 				IP_VS_ERR("ip_vs_conn_new failed\n");
 				return;
@@ -315,11 +323,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 		} else if (!cp->dest) {
 			/* it is an entry created by the synchronization */
 			cp->state = ntohs(s->state);
-			cp->flags = ntohs(s->flags) | IP_VS_CONN_F_HASHED;
+			cp->flags = flags | IP_VS_CONN_F_HASHED;
 		}	/* Note that we don't touch its state and flags
 			   if it is a normal entry. */
 
-		if (ntohs(s->flags) & IP_VS_CONN_F_SEQ_MASK) {
+		if (flags & IP_VS_CONN_F_SEQ_MASK) {
 			opt = (struct ip_vs_sync_conn_options *)&s[1];
 			memcpy(&cp->in_seq, opt, sizeof(*opt));
 			p += FULL_CONN_SIZE;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 30aa8e2ee214..3cf9b451675c 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -51,6 +51,14 @@ config IP_NF_CONNTRACK_EVENTS
 	  
 	  IF unsure, say `N'.
 
+config IP_NF_CONNTRACK_NETLINK
+	tristate 'Connection tracking netlink interface'
+	depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
+	depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
+	help
+	  This option enables support for a netlink-based userspace interface
+
+
 config IP_NF_CT_PROTO_SCTP
 	tristate  'SCTP protocol connection tracking support (EXPERIMENTAL)'
 	depends on IP_NF_CONNTRACK && EXPERIMENTAL
@@ -129,6 +137,22 @@ config IP_NF_AMANDA
 
 	  To compile it as a module, choose M here.  If unsure, say Y.
 
+config IP_NF_PPTP
+	tristate  'PPTP protocol support'
+	help
+	  This module adds support for PPTP (Point to Point Tunnelling
+	  Protocol, RFC2637) conncection tracking and NAT. 
+	
+	  If you are running PPTP sessions over a stateful firewall or NAT
+	  box, you may want to enable this feature.  
+	
+	  Please note that not all PPTP modes of operation are supported yet.
+	  For more info, read top of the file
+	  net/ipv4/netfilter/ip_conntrack_pptp.c
+	
+	  If you want to compile it as a module, say M here and read
+	  Documentation/modules.txt.  If unsure, say `N'.
+
 config IP_NF_QUEUE
 	tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
 	help
@@ -613,6 +637,12 @@ config IP_NF_NAT_AMANDA
 	default IP_NF_NAT if IP_NF_AMANDA=y
 	default m if IP_NF_AMANDA=m
 
+config IP_NF_NAT_PPTP
+	tristate
+	depends on IP_NF_NAT!=n && IP_NF_PPTP!=n
+	default IP_NF_NAT if IP_NF_PPTP=y
+	default m if IP_NF_PPTP=m
+
 # mangle + specific targets
 config IP_NF_MANGLE
 	tristate "Packet mangling"
@@ -774,11 +804,5 @@ config IP_NF_ARP_MANGLE
 	  Allows altering the ARP packet payload: source and destination
 	  hardware and network addresses.
 
-config IP_NF_CONNTRACK_NETLINK
-        tristate 'Connection tracking netlink interface'
-        depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
-        help
-          This option enables support for a netlink-based userspace interface
-
 endmenu
 
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 1ba0db746817..3d45d3c0283c 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -6,6 +6,9 @@
 ip_conntrack-objs	:= ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
 iptable_nat-objs	:= ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
 
+ip_conntrack_pptp-objs	:= ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o
+ip_nat_pptp-objs	:= ip_nat_helper_pptp.o ip_nat_proto_gre.o
+
 # connection tracking
 obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
 
@@ -17,6 +20,7 @@ obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
 obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o
 
 # connection tracking helpers
+obj-$(CONFIG_IP_NF_PPTP) += ip_conntrack_pptp.o
 obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o
 obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o
 obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o
@@ -24,6 +28,7 @@ obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o
 obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o
 
 # NAT helpers 
+obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o
 obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o
 obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o
 obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 19cba16e6e1e..c1f82e0c81cf 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -233,7 +233,7 @@ __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
 
 /* Just find a expectation corresponding to a tuple. */
 struct ip_conntrack_expect *
-ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
+ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
 {
 	struct ip_conntrack_expect *i;
 	
@@ -1143,7 +1143,10 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct,
 		if (del_timer(&ct->timeout)) {
 			ct->timeout.expires = jiffies + extra_jiffies;
 			add_timer(&ct->timeout);
-			ip_conntrack_event_cache(IPCT_REFRESH, skb);
+			/* FIXME: We loose some REFRESH events if this function
+			 * is called without an skb.  I'll fix this later -HW */
+			if (skb)
+				ip_conntrack_event_cache(IPCT_REFRESH, skb);
 		}
 		ct_add_counters(ct, ctinfo, skb);
 		write_unlock_bh(&ip_conntrack_lock);
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
new file mode 100644
index 000000000000..79db5b70d5f6
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -0,0 +1,805 @@
+/*
+ * ip_conntrack_pptp.c	- Version 3.0
+ *
+ * Connection tracking support for PPTP (Point to Point Tunneling Protocol).
+ * PPTP is a a protocol for creating virtual private networks.
+ * It is a specification defined by Microsoft and some vendors
+ * working with Microsoft.  PPTP is built on top of a modified
+ * version of the Internet Generic Routing Encapsulation Protocol.
+ * GRE is defined in RFC 1701 and RFC 1702.  Documentation of
+ * PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ * Limitations:
+ * 	 - We blindly assume that control connections are always
+ * 	   established in PNS->PAC direction.  This is a violation
+ * 	   of RFFC2673
+ * 	 - We can only support one single call within each session
+ *
+ * TODO:
+ *	 - testing of incoming PPTP calls 
+ *
+ * Changes: 
+ * 	2002-02-05 - Version 1.3
+ * 	  - Call ip_conntrack_unexpect_related() from 
+ * 	    pptp_destroy_siblings() to destroy expectations in case
+ * 	    CALL_DISCONNECT_NOTIFY or tcp fin packet was seen
+ * 	    (Philip Craig <philipc@snapgear.com>)
+ * 	  - Add Version information at module loadtime
+ * 	2002-02-10 - Version 1.6
+ * 	  - move to C99 style initializers
+ * 	  - remove second expectation if first arrives
+ * 	2004-10-22 - Version 2.0
+ * 	  - merge Mandrake's 2.6.x port with recent 2.6.x API changes
+ * 	  - fix lots of linear skb assumptions from Mandrake's port
+ * 	2005-06-10 - Version 2.1
+ * 	  - use ip_conntrack_expect_free() instead of kfree() on the
+ * 	    expect's (which are from the slab for quite some time)
+ * 	2005-06-10 - Version 3.0
+ * 	  - port helper to post-2.6.11 API changes,
+ * 	    funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
+ * 	2005-07-30 - Version 3.1
+ * 	  - port helper to 2.6.13 API changes
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
+#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
+
+#define IP_CT_PPTP_VERSION "3.1"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP");
+
+static DEFINE_SPINLOCK(ip_pptp_lock);
+
+int
+(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb,
+			  struct ip_conntrack *ct,
+			  enum ip_conntrack_info ctinfo,
+			  struct PptpControlHeader *ctlh,
+			  union pptp_ctrl_union *pptpReq);
+
+int
+(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb,
+			  struct ip_conntrack *ct,
+			  enum ip_conntrack_info ctinfo,
+			  struct PptpControlHeader *ctlh,
+			  union pptp_ctrl_union *pptpReq);
+
+int
+(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig,
+			    struct ip_conntrack_expect *expect_reply);
+
+void
+(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct,
+			     struct ip_conntrack_expect *exp);
+
+#if 0
+/* PptpControlMessageType names */
+const char *pptp_msg_name[] = {
+	"UNKNOWN_MESSAGE",
+	"START_SESSION_REQUEST",
+	"START_SESSION_REPLY",
+	"STOP_SESSION_REQUEST",
+	"STOP_SESSION_REPLY",
+	"ECHO_REQUEST",
+	"ECHO_REPLY",
+	"OUT_CALL_REQUEST",
+	"OUT_CALL_REPLY",
+	"IN_CALL_REQUEST",
+	"IN_CALL_REPLY",
+	"IN_CALL_CONNECT",
+	"CALL_CLEAR_REQUEST",
+	"CALL_DISCONNECT_NOTIFY",
+	"WAN_ERROR_NOTIFY",
+	"SET_LINK_INFO"
+};
+EXPORT_SYMBOL(pptp_msg_name);
+#define DEBUGP(format, args...)	printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+#define SECS *HZ
+#define MINS * 60 SECS
+#define HOURS * 60 MINS
+
+#define PPTP_GRE_TIMEOUT 		(10 MINS)
+#define PPTP_GRE_STREAM_TIMEOUT 	(5 HOURS)
+
+static void pptp_expectfn(struct ip_conntrack *ct,
+			 struct ip_conntrack_expect *exp)
+{
+	DEBUGP("increasing timeouts\n");
+
+	/* increase timeout of GRE data channel conntrack entry */
+	ct->proto.gre.timeout = PPTP_GRE_TIMEOUT;
+	ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT;
+
+	/* Can you see how rusty this code is, compared with the pre-2.6.11
+	 * one? That's what happened to my shiny newnat of 2002 ;( -HW */
+
+	if (!ip_nat_pptp_hook_expectfn) {
+		struct ip_conntrack_tuple inv_t;
+		struct ip_conntrack_expect *exp_other;
+
+		/* obviously this tuple inversion only works until you do NAT */
+		invert_tuplepr(&inv_t, &exp->tuple);
+		DEBUGP("trying to unexpect other dir: ");
+		DUMP_TUPLE(&inv_t);
+	
+		exp_other = ip_conntrack_expect_find(&inv_t);
+		if (exp_other) {
+			/* delete other expectation.  */
+			DEBUGP("found\n");
+			ip_conntrack_unexpect_related(exp_other);
+			ip_conntrack_expect_put(exp_other);
+		} else {
+			DEBUGP("not found\n");
+		}
+	} else {
+		/* we need more than simple inversion */
+		ip_nat_pptp_hook_expectfn(ct, exp);
+	}
+}
+
+static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t)
+{
+	struct ip_conntrack_tuple_hash *h;
+	struct ip_conntrack_expect *exp;
+
+	DEBUGP("trying to timeout ct or exp for tuple ");
+	DUMP_TUPLE(t);
+
+	h = ip_conntrack_find_get(t, NULL);
+	if (h)  {
+		struct ip_conntrack *sibling = tuplehash_to_ctrack(h);
+		DEBUGP("setting timeout of conntrack %p to 0\n", sibling);
+		sibling->proto.gre.timeout = 0;
+		sibling->proto.gre.stream_timeout = 0;
+		/* refresh_acct will not modify counters if skb == NULL */
+		if (del_timer(&sibling->timeout))
+			sibling->timeout.function((unsigned long)sibling);
+		ip_conntrack_put(sibling);
+		return 1;
+	} else {
+		exp = ip_conntrack_expect_find(t);
+		if (exp) {
+			DEBUGP("unexpect_related of expect %p\n", exp);
+			ip_conntrack_unexpect_related(exp);
+			ip_conntrack_expect_put(exp);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+
+/* timeout GRE data connections */
+static void pptp_destroy_siblings(struct ip_conntrack *ct)
+{
+	struct ip_conntrack_tuple t;
+
+	/* Since ct->sibling_list has literally rusted away in 2.6.11, 
+	 * we now need another way to find out about our sibling
+	 * contrack and expects... -HW */
+
+	/* try original (pns->pac) tuple */
+	memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t));
+	t.dst.protonum = IPPROTO_GRE;
+	t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
+	t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
+
+	if (!destroy_sibling_or_exp(&t))
+		DEBUGP("failed to timeout original pns->pac ct/exp\n");
+
+	/* try reply (pac->pns) tuple */
+	memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
+	t.dst.protonum = IPPROTO_GRE;
+	t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
+	t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
+
+	if (!destroy_sibling_or_exp(&t))
+		DEBUGP("failed to timeout reply pac->pns ct/exp\n");
+}
+
+/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
+static inline int
+exp_gre(struct ip_conntrack *master,
+	u_int32_t seq,
+	u_int16_t callid,
+	u_int16_t peer_callid)
+{
+	struct ip_conntrack_tuple inv_tuple;
+	struct ip_conntrack_tuple exp_tuples[] = {
+		/* tuple in original direction, PNS->PAC */
+		{ .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip,
+			   .u = { .gre = { .key = peer_callid } }
+			 },
+		  .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip,
+			   .u = { .gre = { .key = callid } },
+			   .protonum = IPPROTO_GRE
+			 },
+		 },
+		/* tuple in reply direction, PAC->PNS */
+		{ .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip,
+			   .u = { .gre = { .key = callid } }
+			 },
+		  .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip,
+			   .u = { .gre = { .key = peer_callid } },
+			   .protonum = IPPROTO_GRE
+			 },
+		 }
+	};
+	struct ip_conntrack_expect *exp_orig, *exp_reply;
+	int ret = 1;
+
+	exp_orig = ip_conntrack_expect_alloc(master);
+	if (exp_orig == NULL)
+		goto out;
+
+	exp_reply = ip_conntrack_expect_alloc(master);
+	if (exp_reply == NULL)
+		goto out_put_orig;
+
+	memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple));
+
+	exp_orig->mask.src.ip = 0xffffffff;
+	exp_orig->mask.src.u.all = 0;
+	exp_orig->mask.dst.u.all = 0;
+	exp_orig->mask.dst.u.gre.key = 0xffff;
+	exp_orig->mask.dst.ip = 0xffffffff;
+	exp_orig->mask.dst.protonum = 0xff;
+		
+	exp_orig->master = master;
+	exp_orig->expectfn = pptp_expectfn;
+	exp_orig->flags = 0;
+
+	exp_orig->dir = IP_CT_DIR_ORIGINAL;
+
+	/* both expectations are identical apart from tuple */
+	memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
+	memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple));
+
+	exp_reply->dir = !exp_orig->dir;
+
+	if (ip_nat_pptp_hook_exp_gre)
+		ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
+	else {
+
+		DEBUGP("calling expect_related PNS->PAC");
+		DUMP_TUPLE(&exp_orig->tuple);
+
+		if (ip_conntrack_expect_related(exp_orig) != 0) {
+			DEBUGP("cannot expect_related()\n");
+			goto out_put_both;
+		}
+
+		DEBUGP("calling expect_related PAC->PNS");
+		DUMP_TUPLE(&exp_reply->tuple);
+
+		if (ip_conntrack_expect_related(exp_reply) != 0) {
+			DEBUGP("cannot expect_related()\n");
+			goto out_unexpect_orig;
+		}
+
+		/* Add GRE keymap entries */
+		if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) {
+			DEBUGP("cannot keymap_add() exp\n");
+			goto out_unexpect_both;
+		}
+
+		invert_tuplepr(&inv_tuple, &exp_reply->tuple);
+		if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) {
+			ip_ct_gre_keymap_destroy(master);
+			DEBUGP("cannot keymap_add() exp_inv\n");
+			goto out_unexpect_both;
+		}
+		ret = 0;
+	}
+
+out_put_both:
+	ip_conntrack_expect_put(exp_reply);
+out_put_orig:
+	ip_conntrack_expect_put(exp_orig);
+out:
+	return ret;
+
+out_unexpect_both:
+	ip_conntrack_unexpect_related(exp_reply);
+out_unexpect_orig:
+	ip_conntrack_unexpect_related(exp_orig);
+	goto out_put_both;
+}
+
+static inline int 
+pptp_inbound_pkt(struct sk_buff **pskb,
+		 struct tcphdr *tcph,
+		 unsigned int nexthdr_off,
+		 unsigned int datalen,
+		 struct ip_conntrack *ct,
+		 enum ip_conntrack_info ctinfo)
+{
+	struct PptpControlHeader _ctlh, *ctlh;
+	unsigned int reqlen;
+	union pptp_ctrl_union _pptpReq, *pptpReq;
+	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
+	u_int16_t msg, *cid, *pcid;
+	u_int32_t seq;	
+
+	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
+	if (!ctlh) {
+		DEBUGP("error during skb_header_pointer\n");
+		return NF_ACCEPT;
+	}
+	nexthdr_off += sizeof(_ctlh);
+	datalen -= sizeof(_ctlh);
+
+	reqlen = datalen;
+	if (reqlen > sizeof(*pptpReq))
+		reqlen = sizeof(*pptpReq);
+	pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
+	if (!pptpReq) {
+		DEBUGP("error during skb_header_pointer\n");
+		return NF_ACCEPT;
+	}
+
+	msg = ntohs(ctlh->messageType);
+	DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
+
+	switch (msg) {
+	case PPTP_START_SESSION_REPLY:
+		if (reqlen < sizeof(_pptpReq.srep)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			break;
+		}
+
+		/* server confirms new control session */
+		if (info->sstate < PPTP_SESSION_REQUESTED) {
+			DEBUGP("%s without START_SESS_REQUEST\n",
+				pptp_msg_name[msg]);
+			break;
+		}
+		if (pptpReq->srep.resultCode == PPTP_START_OK)
+			info->sstate = PPTP_SESSION_CONFIRMED;
+		else 
+			info->sstate = PPTP_SESSION_ERROR;
+		break;
+
+	case PPTP_STOP_SESSION_REPLY:
+		if (reqlen < sizeof(_pptpReq.strep)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			break;
+		}
+
+		/* server confirms end of control session */
+		if (info->sstate > PPTP_SESSION_STOPREQ) {
+			DEBUGP("%s without STOP_SESS_REQUEST\n",
+				pptp_msg_name[msg]);
+			break;
+		}
+		if (pptpReq->strep.resultCode == PPTP_STOP_OK)
+			info->sstate = PPTP_SESSION_NONE;
+		else
+			info->sstate = PPTP_SESSION_ERROR;
+		break;
+
+	case PPTP_OUT_CALL_REPLY:
+		if (reqlen < sizeof(_pptpReq.ocack)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			break;
+		}
+
+		/* server accepted call, we now expect GRE frames */
+		if (info->sstate != PPTP_SESSION_CONFIRMED) {
+			DEBUGP("%s but no session\n", pptp_msg_name[msg]);
+			break;
+		}
+		if (info->cstate != PPTP_CALL_OUT_REQ &&
+		    info->cstate != PPTP_CALL_OUT_CONF) {
+			DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]);
+			break;
+		}
+		if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) {
+			info->cstate = PPTP_CALL_NONE;
+			break;
+		}
+
+		cid = &pptpReq->ocack.callID;
+		pcid = &pptpReq->ocack.peersCallID;
+
+		info->pac_call_id = ntohs(*cid);
+		
+		if (htons(info->pns_call_id) != *pcid) {
+			DEBUGP("%s for unknown callid %u\n",
+				pptp_msg_name[msg], ntohs(*pcid));
+			break;
+		}
+
+		DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], 
+			ntohs(*cid), ntohs(*pcid));
+		
+		info->cstate = PPTP_CALL_OUT_CONF;
+
+		seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
+				       + sizeof(struct PptpControlHeader)
+				       + ((void *)pcid - (void *)pptpReq);
+			
+		if (exp_gre(ct, seq, *cid, *pcid) != 0)
+			printk("ip_conntrack_pptp: error during exp_gre\n");
+		break;
+
+	case PPTP_IN_CALL_REQUEST:
+		if (reqlen < sizeof(_pptpReq.icack)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			break;
+		}
+
+		/* server tells us about incoming call request */
+		if (info->sstate != PPTP_SESSION_CONFIRMED) {
+			DEBUGP("%s but no session\n", pptp_msg_name[msg]);
+			break;
+		}
+		pcid = &pptpReq->icack.peersCallID;
+		DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
+		info->cstate = PPTP_CALL_IN_REQ;
+		info->pac_call_id = ntohs(*pcid);
+		break;
+
+	case PPTP_IN_CALL_CONNECT:
+		if (reqlen < sizeof(_pptpReq.iccon)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			break;
+		}
+
+		/* server tells us about incoming call established */
+		if (info->sstate != PPTP_SESSION_CONFIRMED) {
+			DEBUGP("%s but no session\n", pptp_msg_name[msg]);
+			break;
+		}
+		if (info->sstate != PPTP_CALL_IN_REP
+		    && info->sstate != PPTP_CALL_IN_CONF) {
+			DEBUGP("%s but never sent IN_CALL_REPLY\n",
+				pptp_msg_name[msg]);
+			break;
+		}
+
+		pcid = &pptpReq->iccon.peersCallID;
+		cid = &info->pac_call_id;
+
+		if (info->pns_call_id != ntohs(*pcid)) {
+			DEBUGP("%s for unknown CallID %u\n", 
+				pptp_msg_name[msg], ntohs(*cid));
+			break;
+		}
+
+		DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
+		info->cstate = PPTP_CALL_IN_CONF;
+
+		/* we expect a GRE connection from PAC to PNS */
+		seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
+				       + sizeof(struct PptpControlHeader)
+				       + ((void *)pcid - (void *)pptpReq);
+			
+		if (exp_gre(ct, seq, *cid, *pcid) != 0)
+			printk("ip_conntrack_pptp: error during exp_gre\n");
+
+		break;
+
+	case PPTP_CALL_DISCONNECT_NOTIFY:
+		if (reqlen < sizeof(_pptpReq.disc)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			break;
+		}
+
+		/* server confirms disconnect */
+		cid = &pptpReq->disc.callID;
+		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
+		info->cstate = PPTP_CALL_NONE;
+
+		/* untrack this call id, unexpect GRE packets */
+		pptp_destroy_siblings(ct);
+		break;
+
+	case PPTP_WAN_ERROR_NOTIFY:
+		break;
+
+	case PPTP_ECHO_REQUEST:
+	case PPTP_ECHO_REPLY:
+		/* I don't have to explain these ;) */
+		break;
+	default:
+		DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)
+			? pptp_msg_name[msg]:pptp_msg_name[0], msg);
+		break;
+	}
+
+
+	if (ip_nat_pptp_hook_inbound)
+		return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh,
+						pptpReq);
+
+	return NF_ACCEPT;
+
+}
+
+static inline int
+pptp_outbound_pkt(struct sk_buff **pskb,
+		  struct tcphdr *tcph,
+		  unsigned int nexthdr_off,
+		  unsigned int datalen,
+		  struct ip_conntrack *ct,
+		  enum ip_conntrack_info ctinfo)
+{
+	struct PptpControlHeader _ctlh, *ctlh;
+	unsigned int reqlen;
+	union pptp_ctrl_union _pptpReq, *pptpReq;
+	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
+	u_int16_t msg, *cid, *pcid;
+
+	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
+	if (!ctlh)
+		return NF_ACCEPT;
+	nexthdr_off += sizeof(_ctlh);
+	datalen -= sizeof(_ctlh);
+	
+	reqlen = datalen;
+	if (reqlen > sizeof(*pptpReq))
+		reqlen = sizeof(*pptpReq);
+	pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
+	if (!pptpReq)
+		return NF_ACCEPT;
+
+	msg = ntohs(ctlh->messageType);
+	DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
+
+	switch (msg) {
+	case PPTP_START_SESSION_REQUEST:
+		/* client requests for new control session */
+		if (info->sstate != PPTP_SESSION_NONE) {
+			DEBUGP("%s but we already have one",
+				pptp_msg_name[msg]);
+		}
+		info->sstate = PPTP_SESSION_REQUESTED;
+		break;
+	case PPTP_STOP_SESSION_REQUEST:
+		/* client requests end of control session */
+		info->sstate = PPTP_SESSION_STOPREQ;
+		break;
+
+	case PPTP_OUT_CALL_REQUEST:
+		if (reqlen < sizeof(_pptpReq.ocreq)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			/* FIXME: break; */
+		}
+
+		/* client initiating connection to server */
+		if (info->sstate != PPTP_SESSION_CONFIRMED) {
+			DEBUGP("%s but no session\n",
+				pptp_msg_name[msg]);
+			break;
+		}
+		info->cstate = PPTP_CALL_OUT_REQ;
+		/* track PNS call id */
+		cid = &pptpReq->ocreq.callID;
+		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
+		info->pns_call_id = ntohs(*cid);
+		break;
+	case PPTP_IN_CALL_REPLY:
+		if (reqlen < sizeof(_pptpReq.icack)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			break;
+		}
+
+		/* client answers incoming call */
+		if (info->cstate != PPTP_CALL_IN_REQ
+		    && info->cstate != PPTP_CALL_IN_REP) {
+			DEBUGP("%s without incall_req\n", 
+				pptp_msg_name[msg]);
+			break;
+		}
+		if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) {
+			info->cstate = PPTP_CALL_NONE;
+			break;
+		}
+		pcid = &pptpReq->icack.peersCallID;
+		if (info->pac_call_id != ntohs(*pcid)) {
+			DEBUGP("%s for unknown call %u\n", 
+				pptp_msg_name[msg], ntohs(*pcid));
+			break;
+		}
+		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
+		/* part two of the three-way handshake */
+		info->cstate = PPTP_CALL_IN_REP;
+		info->pns_call_id = ntohs(pptpReq->icack.callID);
+		break;
+
+	case PPTP_CALL_CLEAR_REQUEST:
+		/* client requests hangup of call */
+		if (info->sstate != PPTP_SESSION_CONFIRMED) {
+			DEBUGP("CLEAR_CALL but no session\n");
+			break;
+		}
+		/* FUTURE: iterate over all calls and check if
+		 * call ID is valid.  We don't do this without newnat,
+		 * because we only know about last call */
+		info->cstate = PPTP_CALL_CLEAR_REQ;
+		break;
+	case PPTP_SET_LINK_INFO:
+		break;
+	case PPTP_ECHO_REQUEST:
+	case PPTP_ECHO_REPLY:
+		/* I don't have to explain these ;) */
+		break;
+	default:
+		DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? 
+			pptp_msg_name[msg]:pptp_msg_name[0], msg);
+		/* unknown: no need to create GRE masq table entry */
+		break;
+	}
+	
+	if (ip_nat_pptp_hook_outbound)
+		return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh,
+						 pptpReq);
+
+	return NF_ACCEPT;
+}
+
+
+/* track caller id inside control connection, call expect_related */
+static int 
+conntrack_pptp_help(struct sk_buff **pskb,
+		    struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
+
+{
+	struct pptp_pkt_hdr _pptph, *pptph;
+	struct tcphdr _tcph, *tcph;
+	u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
+	u_int32_t datalen;
+	int dir = CTINFO2DIR(ctinfo);
+	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
+	unsigned int nexthdr_off;
+
+	int oldsstate, oldcstate;
+	int ret;
+
+	/* don't do any tracking before tcp handshake complete */
+	if (ctinfo != IP_CT_ESTABLISHED 
+	    && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
+		DEBUGP("ctinfo = %u, skipping\n", ctinfo);
+		return NF_ACCEPT;
+	}
+	
+	nexthdr_off = (*pskb)->nh.iph->ihl*4;
+	tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
+	BUG_ON(!tcph);
+	nexthdr_off += tcph->doff * 4;
+ 	datalen = tcplen - tcph->doff * 4;
+
+	if (tcph->fin || tcph->rst) {
+		DEBUGP("RST/FIN received, timeouting GRE\n");
+		/* can't do this after real newnat */
+		info->cstate = PPTP_CALL_NONE;
+
+		/* untrack this call id, unexpect GRE packets */
+		pptp_destroy_siblings(ct);
+	}
+
+	pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
+	if (!pptph) {
+		DEBUGP("no full PPTP header, can't track\n");
+		return NF_ACCEPT;
+	}
+	nexthdr_off += sizeof(_pptph);
+	datalen -= sizeof(_pptph);
+
+	/* if it's not a control message we can't do anything with it */
+	if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL ||
+	    ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) {
+		DEBUGP("not a control packet\n");
+		return NF_ACCEPT;
+	}
+
+	oldsstate = info->sstate;
+	oldcstate = info->cstate;
+
+	spin_lock_bh(&ip_pptp_lock);
+
+	/* FIXME: We just blindly assume that the control connection is always
+	 * established from PNS->PAC.  However, RFC makes no guarantee */
+	if (dir == IP_CT_DIR_ORIGINAL)
+		/* client -> server (PNS -> PAC) */
+		ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
+					ctinfo);
+	else
+		/* server -> client (PAC -> PNS) */
+		ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
+				       ctinfo);
+	DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
+		oldsstate, info->sstate, oldcstate, info->cstate);
+	spin_unlock_bh(&ip_pptp_lock);
+
+	return ret;
+}
+
+/* control protocol helper */
+static struct ip_conntrack_helper pptp = { 
+	.list = { NULL, NULL },
+	.name = "pptp", 
+	.me = THIS_MODULE,
+	.max_expected = 2,
+	.timeout = 5 * 60,
+	.tuple = { .src = { .ip = 0, 
+		 	    .u = { .tcp = { .port =  
+				    __constant_htons(PPTP_CONTROL_PORT) } } 
+			  }, 
+		   .dst = { .ip = 0, 
+			    .u = { .all = 0 },
+			    .protonum = IPPROTO_TCP
+			  } 
+		 },
+	.mask = { .src = { .ip = 0, 
+			   .u = { .tcp = { .port = 0xffff } } 
+			 }, 
+		  .dst = { .ip = 0, 
+			   .u = { .all = 0 },
+			   .protonum = 0xff 
+		 	 } 
+		},
+	.help = conntrack_pptp_help
+};
+
+extern void __exit ip_ct_proto_gre_fini(void);
+extern int __init ip_ct_proto_gre_init(void);
+
+/* ip_conntrack_pptp initialization */
+static int __init init(void)
+{
+	int retcode;
+ 
+	retcode = ip_ct_proto_gre_init();
+	if (retcode < 0)
+		return retcode;
+
+	DEBUGP(" registering helper\n");
+	if ((retcode = ip_conntrack_helper_register(&pptp))) {
+		printk(KERN_ERR "Unable to register conntrack application "
+				"helper for pptp: %d\n", retcode);
+		ip_ct_proto_gre_fini();
+		return retcode;
+	}
+
+	printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	ip_conntrack_helper_unregister(&pptp);
+	ip_ct_proto_gre_fini();
+	printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION);
+}
+
+module_init(init);
+module_exit(fini);
+
+EXPORT_SYMBOL(ip_nat_pptp_hook_outbound);
+EXPORT_SYMBOL(ip_nat_pptp_hook_inbound);
+EXPORT_SYMBOL(ip_nat_pptp_hook_exp_gre);
+EXPORT_SYMBOL(ip_nat_pptp_hook_expectfn);
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 15aef3564742..b08a432efcf8 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -1270,7 +1270,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	exp = ip_conntrack_expect_find_get(&tuple);
+	exp = ip_conntrack_expect_find(&tuple);
 	if (!exp)
 		return -ENOENT;
 
@@ -1318,7 +1318,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 			return err;
 
 		/* bump usage count to 2 */
-		exp = ip_conntrack_expect_find_get(&tuple);
+		exp = ip_conntrack_expect_find(&tuple);
 		if (!exp)
 			return -ENOENT;
 
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
new file mode 100644
index 000000000000..de3cb9db6f85
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -0,0 +1,327 @@
+/*
+ * ip_conntrack_proto_gre.c - Version 3.0 
+ *
+ * Connection tracking protocol helper module for GRE.
+ *
+ * GRE is a generic encapsulation protocol, which is generally not very
+ * suited for NAT, as it has no protocol-specific part as port numbers.
+ *
+ * It has an optional key field, which may help us distinguishing two 
+ * connections between the same two hosts.
+ *
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 
+ *
+ * PPTP is built on top of a modified version of GRE, and has a mandatory
+ * field called "CallID", which serves us for the same purpose as the key
+ * field in plain GRE.
+ *
+ * Documentation about PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/list.h>
+
+static DEFINE_RWLOCK(ip_ct_gre_lock);
+#define ASSERT_READ_LOCK(x)
+#define ASSERT_WRITE_LOCK(x)
+
+#include <linux/netfilter_ipv4/listhelp.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+
+#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
+#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE");
+
+/* shamelessly stolen from ip_conntrack_proto_udp.c */
+#define GRE_TIMEOUT		(30*HZ)
+#define GRE_STREAM_TIMEOUT	(180*HZ)
+
+#if 0
+#define DEBUGP(format, args...)	printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
+#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x\n", \
+			NIPQUAD((x)->src.ip), ntohs((x)->src.u.gre.key), \
+			NIPQUAD((x)->dst.ip), ntohs((x)->dst.u.gre.key))
+#else
+#define DEBUGP(x, args...)
+#define DUMP_TUPLE_GRE(x)
+#endif
+				
+/* GRE KEYMAP HANDLING FUNCTIONS */
+static LIST_HEAD(gre_keymap_list);
+
+static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km,
+				const struct ip_conntrack_tuple *t)
+{
+	return ((km->tuple.src.ip == t->src.ip) &&
+		(km->tuple.dst.ip == t->dst.ip) &&
+		(km->tuple.dst.protonum == t->dst.protonum) &&
+		(km->tuple.dst.u.all == t->dst.u.all));
+}
+
+/* look up the source key for a given tuple */
+static u_int32_t gre_keymap_lookup(struct ip_conntrack_tuple *t)
+{
+	struct ip_ct_gre_keymap *km;
+	u_int32_t key = 0;
+
+	read_lock_bh(&ip_ct_gre_lock);
+	km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
+			struct ip_ct_gre_keymap *, t);
+	if (km)
+		key = km->tuple.src.u.gre.key;
+	read_unlock_bh(&ip_ct_gre_lock);
+	
+	DEBUGP("lookup src key 0x%x up key for ", key);
+	DUMP_TUPLE_GRE(t);
+
+	return key;
+}
+
+/* add a single keymap entry, associate with specified master ct */
+int
+ip_ct_gre_keymap_add(struct ip_conntrack *ct,
+		     struct ip_conntrack_tuple *t, int reply)
+{
+	struct ip_ct_gre_keymap **exist_km, *km, *old;
+
+	if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
+		DEBUGP("refusing to add GRE keymap to non-pptp session\n");
+		return -1;
+	}
+
+	if (!reply) 
+		exist_km = &ct->help.ct_pptp_info.keymap_orig;
+	else
+		exist_km = &ct->help.ct_pptp_info.keymap_reply;
+
+	if (*exist_km) {
+		/* check whether it's a retransmission */
+		old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
+				struct ip_ct_gre_keymap *, t);
+		if (old == *exist_km) {
+			DEBUGP("retransmission\n");
+			return 0;
+		}
+
+		DEBUGP("trying to override keymap_%s for ct %p\n", 
+			reply? "reply":"orig", ct);
+		return -EEXIST;
+	}
+
+	km = kmalloc(sizeof(*km), GFP_ATOMIC);
+	if (!km)
+		return -ENOMEM;
+
+	memcpy(&km->tuple, t, sizeof(*t));
+	*exist_km = km;
+
+	DEBUGP("adding new entry %p: ", km);
+	DUMP_TUPLE_GRE(&km->tuple);
+
+	write_lock_bh(&ip_ct_gre_lock);
+	list_append(&gre_keymap_list, km);
+	write_unlock_bh(&ip_ct_gre_lock);
+
+	return 0;
+}
+
+/* destroy the keymap entries associated with specified master ct */
+void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct)
+{
+	DEBUGP("entering for ct %p\n", ct);
+
+	if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
+		DEBUGP("refusing to destroy GRE keymap to non-pptp session\n");
+		return;
+	}
+
+	write_lock_bh(&ip_ct_gre_lock);
+	if (ct->help.ct_pptp_info.keymap_orig) {
+		DEBUGP("removing %p from list\n", 
+			ct->help.ct_pptp_info.keymap_orig);
+		list_del(&ct->help.ct_pptp_info.keymap_orig->list);
+		kfree(ct->help.ct_pptp_info.keymap_orig);
+		ct->help.ct_pptp_info.keymap_orig = NULL;
+	}
+	if (ct->help.ct_pptp_info.keymap_reply) {
+		DEBUGP("removing %p from list\n",
+			ct->help.ct_pptp_info.keymap_reply);
+		list_del(&ct->help.ct_pptp_info.keymap_reply->list);
+		kfree(ct->help.ct_pptp_info.keymap_reply);
+		ct->help.ct_pptp_info.keymap_reply = NULL;
+	}
+	write_unlock_bh(&ip_ct_gre_lock);
+}
+
+
+/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
+
+/* invert gre part of tuple */
+static int gre_invert_tuple(struct ip_conntrack_tuple *tuple,
+			    const struct ip_conntrack_tuple *orig)
+{
+	tuple->dst.u.gre.key = orig->src.u.gre.key;
+	tuple->src.u.gre.key = orig->dst.u.gre.key;
+
+	return 1;
+}
+
+/* gre hdr info to tuple */
+static int gre_pkt_to_tuple(const struct sk_buff *skb,
+			   unsigned int dataoff,
+			   struct ip_conntrack_tuple *tuple)
+{
+	struct gre_hdr_pptp _pgrehdr, *pgrehdr;
+	u_int32_t srckey;
+	struct gre_hdr _grehdr, *grehdr;
+
+	/* first only delinearize old RFC1701 GRE header */
+	grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
+	if (!grehdr || grehdr->version != GRE_VERSION_PPTP) {
+		/* try to behave like "ip_conntrack_proto_generic" */
+		tuple->src.u.all = 0;
+		tuple->dst.u.all = 0;
+		return 1;
+	}
+
+	/* PPTP header is variable length, only need up to the call_id field */
+	pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr);
+	if (!pgrehdr)
+		return 1;
+
+	if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
+		DEBUGP("GRE_VERSION_PPTP but unknown proto\n");
+		return 0;
+	}
+
+	tuple->dst.u.gre.key = pgrehdr->call_id;
+	srckey = gre_keymap_lookup(tuple);
+	tuple->src.u.gre.key = srckey;
+
+	return 1;
+}
+
+/* print gre part of tuple */
+static int gre_print_tuple(struct seq_file *s,
+			   const struct ip_conntrack_tuple *tuple)
+{
+	return seq_printf(s, "srckey=0x%x dstkey=0x%x ", 
+			  ntohs(tuple->src.u.gre.key),
+			  ntohs(tuple->dst.u.gre.key));
+}
+
+/* print private data for conntrack */
+static int gre_print_conntrack(struct seq_file *s,
+			       const struct ip_conntrack *ct)
+{
+	return seq_printf(s, "timeout=%u, stream_timeout=%u ",
+			  (ct->proto.gre.timeout / HZ),
+			  (ct->proto.gre.stream_timeout / HZ));
+}
+
+/* Returns verdict for packet, and may modify conntrack */
+static int gre_packet(struct ip_conntrack *ct,
+		      const struct sk_buff *skb,
+		      enum ip_conntrack_info conntrackinfo)
+{
+	/* If we've seen traffic both ways, this is a GRE connection.
+	 * Extend timeout. */
+	if (ct->status & IPS_SEEN_REPLY) {
+		ip_ct_refresh_acct(ct, conntrackinfo, skb,
+				   ct->proto.gre.stream_timeout);
+		/* Also, more likely to be important, and not a probe. */
+		set_bit(IPS_ASSURED_BIT, &ct->status);
+	} else
+		ip_ct_refresh_acct(ct, conntrackinfo, skb,
+				   ct->proto.gre.timeout);
+	
+	return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int gre_new(struct ip_conntrack *ct,
+		   const struct sk_buff *skb)
+{ 
+	DEBUGP(": ");
+	DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+
+	/* initialize to sane value.  Ideally a conntrack helper
+	 * (e.g. in case of pptp) is increasing them */
+	ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT;
+	ct->proto.gre.timeout = GRE_TIMEOUT;
+
+	return 1;
+}
+
+/* Called when a conntrack entry has already been removed from the hashes
+ * and is about to be deleted from memory */
+static void gre_destroy(struct ip_conntrack *ct)
+{
+	struct ip_conntrack *master = ct->master;
+	DEBUGP(" entering\n");
+
+	if (!master)
+		DEBUGP("no master !?!\n");
+	else
+		ip_ct_gre_keymap_destroy(master);
+}
+
+/* protocol helper struct */
+static struct ip_conntrack_protocol gre = { 
+	.proto		 = IPPROTO_GRE,
+	.name		 = "gre", 
+	.pkt_to_tuple	 = gre_pkt_to_tuple,
+	.invert_tuple	 = gre_invert_tuple,
+	.print_tuple	 = gre_print_tuple,
+	.print_conntrack = gre_print_conntrack,
+	.packet		 = gre_packet,
+	.new		 = gre_new,
+	.destroy	 = gre_destroy,
+	.me 		 = THIS_MODULE,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+	.tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
+	.nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
+#endif
+};
+
+/* ip_conntrack_proto_gre initialization */
+int __init ip_ct_proto_gre_init(void)
+{
+	return ip_conntrack_protocol_register(&gre);
+}
+
+void __exit ip_ct_proto_gre_fini(void)
+{
+	struct list_head *pos, *n;
+
+	/* delete all keymap entries */
+	write_lock_bh(&ip_ct_gre_lock);
+	list_for_each_safe(pos, n, &gre_keymap_list) {
+		DEBUGP("deleting keymap %p at module unload time\n", pos);
+		list_del(pos);
+		kfree(pos);
+	}
+	write_unlock_bh(&ip_ct_gre_lock);
+
+	ip_conntrack_protocol_unregister(&gre); 
+}
+
+EXPORT_SYMBOL(ip_ct_gre_keymap_add);
+EXPORT_SYMBOL(ip_ct_gre_keymap_destroy);
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index ae3e3e655db5..d3c7808010ec 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -993,11 +993,11 @@ EXPORT_SYMBOL(ip_ct_refresh_acct);
 
 EXPORT_SYMBOL(ip_conntrack_expect_alloc);
 EXPORT_SYMBOL(ip_conntrack_expect_put);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get);
+EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find);
+EXPORT_SYMBOL_GPL(ip_conntrack_expect_find);
 EXPORT_SYMBOL(ip_conntrack_expect_related);
 EXPORT_SYMBOL(ip_conntrack_unexpect_related);
 EXPORT_SYMBOL_GPL(ip_conntrack_expect_list);
-EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find);
 EXPORT_SYMBOL_GPL(ip_ct_unlink_expect);
 
 EXPORT_SYMBOL(ip_conntrack_tuple_taken);
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 1adedb743f60..c3ea891d38e7 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -578,6 +578,8 @@ ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range)
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range);
+EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
 #endif
 
 int __init ip_nat_init(void)
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
new file mode 100644
index 000000000000..3cdd0684d30d
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -0,0 +1,401 @@
+/*
+ * ip_nat_pptp.c	- Version 3.0
+ *
+ * NAT support for PPTP (Point to Point Tunneling Protocol).
+ * PPTP is a a protocol for creating virtual private networks.
+ * It is a specification defined by Microsoft and some vendors
+ * working with Microsoft.  PPTP is built on top of a modified
+ * version of the Internet Generic Routing Encapsulation Protocol.
+ * GRE is defined in RFC 1701 and RFC 1702.  Documentation of
+ * PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ * TODO: - NAT to a unique tuple, not to TCP source port
+ * 	   (needs netfilter tuple reservation)
+ *
+ * Changes:
+ *     2002-02-10 - Version 1.3
+ *       - Use ip_nat_mangle_tcp_packet() because of cloned skb's
+ *	   in local connections (Philip Craig <philipc@snapgear.com>)
+ *       - add checks for magicCookie and pptp version
+ *       - make argument list of pptp_{out,in}bound_packet() shorter
+ *       - move to C99 style initializers
+ *       - print version number at module loadtime
+ *     2003-09-22 - Version 1.5
+ *       - use SNATed tcp sourceport as callid, since we get called before
+ *	   TCP header is mangled (Philip Craig <philipc@snapgear.com>)
+ *     2004-10-22 - Version 2.0
+ *       - kernel 2.6.x version
+ *     2005-06-10 - Version 3.0
+ *       - kernel >= 2.6.11 version,
+ *	   funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
+ * 
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_nat_pptp.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
+#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
+
+#define IP_NAT_PPTP_VERSION "3.0"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP");
+
+
+#if 0
+extern const char *pptp_msg_name[];
+#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
+				       __FUNCTION__, ## args)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static void pptp_nat_expected(struct ip_conntrack *ct,
+			      struct ip_conntrack_expect *exp)
+{
+	struct ip_conntrack *master = ct->master;
+	struct ip_conntrack_expect *other_exp;
+	struct ip_conntrack_tuple t;
+	struct ip_ct_pptp_master *ct_pptp_info;
+	struct ip_nat_pptp *nat_pptp_info;
+
+	ct_pptp_info = &master->help.ct_pptp_info;
+	nat_pptp_info = &master->nat.help.nat_pptp_info;
+
+	/* And here goes the grand finale of corrosion... */
+
+	if (exp->dir == IP_CT_DIR_ORIGINAL) {
+		DEBUGP("we are PNS->PAC\n");
+		/* therefore, build tuple for PAC->PNS */
+		t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
+		t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id);
+		t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
+		t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id);
+		t.dst.protonum = IPPROTO_GRE;
+	} else {
+		DEBUGP("we are PAC->PNS\n");
+		/* build tuple for PNS->PAC */
+		t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
+		t.src.u.gre.key = 
+			htons(master->nat.help.nat_pptp_info.pns_call_id);
+		t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
+		t.dst.u.gre.key = 
+			htons(master->nat.help.nat_pptp_info.pac_call_id);
+		t.dst.protonum = IPPROTO_GRE;
+	}
+
+	DEBUGP("trying to unexpect other dir: ");
+	DUMP_TUPLE(&t);
+	other_exp = ip_conntrack_expect_find(&t);
+	if (other_exp) {
+		ip_conntrack_unexpect_related(other_exp);
+		ip_conntrack_expect_put(other_exp);
+		DEBUGP("success\n");
+	} else {
+		DEBUGP("not found!\n");
+	}
+
+	ip_nat_follow_master(ct, exp);
+}
+
+/* outbound packets == from PNS to PAC */
+static int
+pptp_outbound_pkt(struct sk_buff **pskb,
+		  struct ip_conntrack *ct,
+		  enum ip_conntrack_info ctinfo,
+		  struct PptpControlHeader *ctlh,
+		  union pptp_ctrl_union *pptpReq)
+
+{
+	struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
+	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
+
+	u_int16_t msg, *cid = NULL, new_callid;
+
+	new_callid = htons(ct_pptp_info->pns_call_id);
+	
+	switch (msg = ntohs(ctlh->messageType)) {
+		case PPTP_OUT_CALL_REQUEST:
+			cid = &pptpReq->ocreq.callID;
+			/* FIXME: ideally we would want to reserve a call ID
+			 * here.  current netfilter NAT core is not able to do
+			 * this :( For now we use TCP source port. This breaks
+			 * multiple calls within one control session */
+
+			/* save original call ID in nat_info */
+			nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
+
+			/* don't use tcph->source since we are at a DSTmanip
+			 * hook (e.g. PREROUTING) and pkt is not mangled yet */
+			new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
+
+			/* save new call ID in ct info */
+			ct_pptp_info->pns_call_id = ntohs(new_callid);
+			break;
+		case PPTP_IN_CALL_REPLY:
+			cid = &pptpReq->icreq.callID;
+			break;
+		case PPTP_CALL_CLEAR_REQUEST:
+			cid = &pptpReq->clrreq.callID;
+			break;
+		default:
+			DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
+			      (msg <= PPTP_MSG_MAX)? 
+			      pptp_msg_name[msg]:pptp_msg_name[0]);
+			/* fall through */
+
+		case PPTP_SET_LINK_INFO:
+			/* only need to NAT in case PAC is behind NAT box */
+		case PPTP_START_SESSION_REQUEST:
+		case PPTP_START_SESSION_REPLY:
+		case PPTP_STOP_SESSION_REQUEST:
+		case PPTP_STOP_SESSION_REPLY:
+		case PPTP_ECHO_REQUEST:
+		case PPTP_ECHO_REPLY:
+			/* no need to alter packet */
+			return NF_ACCEPT;
+	}
+
+	/* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
+	 * down to here */
+
+	IP_NF_ASSERT(cid);
+
+	DEBUGP("altering call id from 0x%04x to 0x%04x\n",
+		ntohs(*cid), ntohs(new_callid));
+
+	/* mangle packet */
+	if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+		(void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
+				 	sizeof(new_callid), 
+					(char *)&new_callid,
+				 	sizeof(new_callid)) == 0)
+		return NF_DROP;
+
+	return NF_ACCEPT;
+}
+
+static int
+pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
+	     struct ip_conntrack_expect *expect_reply)
+{
+	struct ip_ct_pptp_master *ct_pptp_info = 
+				&expect_orig->master->help.ct_pptp_info;
+	struct ip_nat_pptp *nat_pptp_info = 
+				&expect_orig->master->nat.help.nat_pptp_info;
+
+	struct ip_conntrack *ct = expect_orig->master;
+
+	struct ip_conntrack_tuple inv_t;
+	struct ip_conntrack_tuple *orig_t, *reply_t;
+
+	/* save original PAC call ID in nat_info */
+	nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
+
+	/* alter expectation */
+	orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+	reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+	/* alter expectation for PNS->PAC direction */
+	invert_tuplepr(&inv_t, &expect_orig->tuple);
+	expect_orig->saved_proto.gre.key = htons(nat_pptp_info->pac_call_id);
+	expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
+	expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
+	inv_t.src.ip = reply_t->src.ip;
+	inv_t.dst.ip = reply_t->dst.ip;
+	inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
+	inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
+
+	if (!ip_conntrack_expect_related(expect_orig)) {
+		DEBUGP("successfully registered expect\n");
+	} else {
+		DEBUGP("can't expect_related(expect_orig)\n");
+		return 1;
+	}
+
+	/* alter expectation for PAC->PNS direction */
+	invert_tuplepr(&inv_t, &expect_reply->tuple);
+	expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id);
+	expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
+	expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
+	inv_t.src.ip = orig_t->src.ip;
+	inv_t.dst.ip = orig_t->dst.ip;
+	inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
+	inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
+
+	if (!ip_conntrack_expect_related(expect_reply)) {
+		DEBUGP("successfully registered expect\n");
+	} else {
+		DEBUGP("can't expect_related(expect_reply)\n");
+		ip_conntrack_unexpect_related(expect_orig);
+		return 1;
+	}
+
+	if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) {
+		DEBUGP("can't register original keymap\n");
+		ip_conntrack_unexpect_related(expect_orig);
+		ip_conntrack_unexpect_related(expect_reply);
+		return 1;
+	}
+
+	if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) {
+		DEBUGP("can't register reply keymap\n");
+		ip_conntrack_unexpect_related(expect_orig);
+		ip_conntrack_unexpect_related(expect_reply);
+		ip_ct_gre_keymap_destroy(ct);
+		return 1;
+	}
+
+	return 0;
+}
+
+/* inbound packets == from PAC to PNS */
+static int
+pptp_inbound_pkt(struct sk_buff **pskb,
+		 struct ip_conntrack *ct,
+		 enum ip_conntrack_info ctinfo,
+		 struct PptpControlHeader *ctlh,
+		 union pptp_ctrl_union *pptpReq)
+{
+	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
+	u_int16_t msg, new_cid = 0, new_pcid, *pcid = NULL, *cid = NULL;
+
+	int ret = NF_ACCEPT, rv;
+
+	new_pcid = htons(nat_pptp_info->pns_call_id);
+
+	switch (msg = ntohs(ctlh->messageType)) {
+	case PPTP_OUT_CALL_REPLY:
+		pcid = &pptpReq->ocack.peersCallID;	
+		cid = &pptpReq->ocack.callID;
+		break;
+	case PPTP_IN_CALL_CONNECT:
+		pcid = &pptpReq->iccon.peersCallID;
+		break;
+	case PPTP_IN_CALL_REQUEST:
+		/* only need to nat in case PAC is behind NAT box */
+		break;
+	case PPTP_WAN_ERROR_NOTIFY:
+		pcid = &pptpReq->wanerr.peersCallID;
+		break;
+	case PPTP_CALL_DISCONNECT_NOTIFY:
+		pcid = &pptpReq->disc.callID;
+		break;
+	case PPTP_SET_LINK_INFO:
+		pcid = &pptpReq->setlink.peersCallID;
+		break;
+
+	default:
+		DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? 
+			pptp_msg_name[msg]:pptp_msg_name[0]);
+		/* fall through */
+
+	case PPTP_START_SESSION_REQUEST:
+	case PPTP_START_SESSION_REPLY:
+	case PPTP_STOP_SESSION_REQUEST:
+	case PPTP_STOP_SESSION_REPLY:
+	case PPTP_ECHO_REQUEST:
+	case PPTP_ECHO_REPLY:
+		/* no need to alter packet */
+		return NF_ACCEPT;
+	}
+
+	/* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST,
+	 * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
+
+	/* mangle packet */
+	IP_NF_ASSERT(pcid);
+	DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
+		ntohs(*pcid), ntohs(new_pcid));
+	
+	rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, 
+				      (void *)pcid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
+				      sizeof(new_pcid), (char *)&new_pcid, 
+				      sizeof(new_pcid));
+	if (rv != NF_ACCEPT) 
+		return rv;
+
+	if (new_cid) {
+		IP_NF_ASSERT(cid);
+		DEBUGP("altering call id from 0x%04x to 0x%04x\n",
+			ntohs(*cid), ntohs(new_cid));
+		rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, 
+					      (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), 
+					      sizeof(new_cid),
+					      (char *)&new_cid, 
+					      sizeof(new_cid));
+		if (rv != NF_ACCEPT)
+			return rv;
+	}
+
+	/* check for earlier return value of 'switch' above */
+	if (ret != NF_ACCEPT)
+		return ret;
+
+	/* great, at least we don't need to resize packets */
+	return NF_ACCEPT;
+}
+
+
+extern int __init ip_nat_proto_gre_init(void);
+extern void __exit ip_nat_proto_gre_fini(void);
+
+static int __init init(void)
+{
+	int ret;
+
+	DEBUGP("%s: registering NAT helper\n", __FILE__);
+
+	ret = ip_nat_proto_gre_init();
+	if (ret < 0)
+		return ret;
+
+	BUG_ON(ip_nat_pptp_hook_outbound);
+	ip_nat_pptp_hook_outbound = &pptp_outbound_pkt;
+
+	BUG_ON(ip_nat_pptp_hook_inbound);
+	ip_nat_pptp_hook_inbound = &pptp_inbound_pkt;
+
+	BUG_ON(ip_nat_pptp_hook_exp_gre);
+	ip_nat_pptp_hook_exp_gre = &pptp_exp_gre;
+
+	BUG_ON(ip_nat_pptp_hook_expectfn);
+	ip_nat_pptp_hook_expectfn = &pptp_nat_expected;
+
+	printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	DEBUGP("cleanup_module\n" );
+
+	ip_nat_pptp_hook_expectfn = NULL;
+	ip_nat_pptp_hook_exp_gre = NULL;
+	ip_nat_pptp_hook_inbound = NULL;
+	ip_nat_pptp_hook_outbound = NULL;
+
+	ip_nat_proto_gre_fini();
+	/* Make sure noone calls it, meanwhile */
+	synchronize_net();
+
+	printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
new file mode 100644
index 000000000000..7c1285401672
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -0,0 +1,214 @@
+/*
+ * ip_nat_proto_gre.c - Version 2.0
+ *
+ * NAT protocol helper module for GRE.
+ *
+ * GRE is a generic encapsulation protocol, which is generally not very
+ * suited for NAT, as it has no protocol-specific part as port numbers.
+ *
+ * It has an optional key field, which may help us distinguishing two 
+ * connections between the same two hosts.
+ *
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 
+ *
+ * PPTP is built on top of a modified version of GRE, and has a mandatory
+ * field called "CallID", which serves us for the same purpose as the key
+ * field in plain GRE.
+ *
+ * Documentation about PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
+
+#if 0
+#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
+				       __FUNCTION__, ## args)
+#else
+#define DEBUGP(x, args...)
+#endif
+
+/* is key in given range between min and max */
+static int
+gre_in_range(const struct ip_conntrack_tuple *tuple,
+	     enum ip_nat_manip_type maniptype,
+	     const union ip_conntrack_manip_proto *min,
+	     const union ip_conntrack_manip_proto *max)
+{
+	u_int32_t key;
+
+	if (maniptype == IP_NAT_MANIP_SRC)
+		key = tuple->src.u.gre.key;
+	else
+		key = tuple->dst.u.gre.key;
+
+	return ntohl(key) >= ntohl(min->gre.key)
+		&& ntohl(key) <= ntohl(max->gre.key);
+}
+
+/* generate unique tuple ... */
+static int 
+gre_unique_tuple(struct ip_conntrack_tuple *tuple,
+		 const struct ip_nat_range *range,
+		 enum ip_nat_manip_type maniptype,
+		 const struct ip_conntrack *conntrack)
+{
+	static u_int16_t key;
+	u_int16_t *keyptr;
+	unsigned int min, i, range_size;
+
+	if (maniptype == IP_NAT_MANIP_SRC)
+		keyptr = &tuple->src.u.gre.key;
+	else
+		keyptr = &tuple->dst.u.gre.key;
+
+	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+		DEBUGP("%p: NATing GRE PPTP\n", conntrack);
+		min = 1;
+		range_size = 0xffff;
+	} else {
+		min = ntohl(range->min.gre.key);
+		range_size = ntohl(range->max.gre.key) - min + 1;
+	}
+
+	DEBUGP("min = %u, range_size = %u\n", min, range_size); 
+
+	for (i = 0; i < range_size; i++, key++) {
+		*keyptr = htonl(min + key % range_size);
+		if (!ip_nat_used_tuple(tuple, conntrack))
+			return 1;
+	}
+
+	DEBUGP("%p: no NAT mapping\n", conntrack);
+
+	return 0;
+}
+
+/* manipulate a GRE packet according to maniptype */
+static int
+gre_manip_pkt(struct sk_buff **pskb,
+	      unsigned int iphdroff,
+	      const struct ip_conntrack_tuple *tuple,
+	      enum ip_nat_manip_type maniptype)
+{
+	struct gre_hdr *greh;
+	struct gre_hdr_pptp *pgreh;
+	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+	unsigned int hdroff = iphdroff + iph->ihl*4;
+
+	/* pgreh includes two optional 32bit fields which are not required
+	 * to be there.  That's where the magic '8' comes from */
+	if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh)-8))
+		return 0;
+
+	greh = (void *)(*pskb)->data + hdroff;
+	pgreh = (struct gre_hdr_pptp *) greh;
+
+	/* we only have destination manip of a packet, since 'source key' 
+	 * is not present in the packet itself */
+	if (maniptype == IP_NAT_MANIP_DST) {
+		/* key manipulation is always dest */
+		switch (greh->version) {
+		case 0:
+			if (!greh->key) {
+				DEBUGP("can't nat GRE w/o key\n");
+				break;
+			}
+			if (greh->csum) {
+				/* FIXME: Never tested this code... */
+				*(gre_csum(greh)) = 
+					ip_nat_cheat_check(~*(gre_key(greh)),
+							tuple->dst.u.gre.key,
+							*(gre_csum(greh)));
+			}
+			*(gre_key(greh)) = tuple->dst.u.gre.key;
+			break;
+		case GRE_VERSION_PPTP:
+			DEBUGP("call_id -> 0x%04x\n", 
+				ntohl(tuple->dst.u.gre.key));
+			pgreh->call_id = htons(ntohl(tuple->dst.u.gre.key));
+			break;
+		default:
+			DEBUGP("can't nat unknown GRE version\n");
+			return 0;
+			break;
+		}
+	}
+	return 1;
+}
+
+/* print out a nat tuple */
+static unsigned int 
+gre_print(char *buffer, 
+	  const struct ip_conntrack_tuple *match,
+	  const struct ip_conntrack_tuple *mask)
+{
+	unsigned int len = 0;
+
+	if (mask->src.u.gre.key)
+		len += sprintf(buffer + len, "srckey=0x%x ", 
+				ntohl(match->src.u.gre.key));
+
+	if (mask->dst.u.gre.key)
+		len += sprintf(buffer + len, "dstkey=0x%x ",
+				ntohl(match->src.u.gre.key));
+
+	return len;
+}
+
+/* print a range of keys */
+static unsigned int 
+gre_print_range(char *buffer, const struct ip_nat_range *range)
+{
+	if (range->min.gre.key != 0 
+	    || range->max.gre.key != 0xFFFF) {
+		if (range->min.gre.key == range->max.gre.key)
+			return sprintf(buffer, "key 0x%x ",
+					ntohl(range->min.gre.key));
+		else
+			return sprintf(buffer, "keys 0x%u-0x%u ",
+					ntohl(range->min.gre.key),
+					ntohl(range->max.gre.key));
+	} else
+		return 0;
+}
+
+/* nat helper struct */
+static struct ip_nat_protocol gre = { 
+	.name		= "GRE", 
+	.protonum	= IPPROTO_GRE,
+	.manip_pkt	= gre_manip_pkt,
+	.in_range	= gre_in_range,
+	.unique_tuple	= gre_unique_tuple,
+	.print		= gre_print,
+	.print_range	= gre_print_range,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+	.range_to_nfattr	= ip_nat_port_range_to_nfattr,
+	.nfattr_to_range	= ip_nat_port_nfattr_to_range,
+#endif
+};
+				  
+int __init ip_nat_proto_gre_init(void)
+{
+	return ip_nat_protocol_register(&gre);
+}
+
+void __exit ip_nat_proto_gre_fini(void)
+{
+	ip_nat_protocol_unregister(&gre);
+}
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 7d38913754b1..9bcb398fbc1f 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -13,6 +13,7 @@
 #include <linux/config.h>
 #include <linux/proc_fs.h>
 #include <linux/jhash.h>
+#include <linux/bitops.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
@@ -30,7 +31,7 @@
 #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
 #include <linux/netfilter_ipv4/ip_conntrack.h>
 
-#define CLUSTERIP_VERSION "0.7"
+#define CLUSTERIP_VERSION "0.8"
 
 #define DEBUG_CLUSTERIP
 
@@ -49,13 +50,14 @@ MODULE_DESCRIPTION("iptables target for CLUSTERIP");
 struct clusterip_config {
 	struct list_head list;			/* list of all configs */
 	atomic_t refcount;			/* reference count */
+	atomic_t entries;			/* number of entries/rules
+						 * referencing us */
 
 	u_int32_t clusterip;			/* the IP address */
 	u_int8_t clustermac[ETH_ALEN];		/* the MAC address */
 	struct net_device *dev;			/* device */
 	u_int16_t num_total_nodes;		/* total number of nodes */
-	u_int16_t num_local_nodes;		/* number of local nodes */
-	u_int16_t local_nodes[CLUSTERIP_MAX_NODES];	/* node number array */
+	unsigned long local_nodes;		/* node number array */
 
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry *pde;		/* proc dir entry */
@@ -66,8 +68,7 @@ struct clusterip_config {
 
 static LIST_HEAD(clusterip_configs);
 
-/* clusterip_lock protects the clusterip_configs list _AND_ the configurable
- * data within all structurses (num_local_nodes, local_nodes[]) */
+/* clusterip_lock protects the clusterip_configs list */
 static DEFINE_RWLOCK(clusterip_lock);
 
 #ifdef CONFIG_PROC_FS
@@ -76,23 +77,48 @@ static struct proc_dir_entry *clusterip_procdir;
 #endif
 
 static inline void
-clusterip_config_get(struct clusterip_config *c) {
+clusterip_config_get(struct clusterip_config *c)
+{
 	atomic_inc(&c->refcount);
 }
 
 static inline void
-clusterip_config_put(struct clusterip_config *c) {
-	if (atomic_dec_and_test(&c->refcount)) {
+clusterip_config_put(struct clusterip_config *c)
+{
+	if (atomic_dec_and_test(&c->refcount))
+		kfree(c);
+}
+
+/* increase the count of entries(rules) using/referencing this config */
+static inline void
+clusterip_config_entry_get(struct clusterip_config *c)
+{
+	atomic_inc(&c->entries);
+}
+
+/* decrease the count of entries using/referencing this config.  If last
+ * entry(rule) is removed, remove the config from lists, but don't free it
+ * yet, since proc-files could still be holding references */
+static inline void
+clusterip_config_entry_put(struct clusterip_config *c)
+{
+	if (atomic_dec_and_test(&c->entries)) {
 		write_lock_bh(&clusterip_lock);
 		list_del(&c->list);
 		write_unlock_bh(&clusterip_lock);
+
 		dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0);
 		dev_put(c->dev);
-		kfree(c);
+
+		/* In case anyone still accesses the file, the open/close
+		 * functions are also incrementing the refcount on their own,
+		 * so it's safe to remove the entry even if it's in use. */
+#ifdef CONFIG_PROC_FS
+		remove_proc_entry(c->pde->name, c->pde->parent);
+#endif
 	}
 }
 
-
 static struct clusterip_config *
 __clusterip_config_find(u_int32_t clusterip)
 {
@@ -111,7 +137,7 @@ __clusterip_config_find(u_int32_t clusterip)
 }
 
 static inline struct clusterip_config *
-clusterip_config_find_get(u_int32_t clusterip)
+clusterip_config_find_get(u_int32_t clusterip, int entry)
 {
 	struct clusterip_config *c;
 
@@ -122,11 +148,24 @@ clusterip_config_find_get(u_int32_t clusterip)
 		return NULL;
 	}
 	atomic_inc(&c->refcount);
+	if (entry)
+		atomic_inc(&c->entries);
 	read_unlock_bh(&clusterip_lock);
 
 	return c;
 }
 
+static void
+clusterip_config_init_nodelist(struct clusterip_config *c,
+			       const struct ipt_clusterip_tgt_info *i)
+{
+	int n;
+
+	for (n = 0; n < i->num_local_nodes; n++) {
+		set_bit(i->local_nodes[n] - 1, &c->local_nodes);
+	}
+}
+
 static struct clusterip_config *
 clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
 			struct net_device *dev)
@@ -143,11 +182,11 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
 	c->clusterip = ip;
 	memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
 	c->num_total_nodes = i->num_total_nodes;
-	c->num_local_nodes = i->num_local_nodes;
-	memcpy(&c->local_nodes, &i->local_nodes, sizeof(c->local_nodes));
+	clusterip_config_init_nodelist(c, i);
 	c->hash_mode = i->hash_mode;
 	c->hash_initval = i->hash_initval;
 	atomic_set(&c->refcount, 1);
+	atomic_set(&c->entries, 1);
 
 #ifdef CONFIG_PROC_FS
 	/* create proc dir entry */
@@ -171,53 +210,28 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
 static int
 clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
 {
-	int i;
-
-	write_lock_bh(&clusterip_lock);
 
-	if (c->num_local_nodes >= CLUSTERIP_MAX_NODES
-	    || nodenum > CLUSTERIP_MAX_NODES) {
-		write_unlock_bh(&clusterip_lock);
+	if (nodenum == 0 ||
+	    nodenum > c->num_total_nodes)
 		return 1;
-	}
-
-	/* check if we alrady have this number in our array */
-	for (i = 0; i < c->num_local_nodes; i++) {
-		if (c->local_nodes[i] == nodenum) {
-			write_unlock_bh(&clusterip_lock);
-			return 1;
-		}
-	}
 
-	c->local_nodes[c->num_local_nodes++] = nodenum;
+	/* check if we already have this number in our bitfield */
+	if (test_and_set_bit(nodenum - 1, &c->local_nodes))
+		return 1;
 
-	write_unlock_bh(&clusterip_lock);
 	return 0;
 }
 
 static int
 clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
 {
-	int i;
-
-	write_lock_bh(&clusterip_lock);
-
-	if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) {
-		write_unlock_bh(&clusterip_lock);
+	if (nodenum == 0 ||
+	    nodenum > c->num_total_nodes)
 		return 1;
-	}
 		
-	for (i = 0; i < c->num_local_nodes; i++) {
-		if (c->local_nodes[i] == nodenum) {
-			int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1));
-			memmove(&c->local_nodes[i], &c->local_nodes[i+1], size);
-			c->num_local_nodes--;
-			write_unlock_bh(&clusterip_lock);
-			return 0;
-		}
-	}
+	if (test_and_clear_bit(nodenum - 1, &c->local_nodes))
+		return 0;
 
-	write_unlock_bh(&clusterip_lock);
 	return 1;
 }
 
@@ -285,25 +299,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
 static inline int
 clusterip_responsible(struct clusterip_config *config, u_int32_t hash)
 {
-	int i;
-
-	read_lock_bh(&clusterip_lock);
-
-	if (config->num_local_nodes == 0) {
-		read_unlock_bh(&clusterip_lock);
-		return 0;
-	}
-
-	for (i = 0; i < config->num_local_nodes; i++) {
-		if (config->local_nodes[i] == hash) {
-			read_unlock_bh(&clusterip_lock);
-			return 1;
-		}
-	}
-
-	read_unlock_bh(&clusterip_lock);
-
-	return 0;
+	return test_bit(hash - 1, &config->local_nodes);
 }
 
 /*********************************************************************** 
@@ -415,8 +411,26 @@ checkentry(const char *tablename,
 
 	/* FIXME: further sanity checks */
 
-	config = clusterip_config_find_get(e->ip.dst.s_addr);
-	if (!config) {
+	config = clusterip_config_find_get(e->ip.dst.s_addr, 1);
+	if (config) {
+		if (cipinfo->config != NULL) {
+			/* Case A: This is an entry that gets reloaded, since
+			 * it still has a cipinfo->config pointer. Simply
+			 * increase the entry refcount and return */
+			if (cipinfo->config != config) {
+				printk(KERN_ERR "CLUSTERIP: Reloaded entry "
+				       "has invalid config pointer!\n");
+				return 0;
+			}
+			clusterip_config_entry_get(cipinfo->config);
+		} else {
+			/* Case B: This is a new rule referring to an existing
+			 * clusterip config. */
+			cipinfo->config = config;
+			clusterip_config_entry_get(cipinfo->config);
+		}
+	} else {
+		/* Case C: This is a completely new clusterip config */
 		if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
 			printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr));
 			return 0;
@@ -443,10 +457,9 @@ checkentry(const char *tablename,
 			}
 			dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
 		}
+		cipinfo->config = config;
 	}
 
-	cipinfo->config = config;
-
 	return 1;
 }
 
@@ -455,13 +468,10 @@ static void destroy(void *matchinfo, unsigned int matchinfosize)
 {
 	struct ipt_clusterip_tgt_info *cipinfo = matchinfo;
 
-	/* we first remove the proc entry and then drop the reference
-	 * count.  In case anyone still accesses the file, the open/close
-	 * functions are also incrementing the refcount on their own */
-#ifdef CONFIG_PROC_FS
-	remove_proc_entry(cipinfo->config->pde->name,
-			  cipinfo->config->pde->parent);
-#endif
+	/* if no more entries are referencing the config, remove it
+	 * from the list and destroy the proc entry */
+	clusterip_config_entry_put(cipinfo->config);
+
 	clusterip_config_put(cipinfo->config);
 }
 
@@ -533,7 +543,7 @@ arp_mangle(unsigned int hook,
 
 	/* if there is no clusterip configuration for the arp reply's 
 	 * source ip, we don't want to mangle it */
-	c = clusterip_config_find_get(payload->src_ip);
+	c = clusterip_config_find_get(payload->src_ip, 0);
 	if (!c)
 		return NF_ACCEPT;
 
@@ -574,56 +584,69 @@ static struct nf_hook_ops cip_arp_ops = {
 
 #ifdef CONFIG_PROC_FS
 
+struct clusterip_seq_position {
+	unsigned int pos;	/* position */
+	unsigned int weight;	/* number of bits set == size */
+	unsigned int bit;	/* current bit */
+	unsigned long val;	/* current value */
+};
+
 static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
 {
 	struct proc_dir_entry *pde = s->private;
 	struct clusterip_config *c = pde->data;
-	unsigned int *nodeidx;
-
-	read_lock_bh(&clusterip_lock);
-	if (*pos >= c->num_local_nodes)
+	unsigned int weight;
+	u_int32_t local_nodes;
+	struct clusterip_seq_position *idx;
+
+	/* FIXME: possible race */
+	local_nodes = c->local_nodes;
+	weight = hweight32(local_nodes);
+	if (*pos >= weight)
 		return NULL;
 
-	nodeidx = kmalloc(sizeof(unsigned int), GFP_KERNEL);
-	if (!nodeidx)
+	idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL);
+	if (!idx)
 		return ERR_PTR(-ENOMEM);
 
-	*nodeidx = *pos;
-	return nodeidx;
+	idx->pos = *pos;
+	idx->weight = weight;
+	idx->bit = ffs(local_nodes);
+	idx->val = local_nodes;
+	clear_bit(idx->bit - 1, &idx->val);
+
+	return idx;
 }
 
 static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
 {
-	struct proc_dir_entry *pde = s->private;
-	struct clusterip_config *c = pde->data;
-	unsigned int *nodeidx = (unsigned int *)v;
+	struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
 
-	*pos = ++(*nodeidx);
-	if (*pos >= c->num_local_nodes) {
+	*pos = ++idx->pos;
+	if (*pos >= idx->weight) {
 		kfree(v);
 		return NULL;
 	}
-	return nodeidx;
+	idx->bit = ffs(idx->val);
+	clear_bit(idx->bit - 1, &idx->val);
+	return idx;
 }
 
 static void clusterip_seq_stop(struct seq_file *s, void *v)
 {
 	kfree(v);
-
-	read_unlock_bh(&clusterip_lock);
 }
 
 static int clusterip_seq_show(struct seq_file *s, void *v)
 {
-	struct proc_dir_entry *pde = s->private;
-	struct clusterip_config *c = pde->data;
-	unsigned int *nodeidx = (unsigned int *)v;
+	struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
 
-	if (*nodeidx != 0) 
+	if (idx->pos != 0) 
 		seq_putc(s, ',');
-	seq_printf(s, "%u", c->local_nodes[*nodeidx]);
 
-	if (*nodeidx == c->num_local_nodes-1)
+	seq_printf(s, "%u", idx->bit);
+
+	if (idx->pos == idx->weight - 1)
 		seq_putc(s, '\n');
 
 	return 0;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 304bb0a1d4f0..4b0d7e4d6269 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -361,7 +361,7 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
 
 			if (type && code) {
 				get_user(fl->fl_icmp_type, type);
-				__get_user(fl->fl_icmp_code, code);
+			        get_user(fl->fl_icmp_code, code);
 				probed = 1;
 			}
 			break;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 29222b964951..a7537c7bbd06 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -979,14 +979,19 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 			if (!before(TCP_SKB_CB(skb)->seq, end_seq))
 				break;
 
+			in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
+				!before(end_seq, TCP_SKB_CB(skb)->end_seq);
+
 			pcount = tcp_skb_pcount(skb);
 
-			if (pcount > 1 &&
-			    (after(start_seq, TCP_SKB_CB(skb)->seq) ||
-			     before(end_seq, TCP_SKB_CB(skb)->end_seq))) {
+			if (pcount > 1 && !in_sack &&
+			    after(TCP_SKB_CB(skb)->end_seq, start_seq)) {
 				unsigned int pkt_len;
 
-				if (after(start_seq, TCP_SKB_CB(skb)->seq))
+				in_sack = !after(start_seq,
+						 TCP_SKB_CB(skb)->seq);
+
+				if (!in_sack)
 					pkt_len = (start_seq -
 						   TCP_SKB_CB(skb)->seq);
 				else
@@ -999,9 +1004,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 
 			fack_count += pcount;
 
-			in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
-				!before(end_seq, TCP_SKB_CB(skb)->end_seq);
-
 			sacked = TCP_SKB_CB(skb)->sacked;
 
 			/* Account D-SACK for retransmitted packet. */
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a88db28b0af7..b1a63b2c6b4a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -384,7 +384,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp->frto_counter = 0;
 		newtp->frto_highmark = 0;
 
-		newicsk->icsk_ca_ops = &tcp_reno;
+		newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
 
 		tcp_set_ca_state(newsk, TCP_CA_Open);
 		tcp_init_xmit_timers(newsk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c10e4435e3b1..5dd6dd7d091e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -435,6 +435,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 	int nsize, old_factor;
 	u16 flags;
 
+	BUG_ON(len >= skb->len);
+
 	nsize = skb_headlen(skb) - len;
 	if (nsize < 0)
 		nsize = 0;
@@ -459,9 +461,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 	flags = TCP_SKB_CB(skb)->flags;
 	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
 	TCP_SKB_CB(buff)->flags = flags;
-	TCP_SKB_CB(buff)->sacked =
-		(TCP_SKB_CB(skb)->sacked &
-		 (TCPCB_LOST | TCPCB_EVER_RETRANS | TCPCB_AT_TAIL));
+	TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
 	TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
 
 	if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) {
@@ -499,6 +499,12 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 			tcp_skb_pcount(buff);
 
 		tp->packets_out -= diff;
+
+		if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
+			tp->sacked_out -= diff;
+		if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
+			tp->retrans_out -= diff;
+
 		if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
 			tp->lost_out -= diff;
 			tp->left_out -= diff;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 29fed6e58d0a..519899fb11d5 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1968,7 +1968,7 @@ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc)
 	}
 	pmc->mca_sources = NULL;
 	pmc->mca_sfmode = MCAST_EXCLUDE;
-	pmc->mca_sfcount[MCAST_EXCLUDE] = 0;
+	pmc->mca_sfcount[MCAST_INCLUDE] = 0;
 	pmc->mca_sfcount[MCAST_EXCLUDE] = 1;
 }
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 1cb8adb2787f..2da514b16d95 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1955,6 +1955,57 @@ static void __exit fini(void)
 #endif
 }
 
+/*
+ * find specified header up to transport protocol header.
+ * If found target header, the offset to the header is set to *offset
+ * and return 0. otherwise, return -1.
+ *
+ * Notes: - non-1st Fragment Header isn't skipped.
+ *	  - ESP header isn't skipped.
+ *	  - The target header may be trancated.
+ */
+int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
+{
+	unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data;
+	u8 nexthdr = skb->nh.ipv6h->nexthdr;
+	unsigned int len = skb->len - start;
+
+	while (nexthdr != target) {
+		struct ipv6_opt_hdr _hdr, *hp;
+		unsigned int hdrlen;
+
+		if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE)
+			return -1;
+		hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
+		if (hp == NULL)
+			return -1;
+		if (nexthdr == NEXTHDR_FRAGMENT) {
+			unsigned short _frag_off, *fp;
+			fp = skb_header_pointer(skb,
+						start+offsetof(struct frag_hdr,
+							       frag_off),
+						sizeof(_frag_off),
+						&_frag_off);
+			if (fp == NULL)
+				return -1;
+
+			if (ntohs(*fp) & ~0x7)
+				return -1;
+			hdrlen = 8;
+		} else if (nexthdr == NEXTHDR_AUTH)
+			hdrlen = (hp->hdrlen + 2) << 2; 
+		else
+			hdrlen = ipv6_optlen(hp); 
+
+		nexthdr = hp->nexthdr;
+		len -= hdrlen;
+		start += hdrlen;
+	}
+
+	*offset = start;
+	return 0;
+}
+
 EXPORT_SYMBOL(ip6t_register_table);
 EXPORT_SYMBOL(ip6t_unregister_table);
 EXPORT_SYMBOL(ip6t_do_table);
@@ -1963,6 +2014,7 @@ EXPORT_SYMBOL(ip6t_unregister_match);
 EXPORT_SYMBOL(ip6t_register_target);
 EXPORT_SYMBOL(ip6t_unregister_target);
 EXPORT_SYMBOL(ip6t_ext_hdr);
+EXPORT_SYMBOL(ipv6_find_hdr);
 
 module_init(init);
 module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index d5b94f142bba..dde37793d20b 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -48,92 +48,21 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       int *hotdrop)
 {
-	struct ip_auth_hdr *ah = NULL, _ah;
+	struct ip_auth_hdr *ah, _ah;
 	const struct ip6t_ah *ahinfo = matchinfo;
-	unsigned int temp;
-	int len;
-	u8 nexthdr;
 	unsigned int ptr;
 	unsigned int hdrlen = 0;
 
-	/*DEBUGP("IPv6 AH entered\n");*/
-	/* if (opt->auth == 0) return 0;
-	* It does not filled on output */
-
-	/* type of the 1st exthdr */
-	nexthdr = skb->nh.ipv6h->nexthdr;
-	/* pointer to the 1st exthdr */
-	ptr = sizeof(struct ipv6hdr);
-	/* available length */
-	len = skb->len - ptr;
-	temp = 0;
-
-	while (ip6t_ext_hdr(nexthdr)) {
-		struct ipv6_opt_hdr _hdr, *hp;
-
-		DEBUGP("ipv6_ah header iteration \n");
-
-		/* Is there enough space for the next ext header? */
-		if (len < sizeof(struct ipv6_opt_hdr))
-			return 0;
-		/* No more exthdr -> evaluate */
-		if (nexthdr == NEXTHDR_NONE)
-			break;
-		/* ESP -> evaluate */
-		if (nexthdr == NEXTHDR_ESP)
-			break;
-
-		hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-		BUG_ON(hp == NULL);
-
-		/* Calculate the header length */
-		if (nexthdr == NEXTHDR_FRAGMENT)
-			hdrlen = 8;
-		else if (nexthdr == NEXTHDR_AUTH)
-			hdrlen = (hp->hdrlen+2)<<2;
-		else
-			hdrlen = ipv6_optlen(hp);
-
-		/* AH -> evaluate */
-		if (nexthdr == NEXTHDR_AUTH) {
-			temp |= MASK_AH;
-			break;
-		}
-
-		
-		/* set the flag */
-		switch (nexthdr) {
-		case NEXTHDR_HOP:
-		case NEXTHDR_ROUTING:
-		case NEXTHDR_FRAGMENT:
-		case NEXTHDR_AUTH:
-		case NEXTHDR_DEST:
-			break;
-		default:
-			DEBUGP("ipv6_ah match: unknown nextheader %u\n",nexthdr);
-			return 0;
-		}
-
-		nexthdr = hp->nexthdr;
-		len -= hdrlen;
-		ptr += hdrlen;
-		if (ptr > skb->len) {
-			DEBUGP("ipv6_ah: new pointer too large! \n");
-			break;
-		}
-	}
-
-	/* AH header not found */
-	if (temp != MASK_AH)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH) < 0)
 		return 0;
 
-	if (len < sizeof(struct ip_auth_hdr)){
+	ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
+	if (ah == NULL) {
 		*hotdrop = 1;
 		return 0;
 	}
 
-	ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
-	BUG_ON(ah == NULL);
+	hdrlen = (ah->hdrlen + 2) << 2;
 
 	DEBUGP("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen);
 	DEBUGP("RES %04X ", ah->reserved);
diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c
index 540925e4a7a8..c450a635e54b 100644
--- a/net/ipv6/netfilter/ip6t_dst.c
+++ b/net/ipv6/netfilter/ip6t_dst.c
@@ -63,8 +63,6 @@ match(const struct sk_buff *skb,
        struct ipv6_opt_hdr _optsh, *oh;
        const struct ip6t_opts *optinfo = matchinfo;
        unsigned int temp;
-       unsigned int len;
-       u8 nexthdr;
        unsigned int ptr;
        unsigned int hdrlen = 0;
        unsigned int ret = 0;
@@ -72,97 +70,25 @@ match(const struct sk_buff *skb,
        u8 _optlen, *lp = NULL;
        unsigned int optlen;
        
-       /* type of the 1st exthdr */
-       nexthdr = skb->nh.ipv6h->nexthdr;
-       /* pointer to the 1st exthdr */
-       ptr = sizeof(struct ipv6hdr);
-       /* available length */
-       len = skb->len - ptr;
-       temp = 0;
-
-        while (ip6t_ext_hdr(nexthdr)) {
-               struct ipv6_opt_hdr _hdr, *hp;
-
-              DEBUGP("ipv6_opts header iteration \n");
-
-              /* Is there enough space for the next ext header? */
-                if (len < (int)sizeof(struct ipv6_opt_hdr))
-                        return 0;
-              /* No more exthdr -> evaluate */
-                if (nexthdr == NEXTHDR_NONE) {
-                     break;
-              }
-              /* ESP -> evaluate */
-                if (nexthdr == NEXTHDR_ESP) {
-                     break;
-              }
-
-	      hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-	      BUG_ON(hp == NULL);
-
-              /* Calculate the header length */
-                if (nexthdr == NEXTHDR_FRAGMENT) {
-                        hdrlen = 8;
-                } else if (nexthdr == NEXTHDR_AUTH)
-                        hdrlen = (hp->hdrlen+2)<<2;
-                else
-                        hdrlen = ipv6_optlen(hp);
-
-              /* OPTS -> evaluate */
 #if HOPBYHOP
-                if (nexthdr == NEXTHDR_HOP) {
-                     temp |= MASK_HOPOPTS;
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0)
 #else
-                if (nexthdr == NEXTHDR_DEST) {
-                     temp |= MASK_DSTOPTS;
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0)
 #endif
-                     break;
-              }
-
+		return 0;
 
-              /* set the flag */
-              switch (nexthdr){
-                     case NEXTHDR_HOP:
-                     case NEXTHDR_ROUTING:
-                     case NEXTHDR_FRAGMENT:
-                     case NEXTHDR_AUTH:
-                     case NEXTHDR_DEST:
-                            break;
-                     default:
-                            DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr);
-                            return 0;
-                            break;
-              }
-
-                nexthdr = hp->nexthdr;
-                len -= hdrlen;
-                ptr += hdrlen;
-		if ( ptr > skb->len ) {
-			DEBUGP("ipv6_opts: new pointer is too large! \n");
-			break;
-		}
-        }
-
-       /* OPTIONS header not found */
-#if HOPBYHOP
-       if ( temp != MASK_HOPOPTS ) return 0;
-#else
-       if ( temp != MASK_DSTOPTS ) return 0;
-#endif
-
-       if (len < (int)sizeof(struct ipv6_opt_hdr)){
+       oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
+       if (oh == NULL){
 	       *hotdrop = 1;
        		return 0;
        }
 
-       if (len < hdrlen){
+       hdrlen = ipv6_optlen(oh);
+       if (skb->len - ptr < hdrlen){
 	       /* Packet smaller than it's length field */
        		return 0;
        }
 
-       oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
-       BUG_ON(oh == NULL);
-
        DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
 
        DEBUGP("len %02X %04X %02X ",
diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c
index e39dd236fd8e..24bc0cde43a1 100644
--- a/net/ipv6/netfilter/ip6t_esp.c
+++ b/net/ipv6/netfilter/ip6t_esp.c
@@ -48,87 +48,22 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       int *hotdrop)
 {
-	struct ip_esp_hdr _esp, *eh = NULL;
+	struct ip_esp_hdr _esp, *eh;
 	const struct ip6t_esp *espinfo = matchinfo;
-	unsigned int temp;
-	int len;
-	u8 nexthdr;
 	unsigned int ptr;
 
 	/* Make sure this isn't an evil packet */
 	/*DEBUGP("ipv6_esp entered \n");*/
 
-	/* type of the 1st exthdr */
-	nexthdr = skb->nh.ipv6h->nexthdr;
-	/* pointer to the 1st exthdr */
-	ptr = sizeof(struct ipv6hdr);
-	/* available length */
-	len = skb->len - ptr;
-	temp = 0;
-
-	while (ip6t_ext_hdr(nexthdr)) {
-		struct ipv6_opt_hdr _hdr, *hp;
-		int hdrlen;
-
-		DEBUGP("ipv6_esp header iteration \n");
-
-		/* Is there enough space for the next ext header? */
-		if (len < sizeof(struct ipv6_opt_hdr))
-			return 0;
-		/* No more exthdr -> evaluate */
-		if (nexthdr == NEXTHDR_NONE)
-			break;
-		/* ESP -> evaluate */
-		if (nexthdr == NEXTHDR_ESP) {
-			temp |= MASK_ESP;
-			break;
-		}
-
-		hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-		BUG_ON(hp == NULL);
-
-		/* Calculate the header length */
-		if (nexthdr == NEXTHDR_FRAGMENT)
-			hdrlen = 8;
-		else if (nexthdr == NEXTHDR_AUTH)
-			hdrlen = (hp->hdrlen+2)<<2;
-		else
-			hdrlen = ipv6_optlen(hp);
-
-		/* set the flag */
-		switch (nexthdr) {
-		case NEXTHDR_HOP:
-		case NEXTHDR_ROUTING:
-		case NEXTHDR_FRAGMENT:
-		case NEXTHDR_AUTH:
-		case NEXTHDR_DEST:
-			break;
-		default:
-			DEBUGP("ipv6_esp match: unknown nextheader %u\n",nexthdr);
-			return 0;
-		}
-
-		nexthdr = hp->nexthdr;
-		len -= hdrlen;
-		ptr += hdrlen;
-		if (ptr > skb->len) {
-			DEBUGP("ipv6_esp: new pointer too large! \n");
-			break;
-		}
-	}
-
-	/* ESP header not found */
-	if (temp != MASK_ESP)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP) < 0)
 		return 0;
 
-	if (len < sizeof(struct ip_esp_hdr)) {
+	eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp);
+	if (eh == NULL) {
 		*hotdrop = 1;
 		return 0;
 	}
 
-	eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp);
-	BUG_ON(eh == NULL);
-
 	DEBUGP("IPv6 ESP SPI %u %08X\n", ntohl(eh->spi), ntohl(eh->spi));
 
 	return (eh != NULL)
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 4bfa30a9bc80..085d5f8eea29 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -48,90 +48,18 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       int *hotdrop)
 {
-       struct frag_hdr _frag, *fh = NULL;
+       struct frag_hdr _frag, *fh;
        const struct ip6t_frag *fraginfo = matchinfo;
-       unsigned int temp;
-       int len;
-       u8 nexthdr;
        unsigned int ptr;
-       unsigned int hdrlen = 0;
-
-       /* type of the 1st exthdr */
-       nexthdr = skb->nh.ipv6h->nexthdr;
-       /* pointer to the 1st exthdr */
-       ptr = sizeof(struct ipv6hdr);
-       /* available length */
-       len = skb->len - ptr;
-       temp = 0;
-
-        while (ip6t_ext_hdr(nexthdr)) {
-               struct ipv6_opt_hdr _hdr, *hp;
-
-              DEBUGP("ipv6_frag header iteration \n");
-
-              /* Is there enough space for the next ext header? */
-                if (len < (int)sizeof(struct ipv6_opt_hdr))
-                        return 0;
-              /* No more exthdr -> evaluate */
-                if (nexthdr == NEXTHDR_NONE) {
-                     break;
-              }
-              /* ESP -> evaluate */
-                if (nexthdr == NEXTHDR_ESP) {
-                     break;
-              }
-
-	      hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-	      BUG_ON(hp == NULL);
-
-              /* Calculate the header length */
-                if (nexthdr == NEXTHDR_FRAGMENT) {
-                        hdrlen = 8;
-                } else if (nexthdr == NEXTHDR_AUTH)
-                        hdrlen = (hp->hdrlen+2)<<2;
-                else
-                        hdrlen = ipv6_optlen(hp);
-
-              /* FRAG -> evaluate */
-                if (nexthdr == NEXTHDR_FRAGMENT) {
-                     temp |= MASK_FRAGMENT;
-                     break;
-              }
-
-
-              /* set the flag */
-              switch (nexthdr){
-                     case NEXTHDR_HOP:
-                     case NEXTHDR_ROUTING:
-                     case NEXTHDR_FRAGMENT:
-                     case NEXTHDR_AUTH:
-                     case NEXTHDR_DEST:
-                            break;
-                     default:
-                            DEBUGP("ipv6_frag match: unknown nextheader %u\n",nexthdr);
-                            return 0;
-                            break;
-              }
-
-                nexthdr = hp->nexthdr;
-                len -= hdrlen;
-                ptr += hdrlen;
-		if ( ptr > skb->len ) {
-			DEBUGP("ipv6_frag: new pointer too large! \n");
-			break;
-		}
-        }
-
-       /* FRAG header not found */
-       if ( temp != MASK_FRAGMENT ) return 0;
-
-       if (len < sizeof(struct frag_hdr)){
-	       *hotdrop = 1;
-       		return 0;
-       }
 
-       fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
-       BUG_ON(fh == NULL);
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT) < 0)
+		return 0;
+
+	fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
+	if (fh == NULL){
+		*hotdrop = 1;
+		return 0;
+	}
 
        DEBUGP("INFO %04X ", fh->frag_off);
        DEBUGP("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 27f3650d127e..1d09485111d0 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -63,8 +63,6 @@ match(const struct sk_buff *skb,
        struct ipv6_opt_hdr _optsh, *oh;
        const struct ip6t_opts *optinfo = matchinfo;
        unsigned int temp;
-       unsigned int len;
-       u8 nexthdr;
        unsigned int ptr;
        unsigned int hdrlen = 0;
        unsigned int ret = 0;
@@ -72,97 +70,25 @@ match(const struct sk_buff *skb,
        u8 _optlen, *lp = NULL;
        unsigned int optlen;
        
-       /* type of the 1st exthdr */
-       nexthdr = skb->nh.ipv6h->nexthdr;
-       /* pointer to the 1st exthdr */
-       ptr = sizeof(struct ipv6hdr);
-       /* available length */
-       len = skb->len - ptr;
-       temp = 0;
-
-        while (ip6t_ext_hdr(nexthdr)) {
-               struct ipv6_opt_hdr _hdr, *hp;
-
-              DEBUGP("ipv6_opts header iteration \n");
-
-              /* Is there enough space for the next ext header? */
-                if (len < (int)sizeof(struct ipv6_opt_hdr))
-                        return 0;
-              /* No more exthdr -> evaluate */
-                if (nexthdr == NEXTHDR_NONE) {
-                     break;
-              }
-              /* ESP -> evaluate */
-                if (nexthdr == NEXTHDR_ESP) {
-                     break;
-              }
-
-	      hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-	      BUG_ON(hp == NULL);
-
-              /* Calculate the header length */
-                if (nexthdr == NEXTHDR_FRAGMENT) {
-                        hdrlen = 8;
-                } else if (nexthdr == NEXTHDR_AUTH)
-                        hdrlen = (hp->hdrlen+2)<<2;
-                else
-                        hdrlen = ipv6_optlen(hp);
-
-              /* OPTS -> evaluate */
 #if HOPBYHOP
-                if (nexthdr == NEXTHDR_HOP) {
-                     temp |= MASK_HOPOPTS;
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0)
 #else
-                if (nexthdr == NEXTHDR_DEST) {
-                     temp |= MASK_DSTOPTS;
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0)
 #endif
-                     break;
-              }
-
+		return 0;
 
-              /* set the flag */
-              switch (nexthdr){
-                     case NEXTHDR_HOP:
-                     case NEXTHDR_ROUTING:
-                     case NEXTHDR_FRAGMENT:
-                     case NEXTHDR_AUTH:
-                     case NEXTHDR_DEST:
-                            break;
-                     default:
-                            DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr);
-                            return 0;
-                            break;
-              }
-
-                nexthdr = hp->nexthdr;
-                len -= hdrlen;
-                ptr += hdrlen;
-		if ( ptr > skb->len ) {
-			DEBUGP("ipv6_opts: new pointer is too large! \n");
-			break;
-		}
-        }
-
-       /* OPTIONS header not found */
-#if HOPBYHOP
-       if ( temp != MASK_HOPOPTS ) return 0;
-#else
-       if ( temp != MASK_DSTOPTS ) return 0;
-#endif
-
-       if (len < (int)sizeof(struct ipv6_opt_hdr)){
+       oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
+       if (oh == NULL){
 	       *hotdrop = 1;
        		return 0;
        }
 
-       if (len < hdrlen){
+       hdrlen = ipv6_optlen(oh);
+       if (skb->len - ptr < hdrlen){
 	       /* Packet smaller than it's length field */
        		return 0;
        }
 
-       oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
-       BUG_ON(oh == NULL);
-
        DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
 
        DEBUGP("len %02X %04X %02X ",
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 2bb670037df3..beb2fd5cebbb 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -50,98 +50,29 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       int *hotdrop)
 {
-       struct ipv6_rt_hdr _route, *rh = NULL;
+       struct ipv6_rt_hdr _route, *rh;
        const struct ip6t_rt *rtinfo = matchinfo;
        unsigned int temp;
-       unsigned int len;
-       u8 nexthdr;
        unsigned int ptr;
        unsigned int hdrlen = 0;
        unsigned int ret = 0;
        struct in6_addr *ap, _addr;
 
-       /* type of the 1st exthdr */
-       nexthdr = skb->nh.ipv6h->nexthdr;
-       /* pointer to the 1st exthdr */
-       ptr = sizeof(struct ipv6hdr);
-       /* available length */
-       len = skb->len - ptr;
-       temp = 0;
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING) < 0)
+		return 0;
 
-        while (ip6t_ext_hdr(nexthdr)) {
-               struct ipv6_opt_hdr _hdr, *hp;
-
-              DEBUGP("ipv6_rt header iteration \n");
-
-              /* Is there enough space for the next ext header? */
-                if (len < (int)sizeof(struct ipv6_opt_hdr))
-                        return 0;
-              /* No more exthdr -> evaluate */
-                if (nexthdr == NEXTHDR_NONE) {
-                     break;
-              }
-              /* ESP -> evaluate */
-                if (nexthdr == NEXTHDR_ESP) {
-                     break;
-              }
-
-	      hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-	      BUG_ON(hp == NULL);
-
-              /* Calculate the header length */
-                if (nexthdr == NEXTHDR_FRAGMENT) {
-                        hdrlen = 8;
-                } else if (nexthdr == NEXTHDR_AUTH)
-                        hdrlen = (hp->hdrlen+2)<<2;
-                else
-                        hdrlen = ipv6_optlen(hp);
-
-              /* ROUTING -> evaluate */
-                if (nexthdr == NEXTHDR_ROUTING) {
-                     temp |= MASK_ROUTING;
-                     break;
-              }
-
-
-              /* set the flag */
-              switch (nexthdr){
-                     case NEXTHDR_HOP:
-                     case NEXTHDR_ROUTING:
-                     case NEXTHDR_FRAGMENT:
-                     case NEXTHDR_AUTH:
-                     case NEXTHDR_DEST:
-                            break;
-                     default:
-                            DEBUGP("ipv6_rt match: unknown nextheader %u\n",nexthdr);
-                            return 0;
-                            break;
-              }
-
-                nexthdr = hp->nexthdr;
-                len -= hdrlen;
-                ptr += hdrlen;
-		if ( ptr > skb->len ) {
-			DEBUGP("ipv6_rt: new pointer is too large! \n");
-			break;
-		}
-        }
-
-       /* ROUTING header not found */
-       if ( temp != MASK_ROUTING ) return 0;
-
-       if (len < (int)sizeof(struct ipv6_rt_hdr)){
+       rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
+       if (rh == NULL){
 	       *hotdrop = 1;
        		return 0;
        }
 
-       if (len < hdrlen){
+       hdrlen = ipv6_optlen(rh);
+       if (skb->len - ptr < hdrlen){
 	       /* Pcket smaller than its length field */
        		return 0;
        }
 
-       rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
-       BUG_ON(rh == NULL);
-
        DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen);
        DEBUGP("TYPE %04X ", rh->type);
        DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 5aa3691c578d..a1265a320b11 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -627,7 +627,7 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
 
 			if (type && code) {
 				get_user(fl->fl_icmp_type, type);
-				__get_user(fl->fl_icmp_code, code);
+				get_user(fl->fl_icmp_code, code);
 				probed = 1;
 			}
 			break;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 69b146843a20..6001948600f3 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -405,9 +405,8 @@ static struct sock *udp_v6_mcast_next(struct sock *sk,
 				continue;
 
 			if (!ipv6_addr_any(&np->rcv_saddr)) {
-				if (ipv6_addr_equal(&np->rcv_saddr, loc_addr))
-					return s;
-				continue;
+				if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr))
+					continue;
 			}
 			if(!inet6_mc_check(s, loc_addr, rmt_addr))
 				continue;
@@ -640,6 +639,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	int tclass = -1;
 	int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
 	int err;
+	int connected = 0;
 
 	/* destination address check */
 	if (sin6) {
@@ -749,6 +749,7 @@ do_udp_sendmsg:
 		fl->fl_ip_dport = inet->dport;
 		daddr = &np->daddr;
 		fl->fl6_flowlabel = np->flow_label;
+		connected = 1;
 	}
 
 	if (!fl->oif)
@@ -771,6 +772,7 @@ do_udp_sendmsg:
 		}
 		if (!(opt->opt_nflen|opt->opt_flen))
 			opt = NULL;
+		connected = 0;
 	}
 	if (opt == NULL)
 		opt = np->opt;
@@ -788,10 +790,13 @@ do_udp_sendmsg:
 		ipv6_addr_copy(&final, &fl->fl6_dst);
 		ipv6_addr_copy(&fl->fl6_dst, rt0->addr);
 		final_p = &final;
+		connected = 0;
 	}
 
-	if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst))
+	if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) {
 		fl->oif = np->mcast_oif;
+		connected = 0;
+	}
 
 	err = ip6_dst_lookup(sk, &dst, fl);
 	if (err)
@@ -847,7 +852,7 @@ do_append_data:
 	else if (!corkreq)
 		err = udp_v6_push_pending_frames(sk, up);
 
-	if (dst)
+	if (dst && connected)
 		ip6_dst_store(sk, dst,
 			      ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ?
 			      &np->daddr : NULL);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8690f171c1ef..ee865d88183b 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -36,6 +36,11 @@
  *	Michal Ostrowski        :       Module initialization cleanup.
  *         Ulises Alonso        :       Frame number limit removal and 
  *                                      packet_set_ring memory leak.
+ *		Eric Biederman	:	Allow for > 8 byte hardware addresses.
+ *					The convention is that longer addresses
+ *					will simply extend the hardware address
+ *					byte arrays at the end of sockaddr_ll 
+ *					and packet_mreq.
  *
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
@@ -161,7 +166,17 @@ struct packet_mclist
 	int			count;
 	unsigned short		type;
 	unsigned short		alen;
-	unsigned char		addr[8];
+	unsigned char		addr[MAX_ADDR_LEN];
+};
+/* identical to struct packet_mreq except it has
+ * a longer address field.
+ */
+struct packet_mreq_max
+{
+	int		mr_ifindex;
+	unsigned short	mr_type;
+	unsigned short	mr_alen;
+	unsigned char	mr_address[MAX_ADDR_LEN];
 };
 #endif
 #ifdef CONFIG_PACKET_MMAP
@@ -716,6 +731,8 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
 		err = -EINVAL;
 		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
 			goto out;
+		if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
+			goto out;
 		ifindex	= saddr->sll_ifindex;
 		proto	= saddr->sll_protocol;
 		addr	= saddr->sll_addr;
@@ -744,6 +761,12 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
 	if (dev->hard_header) {
 		int res;
 		err = -EINVAL;
+		if (saddr) {
+			if (saddr->sll_halen != dev->addr_len)
+				goto out_free;
+			if (saddr->sll_hatype != dev->type)
+				goto out_free;
+		}
 		res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
 		if (sock->type != SOCK_DGRAM) {
 			skb->tail = skb->data;
@@ -1045,6 +1068,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
 	int copied, err;
+	struct sockaddr_ll *sll;
 
 	err = -EINVAL;
 	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
@@ -1057,16 +1081,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 #endif
 
 	/*
-	 *	If the address length field is there to be filled in, we fill
-	 *	it in now.
-	 */
-
-	if (sock->type == SOCK_PACKET)
-		msg->msg_namelen = sizeof(struct sockaddr_pkt);
-	else
-		msg->msg_namelen = sizeof(struct sockaddr_ll);
-
-	/*
 	 *	Call the generic datagram receiver. This handles all sorts
 	 *	of horrible races and re-entrancy so we can forget about it
 	 *	in the protocol layers.
@@ -1087,6 +1101,17 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 		goto out;
 
 	/*
+	 *	If the address length field is there to be filled in, we fill
+	 *	it in now.
+	 */
+
+	sll = (struct sockaddr_ll*)skb->cb;
+	if (sock->type == SOCK_PACKET)
+		msg->msg_namelen = sizeof(struct sockaddr_pkt);
+	else
+		msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
+
+	/*
 	 *	You lose any data beyond the buffer you gave. If it worries a
 	 *	user program they can ask the device for its MTU anyway.
 	 */
@@ -1166,7 +1191,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
 		sll->sll_hatype = 0;	/* Bad: we have no ARPHRD_UNSPEC */
 		sll->sll_halen = 0;
 	}
-	*uaddr_len = sizeof(*sll);
+	*uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
 
 	return 0;
 }
@@ -1199,7 +1224,7 @@ static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, i
 	}
 }
 
-static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq)
+static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
 {
 	struct packet_sock *po = pkt_sk(sk);
 	struct packet_mclist *ml, *i;
@@ -1249,7 +1274,7 @@ done:
 	return err;
 }
 
-static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq)
+static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
 {
 	struct packet_mclist *ml, **mlp;
 
@@ -1315,11 +1340,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 	case PACKET_ADD_MEMBERSHIP:	
 	case PACKET_DROP_MEMBERSHIP:
 	{
-		struct packet_mreq mreq;
-		if (optlen<sizeof(mreq))
+		struct packet_mreq_max mreq;
+		int len = optlen;
+		memset(&mreq, 0, sizeof(mreq));
+		if (len < sizeof(struct packet_mreq))
 			return -EINVAL;
-		if (copy_from_user(&mreq,optval,sizeof(mreq)))
+		if (len > sizeof(mreq))
+			len = sizeof(mreq);
+		if (copy_from_user(&mreq,optval,len))
 			return -EFAULT;
+		if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
+			return -EINVAL;
 		if (optname == PACKET_ADD_MEMBERSHIP)
 			ret = packet_mc_add(sk, &mreq);
 		else
diff --git a/net/socket.c b/net/socket.c
index c699e93c33d7..f9264472377f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1862,7 +1862,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
 		if (err < 0)
 			goto out_freeiov;
 	}
-	err = __put_user(msg_sys.msg_flags, COMPAT_FLAGS(msg));
+	err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
+			 COMPAT_FLAGS(msg));
 	if (err)
 		goto out_freeiov;
 	if (MSG_CMSG_COMPAT & flags)