summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/icmp.c3
-rw-r--r--net/ipv4/inet_diag.c243
-rw-r--r--net/ipv4/inetpeer.c6
-rw-r--r--net/ipv4/ip_fragment.c2
-rw-r--r--net/ipv4/ip_gre.c34
-rw-r--r--net/ipv4/ip_input.c15
-rw-r--r--net/ipv4/ip_output.c15
-rw-r--r--net/ipv4/ipip.c19
-rw-r--r--net/ipv4/ipmr.c22
-rw-r--r--net/ipv4/ipvs/ip_vs_xmit.c2
-rw-r--r--net/ipv4/netfilter.c15
-rw-r--r--net/ipv4/netfilter/Kconfig10
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c4
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c109
-rw-r--r--net/ipv4/netfilter/ipt_helper.c1
-rw-r--r--net/ipv4/netfilter/ipt_policy.c170
-rw-r--r--net/ipv4/raw.c1
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv4/xfrm4_input.c31
-rw-r--r--net/ipv4/xfrm4_output.c72
24 files changed, 599 insertions, 182 deletions
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index be5a519cd2f8..105039eb7629 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -899,8 +899,7 @@ static void icmp_address_reply(struct sk_buff *skb)
u32 _mask, *mp;
mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask);
- if (mp == NULL)
- BUG();
+ BUG_ON(mp == NULL);
for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
if (*mp == ifa->ifa_mask &&
inet_ifa_match(rt->rt_src, ifa))
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index c49908192047..457db99c76df 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -50,9 +50,10 @@ static struct sock *idiagnl;
#define INET_DIAG_PUT(skb, attrtype, attrlen) \
RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
-static int inet_diag_fill(struct sk_buff *skb, struct sock *sk,
- int ext, u32 pid, u32 seq, u16 nlmsg_flags,
- const struct nlmsghdr *unlh)
+static int inet_csk_diag_fill(struct sock *sk,
+ struct sk_buff *skb,
+ int ext, u32 pid, u32 seq, u16 nlmsg_flags,
+ const struct nlmsghdr *unlh)
{
const struct inet_sock *inet = inet_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -70,20 +71,22 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk,
nlh->nlmsg_flags = nlmsg_flags;
r = NLMSG_DATA(nlh);
- if (sk->sk_state != TCP_TIME_WAIT) {
- if (ext & (1 << (INET_DIAG_MEMINFO - 1)))
- minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO,
- sizeof(*minfo));
- if (ext & (1 << (INET_DIAG_INFO - 1)))
- info = INET_DIAG_PUT(skb, INET_DIAG_INFO,
- handler->idiag_info_size);
-
- if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
- size_t len = strlen(icsk->icsk_ca_ops->name);
- strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
- icsk->icsk_ca_ops->name);
- }
+ BUG_ON(sk->sk_state == TCP_TIME_WAIT);
+
+ if (ext & (1 << (INET_DIAG_MEMINFO - 1)))
+ minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo));
+
+ if (ext & (1 << (INET_DIAG_INFO - 1)))
+ info = INET_DIAG_PUT(skb, INET_DIAG_INFO,
+ handler->idiag_info_size);
+
+ if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
+ const size_t len = strlen(icsk->icsk_ca_ops->name);
+
+ strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
+ icsk->icsk_ca_ops->name);
}
+
r->idiag_family = sk->sk_family;
r->idiag_state = sk->sk_state;
r->idiag_timer = 0;
@@ -93,37 +96,6 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk,
r->id.idiag_cookie[0] = (u32)(unsigned long)sk;
r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
- if (r->idiag_state == TCP_TIME_WAIT) {
- const struct inet_timewait_sock *tw = inet_twsk(sk);
- long tmo = tw->tw_ttd - jiffies;
- if (tmo < 0)
- tmo = 0;
-
- r->id.idiag_sport = tw->tw_sport;
- r->id.idiag_dport = tw->tw_dport;
- r->id.idiag_src[0] = tw->tw_rcv_saddr;
- r->id.idiag_dst[0] = tw->tw_daddr;
- r->idiag_state = tw->tw_substate;
- r->idiag_timer = 3;
- r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ;
- r->idiag_rqueue = 0;
- r->idiag_wqueue = 0;
- r->idiag_uid = 0;
- r->idiag_inode = 0;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
- if (r->idiag_family == AF_INET6) {
- const struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
-
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
- &tw6->tw_v6_rcv_saddr);
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
- &tw6->tw_v6_daddr);
- }
-#endif
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
- }
-
r->id.idiag_sport = inet->sport;
r->id.idiag_dport = inet->dport;
r->id.idiag_src[0] = inet->rcv_saddr;
@@ -185,7 +157,75 @@ nlmsg_failure:
return -1;
}
-static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh)
+static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
+ struct sk_buff *skb, int ext, u32 pid,
+ u32 seq, u16 nlmsg_flags,
+ const struct nlmsghdr *unlh)
+{
+ long tmo;
+ struct inet_diag_msg *r;
+ const unsigned char *previous_tail = skb->tail;
+ struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq,
+ unlh->nlmsg_type, sizeof(*r));
+
+ r = NLMSG_DATA(nlh);
+ BUG_ON(tw->tw_state != TCP_TIME_WAIT);
+
+ nlh->nlmsg_flags = nlmsg_flags;
+
+ tmo = tw->tw_ttd - jiffies;
+ if (tmo < 0)
+ tmo = 0;
+
+ r->idiag_family = tw->tw_family;
+ r->idiag_state = tw->tw_state;
+ r->idiag_timer = 0;
+ r->idiag_retrans = 0;
+ r->id.idiag_if = tw->tw_bound_dev_if;
+ r->id.idiag_cookie[0] = (u32)(unsigned long)tw;
+ r->id.idiag_cookie[1] = (u32)(((unsigned long)tw >> 31) >> 1);
+ r->id.idiag_sport = tw->tw_sport;
+ r->id.idiag_dport = tw->tw_dport;
+ r->id.idiag_src[0] = tw->tw_rcv_saddr;
+ r->id.idiag_dst[0] = tw->tw_daddr;
+ r->idiag_state = tw->tw_substate;
+ r->idiag_timer = 3;
+ r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ;
+ r->idiag_rqueue = 0;
+ r->idiag_wqueue = 0;
+ r->idiag_uid = 0;
+ r->idiag_inode = 0;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+ if (tw->tw_family == AF_INET6) {
+ const struct inet6_timewait_sock *tw6 =
+ inet6_twsk((struct sock *)tw);
+
+ ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
+ &tw6->tw_v6_rcv_saddr);
+ ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
+ &tw6->tw_v6_daddr);
+ }
+#endif
+ nlh->nlmsg_len = skb->tail - previous_tail;
+ return skb->len;
+nlmsg_failure:
+ skb_trim(skb, previous_tail - skb->data);
+ return -1;
+}
+
+static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
+ int ext, u32 pid, u32 seq, u16 nlmsg_flags,
+ const struct nlmsghdr *unlh)
+{
+ if (sk->sk_state == TCP_TIME_WAIT)
+ return inet_twsk_diag_fill((struct inet_timewait_sock *)sk,
+ skb, ext, pid, seq, nlmsg_flags,
+ unlh);
+ return inet_csk_diag_fill(sk, skb, ext, pid, seq, nlmsg_flags, unlh);
+}
+
+static int inet_diag_get_exact(struct sk_buff *in_skb,
+ const struct nlmsghdr *nlh)
{
int err;
struct sock *sk;
@@ -235,7 +275,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nl
if (!rep)
goto out;
- if (inet_diag_fill(rep, sk, req->idiag_ext,
+ if (sk_diag_fill(sk, rep, req->idiag_ext,
NETLINK_CB(in_skb).pid,
nlh->nlmsg_seq, 0, nlh) <= 0)
BUG();
@@ -283,7 +323,7 @@ static int bitstring_match(const u32 *a1, const u32 *a2, int bits)
static int inet_diag_bc_run(const void *bc, int len,
- const struct inet_diag_entry *entry)
+ const struct inet_diag_entry *entry)
{
while (len > 0) {
int yes = 1;
@@ -322,7 +362,7 @@ static int inet_diag_bc_run(const void *bc, int len,
yes = 0;
break;
}
-
+
if (cond->prefix_len == 0)
break;
@@ -331,7 +371,8 @@ static int inet_diag_bc_run(const void *bc, int len,
else
addr = entry->daddr;
- if (bitstring_match(addr, cond->addr, cond->prefix_len))
+ if (bitstring_match(addr, cond->addr,
+ cond->prefix_len))
break;
if (entry->family == AF_INET6 &&
cond->family == AF_INET) {
@@ -346,7 +387,7 @@ static int inet_diag_bc_run(const void *bc, int len,
}
}
- if (yes) {
+ if (yes) {
len -= op->yes;
bc += op->yes;
} else {
@@ -407,14 +448,15 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
default:
return -EINVAL;
}
- bc += op->yes;
+ bc += op->yes;
len -= op->yes;
}
return len == 0 ? 0 : -EINVAL;
}
-static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk,
- struct netlink_callback *cb)
+static int inet_csk_diag_dump(struct sock *sk,
+ struct sk_buff *skb,
+ struct netlink_callback *cb)
{
struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
@@ -444,14 +486,50 @@ static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk,
return 0;
}
- return inet_diag_fill(skb, sk, r->idiag_ext, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
+ return inet_csk_diag_fill(sk, skb, r->idiag_ext,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
+}
+
+static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
+ struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
+
+ if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+ struct inet_diag_entry entry;
+ struct rtattr *bc = (struct rtattr *)(r + 1);
+
+ entry.family = tw->tw_family;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+ if (tw->tw_family == AF_INET6) {
+ struct inet6_timewait_sock *tw6 =
+ inet6_twsk((struct sock *)tw);
+ entry.saddr = tw6->tw_v6_rcv_saddr.s6_addr32;
+ entry.daddr = tw6->tw_v6_daddr.s6_addr32;
+ } else
+#endif
+ {
+ entry.saddr = &tw->tw_rcv_saddr;
+ entry.daddr = &tw->tw_daddr;
+ }
+ entry.sport = tw->tw_num;
+ entry.dport = ntohs(tw->tw_dport);
+ entry.userlocks = 0;
+
+ if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
+ return 0;
+ }
+
+ return inet_twsk_diag_fill(tw, skb, r->idiag_ext,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
}
static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
- struct request_sock *req,
- u32 pid, u32 seq,
- const struct nlmsghdr *unlh)
+ struct request_sock *req, u32 pid, u32 seq,
+ const struct nlmsghdr *unlh)
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct inet_sock *inet = inet_sk(sk);
@@ -504,7 +582,7 @@ nlmsg_failure:
}
static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
- struct netlink_callback *cb)
+ struct netlink_callback *cb)
{
struct inet_diag_entry entry;
struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
@@ -556,7 +634,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
inet6_rsk(req)->loc_addr.s6_addr32 :
#endif
&ireq->loc_addr;
- entry.daddr =
+ entry.daddr =
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
(entry.family == AF_INET6) ?
inet6_rsk(req)->rmt_addr.s6_addr32 :
@@ -599,7 +677,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
handler = inet_diag_table[cb->nlh->nlmsg_type];
BUG_ON(handler == NULL);
hashinfo = handler->idiag_hashinfo;
-
+
s_i = cb->args[1];
s_num = num = cb->args[2];
@@ -630,7 +708,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[3] > 0)
goto syn_recv;
- if (inet_diag_dump_sock(skb, sk, cb) < 0) {
+ if (inet_csk_diag_dump(sk, skb, cb) < 0) {
inet_listen_unlock(hashinfo);
goto done;
}
@@ -672,7 +750,6 @@ skip_listen_ht:
s_num = 0;
read_lock_bh(&head->lock);
-
num = 0;
sk_for_each(sk, node, &head->chain) {
struct inet_sock *inet = inet_sk(sk);
@@ -684,9 +761,10 @@ skip_listen_ht:
if (r->id.idiag_sport != inet->sport &&
r->id.idiag_sport)
goto next_normal;
- if (r->id.idiag_dport != inet->dport && r->id.idiag_dport)
+ if (r->id.idiag_dport != inet->dport &&
+ r->id.idiag_dport)
goto next_normal;
- if (inet_diag_dump_sock(skb, sk, cb) < 0) {
+ if (inet_csk_diag_dump(sk, skb, cb) < 0) {
read_unlock_bh(&head->lock);
goto done;
}
@@ -695,19 +773,20 @@ next_normal:
}
if (r->idiag_states & TCPF_TIME_WAIT) {
- sk_for_each(sk, node,
+ struct inet_timewait_sock *tw;
+
+ inet_twsk_for_each(tw, node,
&hashinfo->ehash[i + hashinfo->ehash_size].chain) {
- struct inet_sock *inet = inet_sk(sk);
if (num < s_num)
goto next_dying;
- if (r->id.idiag_sport != inet->sport &&
+ if (r->id.idiag_sport != tw->tw_sport &&
r->id.idiag_sport)
goto next_dying;
- if (r->id.idiag_dport != inet->dport &&
+ if (r->id.idiag_dport != tw->tw_dport &&
r->id.idiag_dport)
goto next_dying;
- if (inet_diag_dump_sock(skb, sk, cb) < 0) {
+ if (inet_twsk_diag_dump(tw, skb, cb) < 0) {
read_unlock_bh(&head->lock);
goto done;
}
@@ -724,8 +803,7 @@ done:
return skb->len;
}
-static __inline__ int
-inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static inline int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
return 0;
@@ -755,9 +833,8 @@ inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
}
return netlink_dump_start(idiagnl, skb, nlh,
inet_diag_dump, NULL);
- } else {
+ } else
return inet_diag_get_exact(skb, nlh);
- }
err_inval:
return -EINVAL;
@@ -766,15 +843,15 @@ err_inval:
static inline void inet_diag_rcv_skb(struct sk_buff *skb)
{
- int err;
- struct nlmsghdr * nlh;
-
if (skb->len >= NLMSG_SPACE(0)) {
- nlh = (struct nlmsghdr *)skb->data;
- if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+ int err;
+ struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
+
+ if (nlh->nlmsg_len < sizeof(*nlh) ||
+ skb->len < nlh->nlmsg_len)
return;
err = inet_diag_rcv_msg(skb, nlh);
- if (err || nlh->nlmsg_flags & NLM_F_ACK)
+ if (err || nlh->nlmsg_flags & NLM_F_ACK)
netlink_ack(skb, nlh, err);
}
}
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index ce5fe3f74a3d..2160874ce7aa 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -304,8 +304,7 @@ static void unlink_from_pool(struct inet_peer *p)
/* look for a node to insert instead of p */
struct inet_peer *t;
t = lookup_rightempty(p);
- if (*stackptr[-1] != t)
- BUG();
+ BUG_ON(*stackptr[-1] != t);
**--stackptr = t->avl_left;
/* t is removed, t->v4daddr > x->v4daddr for any
* x in p->avl_left subtree.
@@ -314,8 +313,7 @@ static void unlink_from_pool(struct inet_peer *p)
t->avl_left = p->avl_left;
t->avl_right = p->avl_right;
t->avl_height = p->avl_height;
- if (delp[1] != &p->avl_left)
- BUG();
+ BUG_ON(delp[1] != &p->avl_left);
delp[1] = &t->avl_left; /* was &p->avl_left */
}
peer_avl_rebalance(stack, stackptr);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index ce2b70ce4018..2a8adda15e11 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -383,7 +383,7 @@ out_nomem:
*/
static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
{
- __u16 id = iph->id;
+ __be16 id = iph->id;
__u32 saddr = iph->saddr;
__u32 daddr = iph->daddr;
__u8 protocol = iph->protocol;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 912c42f57c79..1e93eafa7af1 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -188,7 +188,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key)
}
if (ipgre_fb_tunnel_dev->flags&IFF_UP)
- return ipgre_fb_tunnel_dev->priv;
+ return netdev_priv(ipgre_fb_tunnel_dev);
return NULL;
}
@@ -278,7 +278,7 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
return NULL;
dev->init = ipgre_tunnel_init;
- nt = dev->priv;
+ nt = netdev_priv(dev);
nt->parms = *parms;
if (register_netdevice(dev) < 0) {
@@ -286,9 +286,6 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
goto failed;
}
- nt = dev->priv;
- nt->parms = *parms;
-
dev_hold(dev);
ipgre_tunnel_link(nt);
return nt;
@@ -299,7 +296,7 @@ failed:
static void ipgre_tunnel_uninit(struct net_device *dev)
{
- ipgre_tunnel_unlink((struct ip_tunnel*)dev->priv);
+ ipgre_tunnel_unlink(netdev_priv(dev));
dev_put(dev);
}
@@ -518,7 +515,7 @@ out:
skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
rel_info = htonl(rel_info);
} else if (type == ICMP_TIME_EXCEEDED) {
- struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
+ struct ip_tunnel *t = netdev_priv(skb2->dev);
if (t->parms.iph.ttl) {
rel_type = ICMP_DEST_UNREACH;
rel_code = ICMP_HOST_UNREACH;
@@ -669,7 +666,7 @@ drop_nolock:
static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+ struct ip_tunnel *tunnel = netdev_priv(dev);
struct net_device_stats *stats = &tunnel->stat;
struct iphdr *old_iph = skb->nh.iph;
struct iphdr *tiph;
@@ -832,6 +829,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
skb->h.raw = skb->nh.raw;
skb->nh.raw = skb_push(skb, gre_hlen);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
dst_release(skb->dst);
skb->dst = &rt->u.dst;
@@ -914,7 +912,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
t = ipgre_tunnel_locate(&p, 0);
}
if (t == NULL)
- t = (struct ip_tunnel*)dev->priv;
+ t = netdev_priv(dev);
memcpy(&p, &t->parms, sizeof(p));
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
err = -EFAULT;
@@ -954,7 +952,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
} else {
unsigned nflags=0;
- t = (struct ip_tunnel*)dev->priv;
+ t = netdev_priv(dev);
if (MULTICAST(p.iph.daddr))
nflags = IFF_BROADCAST;
@@ -1003,7 +1001,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
goto done;
err = -EPERM;
- if (t == ipgre_fb_tunnel_dev->priv)
+ if (t == netdev_priv(ipgre_fb_tunnel_dev))
goto done;
dev = t->dev;
}
@@ -1020,12 +1018,12 @@ done:
static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
{
- return &(((struct ip_tunnel*)dev->priv)->stat);
+ return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
}
static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
{
- struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+ struct ip_tunnel *tunnel = netdev_priv(dev);
if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
return -EINVAL;
dev->mtu = new_mtu;
@@ -1065,7 +1063,7 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
void *daddr, void *saddr, unsigned len)
{
- struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+ struct ip_tunnel *t = netdev_priv(dev);
struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
u16 *p = (u16*)(iph+1);
@@ -1092,7 +1090,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned sh
static int ipgre_open(struct net_device *dev)
{
- struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+ struct ip_tunnel *t = netdev_priv(dev);
if (MULTICAST(t->parms.iph.daddr)) {
struct flowi fl = { .oif = t->parms.link,
@@ -1116,7 +1114,7 @@ static int ipgre_open(struct net_device *dev)
static int ipgre_close(struct net_device *dev)
{
- struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
+ struct ip_tunnel *t = netdev_priv(dev);
if (MULTICAST(t->parms.iph.daddr) && t->mlink) {
struct in_device *in_dev = inetdev_by_index(t->mlink);
if (in_dev) {
@@ -1156,7 +1154,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
int mtu = ETH_DATA_LEN;
int addend = sizeof(struct iphdr) + 4;
- tunnel = (struct ip_tunnel*)dev->priv;
+ tunnel = netdev_priv(dev);
iph = &tunnel->parms.iph;
tunnel->dev = dev;
@@ -1220,7 +1218,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
static int __init ipgre_fb_tunnel_init(struct net_device *dev)
{
- struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+ struct ip_tunnel *tunnel = netdev_priv(dev);
struct iphdr *iph = &tunnel->parms.iph;
tunnel->dev = dev;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index e45846ae570b..18d7fad474d7 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -185,7 +185,6 @@ int ip_call_ra_chain(struct sk_buff *skb)
raw_rcv(last, skb2);
}
last = sk;
- nf_reset(skb);
}
}
@@ -204,10 +203,6 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
__skb_pull(skb, ihl);
- /* Free reference early: we don't need it any more, and it may
- hold ip_conntrack module loaded indefinitely. */
- nf_reset(skb);
-
/* Point into the IP datagram, just past the header. */
skb->h.raw = skb->data;
@@ -232,10 +227,12 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {
int ret;
- if (!ipprot->no_policy &&
- !xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- kfree_skb(skb);
- goto out;
+ if (!ipprot->no_policy) {
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ kfree_skb(skb);
+ goto out;
+ }
+ nf_reset(skb);
}
ret = ipprot->handler(skb);
if (ret < 0) {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 71da31818cfc..3324fbfe528a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -69,6 +69,7 @@
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
+#include <net/xfrm.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/arp.h>
@@ -85,6 +86,8 @@
int sysctl_ip_default_ttl = IPDEFTTL;
+static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*));
+
/* Generate a checksum for an outgoing IP datagram. */
__inline__ void ip_send_check(struct iphdr *iph)
{
@@ -202,6 +205,11 @@ static inline int ip_finish_output2(struct sk_buff *skb)
static inline int ip_finish_output(struct sk_buff *skb)
{
+#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
+ /* Policy lookup after SNAT yielded a new policy */
+ if (skb->dst->xfrm != NULL)
+ return xfrm4_output_finish(skb);
+#endif
if (skb->len > dst_mtu(skb->dst) &&
!(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
return ip_fragment(skb, ip_finish_output2);
@@ -409,7 +417,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
* single device frame, and queue such a frame for sending.
*/
-int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
+static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
{
struct iphdr *iph;
int raw = 0;
@@ -418,7 +426,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
struct sk_buff *skb2;
unsigned int mtu, hlen, left, len, ll_rs;
int offset;
- int not_last_frag;
+ __be16 not_last_frag;
struct rtable *rt = (struct rtable*)skb->dst;
int err = 0;
@@ -1180,7 +1188,7 @@ int ip_push_pending_frames(struct sock *sk)
struct ip_options *opt = NULL;
struct rtable *rt = inet->cork.rt;
struct iphdr *iph;
- int df = 0;
+ __be16 df = 0;
__u8 ttl;
int err = 0;
@@ -1391,7 +1399,6 @@ void __init ip_init(void)
#endif
}
-EXPORT_SYMBOL(ip_fragment);
EXPORT_SYMBOL(ip_generic_getfrag);
EXPORT_SYMBOL(ip_queue_xmit);
EXPORT_SYMBOL(ip_send_check);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 35571cff81c6..bc5ca23b2646 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -244,7 +244,7 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c
if (dev == NULL)
return NULL;
- nt = dev->priv;
+ nt = netdev_priv(dev);
SET_MODULE_OWNER(dev);
dev->init = ipip_tunnel_init;
nt->parms = *parms;
@@ -269,7 +269,7 @@ static void ipip_tunnel_uninit(struct net_device *dev)
tunnels_wc[0] = NULL;
write_unlock_bh(&ipip_lock);
} else
- ipip_tunnel_unlink((struct ip_tunnel*)dev->priv);
+ ipip_tunnel_unlink(netdev_priv(dev));
dev_put(dev);
}
@@ -443,7 +443,7 @@ out:
skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
rel_info = htonl(rel_info);
} else if (type == ICMP_TIME_EXCEEDED) {
- struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
+ struct ip_tunnel *t = netdev_priv(skb2->dev);
if (t->parms.iph.ttl) {
rel_type = ICMP_DEST_UNREACH;
rel_code = ICMP_HOST_UNREACH;
@@ -514,7 +514,7 @@ out:
static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+ struct ip_tunnel *tunnel = netdev_priv(dev);
struct net_device_stats *stats = &tunnel->stat;
struct iphdr *tiph = &tunnel->parms.iph;
u8 tos = tunnel->parms.iph.tos;
@@ -621,6 +621,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
skb->h.raw = skb->nh.raw;
skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
dst_release(skb->dst);
skb->dst = &rt->u.dst;
@@ -673,7 +674,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
t = ipip_tunnel_locate(&p, 0);
}
if (t == NULL)
- t = (struct ip_tunnel*)dev->priv;
+ t = netdev_priv(dev);
memcpy(&p, &t->parms, sizeof(p));
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
err = -EFAULT;
@@ -710,7 +711,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
err = -EINVAL;
break;
}
- t = (struct ip_tunnel*)dev->priv;
+ t = netdev_priv(dev);
ipip_tunnel_unlink(t);
t->parms.iph.saddr = p.iph.saddr;
t->parms.iph.daddr = p.iph.daddr;
@@ -764,7 +765,7 @@ done:
static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
{
- return &(((struct ip_tunnel*)dev->priv)->stat);
+ return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
}
static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
@@ -799,7 +800,7 @@ static int ipip_tunnel_init(struct net_device *dev)
struct ip_tunnel *tunnel;
struct iphdr *iph;
- tunnel = (struct ip_tunnel*)dev->priv;
+ tunnel = netdev_priv(dev);
iph = &tunnel->parms.iph;
tunnel->dev = dev;
@@ -837,7 +838,7 @@ static int ipip_tunnel_init(struct net_device *dev)
static int __init ipip_fb_tunnel_init(struct net_device *dev)
{
- struct ip_tunnel *tunnel = dev->priv;
+ struct ip_tunnel *tunnel = netdev_priv(dev);
struct iphdr *iph = &tunnel->parms.iph;
tunnel->dev = dev;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9a5c0ce7ff35..f58ac9854c3f 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -178,8 +178,8 @@ static int reg_vif_num = -1;
static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
{
read_lock(&mrt_lock);
- ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
- ((struct net_device_stats*)dev->priv)->tx_packets++;
+ ((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len;
+ ((struct net_device_stats*)netdev_priv(dev))->tx_packets++;
ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
read_unlock(&mrt_lock);
kfree_skb(skb);
@@ -188,7 +188,7 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
{
- return (struct net_device_stats*)dev->priv;
+ return (struct net_device_stats*)netdev_priv(dev);
}
static void reg_vif_setup(struct net_device *dev)
@@ -1149,8 +1149,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
if (vif->flags & VIFF_REGISTER) {
vif->pkt_out++;
vif->bytes_out+=skb->len;
- ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
- ((struct net_device_stats*)vif->dev->priv)->tx_packets++;
+ ((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len;
+ ((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++;
ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
kfree_skb(skb);
return;
@@ -1210,8 +1210,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
if (vif->flags & VIFF_TUNNEL) {
ip_encap(skb, vif->local, vif->remote);
/* FIXME: extra output firewall step used to be here. --RR */
- ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
- ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb->len;
+ ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++;
+ ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len;
}
IPCB(skb)->flags |= IPSKB_FORWARDED;
@@ -1467,8 +1467,8 @@ int pim_rcv_v1(struct sk_buff * skb)
skb->pkt_type = PACKET_HOST;
dst_release(skb->dst);
skb->dst = NULL;
- ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
- ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
+ ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
+ ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
nf_reset(skb);
netif_rx(skb);
dev_put(reg_dev);
@@ -1522,8 +1522,8 @@ static int pim_rcv(struct sk_buff * skb)
skb->ip_summed = 0;
skb->pkt_type = PACKET_HOST;
dst_release(skb->dst);
- ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
- ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
+ ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
+ ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
skb->dst = NULL;
nf_reset(skb);
netif_rx(skb);
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 3b87482049cf..52c12e9edbbc 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -322,7 +322,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct net_device *tdev; /* Device to other host */
struct iphdr *old_iph = skb->nh.iph;
u8 tos = old_iph->tos;
- u16 df = old_iph->frag_off;
+ __be16 df = old_iph->frag_off;
struct iphdr *iph; /* Our new IP header */
int max_headroom; /* The extra header space needed */
int mtu;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index ae0779d82c5d..3321092b0914 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -7,11 +7,13 @@
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
+#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/icmp.h>
#include <net/route.h>
-#include <linux/ip.h>
+#include <net/xfrm.h>
+#include <net/ip.h>
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
int ip_route_me_harder(struct sk_buff **pskb)
@@ -33,7 +35,6 @@ int ip_route_me_harder(struct sk_buff **pskb)
#ifdef CONFIG_IP_ROUTE_FWMARK
fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
#endif
- fl.proto = iph->protocol;
if (ip_route_output_key(&rt, &fl) != 0)
return -1;
@@ -60,6 +61,13 @@ int ip_route_me_harder(struct sk_buff **pskb)
if ((*pskb)->dst->error)
return -1;
+#ifdef CONFIG_XFRM
+ if (!(IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+ xfrm_decode_session(*pskb, &fl, AF_INET) == 0)
+ if (xfrm_lookup(&(*pskb)->dst, &fl, (*pskb)->sk, 0))
+ return -1;
+#endif
+
/* Change in oif may mean change in hh_len. */
hh_len = (*pskb)->dst->dev->hard_header_len;
if (skb_headroom(*pskb) < hh_len) {
@@ -78,6 +86,9 @@ int ip_route_me_harder(struct sk_buff **pskb)
}
EXPORT_SYMBOL(ip_route_me_harder);
+void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *);
+EXPORT_SYMBOL(ip_nat_decode_session);
+
/*
* Extra routing may needed on local out, as the QUEUE target never
* returns control to the table.
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 88a60650e6b8..a9893ec03e02 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -487,6 +487,16 @@ config IP_NF_MATCH_STRING
To compile it as a module, choose M here. If unsure, say N.
+config IP_NF_MATCH_POLICY
+ tristate "IPsec policy match support"
+ depends on IP_NF_IPTABLES && XFRM
+ help
+ Policy matching allows you to match packets based on the
+ IPsec policy that was used during decapsulation/will
+ be used during encapsulation.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
# `filter', generic and specific targets
config IP_NF_FILTER
tristate "Packet filtering"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index d0a447e520a2..549b01a648b3 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -72,6 +72,7 @@ obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o
obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o
obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o
+obj-$(CONFIG_IP_NF_MATCH_POLICY) += ipt_policy.o
obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o
obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index 977fb59d4563..0b25050981a1 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -16,6 +16,7 @@
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/timer.h>
+#include <linux/interrupt.h>
#include <linux/netfilter.h>
#include <linux/module.h>
#include <linux/in.h>
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index e7fa29e576dc..77f304680d86 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -995,7 +995,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
|| (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
&& conntrack->proto.tcp.last_index == TCP_ACK_SET))
&& ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
- /* RST sent to invalid SYN or ACK we had let trough
+ /* RST sent to invalid SYN or ACK we had let through
* at a) and c) above:
*
* a) SYN was in window then
@@ -1006,7 +1006,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
* segments we ignored. */
goto in_window;
}
- /* Just fall trough */
+ /* Just fall through */
default:
/* Keep compilers happy. */
break;
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index f04111f74e09..8b8a1f00bbf4 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -55,6 +55,44 @@
: ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \
: "*ERROR*")))
+#ifdef CONFIG_XFRM
+static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
+{
+ struct ip_conntrack *ct;
+ struct ip_conntrack_tuple *t;
+ enum ip_conntrack_info ctinfo;
+ enum ip_conntrack_dir dir;
+ unsigned long statusbit;
+
+ ct = ip_conntrack_get(skb, &ctinfo);
+ if (ct == NULL)
+ return;
+ dir = CTINFO2DIR(ctinfo);
+ t = &ct->tuplehash[dir].tuple;
+
+ if (dir == IP_CT_DIR_ORIGINAL)
+ statusbit = IPS_DST_NAT;
+ else
+ statusbit = IPS_SRC_NAT;
+
+ if (ct->status & statusbit) {
+ fl->fl4_dst = t->dst.ip;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP)
+ fl->fl_ip_dport = t->dst.u.tcp.port;
+ }
+
+ statusbit ^= IPS_NAT_MASK;
+
+ if (ct->status & statusbit) {
+ fl->fl4_src = t->src.ip;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP)
+ fl->fl_ip_sport = t->src.u.tcp.port;
+ }
+}
+#endif
+
static unsigned int
ip_nat_fn(unsigned int hooknum,
struct sk_buff **pskb,
@@ -162,18 +200,20 @@ ip_nat_in(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- u_int32_t saddr, daddr;
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
unsigned int ret;
- saddr = (*pskb)->nh.iph->saddr;
- daddr = (*pskb)->nh.iph->daddr;
-
ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN
- && ((*pskb)->nh.iph->saddr != saddr
- || (*pskb)->nh.iph->daddr != daddr)) {
- dst_release((*pskb)->dst);
- (*pskb)->dst = NULL;
+ && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.src.ip !=
+ ct->tuplehash[!dir].tuple.dst.ip) {
+ dst_release((*pskb)->dst);
+ (*pskb)->dst = NULL;
+ }
}
return ret;
}
@@ -185,12 +225,30 @@ ip_nat_out(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned int ret;
+
/* root is playing with raw sockets. */
if ((*pskb)->len < sizeof(struct iphdr)
|| (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
return NF_ACCEPT;
- return ip_nat_fn(hooknum, pskb, in, out, okfn);
+ ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN
+ && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.src.ip !=
+ ct->tuplehash[!dir].tuple.dst.ip
+#ifdef CONFIG_XFRM
+ || ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all
+#endif
+ )
+ return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+ }
+ return ret;
}
static unsigned int
@@ -200,7 +258,8 @@ ip_nat_local_fn(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- u_int32_t saddr, daddr;
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
unsigned int ret;
/* root is playing with raw sockets. */
@@ -208,14 +267,20 @@ ip_nat_local_fn(unsigned int hooknum,
|| (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
return NF_ACCEPT;
- saddr = (*pskb)->nh.iph->saddr;
- daddr = (*pskb)->nh.iph->daddr;
-
ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN
- && ((*pskb)->nh.iph->saddr != saddr
- || (*pskb)->nh.iph->daddr != daddr))
- return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+ && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.dst.ip !=
+ ct->tuplehash[!dir].tuple.src.ip
+#ifdef CONFIG_XFRM
+ || ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[dir].tuple.src.u.all
+#endif
+ )
+ return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+ }
return ret;
}
@@ -303,10 +368,14 @@ static int init_or_cleanup(int init)
if (!init) goto cleanup;
+#ifdef CONFIG_XFRM
+ BUG_ON(ip_nat_decode_session != NULL);
+ ip_nat_decode_session = nat_decode_session;
+#endif
ret = ip_nat_rule_init();
if (ret < 0) {
printk("ip_nat_init: can't setup rules.\n");
- goto cleanup_nothing;
+ goto cleanup_decode_session;
}
ret = nf_register_hook(&ip_nat_in_ops);
if (ret < 0) {
@@ -354,7 +423,11 @@ static int init_or_cleanup(int init)
nf_unregister_hook(&ip_nat_in_ops);
cleanup_rule_init:
ip_nat_rule_cleanup();
- cleanup_nothing:
+ cleanup_decode_session:
+#ifdef CONFIG_XFRM
+ ip_nat_decode_session = NULL;
+ synchronize_net();
+#endif
return ret;
}
diff --git a/net/ipv4/netfilter/ipt_helper.c b/net/ipv4/netfilter/ipt_helper.c
index bf14e1c7798a..aef649e393af 100644
--- a/net/ipv4/netfilter/ipt_helper.c
+++ b/net/ipv4/netfilter/ipt_helper.c
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netfilter.h>
+#include <linux/interrupt.h>
#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
diff --git a/net/ipv4/netfilter/ipt_policy.c b/net/ipv4/netfilter/ipt_policy.c
new file mode 100644
index 000000000000..709debcc69c9
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_policy.c
@@ -0,0 +1,170 @@
+/* IP tables module for matching IPsec policy
+ *
+ * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <net/xfrm.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_policy.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("IPtables IPsec policy matching module");
+MODULE_LICENSE("GPL");
+
+
+static inline int
+match_xfrm_state(struct xfrm_state *x, const struct ipt_policy_elem *e)
+{
+#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x))
+
+ return MATCH(saddr, x->props.saddr.a4 & e->smask) &&
+ MATCH(daddr, x->id.daddr.a4 & e->dmask) &&
+ MATCH(proto, x->id.proto) &&
+ MATCH(mode, x->props.mode) &&
+ MATCH(spi, x->id.spi) &&
+ MATCH(reqid, x->props.reqid);
+}
+
+static int
+match_policy_in(const struct sk_buff *skb, const struct ipt_policy_info *info)
+{
+ const struct ipt_policy_elem *e;
+ struct sec_path *sp = skb->sp;
+ int strict = info->flags & IPT_POLICY_MATCH_STRICT;
+ int i, pos;
+
+ if (sp == NULL)
+ return -1;
+ if (strict && info->len != sp->len)
+ return 0;
+
+ for (i = sp->len - 1; i >= 0; i--) {
+ pos = strict ? i - sp->len + 1 : 0;
+ if (pos >= info->len)
+ return 0;
+ e = &info->pol[pos];
+
+ if (match_xfrm_state(sp->x[i].xvec, e)) {
+ if (!strict)
+ return 1;
+ } else if (strict)
+ return 0;
+ }
+
+ return strict ? 1 : 0;
+}
+
+static int
+match_policy_out(const struct sk_buff *skb, const struct ipt_policy_info *info)
+{
+ const struct ipt_policy_elem *e;
+ struct dst_entry *dst = skb->dst;
+ int strict = info->flags & IPT_POLICY_MATCH_STRICT;
+ int i, pos;
+
+ if (dst->xfrm == NULL)
+ return -1;
+
+ for (i = 0; dst && dst->xfrm; dst = dst->child, i++) {
+ pos = strict ? i : 0;
+ if (pos >= info->len)
+ return 0;
+ e = &info->pol[pos];
+
+ if (match_xfrm_state(dst->xfrm, e)) {
+ if (!strict)
+ return 1;
+ } else if (strict)
+ return 0;
+ }
+
+ return strict ? 1 : 0;
+}
+
+static int match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo, int offset, int *hotdrop)
+{
+ const struct ipt_policy_info *info = matchinfo;
+ int ret;
+
+ if (info->flags & IPT_POLICY_MATCH_IN)
+ ret = match_policy_in(skb, info);
+ else
+ ret = match_policy_out(skb, info);
+
+ if (ret < 0)
+ ret = info->flags & IPT_POLICY_MATCH_NONE ? 1 : 0;
+ else if (info->flags & IPT_POLICY_MATCH_NONE)
+ ret = 0;
+
+ return ret;
+}
+
+static int checkentry(const char *tablename, const struct ipt_ip *ip,
+ void *matchinfo, unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ struct ipt_policy_info *info = matchinfo;
+
+ if (matchsize != IPT_ALIGN(sizeof(*info))) {
+ printk(KERN_ERR "ipt_policy: matchsize %u != %zu\n",
+ matchsize, IPT_ALIGN(sizeof(*info)));
+ return 0;
+ }
+ if (!(info->flags & (IPT_POLICY_MATCH_IN|IPT_POLICY_MATCH_OUT))) {
+ printk(KERN_ERR "ipt_policy: neither incoming nor "
+ "outgoing policy selected\n");
+ return 0;
+ }
+ if (hook_mask & (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_LOCAL_IN)
+ && info->flags & IPT_POLICY_MATCH_OUT) {
+ printk(KERN_ERR "ipt_policy: output policy not valid in "
+ "PRE_ROUTING and INPUT\n");
+ return 0;
+ }
+ if (hook_mask & (1 << NF_IP_POST_ROUTING | 1 << NF_IP_LOCAL_OUT)
+ && info->flags & IPT_POLICY_MATCH_IN) {
+ printk(KERN_ERR "ipt_policy: input policy not valid in "
+ "POST_ROUTING and OUTPUT\n");
+ return 0;
+ }
+ if (info->len > IPT_POLICY_MAX_ELEM) {
+ printk(KERN_ERR "ipt_policy: too many policy elements\n");
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct ipt_match policy_match = {
+ .name = "policy",
+ .match = match,
+ .checkentry = checkentry,
+ .me = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+ return ipt_register_match(&policy_match);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_match(&policy_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 4b0d7e4d6269..165a4d81efa4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -255,6 +255,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
kfree_skb(skb);
return NET_RX_DROP;
}
+ nf_reset(skb);
skb_push(skb, skb->data - skb->nh.raw);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0a461232329f..a97ed5416c28 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3347,7 +3347,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
int offset = start - TCP_SKB_CB(skb)->seq;
int size = TCP_SKB_CB(skb)->end_seq - start;
- if (offset < 0) BUG();
+ BUG_ON(offset < 0);
if (size > 0) {
size = min(copy, size);
if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e9f83e5b28ce..6ea353907af5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1080,6 +1080,7 @@ process:
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
+ nf_reset(skb);
if (sk_filter(sk, skb, 0))
goto discard_and_relse;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 223abaa72bc5..00840474a449 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -989,6 +989,7 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
kfree_skb(skb);
return -1;
}
+ nf_reset(skb);
if (up->encap_type) {
/*
@@ -1149,6 +1150,7 @@ int udp_rcv(struct sk_buff *skb)
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
+ nf_reset(skb);
/* No socket. Drop packet silently, if checksum is wrong */
if (udp_checksum_complete(skb))
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 2d3849c38a0f..850d919591d1 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -11,6 +11,8 @@
#include <linux/module.h>
#include <linux/string.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
#include <net/inet_ecn.h>
#include <net/ip.h>
#include <net/xfrm.h>
@@ -45,6 +47,23 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq)
return xfrm_parse_spi(skb, nexthdr, spi, seq);
}
+#ifdef CONFIG_NETFILTER
+static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
+{
+ struct iphdr *iph = skb->nh.iph;
+
+ if (skb->dst == NULL) {
+ if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
+ skb->dev))
+ goto drop;
+ }
+ return dst_input(skb);
+drop:
+ kfree_skb(skb);
+ return NET_RX_DROP;
+}
+#endif
+
int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
{
int err;
@@ -137,6 +156,8 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state));
skb->sp->len += xfrm_nr;
+ nf_reset(skb);
+
if (decaps) {
if (!(skb->dev->flags&IFF_LOOPBACK)) {
dst_release(skb->dst);
@@ -145,7 +166,17 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
netif_rx(skb);
return 0;
} else {
+#ifdef CONFIG_NETFILTER
+ __skb_push(skb, skb->data - skb->nh.raw);
+ skb->nh.iph->tot_len = htons(skb->len);
+ ip_send_check(skb->nh.iph);
+
+ NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
+ xfrm4_rcv_encap_finish);
+ return 0;
+#else
return -skb->nh.iph->protocol;
+#endif
}
drop_unlock:
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 66620a95942a..d4df0ddd424b 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -8,8 +8,10 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/compiler.h>
#include <linux/skbuff.h>
#include <linux/spinlock.h>
+#include <linux/netfilter_ipv4.h>
#include <net/inet_ecn.h>
#include <net/ip.h>
#include <net/xfrm.h>
@@ -95,7 +97,7 @@ out:
return ret;
}
-int xfrm4_output(struct sk_buff *skb)
+static int xfrm4_output_one(struct sk_buff *skb)
{
struct dst_entry *dst = skb->dst;
struct xfrm_state *x = dst->xfrm;
@@ -113,27 +115,33 @@ int xfrm4_output(struct sk_buff *skb)
goto error_nolock;
}
- spin_lock_bh(&x->lock);
- err = xfrm_state_check(x, skb);
- if (err)
- goto error;
+ do {
+ spin_lock_bh(&x->lock);
+ err = xfrm_state_check(x, skb);
+ if (err)
+ goto error;
- xfrm4_encap(skb);
+ xfrm4_encap(skb);
- err = x->type->output(x, skb);
- if (err)
- goto error;
+ err = x->type->output(x, skb);
+ if (err)
+ goto error;
- x->curlft.bytes += skb->len;
- x->curlft.packets++;
+ x->curlft.bytes += skb->len;
+ x->curlft.packets++;
- spin_unlock_bh(&x->lock);
+ spin_unlock_bh(&x->lock);
- if (!(skb->dst = dst_pop(dst))) {
- err = -EHOSTUNREACH;
- goto error_nolock;
- }
- err = NET_XMIT_BYPASS;
+ if (!(skb->dst = dst_pop(dst))) {
+ err = -EHOSTUNREACH;
+ goto error_nolock;
+ }
+ dst = skb->dst;
+ x = dst->xfrm;
+ } while (x && !x->props.mode);
+
+ IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
+ err = 0;
out_exit:
return err;
@@ -143,3 +151,33 @@ error_nolock:
kfree_skb(skb);
goto out_exit;
}
+
+int xfrm4_output_finish(struct sk_buff *skb)
+{
+ int err;
+
+ while (likely((err = xfrm4_output_one(skb)) == 0)) {
+ nf_reset(skb);
+
+ err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL,
+ skb->dst->dev, dst_output);
+ if (unlikely(err != 1))
+ break;
+
+ if (!skb->dst->xfrm)
+ return dst_output(skb);
+
+ err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL,
+ skb->dst->dev, xfrm4_output_finish);
+ if (unlikely(err != 1))
+ break;
+ }
+
+ return err;
+}
+
+int xfrm4_output(struct sk_buff *skb)
+{
+ return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev,
+ xfrm4_output_finish);
+}
OpenPOWER on IntegriCloud