diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/icmp.c | 3 | ||||
-rw-r--r-- | net/ipv4/inet_diag.c | 243 | ||||
-rw-r--r-- | net/ipv4/inetpeer.c | 6 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 2 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 34 | ||||
-rw-r--r-- | net/ipv4/ip_input.c | 15 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 15 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 19 | ||||
-rw-r--r-- | net/ipv4/ipmr.c | 22 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_xmit.c | 2 | ||||
-rw-r--r-- | net/ipv4/netfilter.c | 15 | ||||
-rw-r--r-- | net/ipv4/netfilter/Kconfig | 10 | ||||
-rw-r--r-- | net/ipv4/netfilter/Makefile | 1 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_conntrack_proto_sctp.c | 1 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 4 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_nat_standalone.c | 109 | ||||
-rw-r--r-- | net/ipv4/netfilter/ipt_helper.c | 1 | ||||
-rw-r--r-- | net/ipv4/netfilter/ipt_policy.c | 170 | ||||
-rw-r--r-- | net/ipv4/raw.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 1 | ||||
-rw-r--r-- | net/ipv4/udp.c | 2 | ||||
-rw-r--r-- | net/ipv4/xfrm4_input.c | 31 | ||||
-rw-r--r-- | net/ipv4/xfrm4_output.c | 72 |
24 files changed, 599 insertions, 182 deletions
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index be5a519cd2f8..105039eb7629 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -899,8 +899,7 @@ static void icmp_address_reply(struct sk_buff *skb) u32 _mask, *mp; mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask); - if (mp == NULL) - BUG(); + BUG_ON(mp == NULL); for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { if (*mp == ifa->ifa_mask && inet_ifa_match(rt->rt_src, ifa)) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index c49908192047..457db99c76df 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -50,9 +50,10 @@ static struct sock *idiagnl; #define INET_DIAG_PUT(skb, attrtype, attrlen) \ RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) -static int inet_diag_fill(struct sk_buff *skb, struct sock *sk, - int ext, u32 pid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) +static int inet_csk_diag_fill(struct sock *sk, + struct sk_buff *skb, + int ext, u32 pid, u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh) { const struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -70,20 +71,22 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk, nlh->nlmsg_flags = nlmsg_flags; r = NLMSG_DATA(nlh); - if (sk->sk_state != TCP_TIME_WAIT) { - if (ext & (1 << (INET_DIAG_MEMINFO - 1))) - minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, - sizeof(*minfo)); - if (ext & (1 << (INET_DIAG_INFO - 1))) - info = INET_DIAG_PUT(skb, INET_DIAG_INFO, - handler->idiag_info_size); - - if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) { - size_t len = strlen(icsk->icsk_ca_ops->name); - strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1), - icsk->icsk_ca_ops->name); - } + BUG_ON(sk->sk_state == TCP_TIME_WAIT); + + if (ext & (1 << (INET_DIAG_MEMINFO - 1))) + minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo)); + + if (ext & (1 << (INET_DIAG_INFO - 1))) + info = INET_DIAG_PUT(skb, INET_DIAG_INFO, + handler->idiag_info_size); + + if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) { + const size_t len = strlen(icsk->icsk_ca_ops->name); + + strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1), + icsk->icsk_ca_ops->name); } + r->idiag_family = sk->sk_family; r->idiag_state = sk->sk_state; r->idiag_timer = 0; @@ -93,37 +96,6 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk, r->id.idiag_cookie[0] = (u32)(unsigned long)sk; r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); - if (r->idiag_state == TCP_TIME_WAIT) { - const struct inet_timewait_sock *tw = inet_twsk(sk); - long tmo = tw->tw_ttd - jiffies; - if (tmo < 0) - tmo = 0; - - r->id.idiag_sport = tw->tw_sport; - r->id.idiag_dport = tw->tw_dport; - r->id.idiag_src[0] = tw->tw_rcv_saddr; - r->id.idiag_dst[0] = tw->tw_daddr; - r->idiag_state = tw->tw_substate; - r->idiag_timer = 3; - r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ; - r->idiag_rqueue = 0; - r->idiag_wqueue = 0; - r->idiag_uid = 0; - r->idiag_inode = 0; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - if (r->idiag_family == AF_INET6) { - const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); - - ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, - &tw6->tw_v6_rcv_saddr); - ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, - &tw6->tw_v6_daddr); - } -#endif - nlh->nlmsg_len = skb->tail - b; - return skb->len; - } - r->id.idiag_sport = inet->sport; r->id.idiag_dport = inet->dport; r->id.idiag_src[0] = inet->rcv_saddr; @@ -185,7 +157,75 @@ nlmsg_failure: return -1; } -static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) +static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, + struct sk_buff *skb, int ext, u32 pid, + u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh) +{ + long tmo; + struct inet_diag_msg *r; + const unsigned char *previous_tail = skb->tail; + struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq, + unlh->nlmsg_type, sizeof(*r)); + + r = NLMSG_DATA(nlh); + BUG_ON(tw->tw_state != TCP_TIME_WAIT); + + nlh->nlmsg_flags = nlmsg_flags; + + tmo = tw->tw_ttd - jiffies; + if (tmo < 0) + tmo = 0; + + r->idiag_family = tw->tw_family; + r->idiag_state = tw->tw_state; + r->idiag_timer = 0; + r->idiag_retrans = 0; + r->id.idiag_if = tw->tw_bound_dev_if; + r->id.idiag_cookie[0] = (u32)(unsigned long)tw; + r->id.idiag_cookie[1] = (u32)(((unsigned long)tw >> 31) >> 1); + r->id.idiag_sport = tw->tw_sport; + r->id.idiag_dport = tw->tw_dport; + r->id.idiag_src[0] = tw->tw_rcv_saddr; + r->id.idiag_dst[0] = tw->tw_daddr; + r->idiag_state = tw->tw_substate; + r->idiag_timer = 3; + r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ; + r->idiag_rqueue = 0; + r->idiag_wqueue = 0; + r->idiag_uid = 0; + r->idiag_inode = 0; +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + if (tw->tw_family == AF_INET6) { + const struct inet6_timewait_sock *tw6 = + inet6_twsk((struct sock *)tw); + + ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, + &tw6->tw_v6_rcv_saddr); + ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, + &tw6->tw_v6_daddr); + } +#endif + nlh->nlmsg_len = skb->tail - previous_tail; + return skb->len; +nlmsg_failure: + skb_trim(skb, previous_tail - skb->data); + return -1; +} + +static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, + int ext, u32 pid, u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh) +{ + if (sk->sk_state == TCP_TIME_WAIT) + return inet_twsk_diag_fill((struct inet_timewait_sock *)sk, + skb, ext, pid, seq, nlmsg_flags, + unlh); + return inet_csk_diag_fill(sk, skb, ext, pid, seq, nlmsg_flags, unlh); +} + +static int inet_diag_get_exact(struct sk_buff *in_skb, + const struct nlmsghdr *nlh) { int err; struct sock *sk; @@ -235,7 +275,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nl if (!rep) goto out; - if (inet_diag_fill(rep, sk, req->idiag_ext, + if (sk_diag_fill(sk, rep, req->idiag_ext, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, nlh) <= 0) BUG(); @@ -283,7 +323,7 @@ static int bitstring_match(const u32 *a1, const u32 *a2, int bits) static int inet_diag_bc_run(const void *bc, int len, - const struct inet_diag_entry *entry) + const struct inet_diag_entry *entry) { while (len > 0) { int yes = 1; @@ -322,7 +362,7 @@ static int inet_diag_bc_run(const void *bc, int len, yes = 0; break; } - + if (cond->prefix_len == 0) break; @@ -331,7 +371,8 @@ static int inet_diag_bc_run(const void *bc, int len, else addr = entry->daddr; - if (bitstring_match(addr, cond->addr, cond->prefix_len)) + if (bitstring_match(addr, cond->addr, + cond->prefix_len)) break; if (entry->family == AF_INET6 && cond->family == AF_INET) { @@ -346,7 +387,7 @@ static int inet_diag_bc_run(const void *bc, int len, } } - if (yes) { + if (yes) { len -= op->yes; bc += op->yes; } else { @@ -407,14 +448,15 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) default: return -EINVAL; } - bc += op->yes; + bc += op->yes; len -= op->yes; } return len == 0 ? 0 : -EINVAL; } -static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk, - struct netlink_callback *cb) +static int inet_csk_diag_dump(struct sock *sk, + struct sk_buff *skb, + struct netlink_callback *cb) { struct inet_diag_req *r = NLMSG_DATA(cb->nlh); @@ -444,14 +486,50 @@ static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk, return 0; } - return inet_diag_fill(skb, sk, r->idiag_ext, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); + return inet_csk_diag_fill(sk, skb, r->idiag_ext, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); +} + +static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, + struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct inet_diag_req *r = NLMSG_DATA(cb->nlh); + + if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { + struct inet_diag_entry entry; + struct rtattr *bc = (struct rtattr *)(r + 1); + + entry.family = tw->tw_family; +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + if (tw->tw_family == AF_INET6) { + struct inet6_timewait_sock *tw6 = + inet6_twsk((struct sock *)tw); + entry.saddr = tw6->tw_v6_rcv_saddr.s6_addr32; + entry.daddr = tw6->tw_v6_daddr.s6_addr32; + } else +#endif + { + entry.saddr = &tw->tw_rcv_saddr; + entry.daddr = &tw->tw_daddr; + } + entry.sport = tw->tw_num; + entry.dport = ntohs(tw->tw_dport); + entry.userlocks = 0; + + if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) + return 0; + } + + return inet_twsk_diag_fill(tw, skb, r->idiag_ext, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, - struct request_sock *req, - u32 pid, u32 seq, - const struct nlmsghdr *unlh) + struct request_sock *req, u32 pid, u32 seq, + const struct nlmsghdr *unlh) { const struct inet_request_sock *ireq = inet_rsk(req); struct inet_sock *inet = inet_sk(sk); @@ -504,7 +582,7 @@ nlmsg_failure: } static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, - struct netlink_callback *cb) + struct netlink_callback *cb) { struct inet_diag_entry entry; struct inet_diag_req *r = NLMSG_DATA(cb->nlh); @@ -556,7 +634,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, inet6_rsk(req)->loc_addr.s6_addr32 : #endif &ireq->loc_addr; - entry.daddr = + entry.daddr = #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) (entry.family == AF_INET6) ? inet6_rsk(req)->rmt_addr.s6_addr32 : @@ -599,7 +677,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) handler = inet_diag_table[cb->nlh->nlmsg_type]; BUG_ON(handler == NULL); hashinfo = handler->idiag_hashinfo; - + s_i = cb->args[1]; s_num = num = cb->args[2]; @@ -630,7 +708,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->args[3] > 0) goto syn_recv; - if (inet_diag_dump_sock(skb, sk, cb) < 0) { + if (inet_csk_diag_dump(sk, skb, cb) < 0) { inet_listen_unlock(hashinfo); goto done; } @@ -672,7 +750,6 @@ skip_listen_ht: s_num = 0; read_lock_bh(&head->lock); - num = 0; sk_for_each(sk, node, &head->chain) { struct inet_sock *inet = inet_sk(sk); @@ -684,9 +761,10 @@ skip_listen_ht: if (r->id.idiag_sport != inet->sport && r->id.idiag_sport) goto next_normal; - if (r->id.idiag_dport != inet->dport && r->id.idiag_dport) + if (r->id.idiag_dport != inet->dport && + r->id.idiag_dport) goto next_normal; - if (inet_diag_dump_sock(skb, sk, cb) < 0) { + if (inet_csk_diag_dump(sk, skb, cb) < 0) { read_unlock_bh(&head->lock); goto done; } @@ -695,19 +773,20 @@ next_normal: } if (r->idiag_states & TCPF_TIME_WAIT) { - sk_for_each(sk, node, + struct inet_timewait_sock *tw; + + inet_twsk_for_each(tw, node, &hashinfo->ehash[i + hashinfo->ehash_size].chain) { - struct inet_sock *inet = inet_sk(sk); if (num < s_num) goto next_dying; - if (r->id.idiag_sport != inet->sport && + if (r->id.idiag_sport != tw->tw_sport && r->id.idiag_sport) goto next_dying; - if (r->id.idiag_dport != inet->dport && + if (r->id.idiag_dport != tw->tw_dport && r->id.idiag_dport) goto next_dying; - if (inet_diag_dump_sock(skb, sk, cb) < 0) { + if (inet_twsk_diag_dump(tw, skb, cb) < 0) { read_unlock_bh(&head->lock); goto done; } @@ -724,8 +803,7 @@ done: return skb->len; } -static __inline__ int -inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static inline int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { if (!(nlh->nlmsg_flags&NLM_F_REQUEST)) return 0; @@ -755,9 +833,8 @@ inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } return netlink_dump_start(idiagnl, skb, nlh, inet_diag_dump, NULL); - } else { + } else return inet_diag_get_exact(skb, nlh); - } err_inval: return -EINVAL; @@ -766,15 +843,15 @@ err_inval: static inline void inet_diag_rcv_skb(struct sk_buff *skb) { - int err; - struct nlmsghdr * nlh; - if (skb->len >= NLMSG_SPACE(0)) { - nlh = (struct nlmsghdr *)skb->data; - if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) + int err; + struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data; + + if (nlh->nlmsg_len < sizeof(*nlh) || + skb->len < nlh->nlmsg_len) return; err = inet_diag_rcv_msg(skb, nlh); - if (err || nlh->nlmsg_flags & NLM_F_ACK) + if (err || nlh->nlmsg_flags & NLM_F_ACK) netlink_ack(skb, nlh, err); } } diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index ce5fe3f74a3d..2160874ce7aa 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -304,8 +304,7 @@ static void unlink_from_pool(struct inet_peer *p) /* look for a node to insert instead of p */ struct inet_peer *t; t = lookup_rightempty(p); - if (*stackptr[-1] != t) - BUG(); + BUG_ON(*stackptr[-1] != t); **--stackptr = t->avl_left; /* t is removed, t->v4daddr > x->v4daddr for any * x in p->avl_left subtree. @@ -314,8 +313,7 @@ static void unlink_from_pool(struct inet_peer *p) t->avl_left = p->avl_left; t->avl_right = p->avl_right; t->avl_height = p->avl_height; - if (delp[1] != &p->avl_left) - BUG(); + BUG_ON(delp[1] != &p->avl_left); delp[1] = &t->avl_left; /* was &p->avl_left */ } peer_avl_rebalance(stack, stackptr); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index ce2b70ce4018..2a8adda15e11 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -383,7 +383,7 @@ out_nomem: */ static inline struct ipq *ip_find(struct iphdr *iph, u32 user) { - __u16 id = iph->id; + __be16 id = iph->id; __u32 saddr = iph->saddr; __u32 daddr = iph->daddr; __u8 protocol = iph->protocol; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 912c42f57c79..1e93eafa7af1 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -188,7 +188,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key) } if (ipgre_fb_tunnel_dev->flags&IFF_UP) - return ipgre_fb_tunnel_dev->priv; + return netdev_priv(ipgre_fb_tunnel_dev); return NULL; } @@ -278,7 +278,7 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int return NULL; dev->init = ipgre_tunnel_init; - nt = dev->priv; + nt = netdev_priv(dev); nt->parms = *parms; if (register_netdevice(dev) < 0) { @@ -286,9 +286,6 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int goto failed; } - nt = dev->priv; - nt->parms = *parms; - dev_hold(dev); ipgre_tunnel_link(nt); return nt; @@ -299,7 +296,7 @@ failed: static void ipgre_tunnel_uninit(struct net_device *dev) { - ipgre_tunnel_unlink((struct ip_tunnel*)dev->priv); + ipgre_tunnel_unlink(netdev_priv(dev)); dev_put(dev); } @@ -518,7 +515,7 @@ out: skb2->dst->ops->update_pmtu(skb2->dst, rel_info); rel_info = htonl(rel_info); } else if (type == ICMP_TIME_EXCEEDED) { - struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv; + struct ip_tunnel *t = netdev_priv(skb2->dev); if (t->parms.iph.ttl) { rel_type = ICMP_DEST_UNREACH; rel_code = ICMP_HOST_UNREACH; @@ -669,7 +666,7 @@ drop_nolock: static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { - struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; + struct ip_tunnel *tunnel = netdev_priv(dev); struct net_device_stats *stats = &tunnel->stat; struct iphdr *old_iph = skb->nh.iph; struct iphdr *tiph; @@ -832,6 +829,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.raw = skb->nh.raw; skb->nh.raw = skb_push(skb, gre_hlen); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED); dst_release(skb->dst); skb->dst = &rt->u.dst; @@ -914,7 +912,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) t = ipgre_tunnel_locate(&p, 0); } if (t == NULL) - t = (struct ip_tunnel*)dev->priv; + t = netdev_priv(dev); memcpy(&p, &t->parms, sizeof(p)); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) err = -EFAULT; @@ -954,7 +952,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) } else { unsigned nflags=0; - t = (struct ip_tunnel*)dev->priv; + t = netdev_priv(dev); if (MULTICAST(p.iph.daddr)) nflags = IFF_BROADCAST; @@ -1003,7 +1001,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) if ((t = ipgre_tunnel_locate(&p, 0)) == NULL) goto done; err = -EPERM; - if (t == ipgre_fb_tunnel_dev->priv) + if (t == netdev_priv(ipgre_fb_tunnel_dev)) goto done; dev = t->dev; } @@ -1020,12 +1018,12 @@ done: static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev) { - return &(((struct ip_tunnel*)dev->priv)->stat); + return &(((struct ip_tunnel*)netdev_priv(dev))->stat); } static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) { - struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; + struct ip_tunnel *tunnel = netdev_priv(dev); if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen) return -EINVAL; dev->mtu = new_mtu; @@ -1065,7 +1063,7 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len) { - struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; + struct ip_tunnel *t = netdev_priv(dev); struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); u16 *p = (u16*)(iph+1); @@ -1092,7 +1090,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned sh static int ipgre_open(struct net_device *dev) { - struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; + struct ip_tunnel *t = netdev_priv(dev); if (MULTICAST(t->parms.iph.daddr)) { struct flowi fl = { .oif = t->parms.link, @@ -1116,7 +1114,7 @@ static int ipgre_open(struct net_device *dev) static int ipgre_close(struct net_device *dev) { - struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; + struct ip_tunnel *t = netdev_priv(dev); if (MULTICAST(t->parms.iph.daddr) && t->mlink) { struct in_device *in_dev = inetdev_by_index(t->mlink); if (in_dev) { @@ -1156,7 +1154,7 @@ static int ipgre_tunnel_init(struct net_device *dev) int mtu = ETH_DATA_LEN; int addend = sizeof(struct iphdr) + 4; - tunnel = (struct ip_tunnel*)dev->priv; + tunnel = netdev_priv(dev); iph = &tunnel->parms.iph; tunnel->dev = dev; @@ -1220,7 +1218,7 @@ static int ipgre_tunnel_init(struct net_device *dev) static int __init ipgre_fb_tunnel_init(struct net_device *dev) { - struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; + struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; tunnel->dev = dev; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index e45846ae570b..18d7fad474d7 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -185,7 +185,6 @@ int ip_call_ra_chain(struct sk_buff *skb) raw_rcv(last, skb2); } last = sk; - nf_reset(skb); } } @@ -204,10 +203,6 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb) __skb_pull(skb, ihl); - /* Free reference early: we don't need it any more, and it may - hold ip_conntrack module loaded indefinitely. */ - nf_reset(skb); - /* Point into the IP datagram, just past the header. */ skb->h.raw = skb->data; @@ -232,10 +227,12 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb) if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { int ret; - if (!ipprot->no_policy && - !xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - kfree_skb(skb); - goto out; + if (!ipprot->no_policy) { + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { + kfree_skb(skb); + goto out; + } + nf_reset(skb); } ret = ipprot->handler(skb); if (ret < 0) { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 71da31818cfc..3324fbfe528a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -69,6 +69,7 @@ #include <net/ip.h> #include <net/protocol.h> #include <net/route.h> +#include <net/xfrm.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/arp.h> @@ -85,6 +86,8 @@ int sysctl_ip_default_ttl = IPDEFTTL; +static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)); + /* Generate a checksum for an outgoing IP datagram. */ __inline__ void ip_send_check(struct iphdr *iph) { @@ -202,6 +205,11 @@ static inline int ip_finish_output2(struct sk_buff *skb) static inline int ip_finish_output(struct sk_buff *skb) { +#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) + /* Policy lookup after SNAT yielded a new policy */ + if (skb->dst->xfrm != NULL) + return xfrm4_output_finish(skb); +#endif if (skb->len > dst_mtu(skb->dst) && !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) return ip_fragment(skb, ip_finish_output2); @@ -409,7 +417,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) * single device frame, and queue such a frame for sending. */ -int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) +static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) { struct iphdr *iph; int raw = 0; @@ -418,7 +426,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) struct sk_buff *skb2; unsigned int mtu, hlen, left, len, ll_rs; int offset; - int not_last_frag; + __be16 not_last_frag; struct rtable *rt = (struct rtable*)skb->dst; int err = 0; @@ -1180,7 +1188,7 @@ int ip_push_pending_frames(struct sock *sk) struct ip_options *opt = NULL; struct rtable *rt = inet->cork.rt; struct iphdr *iph; - int df = 0; + __be16 df = 0; __u8 ttl; int err = 0; @@ -1391,7 +1399,6 @@ void __init ip_init(void) #endif } -EXPORT_SYMBOL(ip_fragment); EXPORT_SYMBOL(ip_generic_getfrag); EXPORT_SYMBOL(ip_queue_xmit); EXPORT_SYMBOL(ip_send_check); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 35571cff81c6..bc5ca23b2646 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -244,7 +244,7 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c if (dev == NULL) return NULL; - nt = dev->priv; + nt = netdev_priv(dev); SET_MODULE_OWNER(dev); dev->init = ipip_tunnel_init; nt->parms = *parms; @@ -269,7 +269,7 @@ static void ipip_tunnel_uninit(struct net_device *dev) tunnels_wc[0] = NULL; write_unlock_bh(&ipip_lock); } else - ipip_tunnel_unlink((struct ip_tunnel*)dev->priv); + ipip_tunnel_unlink(netdev_priv(dev)); dev_put(dev); } @@ -443,7 +443,7 @@ out: skb2->dst->ops->update_pmtu(skb2->dst, rel_info); rel_info = htonl(rel_info); } else if (type == ICMP_TIME_EXCEEDED) { - struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv; + struct ip_tunnel *t = netdev_priv(skb2->dev); if (t->parms.iph.ttl) { rel_type = ICMP_DEST_UNREACH; rel_code = ICMP_HOST_UNREACH; @@ -514,7 +514,7 @@ out: static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { - struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; + struct ip_tunnel *tunnel = netdev_priv(dev); struct net_device_stats *stats = &tunnel->stat; struct iphdr *tiph = &tunnel->parms.iph; u8 tos = tunnel->parms.iph.tos; @@ -621,6 +621,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.raw = skb->nh.raw; skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED); dst_release(skb->dst); skb->dst = &rt->u.dst; @@ -673,7 +674,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) t = ipip_tunnel_locate(&p, 0); } if (t == NULL) - t = (struct ip_tunnel*)dev->priv; + t = netdev_priv(dev); memcpy(&p, &t->parms, sizeof(p)); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) err = -EFAULT; @@ -710,7 +711,7 @@ ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = -EINVAL; break; } - t = (struct ip_tunnel*)dev->priv; + t = netdev_priv(dev); ipip_tunnel_unlink(t); t->parms.iph.saddr = p.iph.saddr; t->parms.iph.daddr = p.iph.daddr; @@ -764,7 +765,7 @@ done: static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev) { - return &(((struct ip_tunnel*)dev->priv)->stat); + return &(((struct ip_tunnel*)netdev_priv(dev))->stat); } static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) @@ -799,7 +800,7 @@ static int ipip_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel; struct iphdr *iph; - tunnel = (struct ip_tunnel*)dev->priv; + tunnel = netdev_priv(dev); iph = &tunnel->parms.iph; tunnel->dev = dev; @@ -837,7 +838,7 @@ static int ipip_tunnel_init(struct net_device *dev) static int __init ipip_fb_tunnel_init(struct net_device *dev) { - struct ip_tunnel *tunnel = dev->priv; + struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; tunnel->dev = dev; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9a5c0ce7ff35..f58ac9854c3f 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -178,8 +178,8 @@ static int reg_vif_num = -1; static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { read_lock(&mrt_lock); - ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len; - ((struct net_device_stats*)dev->priv)->tx_packets++; + ((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len; + ((struct net_device_stats*)netdev_priv(dev))->tx_packets++; ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); read_unlock(&mrt_lock); kfree_skb(skb); @@ -188,7 +188,7 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) static struct net_device_stats *reg_vif_get_stats(struct net_device *dev) { - return (struct net_device_stats*)dev->priv; + return (struct net_device_stats*)netdev_priv(dev); } static void reg_vif_setup(struct net_device *dev) @@ -1149,8 +1149,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) if (vif->flags & VIFF_REGISTER) { vif->pkt_out++; vif->bytes_out+=skb->len; - ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len; - ((struct net_device_stats*)vif->dev->priv)->tx_packets++; + ((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len; + ((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++; ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); kfree_skb(skb); return; @@ -1210,8 +1210,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) if (vif->flags & VIFF_TUNNEL) { ip_encap(skb, vif->local, vif->remote); /* FIXME: extra output firewall step used to be here. --RR */ - ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++; - ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb->len; + ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++; + ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len; } IPCB(skb)->flags |= IPSKB_FORWARDED; @@ -1467,8 +1467,8 @@ int pim_rcv_v1(struct sk_buff * skb) skb->pkt_type = PACKET_HOST; dst_release(skb->dst); skb->dst = NULL; - ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len; - ((struct net_device_stats*)reg_dev->priv)->rx_packets++; + ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len; + ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++; nf_reset(skb); netif_rx(skb); dev_put(reg_dev); @@ -1522,8 +1522,8 @@ static int pim_rcv(struct sk_buff * skb) skb->ip_summed = 0; skb->pkt_type = PACKET_HOST; dst_release(skb->dst); - ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len; - ((struct net_device_stats*)reg_dev->priv)->rx_packets++; + ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len; + ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++; skb->dst = NULL; nf_reset(skb); netif_rx(skb); diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index 3b87482049cf..52c12e9edbbc 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -322,7 +322,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct net_device *tdev; /* Device to other host */ struct iphdr *old_iph = skb->nh.iph; u8 tos = old_iph->tos; - u16 df = old_iph->frag_off; + __be16 df = old_iph->frag_off; struct iphdr *iph; /* Our new IP header */ int max_headroom; /* The extra header space needed */ int mtu; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index ae0779d82c5d..3321092b0914 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -7,11 +7,13 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> +#include <linux/ip.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/icmp.h> #include <net/route.h> -#include <linux/ip.h> +#include <net/xfrm.h> +#include <net/ip.h> /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ int ip_route_me_harder(struct sk_buff **pskb) @@ -33,7 +35,6 @@ int ip_route_me_harder(struct sk_buff **pskb) #ifdef CONFIG_IP_ROUTE_FWMARK fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark; #endif - fl.proto = iph->protocol; if (ip_route_output_key(&rt, &fl) != 0) return -1; @@ -60,6 +61,13 @@ int ip_route_me_harder(struct sk_buff **pskb) if ((*pskb)->dst->error) return -1; +#ifdef CONFIG_XFRM + if (!(IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) && + xfrm_decode_session(*pskb, &fl, AF_INET) == 0) + if (xfrm_lookup(&(*pskb)->dst, &fl, (*pskb)->sk, 0)) + return -1; +#endif + /* Change in oif may mean change in hh_len. */ hh_len = (*pskb)->dst->dev->hard_header_len; if (skb_headroom(*pskb) < hh_len) { @@ -78,6 +86,9 @@ int ip_route_me_harder(struct sk_buff **pskb) } EXPORT_SYMBOL(ip_route_me_harder); +void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *); +EXPORT_SYMBOL(ip_nat_decode_session); + /* * Extra routing may needed on local out, as the QUEUE target never * returns control to the table. diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 88a60650e6b8..a9893ec03e02 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -487,6 +487,16 @@ config IP_NF_MATCH_STRING To compile it as a module, choose M here. If unsure, say N. +config IP_NF_MATCH_POLICY + tristate "IPsec policy match support" + depends on IP_NF_IPTABLES && XFRM + help + Policy matching allows you to match packets based on the + IPsec policy that was used during decapsulation/will + be used during encapsulation. + + To compile it as a module, choose M here. If unsure, say N. + # `filter', generic and specific targets config IP_NF_FILTER tristate "Packet filtering" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index d0a447e520a2..549b01a648b3 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -72,6 +72,7 @@ obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o +obj-$(CONFIG_IP_NF_MATCH_POLICY) += ipt_policy.o obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index 977fb59d4563..0b25050981a1 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -16,6 +16,7 @@ #include <linux/types.h> #include <linux/sched.h> #include <linux/timer.h> +#include <linux/interrupt.h> #include <linux/netfilter.h> #include <linux/module.h> #include <linux/in.h> diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index e7fa29e576dc..77f304680d86 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -995,7 +995,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, || (!test_bit(IPS_ASSURED_BIT, &conntrack->status) && conntrack->proto.tcp.last_index == TCP_ACK_SET)) && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) { - /* RST sent to invalid SYN or ACK we had let trough + /* RST sent to invalid SYN or ACK we had let through * at a) and c) above: * * a) SYN was in window then @@ -1006,7 +1006,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, * segments we ignored. */ goto in_window; } - /* Just fall trough */ + /* Just fall through */ default: /* Keep compilers happy. */ break; diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index f04111f74e09..8b8a1f00bbf4 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -55,6 +55,44 @@ : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \ : "*ERROR*"))) +#ifdef CONFIG_XFRM +static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) +{ + struct ip_conntrack *ct; + struct ip_conntrack_tuple *t; + enum ip_conntrack_info ctinfo; + enum ip_conntrack_dir dir; + unsigned long statusbit; + + ct = ip_conntrack_get(skb, &ctinfo); + if (ct == NULL) + return; + dir = CTINFO2DIR(ctinfo); + t = &ct->tuplehash[dir].tuple; + + if (dir == IP_CT_DIR_ORIGINAL) + statusbit = IPS_DST_NAT; + else + statusbit = IPS_SRC_NAT; + + if (ct->status & statusbit) { + fl->fl4_dst = t->dst.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP) + fl->fl_ip_dport = t->dst.u.tcp.port; + } + + statusbit ^= IPS_NAT_MASK; + + if (ct->status & statusbit) { + fl->fl4_src = t->src.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP) + fl->fl_ip_sport = t->src.u.tcp.port; + } +} +#endif + static unsigned int ip_nat_fn(unsigned int hooknum, struct sk_buff **pskb, @@ -162,18 +200,20 @@ ip_nat_in(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - u_int32_t saddr, daddr; + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; unsigned int ret; - saddr = (*pskb)->nh.iph->saddr; - daddr = (*pskb)->nh.iph->daddr; - ret = ip_nat_fn(hooknum, pskb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN - && ((*pskb)->nh.iph->saddr != saddr - || (*pskb)->nh.iph->daddr != daddr)) { - dst_release((*pskb)->dst); - (*pskb)->dst = NULL; + && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.src.ip != + ct->tuplehash[!dir].tuple.dst.ip) { + dst_release((*pskb)->dst); + (*pskb)->dst = NULL; + } } return ret; } @@ -185,12 +225,30 @@ ip_nat_out(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + unsigned int ret; + /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct iphdr) || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) return NF_ACCEPT; - return ip_nat_fn(hooknum, pskb, in, out, okfn); + ret = ip_nat_fn(hooknum, pskb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN + && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.src.ip != + ct->tuplehash[!dir].tuple.dst.ip +#ifdef CONFIG_XFRM + || ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all +#endif + ) + return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; + } + return ret; } static unsigned int @@ -200,7 +258,8 @@ ip_nat_local_fn(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - u_int32_t saddr, daddr; + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; unsigned int ret; /* root is playing with raw sockets. */ @@ -208,14 +267,20 @@ ip_nat_local_fn(unsigned int hooknum, || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) return NF_ACCEPT; - saddr = (*pskb)->nh.iph->saddr; - daddr = (*pskb)->nh.iph->daddr; - ret = ip_nat_fn(hooknum, pskb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN - && ((*pskb)->nh.iph->saddr != saddr - || (*pskb)->nh.iph->daddr != daddr)) - return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; + && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.dst.ip != + ct->tuplehash[!dir].tuple.src.ip +#ifdef CONFIG_XFRM + || ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[dir].tuple.src.u.all +#endif + ) + return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; + } return ret; } @@ -303,10 +368,14 @@ static int init_or_cleanup(int init) if (!init) goto cleanup; +#ifdef CONFIG_XFRM + BUG_ON(ip_nat_decode_session != NULL); + ip_nat_decode_session = nat_decode_session; +#endif ret = ip_nat_rule_init(); if (ret < 0) { printk("ip_nat_init: can't setup rules.\n"); - goto cleanup_nothing; + goto cleanup_decode_session; } ret = nf_register_hook(&ip_nat_in_ops); if (ret < 0) { @@ -354,7 +423,11 @@ static int init_or_cleanup(int init) nf_unregister_hook(&ip_nat_in_ops); cleanup_rule_init: ip_nat_rule_cleanup(); - cleanup_nothing: + cleanup_decode_session: +#ifdef CONFIG_XFRM + ip_nat_decode_session = NULL; + synchronize_net(); +#endif return ret; } diff --git a/net/ipv4/netfilter/ipt_helper.c b/net/ipv4/netfilter/ipt_helper.c index bf14e1c7798a..aef649e393af 100644 --- a/net/ipv4/netfilter/ipt_helper.c +++ b/net/ipv4/netfilter/ipt_helper.c @@ -13,6 +13,7 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter.h> +#include <linux/interrupt.h> #if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE) #include <linux/netfilter_ipv4/ip_conntrack.h> #include <linux/netfilter_ipv4/ip_conntrack_core.h> diff --git a/net/ipv4/netfilter/ipt_policy.c b/net/ipv4/netfilter/ipt_policy.c new file mode 100644 index 000000000000..709debcc69c9 --- /dev/null +++ b/net/ipv4/netfilter/ipt_policy.c @@ -0,0 +1,170 @@ +/* IP tables module for matching IPsec policy + * + * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/config.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/init.h> +#include <net/xfrm.h> + +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ipt_policy.h> + +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_DESCRIPTION("IPtables IPsec policy matching module"); +MODULE_LICENSE("GPL"); + + +static inline int +match_xfrm_state(struct xfrm_state *x, const struct ipt_policy_elem *e) +{ +#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x)) + + return MATCH(saddr, x->props.saddr.a4 & e->smask) && + MATCH(daddr, x->id.daddr.a4 & e->dmask) && + MATCH(proto, x->id.proto) && + MATCH(mode, x->props.mode) && + MATCH(spi, x->id.spi) && + MATCH(reqid, x->props.reqid); +} + +static int +match_policy_in(const struct sk_buff *skb, const struct ipt_policy_info *info) +{ + const struct ipt_policy_elem *e; + struct sec_path *sp = skb->sp; + int strict = info->flags & IPT_POLICY_MATCH_STRICT; + int i, pos; + + if (sp == NULL) + return -1; + if (strict && info->len != sp->len) + return 0; + + for (i = sp->len - 1; i >= 0; i--) { + pos = strict ? i - sp->len + 1 : 0; + if (pos >= info->len) + return 0; + e = &info->pol[pos]; + + if (match_xfrm_state(sp->x[i].xvec, e)) { + if (!strict) + return 1; + } else if (strict) + return 0; + } + + return strict ? 1 : 0; +} + +static int +match_policy_out(const struct sk_buff *skb, const struct ipt_policy_info *info) +{ + const struct ipt_policy_elem *e; + struct dst_entry *dst = skb->dst; + int strict = info->flags & IPT_POLICY_MATCH_STRICT; + int i, pos; + + if (dst->xfrm == NULL) + return -1; + + for (i = 0; dst && dst->xfrm; dst = dst->child, i++) { + pos = strict ? i : 0; + if (pos >= info->len) + return 0; + e = &info->pol[pos]; + + if (match_xfrm_state(dst->xfrm, e)) { + if (!strict) + return 1; + } else if (strict) + return 0; + } + + return strict ? 1 : 0; +} + +static int match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, int offset, int *hotdrop) +{ + const struct ipt_policy_info *info = matchinfo; + int ret; + + if (info->flags & IPT_POLICY_MATCH_IN) + ret = match_policy_in(skb, info); + else + ret = match_policy_out(skb, info); + + if (ret < 0) + ret = info->flags & IPT_POLICY_MATCH_NONE ? 1 : 0; + else if (info->flags & IPT_POLICY_MATCH_NONE) + ret = 0; + + return ret; +} + +static int checkentry(const char *tablename, const struct ipt_ip *ip, + void *matchinfo, unsigned int matchsize, + unsigned int hook_mask) +{ + struct ipt_policy_info *info = matchinfo; + + if (matchsize != IPT_ALIGN(sizeof(*info))) { + printk(KERN_ERR "ipt_policy: matchsize %u != %zu\n", + matchsize, IPT_ALIGN(sizeof(*info))); + return 0; + } + if (!(info->flags & (IPT_POLICY_MATCH_IN|IPT_POLICY_MATCH_OUT))) { + printk(KERN_ERR "ipt_policy: neither incoming nor " + "outgoing policy selected\n"); + return 0; + } + if (hook_mask & (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_LOCAL_IN) + && info->flags & IPT_POLICY_MATCH_OUT) { + printk(KERN_ERR "ipt_policy: output policy not valid in " + "PRE_ROUTING and INPUT\n"); + return 0; + } + if (hook_mask & (1 << NF_IP_POST_ROUTING | 1 << NF_IP_LOCAL_OUT) + && info->flags & IPT_POLICY_MATCH_IN) { + printk(KERN_ERR "ipt_policy: input policy not valid in " + "POST_ROUTING and OUTPUT\n"); + return 0; + } + if (info->len > IPT_POLICY_MAX_ELEM) { + printk(KERN_ERR "ipt_policy: too many policy elements\n"); + return 0; + } + + return 1; +} + +static struct ipt_match policy_match = { + .name = "policy", + .match = match, + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ipt_register_match(&policy_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&policy_match); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4b0d7e4d6269..165a4d81efa4 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -255,6 +255,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); return NET_RX_DROP; } + nf_reset(skb); skb_push(skb, skb->data - skb->nh.raw); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0a461232329f..a97ed5416c28 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3347,7 +3347,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, int offset = start - TCP_SKB_CB(skb)->seq; int size = TCP_SKB_CB(skb)->end_seq - start; - if (offset < 0) BUG(); + BUG_ON(offset < 0); if (size > 0) { size = min(copy, size); if (skb_copy_bits(skb, offset, skb_put(nskb, size), size)) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e9f83e5b28ce..6ea353907af5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1080,6 +1080,7 @@ process: if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; + nf_reset(skb); if (sk_filter(sk, skb, 0)) goto discard_and_relse; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 223abaa72bc5..00840474a449 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -989,6 +989,7 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) kfree_skb(skb); return -1; } + nf_reset(skb); if (up->encap_type) { /* @@ -1149,6 +1150,7 @@ int udp_rcv(struct sk_buff *skb) if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; + nf_reset(skb); /* No socket. Drop packet silently, if checksum is wrong */ if (udp_checksum_complete(skb)) diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 2d3849c38a0f..850d919591d1 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -11,6 +11,8 @@ #include <linux/module.h> #include <linux/string.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> #include <net/inet_ecn.h> #include <net/ip.h> #include <net/xfrm.h> @@ -45,6 +47,23 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq) return xfrm_parse_spi(skb, nexthdr, spi, seq); } +#ifdef CONFIG_NETFILTER +static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) +{ + struct iphdr *iph = skb->nh.iph; + + if (skb->dst == NULL) { + if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, + skb->dev)) + goto drop; + } + return dst_input(skb); +drop: + kfree_skb(skb); + return NET_RX_DROP; +} +#endif + int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) { int err; @@ -137,6 +156,8 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state)); skb->sp->len += xfrm_nr; + nf_reset(skb); + if (decaps) { if (!(skb->dev->flags&IFF_LOOPBACK)) { dst_release(skb->dst); @@ -145,7 +166,17 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) netif_rx(skb); return 0; } else { +#ifdef CONFIG_NETFILTER + __skb_push(skb, skb->data - skb->nh.raw); + skb->nh.iph->tot_len = htons(skb->len); + ip_send_check(skb->nh.iph); + + NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL, + xfrm4_rcv_encap_finish); + return 0; +#else return -skb->nh.iph->protocol; +#endif } drop_unlock: diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 66620a95942a..d4df0ddd424b 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -8,8 +8,10 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/compiler.h> #include <linux/skbuff.h> #include <linux/spinlock.h> +#include <linux/netfilter_ipv4.h> #include <net/inet_ecn.h> #include <net/ip.h> #include <net/xfrm.h> @@ -95,7 +97,7 @@ out: return ret; } -int xfrm4_output(struct sk_buff *skb) +static int xfrm4_output_one(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; @@ -113,27 +115,33 @@ int xfrm4_output(struct sk_buff *skb) goto error_nolock; } - spin_lock_bh(&x->lock); - err = xfrm_state_check(x, skb); - if (err) - goto error; + do { + spin_lock_bh(&x->lock); + err = xfrm_state_check(x, skb); + if (err) + goto error; - xfrm4_encap(skb); + xfrm4_encap(skb); - err = x->type->output(x, skb); - if (err) - goto error; + err = x->type->output(x, skb); + if (err) + goto error; - x->curlft.bytes += skb->len; - x->curlft.packets++; + x->curlft.bytes += skb->len; + x->curlft.packets++; - spin_unlock_bh(&x->lock); + spin_unlock_bh(&x->lock); - if (!(skb->dst = dst_pop(dst))) { - err = -EHOSTUNREACH; - goto error_nolock; - } - err = NET_XMIT_BYPASS; + if (!(skb->dst = dst_pop(dst))) { + err = -EHOSTUNREACH; + goto error_nolock; + } + dst = skb->dst; + x = dst->xfrm; + } while (x && !x->props.mode); + + IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; + err = 0; out_exit: return err; @@ -143,3 +151,33 @@ error_nolock: kfree_skb(skb); goto out_exit; } + +int xfrm4_output_finish(struct sk_buff *skb) +{ + int err; + + while (likely((err = xfrm4_output_one(skb)) == 0)) { + nf_reset(skb); + + err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL, + skb->dst->dev, dst_output); + if (unlikely(err != 1)) + break; + + if (!skb->dst->xfrm) + return dst_output(skb); + + err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL, + skb->dst->dev, xfrm4_output_finish); + if (unlikely(err != 1)) + break; + } + + return err; +} + +int xfrm4_output(struct sk_buff *skb) +{ + return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, + xfrm4_output_finish); +} |