diff options
Diffstat (limited to 'net')
32 files changed, 577 insertions, 432 deletions
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c index 770c0df972a3..b54306a934e5 100644 --- a/net/bridge/netfilter/ebt_mark.c +++ b/net/bridge/netfilter/ebt_mark.c @@ -22,24 +22,37 @@ static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr, const void *data, unsigned int datalen) { struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data; + int action = info->target & -16; - if ((*pskb)->nfmark != info->mark) + if (action == MARK_SET_VALUE) (*pskb)->nfmark = info->mark; + else if (action == MARK_OR_VALUE) + (*pskb)->nfmark |= info->mark; + else if (action == MARK_AND_VALUE) + (*pskb)->nfmark &= info->mark; + else + (*pskb)->nfmark ^= info->mark; - return info->target; + return info->target | -16; } static int ebt_target_mark_check(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *data, unsigned int datalen) { struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data; + int tmp; if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_t_info))) return -EINVAL; - if (BASE_CHAIN && info->target == EBT_RETURN) + tmp = info->target | -16; + if (BASE_CHAIN && tmp == EBT_RETURN) return -EINVAL; CLEAR_BASE_CHAIN_BIT; - if (INVALID_TARGET) + if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0) + return -EINVAL; + tmp = info->target & -16; + if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE && + tmp != MARK_AND_VALUE && tmp != MARK_XOR_VALUE) return -EINVAL; return 0; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8ce8c471d868..b4b478353b27 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -344,12 +344,12 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, { struct neighbour *n; int key_len = tbl->key_len; - u32 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask; + u32 hash_val = tbl->hash(pkey, dev); NEIGH_CACHE_STAT_INC(tbl, lookups); read_lock_bh(&tbl->lock); - for (n = tbl->hash_buckets[hash_val]; n; n = n->next) { + for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) { neigh_hold(n); NEIGH_CACHE_STAT_INC(tbl, hits); @@ -364,12 +364,12 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey) { struct neighbour *n; int key_len = tbl->key_len; - u32 hash_val = tbl->hash(pkey, NULL) & tbl->hash_mask; + u32 hash_val = tbl->hash(pkey, NULL); NEIGH_CACHE_STAT_INC(tbl, lookups); read_lock_bh(&tbl->lock); - for (n = tbl->hash_buckets[hash_val]; n; n = n->next) { + for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { if (!memcmp(n->primary_key, pkey, key_len)) { neigh_hold(n); NEIGH_CACHE_STAT_INC(tbl, hits); @@ -1998,12 +1998,12 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, int rc, h, s_h = cb->args[1]; int idx, s_idx = idx = cb->args[2]; + read_lock_bh(&tbl->lock); for (h = 0; h <= tbl->hash_mask; h++) { if (h < s_h) continue; if (h > s_h) s_idx = 0; - read_lock_bh(&tbl->lock); for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) { if (idx < s_idx) continue; @@ -2016,8 +2016,8 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, goto out; } } - read_unlock_bh(&tbl->lock); } + read_unlock_bh(&tbl->lock); rc = skb->len; out: cb->args[1] = h; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c448c7f6fde2..3c23760c5827 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -156,7 +156,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, /* Get the DATA. Size must match skb_add_mtu(). */ size = SKB_DATA_ALIGN(size); - data = ____kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); + data = kmalloc_track_caller(size + sizeof(struct skb_shared_info), + gfp_mask); if (!data) goto nodata; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index d172a9804448..5572071af735 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -434,6 +434,15 @@ config INET_XFRM_MODE_TUNNEL If unsure, say Y. +config INET_XFRM_MODE_BEET + tristate "IP: IPsec BEET mode" + default y + select XFRM + ---help--- + Support for IPsec BEET mode. + + If unsure, say Y. + config INET_DIAG tristate "INET: socket monitoring interface" default y diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index f66049e28aeb..15645c51520c 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_INET_AH) += ah4.o obj-$(CONFIG_INET_ESP) += esp4.o obj-$(CONFIG_INET_IPCOMP) += ipcomp.o obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o +obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o obj-$(CONFIG_INET_TUNNEL) += tunnel4.o obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 13b29360d102..b5c205b57669 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -253,7 +253,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) * as per draft-ietf-ipsec-udp-encaps-06, * section 3.1.2 */ - if (x->props.mode == XFRM_MODE_TRANSPORT) + if (x->props.mode == XFRM_MODE_TRANSPORT || + x->props.mode == XFRM_MODE_BEET) skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -271,17 +272,28 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) { struct esp_data *esp = x->data; u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); - - if (x->props.mode == XFRM_MODE_TUNNEL) { - mtu = ALIGN(mtu + 2, blksize); - } else { - /* The worst case. */ + int enclen = 0; + + switch (x->props.mode) { + case XFRM_MODE_TUNNEL: + mtu = ALIGN(mtu +2, blksize); + break; + default: + case XFRM_MODE_TRANSPORT: + /* The worst case */ mtu = ALIGN(mtu + 2, 4) + blksize - 4; + break; + case XFRM_MODE_BEET: + /* The worst case. */ + enclen = IPV4_BEET_PHMAXLEN; + mtu = ALIGN(mtu + enclen + 2, blksize); + break; } + if (esp->conf.padlen) mtu = ALIGN(mtu, esp->conf.padlen); - return mtu + x->props.header_len + esp->auth.icv_trunc_len; + return mtu + x->props.header_len + esp->auth.icv_trunc_len - enclen; } static void esp4_err(struct sk_buff *skb, u32 info) diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 2017d36024d4..3839b706142e 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -206,6 +206,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) { struct xfrm_state *t; + u8 mode = XFRM_MODE_TUNNEL; t = xfrm_state_alloc(); if (t == NULL) @@ -216,7 +217,9 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) t->id.daddr.a4 = x->id.daddr.a4; memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET; - t->props.mode = XFRM_MODE_TUNNEL; + if (x->props.mode == XFRM_MODE_BEET) + mode = x->props.mode; + t->props.mode = mode; t->props.saddr.a4 = x->props.saddr.a4; t->props.flags = x->props.flags; diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 6dee03935f78..1445bb47fea4 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -813,6 +813,16 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, skb->nh.iph->saddr = cp->vaddr; ip_send_check(skb->nh.iph); + /* For policy routing, packets originating from this + * machine itself may be routed differently to packets + * passing through. We want this packet to be routed as + * if it came from this machine itself. So re-compute + * the routing information. + */ + if (ip_route_me_harder(pskb, RTN_LOCAL) != 0) + goto drop; + skb = *pskb; + IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); ip_vs_out_stats(cp, skb); diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 5ac15379a0cf..e2005c6810a4 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -8,7 +8,7 @@ #include <net/ip.h> /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ -int ip_route_me_harder(struct sk_buff **pskb) +int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) { struct iphdr *iph = (*pskb)->nh.iph; struct rtable *rt; @@ -16,10 +16,13 @@ int ip_route_me_harder(struct sk_buff **pskb) struct dst_entry *odst; unsigned int hh_len; + if (addr_type == RTN_UNSPEC) + addr_type = inet_addr_type(iph->saddr); + /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook. */ - if (inet_addr_type(iph->saddr) == RTN_LOCAL) { + if (addr_type == RTN_LOCAL) { fl.nl_u.ip4_u.daddr = iph->daddr; fl.nl_u.ip4_u.saddr = iph->saddr; fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); @@ -156,7 +159,7 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info) if (!(iph->tos == rt_info->tos && iph->daddr == rt_info->daddr && iph->saddr == rt_info->saddr)) - return ip_route_me_harder(pskb); + return ip_route_me_harder(pskb, RTN_UNSPEC); } return 0; } diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 021395b67463..d85d2de50449 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -265,7 +265,8 @@ ip_nat_local_fn(unsigned int hooknum, ct->tuplehash[!dir].tuple.src.u.all #endif ) - return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; + if (ip_route_me_harder(pskb, RTN_UNSPEC)) + ret = NF_DROP; } return ret; } diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index fd0c05efed8a..ad0312d0e4fd 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -38,76 +38,16 @@ MODULE_DESCRIPTION("iptables REJECT target module"); #define DEBUGP(format, args...) #endif -static inline struct rtable *route_reverse(struct sk_buff *skb, - struct tcphdr *tcph, int hook) -{ - struct iphdr *iph = skb->nh.iph; - struct dst_entry *odst; - struct flowi fl = {}; - struct rtable *rt; - - /* We don't require ip forwarding to be enabled to be able to - * send a RST reply for bridged traffic. */ - if (hook != NF_IP_FORWARD -#ifdef CONFIG_BRIDGE_NETFILTER - || (skb->nf_bridge && skb->nf_bridge->mask & BRNF_BRIDGED) -#endif - ) { - fl.nl_u.ip4_u.daddr = iph->saddr; - if (hook == NF_IP_LOCAL_IN) - fl.nl_u.ip4_u.saddr = iph->daddr; - fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); - - if (ip_route_output_key(&rt, &fl) != 0) - return NULL; - } else { - /* non-local src, find valid iif to satisfy - * rp-filter when calling ip_route_input. */ - fl.nl_u.ip4_u.daddr = iph->daddr; - if (ip_route_output_key(&rt, &fl) != 0) - return NULL; - - odst = skb->dst; - if (ip_route_input(skb, iph->saddr, iph->daddr, - RT_TOS(iph->tos), rt->u.dst.dev) != 0) { - dst_release(&rt->u.dst); - return NULL; - } - dst_release(&rt->u.dst); - rt = (struct rtable *)skb->dst; - skb->dst = odst; - - fl.nl_u.ip4_u.daddr = iph->saddr; - fl.nl_u.ip4_u.saddr = iph->daddr; - fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); - } - - if (rt->u.dst.error) { - dst_release(&rt->u.dst); - return NULL; - } - - fl.proto = IPPROTO_TCP; - fl.fl_ip_sport = tcph->dest; - fl.fl_ip_dport = tcph->source; - security_skb_classify_flow(skb, &fl); - - xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); - - return rt; -} - /* Send RST reply */ static void send_reset(struct sk_buff *oldskb, int hook) { struct sk_buff *nskb; struct iphdr *iph = oldskb->nh.iph; struct tcphdr _otcph, *oth, *tcph; - struct rtable *rt; __be16 tmp_port; __be32 tmp_addr; int needs_ack; - int hh_len; + unsigned int addr_type; /* IP header checks: fragment. */ if (oldskb->nh.iph->frag_off & htons(IP_OFFSET)) @@ -126,23 +66,13 @@ static void send_reset(struct sk_buff *oldskb, int hook) if (nf_ip_checksum(oldskb, hook, iph->ihl * 4, IPPROTO_TCP)) return; - if ((rt = route_reverse(oldskb, oth, hook)) == NULL) - return; - - hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); - /* We need a linear, writeable skb. We also need to expand headroom in case hh_len of incoming interface < hh_len of outgoing interface */ - nskb = skb_copy_expand(oldskb, hh_len, skb_tailroom(oldskb), + nskb = skb_copy_expand(oldskb, LL_MAX_HEADER, skb_tailroom(oldskb), GFP_ATOMIC); - if (!nskb) { - dst_release(&rt->u.dst); + if (!nskb) return; - } - - dst_release(nskb->dst); - nskb->dst = &rt->u.dst; /* This packet will not be the same as the other: clear nf fields */ nf_reset(nskb); @@ -184,6 +114,21 @@ static void send_reset(struct sk_buff *oldskb, int hook) tcph->window = 0; tcph->urg_ptr = 0; + /* Set DF, id = 0 */ + nskb->nh.iph->frag_off = htons(IP_DF); + nskb->nh.iph->id = 0; + + addr_type = RTN_UNSPEC; + if (hook != NF_IP_FORWARD +#ifdef CONFIG_BRIDGE_NETFILTER + || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED) +#endif + ) + addr_type = RTN_LOCAL; + + if (ip_route_me_harder(&nskb, addr_type)) + goto free_nskb; + /* Adjust TCP checksum */ nskb->ip_summed = CHECKSUM_NONE; tcph->check = 0; @@ -192,12 +137,8 @@ static void send_reset(struct sk_buff *oldskb, int hook) nskb->nh.iph->daddr, csum_partial((char *)tcph, sizeof(struct tcphdr), 0)); - - /* Adjust IP TTL, DF */ + /* Adjust IP TTL */ nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT); - /* Set DF, id = 0 */ - nskb->nh.iph->frag_off = htons(IP_DF); - nskb->nh.iph->id = 0; /* Adjust IP checksum */ nskb->nh.iph->check = 0; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index e62ea2bb9c0a..b91f3582359b 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -157,7 +157,8 @@ ipt_local_hook(unsigned int hook, || (*pskb)->nfmark != nfmark #endif || (*pskb)->nh.iph->tos != tos)) - return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; + if (ip_route_me_harder(pskb, RTN_UNSPEC)) + ret = NF_DROP; return ret; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3f884cea14ff..cf06accbe687 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2259,7 +2259,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) u32 pkts_acked = 0; void (*rtt_sample)(struct sock *sk, u32 usrtt) = icsk->icsk_ca_ops->rtt_sample; - struct timeval tv; + struct timeval tv = { .tv_sec = 0, .tv_usec = 0 }; while ((skb = skb_peek(&sk->sk_write_queue)) && skb != sk->sk_send_head) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6d6142f9c478..865d75214a9a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -675,6 +675,8 @@ do_append_data: udp_flush_pending_frames(sk); else if (!corkreq) err = udp_push_pending_frames(sk, up); + else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) + up->pending = 0; release_sock(sk); out: diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c new file mode 100644 index 000000000000..89cf59ea7bbe --- /dev/null +++ b/net/ipv4/xfrm4_mode_beet.c @@ -0,0 +1,139 @@ +/* + * xfrm4_mode_beet.c - BEET mode encapsulation for IPv4. + * + * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com> + * Miika Komu <miika@iki.fi> + * Herbert Xu <herbert@gondor.apana.org.au> + * Abhinav Pathak <abhinav.pathak@hiit.fi> + * Jeff Ahrenholz <ahrenholz@gmail.com> + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/stringify.h> +#include <net/dst.h> +#include <net/ip.h> +#include <net/xfrm.h> + +/* Add encapsulation header. + * + * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. + * The following fields in it shall be filled in by x->type->output: + * tot_len + * check + * + * On exit, skb->h will be set to the start of the payload to be processed + * by x->type->output and skb->nh will be set to the top IP header. + */ +static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) +{ + struct iphdr *iph, *top_iph = NULL; + int hdrlen, optlen; + + iph = skb->nh.iph; + skb->h.ipiph = iph; + + hdrlen = 0; + optlen = iph->ihl * 4 - sizeof(*iph); + if (unlikely(optlen)) + hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); + + skb->nh.raw = skb_push(skb, x->props.header_len + hdrlen); + top_iph = skb->nh.iph; + hdrlen = iph->ihl * 4 - optlen; + skb->h.raw += hdrlen; + + memmove(top_iph, iph, hdrlen); + if (unlikely(optlen)) { + struct ip_beet_phdr *ph; + + BUG_ON(optlen < 0); + + ph = (struct ip_beet_phdr *)skb->h.raw; + ph->padlen = 4 - (optlen & 4); + ph->hdrlen = (optlen + ph->padlen + sizeof(*ph)) / 8; + ph->nexthdr = top_iph->protocol; + + top_iph->protocol = IPPROTO_BEETPH; + top_iph->ihl = sizeof(struct iphdr) / 4; + } + + top_iph->saddr = x->props.saddr.a4; + top_iph->daddr = x->id.daddr.a4; + + return 0; +} + +static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb) +{ + struct iphdr *iph = skb->nh.iph; + int phlen = 0; + int optlen = 0; + __u8 ph_nexthdr = 0, protocol = 0; + int err = -EINVAL; + + protocol = iph->protocol; + + if (unlikely(iph->protocol == IPPROTO_BEETPH)) { + struct ip_beet_phdr *ph = (struct ip_beet_phdr*)(iph + 1); + + if (!pskb_may_pull(skb, sizeof(*ph))) + goto out; + + phlen = ph->hdrlen * 8; + optlen = phlen - ph->padlen - sizeof(*ph); + if (optlen < 0 || optlen & 3 || optlen > 250) + goto out; + + if (!pskb_may_pull(skb, phlen)) + goto out; + + ph_nexthdr = ph->nexthdr; + } + + skb_push(skb, sizeof(*iph) - phlen + optlen); + memmove(skb->data, skb->nh.raw, sizeof(*iph)); + skb->nh.raw = skb->data; + + iph = skb->nh.iph; + iph->ihl = (sizeof(*iph) + optlen) / 4; + iph->tot_len = htons(skb->len); + iph->daddr = x->sel.daddr.a4; + iph->saddr = x->sel.saddr.a4; + if (ph_nexthdr) + iph->protocol = ph_nexthdr; + else + iph->protocol = protocol; + iph->check = 0; + iph->check = ip_fast_csum(skb->nh.raw, iph->ihl); + err = 0; +out: + return err; +} + +static struct xfrm_mode xfrm4_beet_mode = { + .input = xfrm4_beet_input, + .output = xfrm4_beet_output, + .owner = THIS_MODULE, + .encap = XFRM_MODE_BEET, +}; + +static int __init xfrm4_beet_init(void) +{ + return xfrm_register_mode(&xfrm4_beet_mode, AF_INET); +} + +static void __exit xfrm4_beet_exit(void) +{ + int err; + + err = xfrm_unregister_mode(&xfrm4_beet_mode, AF_INET); + BUG_ON(err); +} + +module_init(xfrm4_beet_init); +module_exit(xfrm4_beet_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_BEET); diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index a2d211da2aba..a460e8132b4d 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -136,6 +136,16 @@ config INET6_XFRM_MODE_TUNNEL If unsure, say Y. +config INET6_XFRM_MODE_BEET + tristate "IPv6: IPsec BEET mode" + depends on IPV6 + default IPV6 + select XFRM + ---help--- + Support for IPsec BEET mode. + + If unsure, say Y. + config INET6_XFRM_MODE_ROUTEOPTIMIZATION tristate "IPv6: MIPv6 route optimization mode (EXPERIMENTAL)" depends on IPV6 && EXPERIMENTAL diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 0213c6612b58..87274e47fe32 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o +obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index a2860e35efd7..71f59f18ede8 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -199,6 +199,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) { struct xfrm_state *t = NULL; + u8 mode = XFRM_MODE_TUNNEL; t = xfrm_state_alloc(); if (!t) @@ -212,7 +213,9 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr)); memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET6; - t->props.mode = XFRM_MODE_TUNNEL; + if (x->props.mode == XFRM_MODE_BEET) + mode = x->props.mode; + t->props.mode = mode; memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); if (xfrm_init_state(t)) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9662561701d1..e0c3934a7e4b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -546,7 +546,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct in6_addr *daddr, *final_p = NULL, final; struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; - struct flowi *fl = &inet->cork.fl; + struct flowi fl; struct dst_entry *dst; int addr_len = msg->msg_namelen; int ulen = len; @@ -626,19 +626,19 @@ do_udp_sendmsg: } ulen += sizeof(struct udphdr); - memset(fl, 0, sizeof(*fl)); + memset(&fl, 0, sizeof(fl)); if (sin6) { if (sin6->sin6_port == 0) return -EINVAL; - fl->fl_ip_dport = sin6->sin6_port; + fl.fl_ip_dport = sin6->sin6_port; daddr = &sin6->sin6_addr; if (np->sndflow) { - fl->fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; - if (fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel); + fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); if (flowlabel == NULL) return -EINVAL; daddr = &flowlabel->dst; @@ -656,32 +656,32 @@ do_udp_sendmsg: if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) - fl->oif = sin6->sin6_scope_id; + fl.oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - fl->fl_ip_dport = inet->dport; + fl.fl_ip_dport = inet->dport; daddr = &np->daddr; - fl->fl6_flowlabel = np->flow_label; + fl.fl6_flowlabel = np->flow_label; connected = 1; } - if (!fl->oif) - fl->oif = sk->sk_bound_dev_if; + if (!fl.oif) + fl.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(msg, fl, opt, &hlimit, &tclass); + err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; } - if ((fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { - flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel); + if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { + flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); if (flowlabel == NULL) return -EINVAL; } @@ -695,39 +695,39 @@ do_udp_sendmsg: opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); - fl->proto = IPPROTO_UDP; - ipv6_addr_copy(&fl->fl6_dst, daddr); - if (ipv6_addr_any(&fl->fl6_src) && !ipv6_addr_any(&np->saddr)) - ipv6_addr_copy(&fl->fl6_src, &np->saddr); - fl->fl_ip_sport = inet->sport; + fl.proto = IPPROTO_UDP; + ipv6_addr_copy(&fl.fl6_dst, daddr); + if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) + ipv6_addr_copy(&fl.fl6_src, &np->saddr); + fl.fl_ip_sport = inet->sport; /* merge ip6_build_xmit from ip6_output */ if (opt && opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - ipv6_addr_copy(&final, &fl->fl6_dst); - ipv6_addr_copy(&fl->fl6_dst, rt0->addr); + ipv6_addr_copy(&final, &fl.fl6_dst); + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); final_p = &final; connected = 0; } - if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) { - fl->oif = np->mcast_oif; + if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { + fl.oif = np->mcast_oif; connected = 0; } - security_sk_classify_flow(sk, fl); + security_sk_classify_flow(sk, &fl); - err = ip6_sk_dst_lookup(sk, &dst, fl); + err = ip6_sk_dst_lookup(sk, &dst, &fl); if (err) goto out; if (final_p) - ipv6_addr_copy(&fl->fl6_dst, final_p); + ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, fl, sk, 0)) < 0) + if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) goto out; if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl->fl6_dst)) + if (ipv6_addr_is_multicast(&fl.fl6_dst)) hlimit = np->mcast_hops; else hlimit = np->hop_limit; @@ -763,21 +763,23 @@ back_from_confirm: do_append_data: up->len += ulen; err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, - sizeof(struct udphdr), hlimit, tclass, opt, fl, + sizeof(struct udphdr), hlimit, tclass, opt, &fl, (struct rt6_info*)dst, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_v6_flush_pending_frames(sk); else if (!corkreq) err = udp_v6_push_pending_frames(sk, up); + else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) + up->pending = 0; if (dst) { if (connected) { ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? + ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? &np->daddr : NULL, #ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_equal(&fl->fl6_src, &np->saddr) ? + ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? &np->saddr : #endif NULL); diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c new file mode 100644 index 000000000000..edcfffa9e87b --- /dev/null +++ b/net/ipv6/xfrm6_mode_beet.c @@ -0,0 +1,107 @@ +/* + * xfrm6_mode_beet.c - BEET mode encapsulation for IPv6. + * + * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com> + * Miika Komu <miika@iki.fi> + * Herbert Xu <herbert@gondor.apana.org.au> + * Abhinav Pathak <abhinav.pathak@hiit.fi> + * Jeff Ahrenholz <ahrenholz@gmail.com> + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/stringify.h> +#include <net/dsfield.h> +#include <net/dst.h> +#include <net/inet_ecn.h> +#include <net/ipv6.h> +#include <net/xfrm.h> + +/* Add encapsulation header. + * + * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. + * The following fields in it shall be filled in by x->type->output: + * payload_len + * + * On exit, skb->h will be set to the start of the encapsulation header to be + * filled in by x->type->output and skb->nh will be set to the nextheader field + * of the extension header directly preceding the encapsulation header, or in + * its absence, that of the top IP header. The value of skb->data will always + * point to the top IP header. + */ +static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipv6hdr *iph, *top_iph; + u8 *prevhdr; + int hdr_len; + + skb_push(skb, x->props.header_len); + iph = skb->nh.ipv6h; + + hdr_len = ip6_find_1stfragopt(skb, &prevhdr); + skb->nh.raw = prevhdr - x->props.header_len; + skb->h.raw = skb->data + hdr_len; + memmove(skb->data, iph, hdr_len); + + skb->nh.raw = skb->data; + top_iph = skb->nh.ipv6h; + skb->nh.raw = &top_iph->nexthdr; + skb->h.ipv6h = top_iph + 1; + + ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); + ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); + + return 0; +} + +static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipv6hdr *ip6h; + int size = sizeof(struct ipv6hdr); + int err = -EINVAL; + + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + goto out; + + skb_push(skb, size); + memmove(skb->data, skb->nh.raw, size); + skb->nh.raw = skb->data; + + skb->mac.raw = memmove(skb->data - skb->mac_len, + skb->mac.raw, skb->mac_len); + + ip6h = skb->nh.ipv6h; + ip6h->payload_len = htons(skb->len - size); + ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6); + ipv6_addr_copy(&ip6h->saddr, (struct in6_addr *) &x->sel.saddr.a6); + err = 0; +out: + return err; +} + +static struct xfrm_mode xfrm6_beet_mode = { + .input = xfrm6_beet_input, + .output = xfrm6_beet_output, + .owner = THIS_MODULE, + .encap = XFRM_MODE_BEET, +}; + +static int __init xfrm6_beet_init(void) +{ + return xfrm_register_mode(&xfrm6_beet_mode, AF_INET6); +} + +static void __exit xfrm6_beet_exit(void) +{ + int err; + + err = xfrm_unregister_mode(&xfrm6_beet_mode, AF_INET6); + BUG_ON(err); +} + +module_init(xfrm6_beet_init); +module_exit(xfrm6_beet_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_BEET); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 0a28d2c5c44f..ce94732b8e23 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -365,7 +365,7 @@ config NETFILTER_XT_MATCH_MULTIPORT config NETFILTER_XT_MATCH_PHYSDEV tristate '"physdev" match support' - depends on NETFILTER_XTABLES && BRIDGE_NETFILTER + depends on NETFILTER_XTABLES && BRIDGE && BRIDGE_NETFILTER help Physdev packet matching matches against the physical bridge ports the IP packet arrived on or will leave by. diff --git a/net/sched/estimator.c b/net/sched/estimator.c deleted file mode 100644 index 0ebc98e9be2d..000000000000 --- a/net/sched/estimator.c +++ /dev/null @@ -1,196 +0,0 @@ -/* - * net/sched/estimator.c Simple rate estimator. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> - */ - -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> -#include <linux/module.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/jiffies.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> -#include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <linux/rtnetlink.h> -#include <linux/init.h> -#include <net/sock.h> -#include <net/pkt_sched.h> - -/* - This code is NOT intended to be used for statistics collection, - its purpose is to provide a base for statistical multiplexing - for controlled load service. - If you need only statistics, run a user level daemon which - periodically reads byte counters. - - Unfortunately, rate estimation is not a very easy task. - F.e. I did not find a simple way to estimate the current peak rate - and even failed to formulate the problem 8)8) - - So I preferred not to built an estimator into the scheduler, - but run this task separately. - Ideally, it should be kernel thread(s), but for now it runs - from timers, which puts apparent top bounds on the number of rated - flows, has minimal overhead on small, but is enough - to handle controlled load service, sets of aggregates. - - We measure rate over A=(1<<interval) seconds and evaluate EWMA: - - avrate = avrate*(1-W) + rate*W - - where W is chosen as negative power of 2: W = 2^(-ewma_log) - - The resulting time constant is: - - T = A/(-ln(1-W)) - - - NOTES. - - * The stored value for avbps is scaled by 2^5, so that maximal - rate is ~1Gbit, avpps is scaled by 2^10. - - * Minimal interval is HZ/4=250msec (it is the greatest common divisor - for HZ=100 and HZ=1024 8)), maximal interval - is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals - are too expensive, longer ones can be implemented - at user level painlessly. - */ - -#define EST_MAX_INTERVAL 5 - -struct qdisc_estimator -{ - struct qdisc_estimator *next; - struct tc_stats *stats; - spinlock_t *stats_lock; - unsigned interval; - int ewma_log; - u64 last_bytes; - u32 last_packets; - u32 avpps; - u32 avbps; -}; - -struct qdisc_estimator_head -{ - struct timer_list timer; - struct qdisc_estimator *list; -}; - -static struct qdisc_estimator_head elist[EST_MAX_INTERVAL+1]; - -/* Estimator array lock */ -static DEFINE_RWLOCK(est_lock); - -static void est_timer(unsigned long arg) -{ - int idx = (int)arg; - struct qdisc_estimator *e; - - read_lock(&est_lock); - for (e = elist[idx].list; e; e = e->next) { - struct tc_stats *st = e->stats; - u64 nbytes; - u32 npackets; - u32 rate; - - spin_lock(e->stats_lock); - nbytes = st->bytes; - npackets = st->packets; - rate = (nbytes - e->last_bytes)<<(7 - idx); - e->last_bytes = nbytes; - e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log; - st->bps = (e->avbps+0xF)>>5; - - rate = (npackets - e->last_packets)<<(12 - idx); - e->last_packets = npackets; - e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log; - e->stats->pps = (e->avpps+0x1FF)>>10; - spin_unlock(e->stats_lock); - } - - mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); - read_unlock(&est_lock); -} - -int qdisc_new_estimator(struct tc_stats *stats, spinlock_t *stats_lock, struct rtattr *opt) -{ - struct qdisc_estimator *est; - struct tc_estimator *parm = RTA_DATA(opt); - - if (RTA_PAYLOAD(opt) < sizeof(*parm)) - return -EINVAL; - - if (parm->interval < -2 || parm->interval > 3) - return -EINVAL; - - est = kzalloc(sizeof(*est), GFP_KERNEL); - if (est == NULL) - return -ENOBUFS; - - est->interval = parm->interval + 2; - est->stats = stats; - est->stats_lock = stats_lock; - est->ewma_log = parm->ewma_log; - est->last_bytes = stats->bytes; - est->avbps = stats->bps<<5; - est->last_packets = stats->packets; - est->avpps = stats->pps<<10; - - est->next = elist[est->interval].list; - if (est->next == NULL) { - init_timer(&elist[est->interval].timer); - elist[est->interval].timer.data = est->interval; - elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4); - elist[est->interval].timer.function = est_timer; - add_timer(&elist[est->interval].timer); - } - write_lock_bh(&est_lock); - elist[est->interval].list = est; - write_unlock_bh(&est_lock); - return 0; -} - -void qdisc_kill_estimator(struct tc_stats *stats) -{ - int idx; - struct qdisc_estimator *est, **pest; - - for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { - int killed = 0; - pest = &elist[idx].list; - while ((est=*pest) != NULL) { - if (est->stats != stats) { - pest = &est->next; - continue; - } - - write_lock_bh(&est_lock); - *pest = est->next; - write_unlock_bh(&est_lock); - - kfree(est); - killed++; - } - if (killed && elist[idx].list == NULL) - del_timer(&elist[idx].timer); - } -} - -EXPORT_SYMBOL(qdisc_kill_estimator); -EXPORT_SYMBOL(qdisc_new_estimator); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 6c058e3660c0..bb3ddd4784b1 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -391,7 +391,7 @@ static inline void htb_add_class_to_row(struct htb_sched *q, /* If this triggers, it is a bug in this code, but it need not be fatal */ static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root) { - if (!RB_EMPTY_NODE(rb)) { + if (RB_EMPTY_NODE(rb)) { WARN_ON(1); } else { rb_erase(rb, root); diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 638c0b576203..447d9aef4605 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -903,9 +903,9 @@ out_seq: struct gss_svc_data { /* decoded gss client cred: */ struct rpc_gss_wire_cred clcred; - /* pointer to the beginning of the procedure-specific results, - * which may be encrypted/checksummed in svcauth_gss_release: */ - __be32 *body_start; + /* save a pointer to the beginning of the encoded verifier, + * for use in encryption/checksumming in svcauth_gss_release: */ + __be32 *verf_start; struct rsc *rsci; }; @@ -968,7 +968,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) if (!svcdata) goto auth_err; rqstp->rq_auth_data = svcdata; - svcdata->body_start = NULL; + svcdata->verf_start = NULL; svcdata->rsci = NULL; gc = &svcdata->clcred; @@ -1097,6 +1097,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) goto complete; case RPC_GSS_PROC_DATA: *authp = rpcsec_gsserr_ctxproblem; + svcdata->verf_start = resv->iov_base + resv->iov_len; if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) goto auth_err; rqstp->rq_cred = rsci->cred; @@ -1110,7 +1111,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) gc->gc_seq, rsci->mechctx)) goto auth_err; /* placeholders for length and seq. number: */ - svcdata->body_start = resv->iov_base + resv->iov_len; svc_putnl(resv, 0); svc_putnl(resv, 0); break; @@ -1119,7 +1119,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) gc->gc_seq, rsci->mechctx)) goto auth_err; /* placeholders for length and seq. number: */ - svcdata->body_start = resv->iov_base + resv->iov_len; svc_putnl(resv, 0); svc_putnl(resv, 0); break; @@ -1147,6 +1146,32 @@ out: return ret; } +u32 * +svcauth_gss_prepare_to_wrap(struct xdr_buf *resbuf, struct gss_svc_data *gsd) +{ + u32 *p, verf_len; + + p = gsd->verf_start; + gsd->verf_start = NULL; + + /* If the reply stat is nonzero, don't wrap: */ + if (*(p-1) != rpc_success) + return NULL; + /* Skip the verifier: */ + p += 1; + verf_len = ntohl(*p++); + p += XDR_QUADLEN(verf_len); + /* move accept_stat to right place: */ + memcpy(p, p + 2, 4); + /* Also don't wrap if the accept stat is nonzero: */ + if (*p != rpc_success) { + resbuf->head[0].iov_len -= 2 * 4; + return NULL; + } + p++; + return p; +} + static inline int svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp) { @@ -1160,17 +1185,9 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp) int integ_offset, integ_len; int stat = -EINVAL; - p = gsd->body_start; - gsd->body_start = NULL; - /* move accept_stat to right place: */ - memcpy(p, p + 2, 4); - /* Don't wrap in failure case: */ - /* Counting on not getting here if call was not even accepted! */ - if (*p != rpc_success) { - resbuf->head[0].iov_len -= 2 * 4; + p = svcauth_gss_prepare_to_wrap(resbuf, gsd); + if (p == NULL) goto out; - } - p++; integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base; integ_len = resbuf->len - integ_offset; BUG_ON(integ_len % 4); @@ -1191,7 +1208,6 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp) resbuf->tail[0].iov_base = resbuf->head[0].iov_base + resbuf->head[0].iov_len; resbuf->tail[0].iov_len = 0; - rqstp->rq_restailpage = 0; resv = &resbuf->tail[0]; } else { resv = &resbuf->tail[0]; @@ -1223,24 +1239,16 @@ svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp) int offset; int pad; - p = gsd->body_start; - gsd->body_start = NULL; - /* move accept_stat to right place: */ - memcpy(p, p + 2, 4); - /* Don't wrap in failure case: */ - /* Counting on not getting here if call was not even accepted! */ - if (*p != rpc_success) { - resbuf->head[0].iov_len -= 2 * 4; + p = svcauth_gss_prepare_to_wrap(resbuf, gsd); + if (p == NULL) return 0; - } - p++; len = p++; offset = (u8 *)p - (u8 *)resbuf->head[0].iov_base; *p++ = htonl(gc->gc_seq); inpages = resbuf->pages; /* XXX: Would be better to write some xdr helper functions for * nfs{2,3,4}xdr.c that place the data right, instead of copying: */ - if (resbuf->tail[0].iov_base && rqstp->rq_restailpage == 0) { + if (resbuf->tail[0].iov_base) { BUG_ON(resbuf->tail[0].iov_base >= resbuf->head[0].iov_base + PAGE_SIZE); BUG_ON(resbuf->tail[0].iov_base < resbuf->head[0].iov_base); @@ -1258,7 +1266,6 @@ svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp) resbuf->tail[0].iov_base = resbuf->head[0].iov_base + resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE; resbuf->tail[0].iov_len = 0; - rqstp->rq_restailpage = 0; } if (gss_wrap(gsd->rsci->mechctx, offset, resbuf, inpages)) return -ENOMEM; @@ -1282,7 +1289,7 @@ svcauth_gss_release(struct svc_rqst *rqstp) if (gc->gc_proc != RPC_GSS_PROC_DATA) goto out; /* Release can be called twice, but we only wrap once. */ - if (gsd->body_start == NULL) + if (gsd->verf_start == NULL) goto out; /* normally not set till svc_send, but we need it here: */ /* XXX: what for? Do we mess it up the moment we call svc_putu32 diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index a99e67b164c1..c2c8bb20d07f 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -417,18 +417,15 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size) if (size > RPCSVC_MAXPAYLOAD) size = RPCSVC_MAXPAYLOAD; pages = 2 + (size+ PAGE_SIZE -1) / PAGE_SIZE; - rqstp->rq_argused = 0; - rqstp->rq_resused = 0; arghi = 0; BUG_ON(pages > RPCSVC_MAXPAGES); while (pages) { struct page *p = alloc_page(GFP_KERNEL); if (!p) break; - rqstp->rq_argpages[arghi++] = p; + rqstp->rq_pages[arghi++] = p; pages--; } - rqstp->rq_arghi = arghi; return ! pages; } @@ -438,14 +435,10 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size) static void svc_release_buffer(struct svc_rqst *rqstp) { - while (rqstp->rq_arghi) - put_page(rqstp->rq_argpages[--rqstp->rq_arghi]); - while (rqstp->rq_resused) { - if (rqstp->rq_respages[--rqstp->rq_resused] == NULL) - continue; - put_page(rqstp->rq_respages[rqstp->rq_resused]); - } - rqstp->rq_argused = 0; + int i; + for (i=0; i<ARRAY_SIZE(rqstp->rq_pages); i++) + if (rqstp->rq_pages[i]) + put_page(rqstp->rq_pages[i]); } /* @@ -651,23 +644,32 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port) unsigned long flags; int i, error = 0, dummy; - progp = serv->sv_program; - - dprintk("RPC: svc_register(%s, %s, %d)\n", - progp->pg_name, proto == IPPROTO_UDP? "udp" : "tcp", port); - if (!port) clear_thread_flag(TIF_SIGPENDING); - for (i = 0; i < progp->pg_nvers; i++) { - if (progp->pg_vers[i] == NULL) - continue; - error = rpc_register(progp->pg_prog, i, proto, port, &dummy); - if (error < 0) - break; - if (port && !dummy) { - error = -EACCES; - break; + for (progp = serv->sv_program; progp; progp = progp->pg_next) { + for (i = 0; i < progp->pg_nvers; i++) { + if (progp->pg_vers[i] == NULL) + continue; + + dprintk("RPC: svc_register(%s, %s, %d, %d)%s\n", + progp->pg_name, + proto == IPPROTO_UDP? "udp" : "tcp", + port, + i, + progp->pg_vers[i]->vs_hidden? + " (but not telling portmap)" : ""); + + if (progp->pg_vers[i]->vs_hidden) + continue; + + error = rpc_register(progp->pg_prog, i, proto, port, &dummy); + if (error < 0) + break; + if (port && !dummy) { + error = -EACCES; + break; + } } } @@ -697,7 +699,7 @@ svc_process(struct svc_rqst *rqstp) u32 dir, prog, vers, proc; __be32 auth_stat, rpc_stat; int auth_res; - __be32 *accept_statp; + __be32 *reply_statp; rpc_stat = rpc_success; @@ -707,10 +709,10 @@ svc_process(struct svc_rqst *rqstp) /* setup response xdr_buf. * Initially it has just one page */ - svc_take_page(rqstp); /* must succeed */ + rqstp->rq_resused = 1; resv->iov_base = page_address(rqstp->rq_respages[0]); resv->iov_len = 0; - rqstp->rq_res.pages = rqstp->rq_respages+1; + rqstp->rq_res.pages = rqstp->rq_respages + 1; rqstp->rq_res.len = 0; rqstp->rq_res.page_base = 0; rqstp->rq_res.page_len = 0; @@ -738,7 +740,7 @@ svc_process(struct svc_rqst *rqstp) goto err_bad_rpc; /* Save position in case we later decide to reject: */ - accept_statp = resv->iov_base + resv->iov_len; + reply_statp = resv->iov_base + resv->iov_len; svc_putnl(resv, 0); /* ACCEPT */ @@ -886,7 +888,7 @@ err_bad_auth: dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat)); serv->sv_stats->rpcbadauth++; /* Restore write pointer to location of accept status: */ - xdr_ressize_check(rqstp, accept_statp); + xdr_ressize_check(rqstp, reply_statp); svc_putnl(resv, 1); /* REJECT */ svc_putnl(resv, 1); /* AUTH_ERROR */ svc_putnl(resv, ntohl(auth_stat)); /* status */ @@ -926,3 +928,18 @@ err_bad: svc_putnl(resv, ntohl(rpc_stat)); goto sendit; } + +/* + * Return (transport-specific) limit on the rpc payload. + */ +u32 svc_max_payload(const struct svc_rqst *rqstp) +{ + int max = RPCSVC_MAXPAYLOAD_TCP; + + if (rqstp->rq_sock->sk_sock->type == SOCK_DGRAM) + max = RPCSVC_MAXPAYLOAD_UDP; + if (rqstp->rq_server->sv_bufsz < max) + max = rqstp->rq_server->sv_bufsz; + return max; +} +EXPORT_SYMBOL_GPL(svc_max_payload); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 40d41a2831d7..e1bd933629fe 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -9,6 +9,7 @@ #include <linux/seq_file.h> #include <linux/hash.h> #include <linux/string.h> +#include <net/sock.h> #define RPCDBG_FACILITY RPCDBG_AUTH @@ -375,6 +376,44 @@ void svcauth_unix_purge(void) cache_purge(&ip_map_cache); } +static inline struct ip_map * +ip_map_cached_get(struct svc_rqst *rqstp) +{ + struct ip_map *ipm = rqstp->rq_sock->sk_info_authunix; + if (ipm != NULL) { + if (!cache_valid(&ipm->h)) { + /* + * The entry has been invalidated since it was + * remembered, e.g. by a second mount from the + * same IP address. + */ + rqstp->rq_sock->sk_info_authunix = NULL; + cache_put(&ipm->h, &ip_map_cache); + return NULL; + } + cache_get(&ipm->h); + } + return ipm; +} + +static inline void +ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm) +{ + struct svc_sock *svsk = rqstp->rq_sock; + + if (svsk->sk_sock->type == SOCK_STREAM && svsk->sk_info_authunix == NULL) + svsk->sk_info_authunix = ipm; /* newly cached, keep the reference */ + else + cache_put(&ipm->h, &ip_map_cache); +} + +void +svcauth_unix_info_release(void *info) +{ + struct ip_map *ipm = info; + cache_put(&ipm->h, &ip_map_cache); +} + static int svcauth_unix_set_client(struct svc_rqst *rqstp) { @@ -384,8 +423,10 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) if (rqstp->rq_proc == 0) return SVC_OK; - ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class, - rqstp->rq_addr.sin_addr); + ipm = ip_map_cached_get(rqstp); + if (ipm == NULL) + ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class, + rqstp->rq_addr.sin_addr); if (ipm == NULL) return SVC_DENIED; @@ -400,7 +441,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) case 0: rqstp->rq_client = &ipm->m_client->h; kref_get(&rqstp->rq_client->ref); - cache_put(&ipm->h, &ip_map_cache); + ip_map_cached_put(rqstp, ipm); break; } return SVC_OK; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index cba85d195222..b39e7e2b648f 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -313,7 +313,7 @@ svc_sock_release(struct svc_rqst *rqstp) svc_release_skb(rqstp); - svc_free_allpages(rqstp); + svc_free_res_pages(rqstp); rqstp->rq_res.page_len = 0; rqstp->rq_res.page_base = 0; @@ -412,7 +412,8 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) /* send head */ if (slen == xdr->head[0].iov_len) flags = 0; - len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags); + len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, + xdr->head[0].iov_len, flags); if (len != xdr->head[0].iov_len) goto out; slen -= xdr->head[0].iov_len; @@ -437,8 +438,9 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) } /* send tail */ if (xdr->tail[0].iov_len) { - result = kernel_sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage], - ((unsigned long)xdr->tail[0].iov_base)& (PAGE_SIZE-1), + result = kernel_sendpage(sock, rqstp->rq_respages[0], + ((unsigned long)xdr->tail[0].iov_base) + & (PAGE_SIZE-1), xdr->tail[0].iov_len, 0); if (result > 0) @@ -492,7 +494,12 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) } spin_unlock(&serv->sv_lock); if (closesk) + /* Should unregister with portmap, but you cannot + * unregister just one protocol... + */ svc_delete_socket(closesk); + else if (toclose) + return -ENOENT; return len; } EXPORT_SYMBOL(svc_sock_names); @@ -703,9 +710,11 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) if (len <= rqstp->rq_arg.head[0].iov_len) { rqstp->rq_arg.head[0].iov_len = len; rqstp->rq_arg.page_len = 0; + rqstp->rq_respages = rqstp->rq_pages+1; } else { rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; - rqstp->rq_argused += (rqstp->rq_arg.page_len + PAGE_SIZE - 1)/ PAGE_SIZE; + rqstp->rq_respages = rqstp->rq_pages + 1 + + (rqstp->rq_arg.page_len + PAGE_SIZE - 1)/ PAGE_SIZE; } if (serv->sv_stats) @@ -946,7 +955,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) struct svc_sock *svsk = rqstp->rq_sock; struct svc_serv *serv = svsk->sk_server; int len; - struct kvec vec[RPCSVC_MAXPAGES]; + struct kvec *vec; int pnum, vlen; dprintk("svc: tcp_recv %p data %d conn %d close %d\n", @@ -1044,15 +1053,17 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) len = svsk->sk_reclen; set_bit(SK_DATA, &svsk->sk_flags); + vec = rqstp->rq_vec; vec[0] = rqstp->rq_arg.head[0]; vlen = PAGE_SIZE; pnum = 1; while (vlen < len) { - vec[pnum].iov_base = page_address(rqstp->rq_argpages[rqstp->rq_argused++]); + vec[pnum].iov_base = page_address(rqstp->rq_pages[pnum]); vec[pnum].iov_len = PAGE_SIZE; pnum++; vlen += PAGE_SIZE; } + rqstp->rq_respages = &rqstp->rq_pages[pnum]; /* Now receive data */ len = svc_recvfrom(rqstp, vec, pnum, len); @@ -1204,7 +1215,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout) struct svc_sock *svsk =NULL; struct svc_serv *serv = rqstp->rq_server; struct svc_pool *pool = rqstp->rq_pool; - int len; + int len, i; int pages; struct xdr_buf *arg; DECLARE_WAITQUEUE(wait, current); @@ -1221,27 +1232,22 @@ svc_recv(struct svc_rqst *rqstp, long timeout) "svc_recv: service %p, wait queue active!\n", rqstp); - /* Initialize the buffers */ - /* first reclaim pages that were moved to response list */ - svc_pushback_allpages(rqstp); /* now allocate needed pages. If we get a failure, sleep briefly */ pages = 2 + (serv->sv_bufsz + PAGE_SIZE -1) / PAGE_SIZE; - while (rqstp->rq_arghi < pages) { - struct page *p = alloc_page(GFP_KERNEL); - if (!p) { - schedule_timeout_uninterruptible(msecs_to_jiffies(500)); - continue; + for (i=0; i < pages ; i++) + while (rqstp->rq_pages[i] == NULL) { + struct page *p = alloc_page(GFP_KERNEL); + if (!p) + schedule_timeout_uninterruptible(msecs_to_jiffies(500)); + rqstp->rq_pages[i] = p; } - rqstp->rq_argpages[rqstp->rq_arghi++] = p; - } /* Make arg->head point to first page and arg->pages point to rest */ arg = &rqstp->rq_arg; - arg->head[0].iov_base = page_address(rqstp->rq_argpages[0]); + arg->head[0].iov_base = page_address(rqstp->rq_pages[0]); arg->head[0].iov_len = PAGE_SIZE; - rqstp->rq_argused = 1; - arg->pages = rqstp->rq_argpages + 1; + arg->pages = rqstp->rq_pages + 1; arg->page_base = 0; /* save at least one page for response */ arg->page_len = (pages-2)*PAGE_SIZE; @@ -1604,6 +1610,8 @@ svc_delete_socket(struct svc_sock *svsk) sockfd_put(svsk->sk_sock); else sock_release(svsk->sk_sock); + if (svsk->sk_info_authunix != NULL) + svcauth_unix_info_release(svsk->sk_info_authunix); kfree(svsk); } else { spin_unlock_bh(&serv->sv_lock); @@ -1699,6 +1707,7 @@ static int svc_deferred_recv(struct svc_rqst *rqstp) rqstp->rq_prot = dr->prot; rqstp->rq_addr = dr->addr; rqstp->rq_daddr = dr->daddr; + rqstp->rq_respages = rqstp->rq_pages; return dr->argslen<<2; } diff --git a/net/tipc/link.c b/net/tipc/link.c index 693f02eca6d6..53bc8cb5adbc 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1666,8 +1666,9 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf) char addr_string[16]; tipc_printf(TIPC_OUTPUT, "Msg seq number: %u, ", msg_seqno(msg)); - tipc_printf(TIPC_OUTPUT, "Outstanding acks: %u\n", (u32)TIPC_SKB_CB(buf)->handle); - + tipc_printf(TIPC_OUTPUT, "Outstanding acks: %lu\n", + (unsigned long) TIPC_SKB_CB(buf)->handle); + n_ptr = l_ptr->owner->next; tipc_node_lock(n_ptr); diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h index 6ac4e4f033ac..d401dc8f05ed 100644 --- a/net/xfrm/xfrm_hash.h +++ b/net/xfrm/xfrm_hash.h @@ -41,17 +41,18 @@ static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t return (h ^ (h >> 16)) & hmask; } -static inline unsigned __xfrm_src_hash(xfrm_address_t *saddr, +static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr, + xfrm_address_t *saddr, unsigned short family, unsigned int hmask) { unsigned int h = family; switch (family) { case AF_INET: - h ^= __xfrm4_addr_hash(saddr); + h ^= __xfrm4_daddr_saddr_hash(daddr, saddr); break; case AF_INET6: - h ^= __xfrm6_addr_hash(saddr); + h ^= __xfrm6_daddr_saddr_hash(daddr, saddr); break; }; return (h ^ (h >> 16)) & hmask; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b6e2e79d7261..2a7861661f14 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -778,8 +778,9 @@ void xfrm_policy_flush(u8 type) for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy *pol; struct hlist_node *entry; - int i; + int i, killed; + killed = 0; again1: hlist_for_each_entry(pol, entry, &xfrm_policy_inexact[dir], bydst) { @@ -790,6 +791,7 @@ void xfrm_policy_flush(u8 type) write_unlock_bh(&xfrm_policy_lock); xfrm_policy_kill(pol); + killed++; write_lock_bh(&xfrm_policy_lock); goto again1; @@ -807,13 +809,14 @@ void xfrm_policy_flush(u8 type) write_unlock_bh(&xfrm_policy_lock); xfrm_policy_kill(pol); + killed++; write_lock_bh(&xfrm_policy_lock); goto again2; } } - xfrm_policy_count[dir] = 0; + xfrm_policy_count[dir] -= killed; } atomic_inc(&flow_cache_genid); write_unlock_bh(&xfrm_policy_lock); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index f927b7330f02..39b8bf3a9ded 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -63,10 +63,11 @@ static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask); } -static inline unsigned int xfrm_src_hash(xfrm_address_t *addr, +static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr, + xfrm_address_t *saddr, unsigned short family) { - return __xfrm_src_hash(addr, family, xfrm_state_hmask); + return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask); } static inline unsigned int @@ -92,7 +93,8 @@ static void xfrm_hash_transfer(struct hlist_head *list, nhashmask); hlist_add_head(&x->bydst, ndsttable+h); - h = __xfrm_src_hash(&x->props.saddr, x->props.family, + h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr, + x->props.family, nhashmask); hlist_add_head(&x->bysrc, nsrctable+h); @@ -458,7 +460,7 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) { - unsigned int h = xfrm_src_hash(saddr, family); + unsigned int h = xfrm_src_hash(daddr, saddr, family); struct xfrm_state *x; struct hlist_node *entry; @@ -587,7 +589,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; hlist_add_head(&x->bydst, xfrm_state_bydst+h); - h = xfrm_src_hash(saddr, family); + h = xfrm_src_hash(daddr, saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); if (x->id.spi) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); @@ -622,7 +624,7 @@ static void __xfrm_state_insert(struct xfrm_state *x) x->props.reqid, x->props.family); hlist_add_head(&x->bydst, xfrm_state_bydst+h); - h = xfrm_src_hash(&x->props.saddr, x->props.family); + h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); if (x->id.spi) { @@ -748,7 +750,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; add_timer(&x->timer); hlist_add_head(&x->bydst, xfrm_state_bydst+h); - h = xfrm_src_hash(saddr, family); + h = xfrm_src_hash(daddr, saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); wake_up(&km_waitq); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index c59a78d2923a..d54b3a70d5df 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -211,6 +211,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, case XFRM_MODE_TRANSPORT: case XFRM_MODE_TUNNEL: case XFRM_MODE_ROUTEOPTIMIZATION: + case XFRM_MODE_BEET: break; default: |