diff options
Diffstat (limited to 'net/ipv6/ip6_output.c')
-rw-r--r-- | net/ipv6/ip6_output.c | 169 |
1 files changed, 90 insertions, 79 deletions
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 61d403ee1031..e6a7bd15b9b7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -55,8 +55,9 @@ #include <net/xfrm.h> #include <net/checksum.h> #include <linux/mroute6.h> +#include <net/l3mdev.h> -static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb) +static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; @@ -71,7 +72,7 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb) struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && - ((mroute6_socket(dev_net(dev), skb) && + ((mroute6_socket(net, skb) && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->saddr))) { @@ -82,19 +83,18 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb) */ if (newskb) NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, - sk, newskb, NULL, newskb->dev, + net, sk, newskb, NULL, newskb->dev, dev_loopback_xmit); if (ipv6_hdr(skb)->hop_limit == 0) { - IP6_INC_STATS(dev_net(dev), idev, + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return 0; } } - IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, - skb->len); + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len); if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= IPV6_ADDR_SCOPE_NODELOCAL && @@ -116,48 +116,49 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb) } rcu_read_unlock_bh(); - IP6_INC_STATS(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; } -static int ip6_finish_output(struct sock *sk, struct sk_buff *skb) +static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)) || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) - return ip6_fragment(sk, skb, ip6_finish_output2); + return ip6_fragment(net, sk, skb, ip6_finish_output2); else - return ip6_finish_output2(sk, skb); + return ip6_finish_output2(net, sk, skb); } -int ip6_output(struct sock *sk, struct sk_buff *skb) +int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); + if (unlikely(idev->cnf.disable_ipv6)) { - IP6_INC_STATS(dev_net(dev), idev, - IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return 0; } - return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb, - NULL, dev, + return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, + net, sk, skb, NULL, dev, ip6_finish_output, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } /* - * xmit an sk_buff (used by TCP, SCTP and DCCP) + * xmit an sk_buff (used by TCP, SCTP and DCCP) + * Note : socket lock is not held for SYNACK packets, but might be modified + * by calls to skb_set_owner_w() and ipv6_local_error(), + * which are using proper atomic operations or spinlocks. */ - -int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, +int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, struct ipv6_txoptions *opt, int tclass) { struct net *net = sock_net(sk); - struct ipv6_pinfo *np = inet6_sk(sk); + const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr; @@ -186,7 +187,10 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, } consume_skb(skb); skb = skb2; - skb_set_owner_w(skb, sk); + /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically, + * it is safe to call in our context (socket lock not held) + */ + skb_set_owner_w(skb, (struct sock *)sk); } if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); @@ -224,12 +228,20 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); - return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, - NULL, dst->dev, dst_output_sk); + /* hooks should never assume socket lock is held. + * we promote our socket to non const + */ + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, + net, (struct sock *)sk, skb, NULL, dst->dev, + dst_output); } skb->dev = dst->dev; - ipv6_local_error(sk, EMSGSIZE, fl6, mtu); + /* ipv6_local_error() does not require socket lock, + * we promote our socket to non const + */ + ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; @@ -317,10 +329,11 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) return 0; } -static inline int ip6_forward_finish(struct sock *sk, struct sk_buff *skb) +static inline int ip6_forward_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) { skb_sender_cpu_clear(skb); - return dst_output_sk(sk, skb); + return dst_output(net, sk, skb); } static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) @@ -515,8 +528,8 @@ int ip6_forward(struct sk_buff *skb) IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); - return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb, - skb->dev, dst->dev, + return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, + net, NULL, skb, skb->dev, dst->dev, ip6_forward_finish); error: @@ -543,8 +556,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) skb_copy_secmark(to, from); } -int ip6_fragment(struct sock *sk, struct sk_buff *skb, - int (*output)(struct sock *, struct sk_buff *)) +int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, + int (*output)(struct net *, struct sock *, struct sk_buff *)) { struct sk_buff *frag; struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); @@ -557,7 +570,6 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, __be32 frag_id; int ptr, offset = 0, err = 0; u8 *prevhdr, nexthdr = 0; - struct net *net = dev_net(skb_dst(skb)->dev); hlen = ip6_find_1stfragopt(skb, &prevhdr); nexthdr = *prevhdr; @@ -584,11 +596,17 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, if (np->frag_size) mtu = np->frag_size; } + if (mtu < hlen + sizeof(struct frag_hdr) + 8) + goto fail_toobig; mtu -= hlen + sizeof(struct frag_hdr); frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->saddr); + if (skb->ip_summed == CHECKSUM_PARTIAL && + (err = skb_checksum_help(skb))) + goto fail; + hroom = LL_RESERVED_SPACE(rt->dst.dev); if (skb_has_frag_list(skb)) { int first_len = skb_pagelen(skb); @@ -677,7 +695,7 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, ip6_copy_metadata(frag, skb); } - err = output(sk, skb); + err = output(net, sk, skb); if (!err) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGCREATES); @@ -717,10 +735,6 @@ slow_path_clean: } slow_path: - if ((skb->ip_summed == CHECKSUM_PARTIAL) && - skb_checksum_help(skb)) - goto fail; - left = skb->len - hlen; /* Space per frame */ ptr = hlen; /* Where to start from */ @@ -805,7 +819,7 @@ slow_path: /* * Put this fragment into the sending queue. */ - err = output(sk, frag); + err = output(net, sk, frag); if (err) goto fail; @@ -877,7 +891,8 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, #ifdef CONFIG_IPV6_SUBTREES ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || #endif - (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { + (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) && + (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) { dst_release(dst); dst = NULL; } @@ -886,7 +901,7 @@ out: return dst; } -static int ip6_dst_lookup_tail(struct net *net, struct sock *sk, +static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) { #ifdef CONFIG_IPV6_OPTIMISTIC_DAD @@ -1017,7 +1032,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup); * It returns a valid dst pointer on success, or a pointer encoded * error code. */ -struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, +struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst) { struct dst_entry *dst = NULL; @@ -1029,7 +1044,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, if (final_dst) fl6->daddr = *final_dst; if (!fl6->flowi6_oif) - fl6->flowi6_oif = dst->dev->ifindex; + fl6->flowi6_oif = l3mdev_fib_oif(dst->dev); return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } @@ -1255,6 +1270,7 @@ static int __ip6_append_data(struct sock *sk, struct rt6_info *rt = (struct rt6_info *)cork->dst; struct ipv6_txoptions *opt = v6_cork->opt; int csummode = CHECKSUM_NONE; + unsigned int maxnonfragsize, headersize; skb = skb_peek_tail(queue); if (!skb) { @@ -1272,38 +1288,43 @@ static int __ip6_append_data(struct sock *sk, maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); - if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { - unsigned int maxnonfragsize, headersize; - - headersize = sizeof(struct ipv6hdr) + - (opt ? opt->opt_flen + opt->opt_nflen : 0) + - (dst_allfrag(&rt->dst) ? - sizeof(struct frag_hdr) : 0) + - rt->rt6i_nfheader_len; - - if (ip6_sk_ignore_df(sk)) - maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; - else - maxnonfragsize = mtu; + headersize = sizeof(struct ipv6hdr) + + (opt ? opt->opt_flen + opt->opt_nflen : 0) + + (dst_allfrag(&rt->dst) ? + sizeof(struct frag_hdr) : 0) + + rt->rt6i_nfheader_len; + + if (cork->length + length > mtu - headersize && dontfrag && + (sk->sk_protocol == IPPROTO_UDP || + sk->sk_protocol == IPPROTO_RAW)) { + ipv6_local_rxpmtu(sk, fl6, mtu - headersize + + sizeof(struct ipv6hdr)); + goto emsgsize; + } - /* dontfrag active */ - if ((cork->length + length > mtu - headersize) && dontfrag && - (sk->sk_protocol == IPPROTO_UDP || - sk->sk_protocol == IPPROTO_RAW)) { - ipv6_local_rxpmtu(sk, fl6, mtu - headersize + - sizeof(struct ipv6hdr)); - goto emsgsize; - } + if (ip6_sk_ignore_df(sk)) + maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; + else + maxnonfragsize = mtu; - if (cork->length + length > maxnonfragsize - headersize) { + if (cork->length + length > maxnonfragsize - headersize) { emsgsize: - ipv6_local_error(sk, EMSGSIZE, fl6, - mtu - headersize + - sizeof(struct ipv6hdr)); - return -EMSGSIZE; - } + ipv6_local_error(sk, EMSGSIZE, fl6, + mtu - headersize + + sizeof(struct ipv6hdr)); + return -EMSGSIZE; } + /* CHECKSUM_PARTIAL only with no extension headers and when + * we are not going to fragment + */ + if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && + headersize == sizeof(struct ipv6hdr) && + length < mtu - headersize && + !(flags & MSG_MORE) && + rt->dst.dev->features & NETIF_F_V6_CSUM) + csummode = CHECKSUM_PARTIAL; + if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) { sock_tx_timestamp(sk, &tx_flags); if (tx_flags & SKBTX_ANY_SW_TSTAMP && @@ -1311,16 +1332,6 @@ emsgsize: tskey = sk->sk_tskey++; } - /* If this is the first and only packet and device - * supports checksum offloading, let's use it. - * Use transhdrlen, same as IPv4, because partial - * sums only work when transhdrlen is set. - */ - if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && - length + fragheaderlen < mtu && - rt->dst.dev->features & NETIF_F_V6_CSUM && - !exthdrlen) - csummode = CHECKSUM_PARTIAL; /* * Let's try using as much space as possible. * Use MTU if total length of the message fits into the MTU. @@ -1683,7 +1694,7 @@ int ip6_send_skb(struct sk_buff *skb) struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); int err; - err = ip6_local_out(skb); + err = ip6_local_out(net, skb->sk, skb); if (err) { if (err > 0) err = net_xmit_errno(err); |