diff options
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 138 |
1 files changed, 73 insertions, 65 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 860b3fd2f54b..8322e479f299 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -379,12 +379,12 @@ static int __net_init ip_rt_do_proc_init(struct net *net) { struct proc_dir_entry *pde; - pde = proc_create("rt_cache", S_IRUGO, net->proc_net, + pde = proc_create("rt_cache", 0444, net->proc_net, &rt_cache_seq_fops); if (!pde) goto err1; - pde = proc_create("rt_cache", S_IRUGO, + pde = proc_create("rt_cache", 0444, net->proc_net_stat, &rt_cpu_seq_fops); if (!pde) goto err2; @@ -634,6 +634,7 @@ static inline u32 fnhe_hashfun(__be32 daddr) static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) { rt->rt_pmtu = fnhe->fnhe_pmtu; + rt->rt_mtu_locked = fnhe->fnhe_mtu_locked; rt->dst.expires = fnhe->fnhe_expires; if (fnhe->fnhe_gw) { @@ -644,7 +645,7 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh } static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, - u32 pmtu, unsigned long expires) + u32 pmtu, bool lock, unsigned long expires) { struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; @@ -681,8 +682,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_genid = genid; if (gw) fnhe->fnhe_gw = gw; - if (pmtu) + if (pmtu) { fnhe->fnhe_pmtu = pmtu; + fnhe->fnhe_mtu_locked = lock; + } fnhe->fnhe_expires = max(1UL, expires); /* Update all cached dsts too */ rt = rcu_dereference(fnhe->fnhe_rth_input); @@ -706,6 +709,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_daddr = daddr; fnhe->fnhe_gw = gw; fnhe->fnhe_pmtu = pmtu; + fnhe->fnhe_mtu_locked = lock; fnhe->fnhe_expires = expires; /* Exception created; mark the cached routes for the nexthop @@ -787,7 +791,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, new_gw, - 0, jiffies + ip_rt_gc_timeout); + 0, false, + jiffies + ip_rt_gc_timeout); } if (kill_route) rt->dst.obsolete = DST_OBSOLETE_KILL; @@ -1009,15 +1014,18 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct dst_entry *dst = &rt->dst; struct fib_result res; + bool lock = false; - if (dst_metric_locked(dst, RTAX_MTU)) + if (ip_mtu_locked(dst)) return; if (ipv4_mtu(dst) < mtu) return; - if (mtu < ip_rt_min_pmtu) + if (mtu < ip_rt_min_pmtu) { + lock = true; mtu = ip_rt_min_pmtu; + } if (rt->rt_pmtu == mtu && time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) @@ -1027,7 +1035,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); - update_or_create_fnhe(nh, fl4->daddr, 0, mtu, + update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock, jiffies + ip_rt_mtu_expires); } rcu_read_unlock(); @@ -1280,7 +1288,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) mtu = READ_ONCE(dst->dev->mtu); - if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { + if (unlikely(ip_mtu_locked(dst))) { if (rt->rt_uses_gateway && mtu > 576) mtu = 576; } @@ -1393,7 +1401,7 @@ struct uncached_list { static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list); -static void rt_add_uncached_list(struct rtable *rt) +void rt_add_uncached_list(struct rtable *rt) { struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list); @@ -1404,14 +1412,8 @@ static void rt_add_uncached_list(struct rtable *rt) spin_unlock_bh(&ul->lock); } -static void ipv4_dst_destroy(struct dst_entry *dst) +void rt_del_uncached_list(struct rtable *rt) { - struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); - struct rtable *rt = (struct rtable *) dst; - - if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) - kfree(p); - if (!list_empty(&rt->rt_uncached)) { struct uncached_list *ul = rt->rt_uncached_list; @@ -1421,6 +1423,17 @@ static void ipv4_dst_destroy(struct dst_entry *dst) } } +static void ipv4_dst_destroy(struct dst_entry *dst) +{ + struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); + struct rtable *rt = (struct rtable *)dst; + + if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) + kfree(p); + + rt_del_uncached_list(rt); +} + void rt_flush_dev(struct net_device *dev) { struct net *net = dev_net(dev); @@ -1516,9 +1529,9 @@ struct rtable *rt_dst_alloc(struct net_device *dev, rt->rt_is_input = 0; rt->rt_iif = 0; rt->rt_pmtu = 0; + rt->rt_mtu_locked = 0; rt->rt_gateway = 0; rt->rt_uses_gateway = 0; - rt->rt_table_id = 0; INIT_LIST_HEAD(&rt->rt_uncached); rt->dst.output = ip_output; @@ -1654,19 +1667,6 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) spin_unlock_bh(&fnhe_lock); } -static void set_lwt_redirect(struct rtable *rth) -{ - if (lwtunnel_output_redirect(rth->dst.lwtstate)) { - rth->dst.lwtstate->orig_output = rth->dst.output; - rth->dst.output = lwtunnel_output; - } - - if (lwtunnel_input_redirect(rth->dst.lwtstate)) { - rth->dst.lwtstate->orig_input = rth->dst.input; - rth->dst.input = lwtunnel_input; - } -} - /* called in rcu_read_lock() section */ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res, @@ -1749,15 +1749,13 @@ rt_cache: } rth->rt_is_input = 1; - if (res->table) - rth->rt_table_id = res->table->tb_id; RT_CACHE_STAT_INC(in_slow_tot); rth->dst.input = ip_forward; rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag, do_cache); - set_lwt_redirect(rth); + lwtunnel_set_redirect(&rth->dst); skb_dst_set(skb, &rth->dst); out: err = 0; @@ -1773,44 +1771,45 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb, struct flow_keys *hash_keys) { const struct iphdr *outer_iph = ip_hdr(skb); + const struct iphdr *key_iph = outer_iph; const struct iphdr *inner_iph; const struct icmphdr *icmph; struct iphdr _inner_iph; struct icmphdr _icmph; - hash_keys->addrs.v4addrs.src = outer_iph->saddr; - hash_keys->addrs.v4addrs.dst = outer_iph->daddr; if (likely(outer_iph->protocol != IPPROTO_ICMP)) - return; + goto out; if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0)) - return; + goto out; icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph), &_icmph); if (!icmph) - return; + goto out; if (icmph->type != ICMP_DEST_UNREACH && icmph->type != ICMP_REDIRECT && icmph->type != ICMP_TIME_EXCEEDED && icmph->type != ICMP_PARAMETERPROB) - return; + goto out; inner_iph = skb_header_pointer(skb, outer_iph->ihl * 4 + sizeof(_icmph), sizeof(_inner_iph), &_inner_iph); if (!inner_iph) - return; - hash_keys->addrs.v4addrs.src = inner_iph->saddr; - hash_keys->addrs.v4addrs.dst = inner_iph->daddr; + goto out; + + key_iph = inner_iph; +out: + hash_keys->addrs.v4addrs.src = key_iph->saddr; + hash_keys->addrs.v4addrs.dst = key_iph->daddr; } /* if skb is set it will be used and fl4 can be NULL */ -int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, - const struct sk_buff *skb) +int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, + const struct sk_buff *skb, struct flow_keys *flkeys) { - struct net *net = fi->fib_net; struct flow_keys hash_keys; u32 mhash; @@ -1834,15 +1833,20 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, /* short-circuit if we already have L4 hash present */ if (skb->l4_hash) return skb_get_hash_raw(skb) >> 1; + memset(&hash_keys, 0, sizeof(hash_keys)); - skb_flow_dissect_flow_keys(skb, &keys, flag); + + if (!flkeys) { + skb_flow_dissect_flow_keys(skb, &keys, flag); + flkeys = &keys; + } hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; - hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; - hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; - hash_keys.ports.src = keys.ports.src; - hash_keys.ports.dst = keys.ports.dst; - hash_keys.basic.ip_proto = keys.basic.ip_proto; + hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src; + hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst; + hash_keys.ports.src = flkeys->ports.src; + hash_keys.ports.dst = flkeys->ports.dst; + hash_keys.basic.ip_proto = flkeys->basic.ip_proto; } else { memset(&hash_keys, 0, sizeof(hash_keys)); hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; @@ -1858,17 +1862,17 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, return mhash >> 1; } -EXPORT_SYMBOL_GPL(fib_multipath_hash); #endif /* CONFIG_IP_ROUTE_MULTIPATH */ static int ip_mkroute_input(struct sk_buff *skb, struct fib_result *res, struct in_device *in_dev, - __be32 daddr, __be32 saddr, u32 tos) + __be32 daddr, __be32 saddr, u32 tos, + struct flow_keys *hkeys) { #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi && res->fi->fib_nhs > 1) { - int h = fib_multipath_hash(res->fi, NULL, skb); + int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys); fib_select_multipath(res, h); } @@ -1894,13 +1898,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct fib_result *res) { struct in_device *in_dev = __in_dev_get_rcu(dev); + struct flow_keys *flkeys = NULL, _flkeys; + struct net *net = dev_net(dev); struct ip_tunnel_info *tun_info; - struct flowi4 fl4; + int err = -EINVAL; unsigned int flags = 0; u32 itag = 0; struct rtable *rth; - int err = -EINVAL; - struct net *net = dev_net(dev); + struct flowi4 fl4; bool do_cache; /* IP on this device is disabled. */ @@ -1959,6 +1964,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.daddr = daddr; fl4.saddr = saddr; fl4.flowi4_uid = sock_net_uid(net, NULL); + + if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) + flkeys = &_flkeys; + err = fib_lookup(net, &fl4, res, 0); if (err != 0) { if (!IN_DEV_FORWARD(in_dev)) @@ -1984,7 +1993,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res->type != RTN_UNICAST) goto martian_destination; - err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos); + err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys); out: return err; brd_input: @@ -2026,8 +2035,6 @@ local_input: rth->dst.tclassid = itag; #endif rth->rt_is_input = 1; - if (res->table) - rth->rt_table_id = res->table->tb_id; RT_CACHE_STAT_INC(in_slow_tot); if (res->type == RTN_UNREACHABLE) { @@ -2256,8 +2263,6 @@ add: return ERR_PTR(-ENOBUFS); rth->rt_iif = orig_oif; - if (res->table) - rth->rt_table_id = res->table->tb_id; RT_CACHE_STAT_INC(out_slow_tot); @@ -2279,7 +2284,7 @@ add: } rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache); - set_lwt_redirect(rth); + lwtunnel_set_redirect(&rth->dst); return rth; } @@ -2541,6 +2546,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_is_input = ort->rt_is_input; rt->rt_iif = ort->rt_iif; rt->rt_pmtu = ort->rt_pmtu; + rt->rt_mtu_locked = ort->rt_mtu_locked; rt->rt_genid = rt_genid_ipv4(net); rt->rt_flags = ort->rt_flags; @@ -2643,6 +2649,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); if (rt->rt_pmtu && expires) metrics[RTAX_MTU - 1] = rt->rt_pmtu; + if (rt->rt_mtu_locked && expires) + metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU); if (rtnetlink_put_metrics(skb, metrics) < 0) goto nla_put_failure; @@ -2787,7 +2795,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, rt->rt_flags |= RTCF_NOTIFY; if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE) - table_id = rt->rt_table_id; + table_id = res.table ? res.table->tb_id : 0; if (rtm->rtm_flags & RTM_F_FIB_MATCH) { if (!res.fi) { |