From 419df12fb5fa558451319276838c1842f2b11f8f Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 31 Mar 2015 11:01:46 -0700 Subject: net: move fib_rules_unregister() under rtnl lock We have to hold rtnl lock for fib_rules_unregister() otherwise the following race could happen: fib_rules_unregister(): fib_nl_delrule(): ... ... ... ops = lookup_rules_ops(); list_del_rcu(&ops->list); list_for_each_entry(ops->rules) { fib_rules_cleanup_ops(ops); ... list_del_rcu(); list_del_rcu(); } Note, net->rules_mod_lock is actually not needed at all, either upper layer netns code or rtnl lock guarantees we are safe. Cc: Alexander Duyck Cc: Thomas Graf Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/fib_rules.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 44706e81b2e0..e4fdc9dfb2c7 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -175,9 +175,9 @@ void fib_rules_unregister(struct fib_rules_ops *ops) spin_lock(&net->rules_mod_lock); list_del_rcu(&ops->list); - fib_rules_cleanup_ops(ops); spin_unlock(&net->rules_mod_lock); + fib_rules_cleanup_ops(ops); call_rcu(&ops->rcu, fib_rules_put_rcu); } EXPORT_SYMBOL_GPL(fib_rules_unregister); -- cgit v1.2.1 From 6d458f5b4ece8542a5c2281e40008823fec91814 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 3 Apr 2015 12:02:36 +0200 Subject: Revert "netns: don't clear nsid too early on removal" This reverts commit 4217291e592d ("netns: don't clear nsid too early on removal"). This is not the right fix, it introduces races. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/net_namespace.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'net/core') diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 5221f975a4cc..cb5290b8c428 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -349,7 +349,7 @@ static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ static void cleanup_net(struct work_struct *work) { const struct pernet_operations *ops; - struct net *net, *tmp, *peer; + struct net *net, *tmp; struct list_head net_kill_list; LIST_HEAD(net_exit_list); @@ -365,6 +365,14 @@ static void cleanup_net(struct work_struct *work) list_for_each_entry(net, &net_kill_list, cleanup_list) { list_del_rcu(&net->list); list_add_tail(&net->exit_list, &net_exit_list); + for_each_net(tmp) { + int id = __peernet2id(tmp, net, false); + + if (id >= 0) + idr_remove(&tmp->netns_ids, id); + } + idr_destroy(&net->netns_ids); + } rtnl_unlock(); @@ -390,26 +398,12 @@ static void cleanup_net(struct work_struct *work) */ rcu_barrier(); - rtnl_lock(); /* Finally it is safe to free my network namespace structure */ list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { - /* Unreference net from all peers (no need to loop over - * net_exit_list because idr_destroy() will be called for each - * element of this list. - */ - for_each_net(peer) { - int id = __peernet2id(peer, net, false); - - if (id >= 0) - idr_remove(&peer->netns_ids, id); - } - idr_destroy(&net->netns_ids); - list_del_init(&net->exit_list); put_user_ns(net->user_ns); net_drop_ns(net); } - rtnl_unlock(); } static DECLARE_WORK(net_cleanup_work, cleanup_net); -- cgit v1.2.1 From 576b7cd2f6ff1e90b3fc0a000d2fe74f8a50a4bb Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 3 Apr 2015 12:02:37 +0200 Subject: netns: don't allocate an id for dead netns First, let's explain the problem. Suppose you have an ipip interface that stands in the netns foo and its link part in the netns bar (so the netns bar has an nsid into the netns foo). Now, you remove the netns bar: - the bar nsid into the netns foo is removed - the netns exit method of ipip is called, thus our ipip iface is removed: => a netlink message is built in the netns foo to advertise this deletion => this netlink message requests an nsid for bar, thus a new nsid is allocated for bar and never removed. This patch adds a check in peernet2id() so that an id cannot be allocated for a netns which is currently destroyed. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/net_namespace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index cb5290b8c428..70d3450588b2 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -198,8 +198,10 @@ static int __peernet2id(struct net *net, struct net *peer, bool alloc) */ int peernet2id(struct net *net, struct net *peer) { - int id = __peernet2id(net, peer, true); + bool alloc = atomic_read(&peer->count) == 0 ? false : true; + int id; + id = __peernet2id(net, peer, alloc); return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED; } EXPORT_SYMBOL(peernet2id); -- cgit v1.2.1 From f60e5990d9c1424af9dbca60a23ba2a1c7c1ce90 Mon Sep 17 00:00:00 2001 From: "hannes@stressinduktion.org" Date: Wed, 1 Apr 2015 17:07:44 +0200 Subject: ipv6: protect skb->sk accesses from recursive dereference inside the stack We should not consult skb->sk for output decisions in xmit recursion levels > 0 in the stack. Otherwise local socket settings could influence the result of e.g. tunnel encapsulation process. ipv6 does not conform with this in three places: 1) ip6_fragment: we do consult ipv6_npinfo for frag_size 2) sk_mc_loop in ipv6 uses skb->sk and checks if we should loop the packet back to the local socket 3) ip6_skb_dst_mtu could query the settings from the user socket and force a wrong MTU Furthermore: In sk_mc_loop we could potentially land in WARN_ON(1) if we use a PF_PACKET socket ontop of an IPv6-backed vxlan device. Reuse xmit_recursion as we are currently only interested in protecting tunnel devices. Cc: Jiri Pirko Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/core/dev.c | 4 +++- net/core/sock.c | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 962ee9d71964..45109b70664e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2848,7 +2848,9 @@ static void skb_update_prio(struct sk_buff *skb) #define skb_update_prio(skb) #endif -static DEFINE_PER_CPU(int, xmit_recursion); +DEFINE_PER_CPU(int, xmit_recursion); +EXPORT_SYMBOL(xmit_recursion); + #define RECURSION_LIMIT 10 /** diff --git a/net/core/sock.c b/net/core/sock.c index 78e89eb7eb70..71e3e5f1eaa0 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -653,6 +653,25 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) sock_reset_flag(sk, bit); } +bool sk_mc_loop(struct sock *sk) +{ + if (dev_recursion_level()) + return false; + if (!sk) + return true; + switch (sk->sk_family) { + case AF_INET: + return inet_sk(sk)->mc_loop; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + return inet6_sk(sk)->mc_loop; +#endif + } + WARN_ON(1); + return true; +} +EXPORT_SYMBOL(sk_mc_loop); + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. -- cgit v1.2.1