diff options
author | Denis V. Lunev <den@openvz.org> | 2008-01-18 23:55:19 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-01-28 15:08:08 -0800 |
commit | 775516bfa2bd7993620c9039191a0c30b8d8a496 (patch) | |
tree | 7385f35edda9f4124b802ccf337c55070d2e00bc /net | |
parent | b7c6ba6eb1234e35a74fb8ba8123232a7b1ba9e4 (diff) | |
download | blackbird-op-linux-775516bfa2bd7993620c9039191a0c30b8d8a496.tar.gz blackbird-op-linux-775516bfa2bd7993620c9039191a0c30b8d8a496.zip |
[NETNS]: Namespace stop vs 'ip r l' race.
During network namespace stop process kernel side netlink sockets
belonging to a namespace should be closed. They should not prevent
namespace to stop, so they do not increment namespace usage
counter. Though this counter will be put during last sock_put.
The raplacement of the correct netns for init_ns solves the problem
only partial as socket to be stoped until proper stop is a valid
netlink kernel socket and can be looked up by the user processes. This
is not a problem until it resides in initial namespace (no processes
inside this net), but this is not true for init_net.
So, hold the referrence for a socket, remove it from lookup tables and
only after that change namespace and perform a last put.
Signed-off-by: Denis V. Lunev <den@openvz.org>
Tested-by: Alexey Dobriyan <adobriyan@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/core/rtnetlink.c | 15 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 7 | ||||
-rw-r--r-- | net/netlink/af_netlink.c | 15 |
3 files changed, 18 insertions, 19 deletions
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 02cf848f71d2..ddbdde82a700 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1368,25 +1368,14 @@ static int rtnetlink_net_init(struct net *net) rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); if (!sk) return -ENOMEM; - - /* Don't hold an extra reference on the namespace */ - put_net(sk->sk_net); net->rtnl = sk; return 0; } static void rtnetlink_net_exit(struct net *net) { - struct sock *sk = net->rtnl; - if (sk) { - /* At the last minute lie and say this is a socket for the - * initial network namespace. So the socket will be safe to - * free. - */ - sk->sk_net = get_net(&init_net); - netlink_kernel_release(net->rtnl); - net->rtnl = NULL; - } + netlink_kernel_release(net->rtnl); + net->rtnl = NULL; } static struct pernet_operations rtnetlink_net_ops = { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index e787d2151152..62bd791c204e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -869,19 +869,14 @@ static int nl_fib_lookup_init(struct net *net) nl_fib_input, NULL, THIS_MODULE); if (sk == NULL) return -EAFNOSUPPORT; - /* Don't hold an extra reference on the namespace */ - put_net(sk->sk_net); net->ipv4.fibnl = sk; return 0; } static void nl_fib_lookup_exit(struct net *net) { - /* At the last minute lie and say this is a socket for the - * initial network namespace. So the socket will be safe to free. - */ - net->ipv4.fibnl->sk_net = get_net(&init_net); netlink_kernel_release(net->ipv4.fibnl); + net->ipv4.fibnl = NULL; } static void fib_disable_ip(struct net_device *dev, int force) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 626a58206298..6b178e1247b5 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1396,6 +1396,9 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups, } netlink_table_ungrab(); + /* Do not hold an extra referrence to a namespace as this socket is + * internal to a namespace and does not prevent it to stop. */ + put_net(net); return sk; out_sock_release: @@ -1411,7 +1414,19 @@ netlink_kernel_release(struct sock *sk) { if (sk == NULL || sk->sk_socket == NULL) return; + + /* + * Last sock_put should drop referrence to sk->sk_net. It has already + * been dropped in netlink_kernel_create. Taking referrence to stopping + * namespace is not an option. + * Take referrence to a socket to remove it from netlink lookup table + * _alive_ and after that destroy it in the context of init_net. + */ + sock_hold(sk); sock_release(sk->sk_socket); + + sk->sk_net = get_net(&init_net); + sock_put(sk); } EXPORT_SYMBOL(netlink_kernel_release); |