diff options
Diffstat (limited to 'net/netlink/af_netlink.c')
| -rw-r--r-- | net/netlink/af_netlink.c | 101 | 
1 files changed, 73 insertions, 28 deletions
| diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7f86d3b55060..fafe33bdb619 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -125,6 +125,24 @@ static inline u32 netlink_group_mask(u32 group)  	return group ? 1 << (group - 1) : 0;  } +static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, +					   gfp_t gfp_mask) +{ +	unsigned int len = skb_end_offset(skb); +	struct sk_buff *new; + +	new = alloc_skb(len, gfp_mask); +	if (new == NULL) +		return NULL; + +	NETLINK_CB(new).portid = NETLINK_CB(skb).portid; +	NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group; +	NETLINK_CB(new).creds = NETLINK_CB(skb).creds; + +	memcpy(skb_put(new, len), skb->data, len); +	return new; +} +  int netlink_add_tap(struct netlink_tap *nt)  {  	if (unlikely(nt->dev->type != ARPHRD_NETLINK)) @@ -206,7 +224,11 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,  	int ret = -ENOMEM;  	dev_hold(dev); -	nskb = skb_clone(skb, GFP_ATOMIC); + +	if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head)) +		nskb = netlink_to_full_skb(skb, GFP_ATOMIC); +	else +		nskb = skb_clone(skb, GFP_ATOMIC);  	if (nskb) {  		nskb->dev = dev;  		nskb->protocol = htons((u16) sk->sk_protocol); @@ -279,11 +301,6 @@ static void netlink_rcv_wake(struct sock *sk)  }  #ifdef CONFIG_NETLINK_MMAP -static bool netlink_skb_is_mmaped(const struct sk_buff *skb) -{ -	return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; -} -  static bool netlink_rx_is_mmaped(struct sock *sk)  {  	return nlk_sk(sk)->rx_ring.pg_vec != NULL; @@ -846,7 +863,6 @@ static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)  }  #else /* CONFIG_NETLINK_MMAP */ -#define netlink_skb_is_mmaped(skb)	false  #define netlink_rx_is_mmaped(sk)	false  #define netlink_tx_is_mmaped(sk)	false  #define netlink_mmap			sock_no_mmap @@ -1094,8 +1110,8 @@ static int netlink_insert(struct sock *sk, u32 portid)  	lock_sock(sk); -	err = -EBUSY; -	if (nlk_sk(sk)->portid) +	err = nlk_sk(sk)->portid == portid ? 0 : -EBUSY; +	if (nlk_sk(sk)->bound)  		goto err;  	err = -ENOMEM; @@ -1115,10 +1131,14 @@ static int netlink_insert(struct sock *sk, u32 portid)  			err = -EOVERFLOW;  		if (err == -EEXIST)  			err = -EADDRINUSE; -		nlk_sk(sk)->portid = 0;  		sock_put(sk); +		goto err;  	} +	/* We need to ensure that the socket is hashed and visible. */ +	smp_wmb(); +	nlk_sk(sk)->bound = portid; +  err:  	release_sock(sk);  	return err; @@ -1503,6 +1523,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,  	struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;  	int err;  	long unsigned int groups = nladdr->nl_groups; +	bool bound;  	if (addr_len < sizeof(struct sockaddr_nl))  		return -EINVAL; @@ -1519,9 +1540,14 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,  			return err;  	} -	if (nlk->portid) +	bound = nlk->bound; +	if (bound) { +		/* Ensure nlk->portid is up-to-date. */ +		smp_rmb(); +  		if (nladdr->nl_pid != nlk->portid)  			return -EINVAL; +	}  	if (nlk->netlink_bind && groups) {  		int group; @@ -1537,7 +1563,10 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,  		}  	} -	if (!nlk->portid) { +	/* No need for barriers here as we return to user-space without +	 * using any of the bound attributes. +	 */ +	if (!bound) {  		err = nladdr->nl_pid ?  			netlink_insert(sk, nladdr->nl_pid) :  			netlink_autobind(sock); @@ -1585,7 +1614,10 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,  	    !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))  		return -EPERM; -	if (!nlk->portid) +	/* No need for barriers here as we return to user-space without +	 * using any of the bound attributes. +	 */ +	if (!nlk->bound)  		err = netlink_autobind(sock);  	if (err == 0) { @@ -2339,7 +2371,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,  		int pos, idx, shift;  		err = 0; -		netlink_table_grab(); +		netlink_lock_table();  		for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) {  			if (len - pos < sizeof(u32))  				break; @@ -2354,7 +2386,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,  		}  		if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen))  			err = -EFAULT; -		netlink_table_ungrab(); +		netlink_unlock_table();  		break;  	}  	case NETLINK_CAP_ACK: @@ -2426,10 +2458,13 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)  		dst_group = nlk->dst_group;  	} -	if (!nlk->portid) { +	if (!nlk->bound) {  		err = netlink_autobind(sock);  		if (err)  			goto out; +	} else { +		/* Ensure nlk is hashed and visible. */ +		smp_rmb();  	}  	/* It's a really convoluted way for userland to ask for mmaped @@ -2750,6 +2785,7 @@ static int netlink_dump(struct sock *sk)  	struct sk_buff *skb = NULL;  	struct nlmsghdr *nlh;  	int len, err = -ENOBUFS; +	int alloc_min_size;  	int alloc_size;  	mutex_lock(nlk->cb_mutex); @@ -2758,9 +2794,6 @@ static int netlink_dump(struct sock *sk)  		goto errout_skb;  	} -	cb = &nlk->cb; -	alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); -  	if (!netlink_rx_is_mmaped(sk) &&  	    atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)  		goto errout_skb; @@ -2770,23 +2803,35 @@ static int netlink_dump(struct sock *sk)  	 * to reduce number of system calls on dump operations, if user  	 * ever provided a big enough buffer.  	 */ -	if (alloc_size < nlk->max_recvmsg_len) { -		skb = netlink_alloc_skb(sk, -					nlk->max_recvmsg_len, -					nlk->portid, +	cb = &nlk->cb; +	alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); + +	if (alloc_min_size < nlk->max_recvmsg_len) { +		alloc_size = nlk->max_recvmsg_len; +		skb = netlink_alloc_skb(sk, alloc_size, nlk->portid,  					GFP_KERNEL |  					__GFP_NOWARN |  					__GFP_NORETRY); -		/* available room should be exact amount to avoid MSG_TRUNC */ -		if (skb) -			skb_reserve(skb, skb_tailroom(skb) - -					 nlk->max_recvmsg_len);  	} -	if (!skb) +	if (!skb) { +		alloc_size = alloc_min_size;  		skb = netlink_alloc_skb(sk, alloc_size, nlk->portid,  					GFP_KERNEL); +	}  	if (!skb)  		goto errout_skb; + +	/* Trim skb to allocated size. User is expected to provide buffer as +	 * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at +	 * netlink_recvmsg())). dump will pack as many smaller messages as +	 * could fit within the allocated skb. skb is typically allocated +	 * with larger space than required (could be as much as near 2x the +	 * requested size with align to next power of 2 approach). Allowing +	 * dump to use the excess space makes it difficult for a user to have a +	 * reasonable static buffer based on the expected largest dump of a +	 * single netdev. The outcome is MSG_TRUNC error. +	 */ +	skb_reserve(skb, skb_tailroom(skb) - alloc_size);  	netlink_skb_set_owner_r(skb, sk);  	len = cb->dump(skb, cb); | 

