From 3622c36f37640078c9a706b71e02e6334c85f9e9 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Fri, 27 Feb 2015 08:56:54 +0100 Subject: tipc: only create header copy for name distr messages The TIPC name distributor pushes topology updates to the cluster neighbors. Currently this is done in a unicast manner, and the skb holding the update is cloned for each cluster member. This is unnecessary, as we only modify the destnode field in the header so we change it to do pskb_copy instead. Signed-off-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/name_distr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/tipc') diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index fcb07915aaac..506aaa565da7 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -98,7 +98,7 @@ void named_cluster_distribute(struct net *net, struct sk_buff *skb) continue; if (!tipc_node_active_links(node)) continue; - oskb = skb_copy(skb, GFP_ATOMIC); + oskb = pskb_copy(skb, GFP_ATOMIC); if (!oskb) break; msg_set_destnode(buf_msg(oskb), dnode); -- cgit v1.2.3 From 7fe8097cef5ff4ba1c7ced42bda97830ce00eec6 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Fri, 27 Feb 2015 08:56:55 +0100 Subject: tipc: fix nullpointer bug when subscribing to events If a subscription request is sent to a topology server connection, and any error occurs (malformed request, oom or limit reached) while processing this request, TIPC should terminate the subscriber connection. While doing so, it tries to access fields in an already freed (or never allocated) subscription element leading to a nullpointer exception. We fix this by removing the subscr_terminate function and terminate the connection immediately upon any subscription failure. Signed-off-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/subscr.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 72c339e432aa..1c147c869c2e 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -162,19 +162,6 @@ static void subscr_del(struct tipc_subscription *sub) atomic_dec(&tn->subscription_count); } -/** - * subscr_terminate - terminate communication with a subscriber - * - * Note: Must call it in process context since it might sleep. - */ -static void subscr_terminate(struct tipc_subscription *sub) -{ - struct tipc_subscriber *subscriber = sub->subscriber; - struct tipc_net *tn = net_generic(sub->net, tipc_net_id); - - tipc_conn_terminate(tn->topsrv, subscriber->conid); -} - static void subscr_release(struct tipc_subscriber *subscriber) { struct tipc_subscription *sub; @@ -312,16 +299,14 @@ static void subscr_conn_msg_event(struct net *net, int conid, { struct tipc_subscriber *subscriber = usr_data; struct tipc_subscription *sub = NULL; + struct tipc_net *tn = net_generic(net, tipc_net_id); spin_lock_bh(&subscriber->lock); - if (subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber, - &sub) < 0) { - spin_unlock_bh(&subscriber->lock); - subscr_terminate(sub); - return; - } + subscr_subscribe(net, (struct tipc_subscr *)buf, subscriber, &sub); if (sub) tipc_nametbl_subscribe(sub); + else + tipc_conn_terminate(tn->topsrv, subscriber->conid); spin_unlock_bh(&subscriber->lock); } -- cgit v1.2.3 From afaa3f65f65fda2e7b190aac7e2a75d9a2a77cb6 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Fri, 27 Feb 2015 08:56:56 +0100 Subject: tipc: purge links when bearer is disabled If a bearer is disabled by manual intervention, all links over that bearer should be purged, indicated with the 'shutting_down' flag. Otherwise tipc will get confused if a new bearer is enabled using a different media type. Signed-off-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/bearer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/tipc') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 48852c2dcc03..af6deeb397a8 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -742,7 +742,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } - bearer_disable(net, bearer, false); + bearer_disable(net, bearer, true); rtnl_unlock(); return 0; -- cgit v1.2.3 From 91e2eb56845a018e5c691acf87137baf05316c4e Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Fri, 27 Feb 2015 08:56:57 +0100 Subject: tipc: rename media/msg related definitions The TIPC_MEDIA_ADDR_SIZE and TIPC_MEDIA_ADDR_OFFSET names are misleading, as they actually define the size and offset of the whole media info field and not the address part. This patch does not have any functional changes. Signed-off-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/bearer.h | 4 ++-- net/tipc/eth_media.c | 2 +- net/tipc/ib_media.c | 2 +- net/tipc/msg.h | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 6b17795ff8bc..1a233271d1b5 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -50,7 +50,7 @@ * - the field's actual content and length is defined per media * - remaining unused bytes in the field are set to zero */ -#define TIPC_MEDIA_ADDR_SIZE 32 +#define TIPC_MEDIA_INFO_SIZE 32 #define TIPC_MEDIA_TYPE_OFFSET 3 /* @@ -76,7 +76,7 @@ struct tipc_node_map { * @broadcast: non-zero if address is a broadcast address */ struct tipc_media_addr { - u8 value[TIPC_MEDIA_ADDR_SIZE]; + u8 value[TIPC_MEDIA_INFO_SIZE]; u8 media_id; u8 broadcast; }; diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 5e1426f1751f..085d3a07e565 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -53,7 +53,7 @@ static int tipc_eth_addr2str(struct tipc_media_addr *addr, /* Convert from media address format to discovery message addr format */ static int tipc_eth_addr2msg(char *msg, struct tipc_media_addr *addr) { - memset(msg, 0, TIPC_MEDIA_ADDR_SIZE); + memset(msg, 0, TIPC_MEDIA_INFO_SIZE); msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH; memcpy(msg + ETH_ADDR_OFFSET, addr->value, ETH_ALEN); return 0; diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index 8522eef9c136..e8c16718e3fa 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -57,7 +57,7 @@ static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf, /* Convert from media address format to discovery message addr format */ static int tipc_ib_addr2msg(char *msg, struct tipc_media_addr *addr) { - memset(msg, 0, TIPC_MEDIA_ADDR_SIZE); + memset(msg, 0, TIPC_MEDIA_INFO_SIZE); memcpy(msg, addr->value, INFINIBAND_ALEN); return 0; } diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 9ace47f44a69..c1cc8d7a5d52 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -76,7 +76,7 @@ struct plist; #define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) -#define TIPC_MEDIA_ADDR_OFFSET 5 +#define TIPC_MEDIA_INFO_OFFSET 5 /** * TIPC message buffer code @@ -688,7 +688,7 @@ static inline void msg_set_redundant_link(struct tipc_msg *m, u32 r) static inline char *msg_media_addr(struct tipc_msg *m) { - return (char *)&m->hdr[TIPC_MEDIA_ADDR_OFFSET]; + return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET]; } /* -- cgit v1.2.3 From d76a436d50d1ebce352e1815eaea79a254b2b24f Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Fri, 27 Feb 2015 08:56:58 +0100 Subject: tipc: make media address offset a common define With the exception of infiniband media which does not use media offsets, the media address is always located at offset 4 in the media info field as defined by the protocol, so we move the definition to the generic bearer.h Signed-off-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/bearer.h | 1 + net/tipc/eth_media.c | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 1a233271d1b5..097aff08ad5b 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -52,6 +52,7 @@ */ #define TIPC_MEDIA_INFO_SIZE 32 #define TIPC_MEDIA_TYPE_OFFSET 3 +#define TIPC_MEDIA_ADDR_OFFSET 4 /* * Identifiers of supported TIPC media types diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 085d3a07e565..f69a2fde9f4a 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -37,8 +37,6 @@ #include "core.h" #include "bearer.h" -#define ETH_ADDR_OFFSET 4 /* MAC addr position inside address field */ - /* Convert Ethernet address (media address format) to string */ static int tipc_eth_addr2str(struct tipc_media_addr *addr, char *strbuf, int bufsz) @@ -55,7 +53,7 @@ static int tipc_eth_addr2msg(char *msg, struct tipc_media_addr *addr) { memset(msg, 0, TIPC_MEDIA_INFO_SIZE); msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH; - memcpy(msg + ETH_ADDR_OFFSET, addr->value, ETH_ALEN); + memcpy(msg + TIPC_MEDIA_ADDR_OFFSET, addr->value, ETH_ALEN); return 0; } @@ -79,7 +77,7 @@ static int tipc_eth_msg2addr(struct tipc_bearer *b, char *msg) { /* Skip past preamble: */ - msg += ETH_ADDR_OFFSET; + msg += TIPC_MEDIA_ADDR_OFFSET; return tipc_eth_raw2addr(b, addr, msg); } -- cgit v1.2.3 From 39a0295f901423e260a034ac7c3211ecaa9c2745 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 2 Mar 2015 15:37:47 +0800 Subject: tipc: Don't use iocb argument in socket layer Currently the iocb argument is used to idenfiy whether or not socket lock is hold before tipc_sendmsg()/tipc_send_stream() is called. But this usage prevents iocb argument from being dropped through sendmsg() at socket common layer. Therefore, in the commit we introduce two new functions called __tipc_sendmsg() and __tipc_send_stream(). When they are invoked, it assumes that their callers have taken socket lock, thereby avoiding the weird usage of iocb argument. Cc: Al Viro Cc: Christoph Hellwig Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/socket.c | 82 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 44 insertions(+), 38 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f73e975af80b..c245ec31fa4c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -114,6 +114,9 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); static int tipc_sk_insert(struct tipc_sock *tsk); static void tipc_sk_remove(struct tipc_sock *tsk); +static int __tipc_send_stream(struct socket *sock, struct msghdr *m, + size_t dsz); +static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; @@ -906,6 +909,18 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) */ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t dsz) +{ + struct sock *sk = sock->sk; + int ret; + + lock_sock(sk); + ret = __tipc_sendmsg(sock, m, dsz); + release_sock(sk); + + return ret; +} + +static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) { DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); struct sock *sk = sock->sk; @@ -931,22 +946,13 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, if (dsz > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; - if (iocb) - lock_sock(sk); - if (unlikely(sock->state != SS_READY)) { - if (sock->state == SS_LISTENING) { - rc = -EPIPE; - goto exit; - } - if (sock->state != SS_UNCONNECTED) { - rc = -EISCONN; - goto exit; - } - if (tsk->published) { - rc = -EOPNOTSUPP; - goto exit; - } + if (sock->state == SS_LISTENING) + return -EPIPE; + if (sock->state != SS_UNCONNECTED) + return -EISCONN; + if (tsk->published) + return -EOPNOTSUPP; if (dest->addrtype == TIPC_ADDR_NAME) { tsk->conn_type = dest->addr.name.name.type; tsk->conn_instance = dest->addr.name.name.instance; @@ -956,8 +962,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); if (dest->addrtype == TIPC_ADDR_MCAST) { - rc = tipc_sendmcast(sock, seq, m, dsz, timeo); - goto exit; + return tipc_sendmcast(sock, seq, m, dsz, timeo); } else if (dest->addrtype == TIPC_ADDR_NAME) { u32 type = dest->addr.name.name.type; u32 inst = dest->addr.name.name.instance; @@ -972,10 +977,8 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, dport = tipc_nametbl_translate(net, type, inst, &dnode); msg_set_destnode(mhdr, dnode); msg_set_destport(mhdr, dport); - if (unlikely(!dport && !dnode)) { - rc = -EHOSTUNREACH; - goto exit; - } + if (unlikely(!dport && !dnode)) + return -EHOSTUNREACH; } else if (dest->addrtype == TIPC_ADDR_ID) { dnode = dest->addr.id.node; msg_set_type(mhdr, TIPC_DIRECT_MSG); @@ -990,7 +993,7 @@ new_mtu: mtu = tipc_node_get_mtu(net, dnode, tsk->portid); rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain); if (rc < 0) - goto exit; + return rc; do { skb = skb_peek(pktchain); @@ -1013,9 +1016,6 @@ new_mtu: if (rc) __skb_queue_purge(pktchain); } while (!rc); -exit: - if (iocb) - release_sock(sk); return rc; } @@ -1064,6 +1064,18 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) */ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t dsz) +{ + struct sock *sk = sock->sk; + int ret; + + lock_sock(sk); + ret = __tipc_send_stream(sock, m, dsz); + release_sock(sk); + + return ret; +} + +static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); @@ -1080,7 +1092,7 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, /* Handle implied connection establishment */ if (unlikely(dest)) { - rc = tipc_sendmsg(iocb, sock, m, dsz); + rc = __tipc_sendmsg(sock, m, dsz); if (dsz && (dsz == rc)) tsk->sent_unacked = 1; return rc; @@ -1088,15 +1100,11 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, if (dsz > (uint)INT_MAX) return -EMSGSIZE; - if (iocb) - lock_sock(sk); - if (unlikely(sock->state != SS_CONNECTED)) { if (sock->state == SS_DISCONNECTING) - rc = -EPIPE; + return -EPIPE; else - rc = -ENOTCONN; - goto exit; + return -ENOTCONN; } timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); @@ -1108,7 +1116,7 @@ next: send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain); if (unlikely(rc < 0)) - goto exit; + return rc; do { if (likely(!tsk_conn_cong(tsk))) { rc = tipc_link_xmit(net, pktchain, dnode, portid); @@ -1133,9 +1141,7 @@ next: if (rc) __skb_queue_purge(pktchain); } while (!rc); -exit: - if (iocb) - release_sock(sk); + return sent ? sent : rc; } @@ -1947,7 +1953,7 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, if (!timeout) m.msg_flags = MSG_DONTWAIT; - res = tipc_sendmsg(NULL, sock, &m, 0); + res = __tipc_sendmsg(sock, &m, 0); if ((res < 0) && (res != -EWOULDBLOCK)) goto exit; @@ -2103,7 +2109,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) struct msghdr m = {NULL,}; tsk_advance_rx_queue(sk); - tipc_send_packet(NULL, new_sock, &m, 0); + __tipc_send_stream(new_sock, &m, 0); } else { __skb_dequeue(&sk->sk_receive_queue); __skb_queue_head(&new_sk->sk_receive_queue, buf); -- cgit v1.2.3 From 1b784140474e4fc94281a49e96c67d29df0efbde Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 2 Mar 2015 15:37:48 +0800 Subject: net: Remove iocb argument from sendmsg and recvmsg After TIPC doesn't depend on iocb argument in its internal implementations of sendmsg() and recvmsg() hooks defined in proto structure, no any user is using iocb argument in them at all now. Then we can drop the redundant iocb argument completely from kinds of implementations of both sendmsg() and recvmsg() in the entire networking stack. Cc: Christoph Hellwig Suggested-by: Al Viro Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- crypto/algif_hash.c | 8 ++-- crypto/algif_rng.c | 4 +- crypto/algif_skcipher.c | 8 ++-- drivers/isdn/mISDN/socket.c | 7 ++-- drivers/net/macvtap.c | 9 ++--- drivers/net/ppp/pppoe.c | 8 ++-- drivers/net/tun.c | 6 +-- drivers/vhost/net.c | 6 +-- include/linux/net.h | 10 ++--- include/net/af_vsock.h | 4 +- include/net/bluetooth/bluetooth.h | 8 ++-- include/net/inet_common.h | 7 ++-- include/net/ping.h | 7 ++-- include/net/sock.h | 16 ++++---- include/net/tcp.h | 7 ++-- include/net/udp.h | 3 +- net/appletalk/ddp.c | 7 ++-- net/atm/common.c | 7 ++-- net/atm/common.h | 7 ++-- net/ax25/af_ax25.c | 7 ++-- net/bluetooth/af_bluetooth.c | 8 ++-- net/bluetooth/hci_sock.c | 8 ++-- net/bluetooth/l2cap_sock.c | 12 +++--- net/bluetooth/rfcomm/sock.c | 10 ++--- net/bluetooth/sco.c | 10 ++--- net/caif/caif_socket.c | 17 ++++----- net/can/bcm.c | 7 ++-- net/can/raw.c | 7 ++-- net/core/sock.c | 13 +++---- net/dccp/dccp.h | 8 ++-- net/dccp/probe.c | 3 +- net/dccp/proto.c | 7 ++-- net/decnet/af_decnet.c | 7 ++-- net/ieee802154/socket.c | 21 +++++------ net/ipv4/af_inet.c | 11 +++--- net/ipv4/ping.c | 7 ++-- net/ipv4/raw.c | 7 ++-- net/ipv4/tcp.c | 7 ++-- net/ipv4/udp.c | 9 ++--- net/ipv4/udp_impl.h | 4 +- net/ipv6/ping.c | 3 +- net/ipv6/raw.c | 8 ++-- net/ipv6/udp.c | 10 ++--- net/ipv6/udp_impl.h | 7 ++-- net/ipx/af_ipx.c | 7 ++-- net/irda/af_irda.c | 29 +++++++-------- net/iucv/af_iucv.c | 8 ++-- net/key/af_key.c | 6 +-- net/l2tp/l2tp_ip.c | 4 +- net/l2tp/l2tp_ip6.c | 8 ++-- net/l2tp/l2tp_ppp.c | 7 ++-- net/llc/af_llc.c | 7 ++-- net/netlink/af_netlink.c | 6 +-- net/netrom/af_netrom.c | 7 ++-- net/nfc/llcp_sock.c | 8 ++-- net/nfc/rawsock.c | 7 ++-- net/packet/af_packet.c | 11 +++--- net/phonet/datagram.c | 8 ++-- net/phonet/pep.c | 8 ++-- net/phonet/socket.c | 6 +-- net/rds/rds.h | 7 ++-- net/rds/recv.c | 4 +- net/rds/send.c | 3 +- net/rose/af_rose.c | 7 ++-- net/rxrpc/af_rxrpc.c | 7 ++-- net/rxrpc/ar-internal.h | 10 ++--- net/rxrpc/ar-output.c | 20 ++++------ net/rxrpc/ar-recvmsg.c | 4 +- net/sctp/socket.c | 8 ++-- net/socket.c | 78 +++++++-------------------------------- net/tipc/socket.c | 23 ++++-------- net/unix/af_unix.c | 50 +++++++++++-------------- net/vmw_vsock/af_vsock.c | 20 +++++----- net/vmw_vsock/vmci_transport.c | 3 +- net/x25/af_x25.c | 6 +-- 75 files changed, 302 insertions(+), 442 deletions(-) (limited to 'net/tipc') diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 01da360bdb55..0a465e0f3012 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -34,8 +34,8 @@ struct hash_ctx { struct ahash_request req; }; -static int hash_sendmsg(struct kiocb *unused, struct socket *sock, - struct msghdr *msg, size_t ignored) +static int hash_sendmsg(struct socket *sock, struct msghdr *msg, + size_t ignored) { int limit = ALG_MAX_PAGES * PAGE_SIZE; struct sock *sk = sock->sk; @@ -139,8 +139,8 @@ unlock: return err ?: size; } -static int hash_recvmsg(struct kiocb *unused, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); diff --git a/crypto/algif_rng.c b/crypto/algif_rng.c index 67f612cfed97..3acba0a7cd55 100644 --- a/crypto/algif_rng.c +++ b/crypto/algif_rng.c @@ -55,8 +55,8 @@ struct rng_ctx { struct crypto_rng *drng; }; -static int rng_recvmsg(struct kiocb *unused, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int rng_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 0c8a1e5ccadf..b9743dc35801 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -239,8 +239,8 @@ static void skcipher_data_wakeup(struct sock *sk) rcu_read_unlock(); } -static int skcipher_sendmsg(struct kiocb *unused, struct socket *sock, - struct msghdr *msg, size_t size) +static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg, + size_t size) { struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); @@ -424,8 +424,8 @@ unlock: return err ?: size; } -static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock, - struct msghdr *msg, size_t ignored, int flags) +static int skcipher_recvmsg(struct socket *sock, struct msghdr *msg, + size_t ignored, int flags) { struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index 84b35925ee4d..8dc7290089bb 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -112,8 +112,8 @@ mISDN_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) } static int -mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +mISDN_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct sk_buff *skb; struct sock *sk = sock->sk; @@ -173,8 +173,7 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock, } static int -mISDN_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +mISDN_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct sk_buff *skb; diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index e40fdfccc9c1..1e51c6bf3ae1 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -1127,16 +1127,15 @@ static const struct file_operations macvtap_fops = { #endif }; -static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int macvtap_sendmsg(struct socket *sock, struct msghdr *m, + size_t total_len) { struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); return macvtap_get_user(q, m, &m->msg_iter, m->msg_flags & MSG_DONTWAIT); } -static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len, - int flags) +static int macvtap_recvmsg(struct socket *sock, struct msghdr *m, + size_t total_len, int flags) { struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); int ret; diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 9c97e9bcf5f5..ff059e1d8ac6 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -835,8 +835,8 @@ static int pppoe_ioctl(struct socket *sock, unsigned int cmd, return err; } -static int pppoe_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int pppoe_sendmsg(struct socket *sock, struct msghdr *m, + size_t total_len) { struct sk_buff *skb; struct sock *sk = sock->sk; @@ -977,8 +977,8 @@ static const struct ppp_channel_ops pppoe_chan_ops = { .start_xmit = pppoe_xmit, }; -static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len, int flags) +static int pppoe_recvmsg(struct socket *sock, struct msghdr *m, + size_t total_len, int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 857dca47bf80..b96b94cee760 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1448,8 +1448,7 @@ static void tun_sock_write_space(struct sock *sk) kill_fasync(&tfile->fasync, SIGIO, POLL_OUT); } -static int tun_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { int ret; struct tun_file *tfile = container_of(sock, struct tun_file, socket); @@ -1464,8 +1463,7 @@ static int tun_sendmsg(struct kiocb *iocb, struct socket *sock, return ret; } -static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len, +static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, int flags) { struct tun_file *tfile = container_of(sock, struct tun_file, socket); diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index afa06d28725d..633012cc9a57 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -390,7 +390,7 @@ static void handle_tx(struct vhost_net *net) ubufs = NULL; } /* TODO: Check specific error and bomb out unless ENOBUFS? */ - err = sock->ops->sendmsg(NULL, sock, &msg, len); + err = sock->ops->sendmsg(sock, &msg, len); if (unlikely(err < 0)) { if (zcopy_used) { vhost_net_ubuf_put(ubufs); @@ -566,7 +566,7 @@ static void handle_rx(struct vhost_net *net) /* On overrun, truncate and discard */ if (unlikely(headcount > UIO_MAXIOV)) { iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1); - err = sock->ops->recvmsg(NULL, sock, &msg, + err = sock->ops->recvmsg(sock, &msg, 1, MSG_DONTWAIT | MSG_TRUNC); pr_debug("Discarded rx packet: len %zd\n", sock_len); continue; @@ -597,7 +597,7 @@ static void handle_rx(struct vhost_net *net) */ iov_iter_advance(&fixup, sizeof(hdr)); } - err = sock->ops->recvmsg(NULL, sock, &msg, + err = sock->ops->recvmsg(sock, &msg, sock_len, MSG_DONTWAIT | MSG_TRUNC); /* Userspace might have consumed the packet meanwhile: * it's not supposed to do this usually, but might be hard diff --git a/include/linux/net.h b/include/linux/net.h index 17d83393afcc..e74114bcca68 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -120,7 +120,6 @@ struct socket { struct vm_area_struct; struct page; -struct kiocb; struct sockaddr; struct msghdr; struct module; @@ -162,8 +161,8 @@ struct proto_ops { int (*compat_getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); #endif - int (*sendmsg) (struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len); + int (*sendmsg) (struct socket *sock, struct msghdr *m, + size_t total_len); /* Notes for implementing recvmsg: * =============================== * msg->msg_namelen should get updated by the recvmsg handlers @@ -172,9 +171,8 @@ struct proto_ops { * handlers can assume that msg.msg_name is either NULL or has * a minimum size of sizeof(struct sockaddr_storage). */ - int (*recvmsg) (struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len, - int flags); + int (*recvmsg) (struct socket *sock, struct msghdr *m, + size_t total_len, int flags); int (*mmap) (struct file *file, struct socket *sock, struct vm_area_struct * vma); ssize_t (*sendpage) (struct socket *sock, struct page *page, diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 0d87674fb775..172632dd9930 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -100,8 +100,8 @@ struct vsock_transport { /* DGRAM. */ int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *); - int (*dgram_dequeue)(struct kiocb *kiocb, struct vsock_sock *vsk, - struct msghdr *msg, size_t len, int flags); + int (*dgram_dequeue)(struct vsock_sock *vsk, struct msghdr *msg, + size_t len, int flags); int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *, struct msghdr *, size_t len); bool (*dgram_allow)(u32 cid, u32 port); diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 4500bf88ff55..6bb97df16d2d 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -245,10 +245,10 @@ int bt_sock_register(int proto, const struct net_proto_family *ops); void bt_sock_unregister(int proto); void bt_sock_link(struct bt_sock_list *l, struct sock *s); void bt_sock_unlink(struct bt_sock_list *l, struct sock *s); -int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags); -int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags); +int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags); +int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags); uint bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait); int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo); diff --git a/include/net/inet_common.h b/include/net/inet_common.h index b2828a06a5a6..4a92423eefa5 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -21,12 +21,11 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); int inet_accept(struct socket *sock, struct socket *newsock, int flags); -int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size); +int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size); ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); -int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int flags); +int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags); int inet_shutdown(struct socket *sock, int how); int inet_listen(struct socket *sock, int backlog); void inet_sock_destruct(struct sock *sk); diff --git a/include/net/ping.h b/include/net/ping.h index cc16d413f681..ac80cb45e630 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -75,12 +75,11 @@ void ping_err(struct sk_buff *skb, int offset, u32 info); int ping_getfrag(void *from, char *to, int offset, int fraglen, int odd, struct sk_buff *); -int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len); +int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len); int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, void *user_icmph, size_t icmph_len); -int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len); +int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); bool ping_rcv(struct sk_buff *skb); diff --git a/include/net/sock.h b/include/net/sock.h index 38369d3580a1..250822cc1e02 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -958,10 +958,9 @@ struct proto { int (*compat_ioctl)(struct sock *sk, unsigned int cmd, unsigned long arg); #endif - int (*sendmsg)(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len); - int (*recvmsg)(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, + int (*sendmsg)(struct sock *sk, struct msghdr *msg, + size_t len); + int (*recvmsg)(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); int (*sendpage)(struct sock *sk, struct page *page, @@ -1562,9 +1561,8 @@ int sock_no_listen(struct socket *, int); int sock_no_shutdown(struct socket *, int); int sock_no_getsockopt(struct socket *, int , int, char __user *, int __user *); int sock_no_setsockopt(struct socket *, int, int, char __user *, unsigned int); -int sock_no_sendmsg(struct kiocb *, struct socket *, struct msghdr *, size_t); -int sock_no_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, - int); +int sock_no_sendmsg(struct socket *, struct msghdr *, size_t); +int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int); int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma); ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, @@ -1576,8 +1574,8 @@ ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, */ int sock_common_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); -int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags); +int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags); int sock_common_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen); int compat_sock_common_getsockopt(struct socket *sock, int level, diff --git a/include/net/tcp.h b/include/net/tcp.h index 8d6b983d5099..f87599d5af82 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -349,8 +349,7 @@ void tcp_v4_early_demux(struct sk_buff *skb); int tcp_v4_rcv(struct sk_buff *skb); int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); -int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t size); +int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tcp_release_cb(struct sock *sk); @@ -430,8 +429,8 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); void tcp_set_keepalive(struct sock *sk, int val); void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req); -int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int nonblock, int flags, int *addr_len); +int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, + int flags, int *addr_len); void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc); diff --git a/include/net/udp.h b/include/net/udp.h index 32d8d9f07f76..6d4ed18e1427 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -238,8 +238,7 @@ int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, const struct sock *)); void udp_err(struct sk_buff *, u32); -int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len); +int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int udp_push_pending_frames(struct sock *sk); void udp_flush_pending_frames(struct sock *sk); void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 0d0766ea5ab1..3b7ad43c7dad 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1559,8 +1559,7 @@ freeit: return 0; } -static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t len) +static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct atalk_sock *at = at_sk(sk); @@ -1728,8 +1727,8 @@ out: return err ? : len; } -static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int flags) +static int atalk_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct ddpehdr *ddp; diff --git a/net/atm/common.c b/net/atm/common.c index b84057e41bd6..ed0466637e13 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -523,8 +523,8 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci) return 0; } -int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int flags) +int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct atm_vcc *vcc; @@ -569,8 +569,7 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, return copied; } -int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, - size_t size) +int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) { struct sock *sk = sock->sk; DEFINE_WAIT(wait); diff --git a/net/atm/common.h b/net/atm/common.h index cc3c2dae4d79..4d6f5b2068ac 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -13,10 +13,9 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family); int vcc_release(struct socket *sock); int vcc_connect(struct socket *sock, int itf, short vpi, int vci); -int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int flags); -int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, - size_t total_len); +int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags); +int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len); unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait); int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index ca049a7c9287..330c1f4a5a0b 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1432,8 +1432,7 @@ out: return err; } -static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { DECLARE_SOCKADDR(struct sockaddr_ax25 *, usax, msg->msg_name); struct sock *sk = sock->sk; @@ -1599,8 +1598,8 @@ out: return err; } -static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 4b904c97a068..20a4698e2255 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -210,8 +210,8 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) } EXPORT_SYMBOL(bt_accept_dequeue); -int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; @@ -283,8 +283,8 @@ static long bt_sock_data_wait(struct sock *sk, long timeo) return timeo; } -int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; int err = 0; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 37198fb99ffe..aa8be4cb19a1 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -826,8 +826,8 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, } } -static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; @@ -871,8 +871,8 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, return err ? : copied; } -static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct hci_dev *hdev; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 60694f0f4c73..9070720eedc8 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -944,8 +944,8 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, return err; } -static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int l2cap_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct l2cap_chan *chan = l2cap_pi(sk)->chan; @@ -976,8 +976,8 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, return err; } -static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int l2cap_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { struct sock *sk = sock->sk; struct l2cap_pinfo *pi = l2cap_pi(sk); @@ -1004,9 +1004,9 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, release_sock(sk); if (sock->type == SOCK_STREAM) - err = bt_sock_stream_recvmsg(iocb, sock, msg, len, flags); + err = bt_sock_stream_recvmsg(sock, msg, len, flags); else - err = bt_sock_recvmsg(iocb, sock, msg, len, flags); + err = bt_sock_recvmsg(sock, msg, len, flags); if (pi->chan->mode != L2CAP_MODE_ERTM) return err; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 3c6d2c8ac1a4..825e8fb5114b 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -549,8 +549,8 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int * return 0; } -static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; @@ -615,8 +615,8 @@ done: return sent; } -static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int rfcomm_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; @@ -627,7 +627,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, return 0; } - len = bt_sock_stream_recvmsg(iocb, sock, msg, size, flags); + len = bt_sock_stream_recvmsg(sock, msg, size, flags); lock_sock(sk); if (!(flags & MSG_PEEK) && len > 0) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 76321b546e84..2bb7ef46bb99 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -688,8 +688,8 @@ static int sco_sock_getname(struct socket *sock, struct sockaddr *addr, int *len return 0; } -static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; int err; @@ -758,8 +758,8 @@ static void sco_conn_defer_accept(struct hci_conn *conn, u16 setting) } } -static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int sco_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { struct sock *sk = sock->sk; struct sco_pinfo *pi = sco_pi(sk); @@ -777,7 +777,7 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, release_sock(sk); - return bt_sock_recvmsg(iocb, sock, msg, len, flags); + return bt_sock_recvmsg(sock, msg, len, flags); } static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 769b185fefbd..b6bf51bb187d 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -271,8 +271,8 @@ static void caif_check_flow_release(struct sock *sk) * Copied from unix_dgram_recvmsg, but removed credit checks, * changed locking, address handling and added MSG_TRUNC. */ -static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t len, int flags) +static int caif_seqpkt_recvmsg(struct socket *sock, struct msghdr *m, + size_t len, int flags) { struct sock *sk = sock->sk; @@ -343,9 +343,8 @@ static long caif_stream_data_wait(struct sock *sk, long timeo) * Copied from unix_stream_recvmsg, but removed credit checks, * changed locking calls, changed address handling. */ -static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, - int flags) +static int caif_stream_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; int copied = 0; @@ -511,8 +510,8 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk, } /* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */ -static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int caif_seqpkt_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); @@ -586,8 +585,8 @@ err: * Changed removed permission handling and added waiting for flow on * and other minor adaptations. */ -static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int caif_stream_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); diff --git a/net/can/bcm.c b/net/can/bcm.c index d559f922326d..b523453585be 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1231,8 +1231,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) /* * bcm_sendmsg - process BCM commands (opcodes) from the userspace */ -static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) +static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct bcm_sock *bo = bcm_sk(sk); @@ -1535,8 +1534,8 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, return 0; } -static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; diff --git a/net/can/raw.c b/net/can/raw.c index 94601b7ff0a3..63ffdb0f3a23 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -658,8 +658,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, return 0; } -static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) +static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); @@ -728,8 +727,8 @@ send_failed: return err; } -static int raw_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; diff --git a/net/core/sock.c b/net/core/sock.c index 9c74fc8f0e32..726e1f99aa8d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2163,15 +2163,14 @@ int sock_no_getsockopt(struct socket *sock, int level, int optname, } EXPORT_SYMBOL(sock_no_getsockopt); -int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, - size_t len) +int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len) { return -EOPNOTSUPP; } EXPORT_SYMBOL(sock_no_sendmsg); -int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, - size_t len, int flags) +int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len, + int flags) { return -EOPNOTSUPP; } @@ -2543,14 +2542,14 @@ int compat_sock_common_getsockopt(struct socket *sock, int level, int optname, EXPORT_SYMBOL(compat_sock_common_getsockopt); #endif -int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; int addr_len = 0; int err; - err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, + err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index e4c144fa706f..3b1d64d6e093 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -310,11 +310,9 @@ int compat_dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); #endif int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); -int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t size); -int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int nonblock, int flags, - int *addr_len); +int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); +int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, + int flags, int *addr_len); void dccp_shutdown(struct sock *sk, int how); int inet_dccp_listen(struct socket *sock, int backlog); unsigned int dccp_poll(struct file *file, struct socket *sock, diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 595ddf0459db..d8346d0eadeb 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -72,8 +72,7 @@ static void printl(const char *fmt, ...) wake_up(&dccpw.wait); } -static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size) +static int jdccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { const struct inet_sock *inet = inet_sk(sk); struct ccid3_hc_tx_sock *hc = NULL; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index e171b780b499..52a94016526d 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -741,8 +741,7 @@ static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) return 0; } -int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { const struct dccp_sock *dp = dccp_sk(sk); const int flags = msg->msg_flags; @@ -806,8 +805,8 @@ out_discard: EXPORT_SYMBOL_GPL(dccp_sendmsg); -int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int nonblock, int flags, int *addr_len) +int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, + int flags, int *addr_len) { const struct dccp_hdr *dh; long timeo; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 810228646de3..754484b3cd0e 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1669,8 +1669,8 @@ static int dn_data_ready(struct sock *sk, struct sk_buff_head *q, int flags, int } -static int dn_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct dn_scp *scp = DN_SK(sk); @@ -1905,8 +1905,7 @@ static inline struct sk_buff *dn_alloc_send_pskb(struct sock *sk, return skb; } -static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) +static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct dn_scp *scp = DN_SK(sk); diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index 2878d8ca6d3b..b60c65f70346 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -98,12 +98,12 @@ static int ieee802154_sock_release(struct socket *sock) return 0; } -static int ieee802154_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int ieee802154_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; - return sk->sk_prot->sendmsg(iocb, sk, msg, len); + return sk->sk_prot->sendmsg(sk, msg, len); } static int ieee802154_sock_bind(struct socket *sock, struct sockaddr *uaddr, @@ -255,8 +255,7 @@ static int raw_disconnect(struct sock *sk, int flags) return 0; } -static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size) +static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct net_device *dev; unsigned int mtu; @@ -327,8 +326,8 @@ out: return err; } -static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { size_t copied = 0; int err = -EOPNOTSUPP; @@ -615,8 +614,7 @@ static int dgram_disconnect(struct sock *sk, int flags) return 0; } -static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size) +static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct net_device *dev; unsigned int mtu; @@ -715,9 +713,8 @@ out: return err; } -static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { size_t copied = 0; int err = -EOPNOTSUPP; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4ce954cc94a4..64a9c0fdc4aa 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -716,8 +716,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, } EXPORT_SYMBOL(inet_getname); -int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size) +int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; @@ -728,7 +727,7 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, inet_autobind(sk)) return -EAGAIN; - return sk->sk_prot->sendmsg(iocb, sk, msg, size); + return sk->sk_prot->sendmsg(sk, msg, size); } EXPORT_SYMBOL(inet_sendmsg); @@ -750,8 +749,8 @@ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, } EXPORT_SYMBOL(inet_sendpage); -int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int flags) +int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; int addr_len = 0; @@ -759,7 +758,7 @@ int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, sock_rps_record_flow(sk); - err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, + err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index e9f66e1cda50..3648e7f32f3d 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -684,8 +684,7 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, } EXPORT_SYMBOL_GPL(ping_common_sendmsg); -static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct net *net = sock_net(sk); struct flowi4 fl4; @@ -841,8 +840,8 @@ do_confirm: goto out; } -int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) { struct inet_sock *isk = inet_sk(sk); int family = sk->sk_family; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index f027a708b7e0..923cf538fce1 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -481,8 +481,7 @@ static int raw_getfrag(void *from, char *to, int offset, int len, int odd, return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb); } -static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct ipcm_cookie ipc; @@ -709,8 +708,8 @@ out: return ret; * we return it, otherwise we block. */ -static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); size_t copied = 0; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4b57ea8dabc7..d939c35001f9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1064,8 +1064,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, return err; } -int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t size) +int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -1543,8 +1542,8 @@ EXPORT_SYMBOL(tcp_read_sock); * Probably, code can be easily improved even more. */ -int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int nonblock, int flags, int *addr_len) +int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, + int flags, int *addr_len) { struct tcp_sock *tp = tcp_sk(sk); int copied = 0; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0224f930c613..f27556e2158b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -873,8 +873,7 @@ out: } EXPORT_SYMBOL(udp_push_pending_frames); -int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct udp_sock *up = udp_sk(sk); @@ -1136,7 +1135,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, * sendpage interface can't pass. * This will succeed only when the socket is connected. */ - ret = udp_sendmsg(NULL, sk, &msg, 0); + ret = udp_sendmsg(sk, &msg, 0); if (ret < 0) return ret; } @@ -1254,8 +1253,8 @@ EXPORT_SYMBOL(udp_ioctl); * return it, otherwise we block. */ -int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index f3c27899f62b..7e0fe4bdd967 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -21,8 +21,8 @@ int compat_udp_setsockopt(struct sock *sk, int level, int optname, int compat_udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); #endif -int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len); +int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len); int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index bd46f736f61d..fee25c0ed1f5 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -77,8 +77,7 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, return 0; } -int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 0d84b2c7f24e..a5287b3582a4 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -456,9 +456,8 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) * we return it, otherwise we block. */ -static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) +static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); @@ -730,8 +729,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd, return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb); } -static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions opt_space; DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d048d46779fc..70568a4548e4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -391,8 +391,7 @@ EXPORT_SYMBOL_GPL(udp6_lib_lookup); * return it, otherwise we block. */ -int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, +int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -1101,8 +1100,7 @@ out: return err; } -int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions opt_space; struct udp_sock *up = udp_sk(sk); @@ -1164,12 +1162,12 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, do_udp_sendmsg: if (__ipv6_only_sock(sk)) return -ENETUNREACH; - return udp_sendmsg(iocb, sk, msg, len); + return udp_sendmsg(sk, msg, len); } } if (up->pending == AF_INET) - return udp_sendmsg(iocb, sk, msg, len); + return udp_sendmsg(sk, msg, len); /* Rough check on arithmetic overflow, better check is made in ip6_append_data(). diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index c779c3c90b9d..0682c031ccdc 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -23,10 +23,9 @@ int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); #endif -int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len); -int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len); +int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); +int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len); int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); void udpv6_destroy_sock(struct sock *sk); diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index f11ad1d95e0e..4ea5d7497b5f 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1688,8 +1688,7 @@ out: return rc; } -static int ipx_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int ipx_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct ipx_sock *ipxs = ipx_sk(sk); @@ -1754,8 +1753,8 @@ out: } -static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int ipx_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct ipx_sock *ipxs = ipx_sk(sk); diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 568edc72d737..ee0ea25c8e7a 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1256,14 +1256,13 @@ static int irda_release(struct socket *sock) } /* - * Function irda_sendmsg (iocb, sock, msg, len) + * Function irda_sendmsg (sock, msg, len) * * Send message down to TinyTP. This function is used for both STREAM and * SEQPACK services. This is possible since it forces the client to * fragment the message if necessary */ -static int irda_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int irda_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct irda_sock *self; @@ -1348,13 +1347,13 @@ out: } /* - * Function irda_recvmsg_dgram (iocb, sock, msg, size, flags) + * Function irda_recvmsg_dgram (sock, msg, size, flags) * * Try to receive message and copy it to user. The frame is discarded * after being read, regardless of how much the user actually read */ -static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int irda_recvmsg_dgram(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; struct irda_sock *self = irda_sk(sk); @@ -1398,10 +1397,10 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock, } /* - * Function irda_recvmsg_stream (iocb, sock, msg, size, flags) + * Function irda_recvmsg_stream (sock, msg, size, flags) */ -static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int irda_recvmsg_stream(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; struct irda_sock *self = irda_sk(sk); @@ -1515,14 +1514,14 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, } /* - * Function irda_sendmsg_dgram (iocb, sock, msg, len) + * Function irda_sendmsg_dgram (sock, msg, len) * * Send message down to TinyTP for the unreliable sequenced * packet service... * */ -static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int irda_sendmsg_dgram(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct irda_sock *self; @@ -1594,14 +1593,14 @@ out: } /* - * Function irda_sendmsg_ultra (iocb, sock, msg, len) + * Function irda_sendmsg_ultra (sock, msg, len) * * Send message down to IrLMP for the unreliable Ultra * packet service... */ #ifdef CONFIG_IRDA_ULTRA -static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int irda_sendmsg_ultra(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct irda_sock *self; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 2e9953b2db84..94b4c898a116 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1026,8 +1026,8 @@ static int iucv_send_iprm(struct iucv_path *path, struct iucv_message *msg, (void *) prmdata, 8); } -static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); @@ -1317,8 +1317,8 @@ static void iucv_process_message_q(struct sock *sk) } } -static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; diff --git a/net/key/af_key.c b/net/key/af_key.c index f8ac939d52b4..9255fd9d94bc 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3588,8 +3588,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, } #endif -static int pfkey_sendmsg(struct kiocb *kiocb, - struct socket *sock, struct msghdr *msg, size_t len) +static int pfkey_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct sk_buff *skb = NULL; @@ -3630,8 +3629,7 @@ out: return err ? : len; } -static int pfkey_recvmsg(struct kiocb *kiocb, - struct socket *sock, struct msghdr *msg, size_t len, +static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 05dfc8aa36af..79649937ec71 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -385,7 +385,7 @@ drop: /* Userspace will call sendmsg() on the tunnel socket to send L2TP * control frames. */ -static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) +static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct sk_buff *skb; int rc; @@ -506,7 +506,7 @@ no_route: goto out; } -static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, +static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) { struct inet_sock *inet = inet_sk(sk); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 8611f1b63141..d1ded3777815 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -480,8 +480,7 @@ out: /* Userspace will call sendmsg() on the tunnel socket to send L2TP * control frames. */ -static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions opt_space; DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name); @@ -643,9 +642,8 @@ do_confirm: goto done; } -static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name); diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index cc7a828fc914..e9b0dec56b8e 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -185,9 +185,8 @@ static int pppol2tp_recv_payload_hook(struct sk_buff *skb) /* Receive message. This is the recvmsg for the PPPoL2TP socket. */ -static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, - int flags) +static int pppol2tp_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { int err; struct sk_buff *skb; @@ -295,7 +294,7 @@ static void pppol2tp_session_sock_put(struct l2tp_session *session) * when a user application does a sendmsg() on the session socket. L2TP and * PPP headers must be inserted into the user's data. */ -static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, +static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { static const unsigned char ppph[2] = { 0xff, 0x03 }; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 2c0b83ce43bd..17a8dff06090 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -704,8 +704,8 @@ out: * Copy received data to the socket user. * Returns non-negative upon success, negative otherwise. */ -static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { DECLARE_SOCKADDR(struct sockaddr_llc *, uaddr, msg->msg_name); const int nonblock = flags & MSG_DONTWAIT; @@ -878,8 +878,7 @@ copy_uaddr: * Transmit data provided by the socket user. * Returns non-negative upon success, negative otherwise. */ -static int llc_ui_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2702673f0f23..a96025c0583f 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2256,8 +2256,7 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); } -static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); @@ -2346,8 +2345,7 @@ out: return err; } -static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len, +static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct scm_cookie scm; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 69f1d5e9959f..b987fd56c3c5 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1023,8 +1023,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) return 1; } -static int nr_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int nr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct nr_sock *nr = nr_sk(sk); @@ -1133,8 +1132,8 @@ out: return err; } -static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int nr_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; DECLARE_SOCKADDR(struct sockaddr_ax25 *, sax, msg->msg_name); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index e181e290427c..9578bd6a4f3e 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -750,8 +750,8 @@ error: return ret; } -static int llcp_sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); @@ -793,8 +793,8 @@ static int llcp_sock_sendmsg(struct kiocb *iocb, struct socket *sock, return nfc_llcp_send_i_frame(llcp_sock, msg, len); } -static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int llcp_sock_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 373e138c0ab6..82b4e8024778 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -211,8 +211,7 @@ static void rawsock_tx_work(struct work_struct *work) } } -static int rawsock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int rawsock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct nfc_dev *dev = nfc_rawsock(sk)->dev; @@ -248,8 +247,8 @@ static int rawsock_sendmsg(struct kiocb *iocb, struct socket *sock, return len; } -static int rawsock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int rawsock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9db83693d736..404c9735aee9 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1606,8 +1606,8 @@ oom: * protocol layers and you must therefore supply it with a complete frame */ -static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name); @@ -2601,8 +2601,7 @@ out: return err; } -static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); @@ -2882,8 +2881,8 @@ out: * If necessary we block. */ -static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index 26054b4b467c..5e710435ffa9 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -83,8 +83,7 @@ static int pn_init(struct sock *sk) return 0; } -static int pn_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +static int pn_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { DECLARE_SOCKADDR(struct sockaddr_pn *, target, msg->msg_name); struct sk_buff *skb; @@ -125,9 +124,8 @@ static int pn_sendmsg(struct kiocb *iocb, struct sock *sk, return (err >= 0) ? len : err; } -static int pn_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct sk_buff *skb = NULL; struct sockaddr_pn sa; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 5d3f2b7507d4..6de2aeb98a1f 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -1118,8 +1118,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb) } -static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +static int pep_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct pep_sock *pn = pep_sk(sk); struct sk_buff *skb; @@ -1246,9 +1245,8 @@ struct sk_buff *pep_read(struct sock *sk) return skb; } -static int pep_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int pep_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct sk_buff *skb; int err; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 008214a3d5eb..d575ef4e9aa6 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -425,15 +425,15 @@ out: return err; } -static int pn_socket_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t total_len) +static int pn_socket_sendmsg(struct socket *sock, struct msghdr *m, + size_t total_len) { struct sock *sk = sock->sk; if (pn_socket_autobind(sock)) return -EAGAIN; - return sk->sk_prot->sendmsg(iocb, sk, m, total_len); + return sk->sk_prot->sendmsg(sk, m, total_len); } const struct proto_ops phonet_dgram_ops = { diff --git a/net/rds/rds.h b/net/rds/rds.h index c2a5eef41343..c3f2855c3d84 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -702,8 +702,8 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, void rds_inc_put(struct rds_incoming *inc); void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, struct rds_incoming *inc, gfp_t gfp); -int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int msg_flags); +int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int msg_flags); void rds_clear_recv_queue(struct rds_sock *rs); int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg); void rds_inc_info_copy(struct rds_incoming *inc, @@ -711,8 +711,7 @@ void rds_inc_info_copy(struct rds_incoming *inc, __be32 saddr, __be32 daddr, int flip); /* send.c */ -int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t payload_len); +int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len); void rds_send_reset(struct rds_connection *conn); int rds_send_xmit(struct rds_connection *conn); struct sockaddr_in; diff --git a/net/rds/recv.c b/net/rds/recv.c index f9ec1acd801c..a00462b0d01d 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -395,8 +395,8 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg) return 0; } -int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t size, int msg_flags) +int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int msg_flags) { struct sock *sk = sock->sk; struct rds_sock *rs = rds_sk_to_rs(sk); diff --git a/net/rds/send.c b/net/rds/send.c index 42f65d4305c8..44672befc0ee 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -920,8 +920,7 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, return ret; } -int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, - size_t payload_len) +int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) { struct sock *sk = sock->sk; struct rds_sock *rs = rds_sk_to_rs(sk); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 43bac7c4dd9e..8ae603069a1a 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1046,8 +1046,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros return 1; } -static int rose_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int rose_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); @@ -1211,8 +1210,8 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock, } -static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static int rose_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 7b1670489638..0095b9a0b779 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -441,8 +441,7 @@ static int rxrpc_connect(struct socket *sock, struct sockaddr *addr, * - sends a call data packet * - may send an abort (abort code in control data) */ -static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t len) +static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) { struct rxrpc_transport *trans; struct rxrpc_sock *rx = rxrpc_sk(sock->sk); @@ -482,7 +481,7 @@ static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock, switch (rx->sk.sk_state) { case RXRPC_SERVER_LISTENING: if (!m->msg_name) { - ret = rxrpc_server_sendmsg(iocb, rx, m, len); + ret = rxrpc_server_sendmsg(rx, m, len); break; } case RXRPC_SERVER_BOUND: @@ -492,7 +491,7 @@ static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock, break; } case RXRPC_CLIENT_CONNECTED: - ret = rxrpc_client_sendmsg(iocb, rx, trans, m, len); + ret = rxrpc_client_sendmsg(rx, trans, m, len); break; default: ret = -ENOTCONN; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ba9fd36d3f15..2fc1e659e5c9 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -548,10 +548,9 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t, extern unsigned rxrpc_resend_timeout; int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *); -int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *, - struct rxrpc_transport *, struct msghdr *, size_t); -int rxrpc_server_sendmsg(struct kiocb *, struct rxrpc_sock *, struct msghdr *, - size_t); +int rxrpc_client_sendmsg(struct rxrpc_sock *, struct rxrpc_transport *, + struct msghdr *, size_t); +int rxrpc_server_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t); /* * ar-peer.c @@ -572,8 +571,7 @@ extern const struct file_operations rxrpc_connection_seq_fops; * ar-recvmsg.c */ void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *); -int rxrpc_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, - int); +int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* * ar-security.c diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index 8331c95e1522..09f584566e23 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -23,8 +23,7 @@ */ unsigned rxrpc_resend_timeout = 4 * HZ; -static int rxrpc_send_data(struct kiocb *iocb, - struct rxrpc_sock *rx, +static int rxrpc_send_data(struct rxrpc_sock *rx, struct rxrpc_call *call, struct msghdr *msg, size_t len); @@ -129,9 +128,8 @@ static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code) * - caller holds the socket locked * - the socket may be either a client socket or a server socket */ -int rxrpc_client_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx, - struct rxrpc_transport *trans, struct msghdr *msg, - size_t len) +int rxrpc_client_sendmsg(struct rxrpc_sock *rx, struct rxrpc_transport *trans, + struct msghdr *msg, size_t len) { struct rxrpc_conn_bundle *bundle; enum rxrpc_command cmd; @@ -191,7 +189,7 @@ int rxrpc_client_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx, /* request phase complete for this client call */ ret = -EPROTO; } else { - ret = rxrpc_send_data(iocb, rx, call, msg, len); + ret = rxrpc_send_data(rx, call, msg, len); } rxrpc_put_call(call); @@ -232,7 +230,7 @@ int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, call->state != RXRPC_CALL_SERVER_SEND_REPLY) { ret = -EPROTO; /* request phase complete for this client call */ } else { - ret = rxrpc_send_data(NULL, call->socket, call, msg, len); + ret = rxrpc_send_data(call->socket, call, msg, len); } release_sock(&call->socket->sk); @@ -271,8 +269,7 @@ EXPORT_SYMBOL(rxrpc_kernel_abort_call); * send a message through a server socket * - caller holds the socket locked */ -int rxrpc_server_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx, - struct msghdr *msg, size_t len) +int rxrpc_server_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) { enum rxrpc_command cmd; struct rxrpc_call *call; @@ -313,7 +310,7 @@ int rxrpc_server_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx, break; } - ret = rxrpc_send_data(iocb, rx, call, msg, len); + ret = rxrpc_send_data(rx, call, msg, len); break; case RXRPC_CMD_SEND_ABORT: @@ -520,8 +517,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, * - must be called in process context * - caller holds the socket locked */ -static int rxrpc_send_data(struct kiocb *iocb, - struct rxrpc_sock *rx, +static int rxrpc_send_data(struct rxrpc_sock *rx, struct rxrpc_call *call, struct msghdr *msg, size_t len) { diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index d58ba702bd2c..a4f883e2d66f 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -43,8 +43,8 @@ void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call) * - we need to be careful about two or more threads calling recvmsg * simultaneously */ -int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct rxrpc_skb_priv *sp; struct rxrpc_call *call = NULL, *continue_call = NULL; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index aafe94bf292e..f1a65398f311 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1586,8 +1586,7 @@ static int sctp_error(struct sock *sk, int flags, int err) static int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *); -static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t msg_len) +static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) { struct net *net = sock_net(sk); struct sctp_sock *sp; @@ -2066,9 +2065,8 @@ static int sctp_skb_pull(struct sk_buff *skb, int len) * flags - flags sent or received with the user message, see Section * 5 for complete description of the flags. */ -static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct sctp_ulpevent *event = NULL; struct sctp_sock *sp = sctp_sk(sk); diff --git a/net/socket.c b/net/socket.c index b78cf601a021..95d3085cb477 100644 --- a/net/socket.c +++ b/net/socket.c @@ -610,45 +610,20 @@ void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags) } EXPORT_SYMBOL(__sock_tx_timestamp); -static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) +static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, + size_t size) { - return sock->ops->sendmsg(iocb, sock, msg, size); + return sock->ops->sendmsg(sock, msg, size); } -static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) +int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { int err = security_socket_sendmsg(sock, msg, size); - return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size); -} - -static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg, - size_t size, bool nosec) -{ - struct kiocb iocb; - int ret; - - init_sync_kiocb(&iocb, NULL); - ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) : - __sock_sendmsg(&iocb, sock, msg, size); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); - return ret; -} - -int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) -{ - return do_sock_sendmsg(sock, msg, size, false); + return err ?: sock_sendmsg_nosec(sock, msg, size); } EXPORT_SYMBOL(sock_sendmsg); -static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size) -{ - return do_sock_sendmsg(sock, msg, size, true); -} - int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) { @@ -744,47 +719,21 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, } EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); -static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { - return sock->ops->recvmsg(iocb, sock, msg, size, flags); + return sock->ops->recvmsg(sock, msg, size, flags); } -static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { int err = security_socket_recvmsg(sock, msg, size, flags); - return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags); -} - -int sock_recvmsg(struct socket *sock, struct msghdr *msg, - size_t size, int flags) -{ - struct kiocb iocb; - int ret; - - init_sync_kiocb(&iocb, NULL); - ret = __sock_recvmsg(&iocb, sock, msg, size, flags); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); - return ret; + return err ?: sock_recvmsg_nosec(sock, msg, size, flags); } EXPORT_SYMBOL(sock_recvmsg); -static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, - size_t size, int flags) -{ - struct kiocb iocb; - int ret; - - init_sync_kiocb(&iocb, NULL); - ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); - return ret; -} - /** * kernel_recvmsg - Receive a message from a socket (kernel space) * @sock: The socket to receive the message from @@ -861,8 +810,7 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */ return 0; - res = __sock_recvmsg(iocb, sock, &msg, - iocb->ki_nbytes, msg.msg_flags); + res = sock_recvmsg(sock, &msg, iocb->ki_nbytes, msg.msg_flags); *to = msg.msg_iter; return res; } @@ -883,7 +831,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) if (sock->type == SOCK_SEQPACKET) msg.msg_flags |= MSG_EOR; - res = __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes); + res = sock_sendmsg(sock, &msg, iocb->ki_nbytes); *from = msg.msg_iter; return res; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index c245ec31fa4c..dcb797c60806 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -895,7 +895,6 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) /** * tipc_sendmsg - send message in connectionless manner - * @iocb: if NULL, indicates that socket lock is already held * @sock: socket structure * @m: message to send * @dsz: amount of user data to be sent @@ -907,7 +906,7 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) * * Returns the number of bytes sent on success, or errno otherwise */ -static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock, +static int tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) { struct sock *sk = sock->sk; @@ -1052,7 +1051,6 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) /** * tipc_send_stream - send stream-oriented data - * @iocb: (unused) * @sock: socket structure * @m: data to send * @dsz: total length of data to be transmitted @@ -1062,8 +1060,7 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) * Returns the number of bytes sent on success (or partial success), * or errno if no data sent */ -static int tipc_send_stream(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t dsz) +static int tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) { struct sock *sk = sock->sk; int ret; @@ -1147,7 +1144,6 @@ next: /** * tipc_send_packet - send a connection-oriented message - * @iocb: if NULL, indicates that socket lock is already held * @sock: socket structure * @m: message to send * @dsz: length of data to be transmitted @@ -1156,13 +1152,12 @@ next: * * Returns the number of bytes sent on success, or errno otherwise */ -static int tipc_send_packet(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t dsz) +static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz) { if (dsz > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; - return tipc_send_stream(iocb, sock, m, dsz); + return tipc_send_stream(sock, m, dsz); } /* tipc_sk_finish_conn - complete the setup of a connection @@ -1337,7 +1332,6 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) /** * tipc_recvmsg - receive packet-oriented message - * @iocb: (unused) * @m: descriptor for message info * @buf_len: total size of user buffer area * @flags: receive flags @@ -1347,8 +1341,8 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) * * Returns size of returned message data, errno otherwise */ -static int tipc_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t buf_len, int flags) +static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len, + int flags) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); @@ -1432,7 +1426,6 @@ exit: /** * tipc_recv_stream - receive stream-oriented data - * @iocb: (unused) * @m: descriptor for message info * @buf_len: total size of user buffer area * @flags: receive flags @@ -1442,8 +1435,8 @@ exit: * * Returns size of returned message data, errno otherwise */ -static int tipc_recv_stream(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t buf_len, int flags) +static int tipc_recv_stream(struct socket *sock, struct msghdr *m, + size_t buf_len, int flags) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 526b6edab018..433f287ee548 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -516,20 +516,15 @@ static unsigned int unix_dgram_poll(struct file *, struct socket *, poll_table *); static int unix_ioctl(struct socket *, unsigned int, unsigned long); static int unix_shutdown(struct socket *, int); -static int unix_stream_sendmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t); -static int unix_stream_recvmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t, int); -static int unix_dgram_sendmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t); -static int unix_dgram_recvmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t, int); +static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); +static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); +static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); +static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); static int unix_dgram_connect(struct socket *, struct sockaddr *, int, int); -static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t); -static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *, - struct msghdr *, size_t, int); +static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); +static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, + int); static int unix_set_peek_off(struct sock *sk, int val) { @@ -1442,8 +1437,8 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, * Send AF_UNIX data. */ -static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); @@ -1622,8 +1617,8 @@ out: */ #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) -static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk = sock->sk; struct sock *other = NULL; @@ -1725,8 +1720,8 @@ out_err: return sent ? : err; } -static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { int err; struct sock *sk = sock->sk; @@ -1741,19 +1736,18 @@ static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock, if (msg->msg_namelen) msg->msg_namelen = 0; - return unix_dgram_sendmsg(kiocb, sock, msg, len); + return unix_dgram_sendmsg(sock, msg, len); } -static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, - int flags) +static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct sock *sk = sock->sk; if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; - return unix_dgram_recvmsg(iocb, sock, msg, size, flags); + return unix_dgram_recvmsg(sock, msg, size, flags); } static void unix_copy_addr(struct msghdr *msg, struct sock *sk) @@ -1766,9 +1760,8 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk) } } -static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, - int flags) +static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct scm_cookie scm; struct sock *sk = sock->sk; @@ -1900,9 +1893,8 @@ static unsigned int unix_skb_len(const struct sk_buff *skb) return skb->len - UNIXCB(skb).consumed; } -static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, - int flags) +static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { struct scm_cookie scm; struct sock *sk = sock->sk; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 1d0e39c9a3e2..2ec86e652a19 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -949,8 +949,8 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock, return mask; } -static int vsock_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int vsock_dgram_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { int err; struct sock *sk; @@ -1062,11 +1062,10 @@ out: return err; } -static int vsock_dgram_recvmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) +static int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) { - return transport->dgram_dequeue(kiocb, vsock_sk(sock->sk), msg, len, - flags); + return transport->dgram_dequeue(vsock_sk(sock->sk), msg, len, flags); } static const struct proto_ops vsock_dgram_ops = { @@ -1505,8 +1504,8 @@ static int vsock_stream_getsockopt(struct socket *sock, return 0; } -static int vsock_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) { struct sock *sk; struct vsock_sock *vsk; @@ -1644,9 +1643,8 @@ out: static int -vsock_stream_recvmsg(struct kiocb *kiocb, - struct socket *sock, - struct msghdr *msg, size_t len, int flags) +vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct sock *sk; struct vsock_sock *vsk; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 7f3255084a6c..c294da095461 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1730,8 +1730,7 @@ static int vmci_transport_dgram_enqueue( return err - sizeof(*dg); } -static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, - struct vsock_sock *vsk, +static int vmci_transport_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg, size_t len, int flags) { diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index d9149b68b9bc..c3ab230e4493 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1077,8 +1077,7 @@ out_clear_request: goto out; } -static int x25_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len) +static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct x25_sock *x25 = x25_sk(sk); @@ -1252,8 +1251,7 @@ out_kfree_skb: } -static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, +static int x25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; -- cgit v1.2.3 From 948fa2d115c553ae32aced66e0f00f89245dc05e Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 5 Mar 2015 10:23:48 +0100 Subject: tipc: increase size of tipc discovery messages The payload area following the TIPC discovery message header is an opaque area defined by the media. INT_H_SIZE was enough for Ethernet/IB/IPv4 but needs to be expanded to carry IPv6 addressing information. Signed-off-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/discover.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/discover.c b/net/tipc/discover.c index feef3753615d..5967506833ce 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -86,7 +86,7 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type, msg = buf_msg(buf); tipc_msg_init(tn->own_addr, msg, LINK_CONFIG, type, - INT_H_SIZE, dest_domain); + MAX_H_SIZE, dest_domain); msg_set_non_seq(msg, 1); msg_set_node_sig(msg, tn->random); msg_set_dest_domain(msg, dest_domain); @@ -249,7 +249,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf, /* Send response, if necessary */ if (respond && (mtyp == DSC_REQ_MSG)) { - rbuf = tipc_buf_acquire(INT_H_SIZE); + rbuf = tipc_buf_acquire(MAX_H_SIZE); if (rbuf) { tipc_disc_init_msg(net, rbuf, DSC_RESP_MSG, bearer); tipc_bearer_send(net, bearer->identity, rbuf, &maddr); @@ -359,8 +359,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, req = kmalloc(sizeof(*req), GFP_ATOMIC); if (!req) return -ENOMEM; - - req->buf = tipc_buf_acquire(INT_H_SIZE); + req->buf = tipc_buf_acquire(MAX_H_SIZE); if (!req->buf) { kfree(req); return -ENOMEM; -- cgit v1.2.3 From d0f91938bede204a343473792529e0db7d599836 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 5 Mar 2015 10:23:49 +0100 Subject: tipc: add ip/udp media type The ip/udp bearer can be configured in a point-to-point mode by specifying both local and remote ip/hostname, or it can be enabled in multicast mode, where links are established to all tipc nodes that have joined the same multicast group. The multicast IP address is generated based on the TIPC network ID, but can be overridden by using another multicast address as remote ip. Signed-off-by: Erik Hugne Reviewed-by: Jon Maloy Reviewed-by: Ying Xue Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 9 + net/tipc/Kconfig | 8 + net/tipc/Makefile | 1 + net/tipc/bearer.c | 13 +- net/tipc/bearer.h | 12 +- net/tipc/msg.h | 2 +- net/tipc/udp_media.c | 442 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 479 insertions(+), 8 deletions(-) create mode 100644 net/tipc/udp_media.c (limited to 'net/tipc') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 8d723824ad69..d4c8f142ba63 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -83,11 +83,20 @@ enum { TIPC_NLA_BEARER_NAME, /* string */ TIPC_NLA_BEARER_PROP, /* nest */ TIPC_NLA_BEARER_DOMAIN, /* u32 */ + TIPC_NLA_BEARER_UDP_OPTS, /* nest */ __TIPC_NLA_BEARER_MAX, TIPC_NLA_BEARER_MAX = __TIPC_NLA_BEARER_MAX - 1 }; +enum { + TIPC_NLA_UDP_UNSPEC, + TIPC_NLA_UDP_LOCAL, /* sockaddr_storage */ + TIPC_NLA_UDP_REMOTE, /* sockaddr_storage */ + + __TIPC_NLA_UDP_MAX, + TIPC_NLA_UDP_MAX = __TIPC_NLA_UDP_MAX - 1 +}; /* Socket info */ enum { TIPC_NLA_SOCK_UNSPEC, diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index 91c8a8e031db..c25a3a149dc4 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -26,3 +26,11 @@ config TIPC_MEDIA_IB help Saying Y here will enable support for running TIPC on IP-over-InfiniBand devices. +config TIPC_MEDIA_UDP + bool "IP/UDP media type support" + depends on TIPC + select NET_UDP_TUNNEL + help + Saying Y here will enable support for running TIPC over IP/UDP + bool + default y diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 599b1a540d2b..57e460be4692 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -10,5 +10,6 @@ tipc-y += addr.o bcast.o bearer.o \ netlink.o netlink_compat.o node.o socket.o eth_media.o \ server.o socket.o +tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o tipc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index af6deeb397a8..840db89e4283 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -47,6 +47,9 @@ static struct tipc_media * const media_info_array[] = { ð_media_info, #ifdef CONFIG_TIPC_MEDIA_IB &ib_media_info, +#endif +#ifdef CONFIG_TIPC_MEDIA_UDP + &udp_media_info, #endif NULL }; @@ -216,7 +219,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) * tipc_enable_bearer - enable bearer with the given name */ static int tipc_enable_bearer(struct net *net, const char *name, - u32 disc_domain, u32 priority) + u32 disc_domain, u32 priority, + struct nlattr *attr[]) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b_ptr; @@ -304,7 +308,7 @@ restart: strcpy(b_ptr->name, name); b_ptr->media = m_ptr; - res = m_ptr->enable_media(net, b_ptr); + res = m_ptr->enable_media(net, b_ptr, attr); if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); @@ -372,7 +376,8 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr, kfree_rcu(b_ptr, rcu); } -int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b) +int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, + struct nlattr *attr[]) { struct net_device *dev; char *driver_name = strchr((const char *)b->name, ':') + 1; @@ -791,7 +796,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) } rtnl_lock(); - err = tipc_enable_bearer(net, bearer, domain, prio); + err = tipc_enable_bearer(net, bearer, domain, prio, attrs); if (err) { rtnl_unlock(); return err; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 097aff08ad5b..5cad243ee8fc 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -41,7 +41,7 @@ #include #define MAX_BEARERS 2 -#define MAX_MEDIA 2 +#define MAX_MEDIA 3 #define MAX_NODES 4096 #define WSIZE 32 @@ -59,6 +59,7 @@ */ #define TIPC_MEDIA_TYPE_ETH 1 #define TIPC_MEDIA_TYPE_IB 2 +#define TIPC_MEDIA_TYPE_UDP 3 /** * struct tipc_node_map - set of node identifiers @@ -104,7 +105,8 @@ struct tipc_media { int (*send_msg)(struct net *net, struct sk_buff *buf, struct tipc_bearer *b_ptr, struct tipc_media_addr *dest); - int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr); + int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr, + struct nlattr *attr[]); void (*disable_media)(struct tipc_bearer *b_ptr); int (*addr2str)(struct tipc_media_addr *addr, char *strbuf, @@ -183,6 +185,9 @@ extern struct tipc_media eth_media_info; #ifdef CONFIG_TIPC_MEDIA_IB extern struct tipc_media ib_media_info; #endif +#ifdef CONFIG_TIPC_MEDIA_UDP +extern struct tipc_media udp_media_info; +#endif int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info); @@ -197,7 +202,8 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info); int tipc_media_set_priority(const char *name, u32 new_value); int tipc_media_set_window(const char *name, u32 new_value); void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); -int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b); +int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, + struct nlattr *attrs[]); void tipc_disable_l2_media(struct tipc_bearer *b); int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, struct tipc_bearer *b, struct tipc_media_addr *dest); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index c1cc8d7a5d52..fa167846d1ab 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -87,7 +87,7 @@ struct plist; * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields * are word aligned for quicker access */ -#define BUF_HEADROOM LL_MAX_HEADER +#define BUF_HEADROOM (LL_MAX_HEADER + 48) struct tipc_skb_cb { void *handle; diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c new file mode 100644 index 000000000000..0d10001db40d --- /dev/null +++ b/net/tipc/udp_media.c @@ -0,0 +1,442 @@ +/* net/tipc/udp_media.c: IP bearer support for TIPC + * + * Copyright (c) 2015, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "core.h" +#include "bearer.h" + +/* IANA assigned UDP port */ +#define UDP_PORT_DEFAULT 6118 + +static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = { + [TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC}, + [TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY, + .len = sizeof(struct sockaddr_storage)}, + [TIPC_NLA_UDP_REMOTE] = {.type = NLA_BINARY, + .len = sizeof(struct sockaddr_storage)}, +}; + +/** + * struct udp_media_addr - IP/UDP addressing information + * + * This is the bearer level originating address used in neighbor discovery + * messages, and all fields should be in network byte order + */ +struct udp_media_addr { + __be16 proto; + __be16 udp_port; + union { + struct in_addr ipv4; + struct in6_addr ipv6; + }; +}; + +/** + * struct udp_bearer - ip/udp bearer data structure + * @bearer: associated generic tipc bearer + * @ubsock: bearer associated socket + * @ifindex: local address scope + * @work: used to schedule deferred work on a bearer + */ +struct udp_bearer { + struct tipc_bearer __rcu *bearer; + struct socket *ubsock; + u32 ifindex; + struct work_struct work; +}; + +/* udp_media_addr_set - convert a ip/udp address to a TIPC media address */ +static void tipc_udp_media_addr_set(struct tipc_media_addr *addr, + struct udp_media_addr *ua) +{ + memset(addr, 0, sizeof(struct tipc_media_addr)); + addr->media_id = TIPC_MEDIA_TYPE_UDP; + memcpy(addr->value, ua, sizeof(struct udp_media_addr)); + if (ntohs(ua->proto) == ETH_P_IP) { + if (ipv4_is_multicast(ua->ipv4.s_addr)) + addr->broadcast = 1; + } else if (ntohs(ua->proto) == ETH_P_IPV6) { + if (ipv6_addr_type(&ua->ipv6) & IPV6_ADDR_MULTICAST) + addr->broadcast = 1; + } else { + pr_err("Invalid UDP media address\n"); + } +} + +/* tipc_udp_addr2str - convert ip/udp address to string */ +static int tipc_udp_addr2str(struct tipc_media_addr *a, char *buf, int size) +{ + struct udp_media_addr *ua = (struct udp_media_addr *)&a->value; + + if (ntohs(ua->proto) == ETH_P_IP) + snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->udp_port)); + else if (ntohs(ua->proto) == ETH_P_IPV6) + snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->udp_port)); + else + pr_err("Invalid UDP media address\n"); + return 0; +} + +/* tipc_udp_msg2addr - extract an ip/udp address from a TIPC ndisc message */ +static int tipc_udp_msg2addr(struct tipc_bearer *b, struct tipc_media_addr *a, + char *msg) +{ + struct udp_media_addr *ua; + + ua = (struct udp_media_addr *) (msg + TIPC_MEDIA_ADDR_OFFSET); + if (msg[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_UDP) + return -EINVAL; + tipc_udp_media_addr_set(a, ua); + return 0; +} + +/* tipc_udp_addr2msg - write an ip/udp address to a TIPC ndisc message */ +static int tipc_udp_addr2msg(char *msg, struct tipc_media_addr *a) +{ + memset(msg, 0, TIPC_MEDIA_INFO_SIZE); + msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_UDP; + memcpy(msg + TIPC_MEDIA_ADDR_OFFSET, a->value, + sizeof(struct udp_media_addr)); + return 0; +} + +/* tipc_send_msg - enqueue a send request */ +static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, + struct tipc_bearer *b, + struct tipc_media_addr *dest) +{ + int ttl, err = 0; + struct udp_bearer *ub; + struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value; + struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; + struct sk_buff *clone; + struct rtable *rt; + + clone = skb_clone(skb, GFP_ATOMIC); + skb_set_inner_protocol(clone, htons(ETH_P_TIPC)); + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) { + err = -ENODEV; + goto tx_error; + } + if (htons(dst->proto) == ETH_P_IP) { + struct flowi4 fl = { + .daddr = dst->ipv4.s_addr, + .saddr = src->ipv4.s_addr, + .flowi4_mark = clone->mark, + .flowi4_proto = IPPROTO_UDP + }; + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto tx_error; + } + ttl = ip4_dst_hoplimit(&rt->dst); + err = udp_tunnel_xmit_skb(rt, clone, src->ipv4.s_addr, + dst->ipv4.s_addr, 0, ttl, 0, + src->udp_port, dst->udp_port, + false, true); + if (err < 0) { + ip_rt_put(rt); + goto tx_error; + } +#if IS_ENABLED(CONFIG_IPV6) + } else { + struct dst_entry *ndst; + struct flowi6 fl6 = { + .flowi6_oif = ub->ifindex, + .daddr = dst->ipv6, + .saddr = src->ipv6, + .flowi6_proto = IPPROTO_UDP + }; + err = ip6_dst_lookup(ub->ubsock->sk, &ndst, &fl6); + if (err) + goto tx_error; + ttl = ip6_dst_hoplimit(ndst); + err = udp_tunnel6_xmit_skb(ndst, clone, ndst->dev, &src->ipv6, + &dst->ipv6, 0, ttl, src->udp_port, + dst->udp_port, false); +#endif + } + return err; + +tx_error: + kfree_skb(clone); + return err; +} + +/* tipc_udp_recv - read data from bearer socket */ +static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) +{ + struct udp_bearer *ub; + struct tipc_bearer *b; + + ub = rcu_dereference_sk_user_data(sk); + if (!ub) { + pr_err_ratelimited("Failed to get UDP bearer reference"); + kfree_skb(skb); + return 0; + } + + skb_pull(skb, sizeof(struct udphdr)); + rcu_read_lock(); + b = rcu_dereference_rtnl(ub->bearer); + + if (b) { + tipc_rcv(sock_net(sk), skb, b); + rcu_read_unlock(); + return 0; + } + rcu_read_unlock(); + kfree_skb(skb); + return 0; +} + +static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote) +{ + int err = 0; + struct ip_mreqn mreqn; + struct sock *sk = ub->ubsock->sk; + + if (ntohs(remote->proto) == ETH_P_IP) { + if (!ipv4_is_multicast(remote->ipv4.s_addr)) + return 0; + mreqn.imr_multiaddr = remote->ipv4; + mreqn.imr_ifindex = ub->ifindex; + err = __ip_mc_join_group(sk, &mreqn); + } else { + if (!ipv6_addr_is_multicast(&remote->ipv6)) + return 0; + err = __ipv6_sock_mc_join(sk, ub->ifindex, &remote->ipv6); + } + return err; +} + +/** + * parse_options - build local/remote addresses from configuration + * @attrs: netlink config data + * @ub: UDP bearer instance + * @local: local bearer IP address/port + * @remote: peer or multicast IP/port + */ +static int parse_options(struct nlattr *attrs[], struct udp_bearer *ub, + struct udp_media_addr *local, + struct udp_media_addr *remote) +{ + struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; + struct sockaddr_storage *sa_local, *sa_remote; + + if (!attrs[TIPC_NLA_BEARER_UDP_OPTS]) + goto err; + if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, + attrs[TIPC_NLA_BEARER_UDP_OPTS], + tipc_nl_udp_policy)) + goto err; + if (opts[TIPC_NLA_UDP_LOCAL] && opts[TIPC_NLA_UDP_REMOTE]) { + sa_local = nla_data(opts[TIPC_NLA_UDP_LOCAL]); + sa_remote = nla_data(opts[TIPC_NLA_UDP_REMOTE]); + } else { +err: + pr_err("Invalid UDP bearer configuration"); + return -EINVAL; + } + if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET) { + struct sockaddr_in *ip4; + + ip4 = (struct sockaddr_in *)sa_local; + local->proto = htons(ETH_P_IP); + local->udp_port = ip4->sin_port; + local->ipv4.s_addr = ip4->sin_addr.s_addr; + + ip4 = (struct sockaddr_in *)sa_remote; + remote->proto = htons(ETH_P_IP); + remote->udp_port = ip4->sin_port; + remote->ipv4.s_addr = ip4->sin_addr.s_addr; + return 0; + +#if IS_ENABLED(CONFIG_IPV6) + } else if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET6) { + struct sockaddr_in6 *ip6; + + ip6 = (struct sockaddr_in6 *)sa_local; + local->proto = htons(ETH_P_IPV6); + local->udp_port = ip6->sin6_port; + local->ipv6 = ip6->sin6_addr; + ub->ifindex = ip6->sin6_scope_id; + + ip6 = (struct sockaddr_in6 *)sa_remote; + remote->proto = htons(ETH_P_IPV6); + remote->udp_port = ip6->sin6_port; + remote->ipv6 = ip6->sin6_addr; + return 0; +#endif + } + return -EADDRNOTAVAIL; +} + +/** + * tipc_udp_enable - callback to create a new udp bearer instance + * @net: network namespace + * @b: pointer to generic tipc_bearer + * @attrs: netlink bearer configuration + * + * validate the bearer parameters and initialize the udp bearer + * rtnl_lock should be held + */ +static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, + struct nlattr *attrs[]) +{ + int err = -EINVAL; + struct udp_bearer *ub; + struct udp_media_addr *remote; + struct udp_media_addr local = {0}; + struct udp_port_cfg udp_conf = {0}; + struct udp_tunnel_sock_cfg tuncfg = {0}; + + ub = kzalloc(sizeof(*ub), GFP_ATOMIC); + if (!ub) + return -ENOMEM; + + remote = (struct udp_media_addr *)&b->bcast_addr.value; + memset(remote, 0, sizeof(struct udp_media_addr)); + err = parse_options(attrs, ub, &local, remote); + if (err) + goto err; + + b->bcast_addr.media_id = TIPC_MEDIA_TYPE_UDP; + b->bcast_addr.broadcast = 1; + rcu_assign_pointer(b->media_ptr, ub); + rcu_assign_pointer(ub->bearer, b); + tipc_udp_media_addr_set(&b->addr, &local); + if (htons(local.proto) == ETH_P_IP) { + struct net_device *dev; + + dev = __ip_dev_find(net, local.ipv4.s_addr, false); + if (!dev) { + err = -ENODEV; + goto err; + } + udp_conf.family = AF_INET; + udp_conf.local_ip.s_addr = htonl(INADDR_ANY); + udp_conf.use_udp_checksums = false; + ub->ifindex = dev->ifindex; + b->mtu = dev->mtu - sizeof(struct iphdr) + - sizeof(struct udphdr); +#if IS_ENABLED(CONFIG_IPV6) + } else if (htons(local.proto) == ETH_P_IPV6) { + udp_conf.family = AF_INET6; + udp_conf.use_udp6_tx_checksums = true; + udp_conf.use_udp6_rx_checksums = true; + udp_conf.local_ip6 = in6addr_any; + b->mtu = 1280; +#endif + } else { + err = -EAFNOSUPPORT; + goto err; + } + udp_conf.local_udp_port = local.udp_port; + err = udp_sock_create(net, &udp_conf, &ub->ubsock); + if (err) + goto err; + tuncfg.sk_user_data = ub; + tuncfg.encap_type = 1; + tuncfg.encap_rcv = tipc_udp_recv; + tuncfg.encap_destroy = NULL; + setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg); + + if (enable_mcast(ub, remote)) + goto err; + return 0; +err: + kfree(ub); + return err; +} + +/* cleanup_bearer - break the socket/bearer association */ +static void cleanup_bearer(struct work_struct *work) +{ + struct udp_bearer *ub = container_of(work, struct udp_bearer, work); + + if (ub->ubsock) + udp_tunnel_sock_release(ub->ubsock); + synchronize_net(); + kfree(ub); +} + +/* tipc_udp_disable - detach bearer from socket */ +static void tipc_udp_disable(struct tipc_bearer *b) +{ + struct udp_bearer *ub; + + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) { + pr_err("UDP bearer instance not found\n"); + return; + } + if (ub->ubsock) + sock_set_flag(ub->ubsock->sk, SOCK_DEAD); + RCU_INIT_POINTER(b->media_ptr, NULL); + RCU_INIT_POINTER(ub->bearer, NULL); + + /* sock_release need to be done outside of rtnl lock */ + INIT_WORK(&ub->work, cleanup_bearer); + schedule_work(&ub->work); +} + +struct tipc_media udp_media_info = { + .send_msg = tipc_udp_send_msg, + .enable_media = tipc_udp_enable, + .disable_media = tipc_udp_disable, + .addr2str = tipc_udp_addr2str, + .addr2msg = tipc_udp_addr2msg, + .msg2addr = tipc_udp_msg2addr, + .priority = TIPC_DEF_LINK_PRI, + .tolerance = TIPC_DEF_LINK_TOL, + .window = TIPC_DEF_LINK_WIN, + .type_id = TIPC_MEDIA_TYPE_UDP, + .hwaddr_len = 0, + .name = "udp" +}; -- cgit v1.2.3 From 4fee6be8134a69545caf88d8b439936a326d6d77 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Mon, 9 Mar 2015 10:19:31 +0100 Subject: tipc: sparse: fix htons conversion warnings Commit d0f91938bede ("tipc: add ip/udp media type") introduced some new sparse warnings. Clean them up. Signed-off-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/udp_media.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 0d10001db40d..fc2fb11a354d 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -162,7 +162,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, err = -ENODEV; goto tx_error; } - if (htons(dst->proto) == ETH_P_IP) { + if (dst->proto == htons(ETH_P_IP)) { struct flowi4 fl = { .daddr = dst->ipv4.s_addr, .saddr = src->ipv4.s_addr, @@ -334,7 +334,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, struct udp_media_addr *remote; struct udp_media_addr local = {0}; struct udp_port_cfg udp_conf = {0}; - struct udp_tunnel_sock_cfg tuncfg = {0}; + struct udp_tunnel_sock_cfg tuncfg = {NULL}; ub = kzalloc(sizeof(*ub), GFP_ATOMIC); if (!ub) @@ -351,7 +351,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, rcu_assign_pointer(b->media_ptr, ub); rcu_assign_pointer(ub->bearer, b); tipc_udp_media_addr_set(&b->addr, &local); - if (htons(local.proto) == ETH_P_IP) { + if (local.proto == htons(ETH_P_IP)) { struct net_device *dev; dev = __ip_dev_find(net, local.ipv4.s_addr, false); @@ -366,7 +366,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, b->mtu = dev->mtu - sizeof(struct iphdr) - sizeof(struct udphdr); #if IS_ENABLED(CONFIG_IPV6) - } else if (htons(local.proto) == ETH_P_IPV6) { + } else if (local.proto == htons(ETH_P_IPV6)) { udp_conf.family = AF_INET6; udp_conf.use_udp6_tx_checksums = true; udp_conf.use_udp6_rx_checksums = true; -- cgit v1.2.3 From 143fe22f50a8be855bba77b5b2dc9dd1a5982b1c Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Mon, 9 Mar 2015 10:43:42 +0100 Subject: tipc: fix inconsistent signal handling regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 9bbb4ecc6819 ("tipc: standardize recvmsg routine") changed the sleep/wakeup behaviour for sockets entering recv() or accept(). In this process the order of reporting -EAGAIN/-EINTR was reversed. This caused problems with wrong errno being reported back if the timeout expires. The same problem happens if the socket is nonblocking and recv()/accept() is called when the process have pending signals. If there is no pending data read or connections to accept, -EINTR will be returned instead of -EAGAIN. Signed-off-by: Erik Hugne Reviewed-by: Ying Xue Reviewed-by: Jon Maloy Reported-by László Benedek Signed-off-by: David S. Miller --- net/tipc/socket.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 95c514a1d7d9..934947f038b6 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1318,12 +1318,12 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) err = 0; if (!skb_queue_empty(&sk->sk_receive_queue)) break; - err = sock_intr_errno(timeo); - if (signal_pending(current)) - break; err = -EAGAIN; if (!timeo) break; + err = sock_intr_errno(timeo); + if (signal_pending(current)) + break; } finish_wait(sk_sleep(sk), &wait); *timeop = timeo; @@ -2026,12 +2026,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) err = -EINVAL; if (sock->state != SS_LISTENING) break; - err = sock_intr_errno(timeo); - if (signal_pending(current)) - break; err = -EAGAIN; if (!timeo) break; + err = sock_intr_errno(timeo); + if (signal_pending(current)) + break; } finish_wait(sk_sleep(sk), &wait); return err; -- cgit v1.2.3 From 169bf9121b19dd6029e0a354d33513f61bfbe3d3 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Tue, 10 Mar 2015 12:23:34 -0400 Subject: tipc: ensure that idle links are deleted when a bearer is disabled commit afaa3f65f65fda2e7b190aac7e2a75d9a2a77cb6 (tipc: purge links when bearer is disabled) was an attempt to resolve a problem that turned out to have a more profound reason. When we disable a bearer, we delete all its pertaining links if there is no other bearer to perform failover to, or if the module is shutting down. In case there are dual bearers, we wait with deleting links until the failover procedure is finished. However, this misses the case when a link on the removed bearer was already down, so that there will be no failover procedure to finish the link delete. This causes confusion if a new bearer is added to replace the removed one, and also entails a small memory leak. This commit takes the current state of the link into account when deciding when to delete it, and also reverses the above-mentioned commit. Reviewed-by: Erik Hugne Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bearer.c | 2 +- net/tipc/link.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 840db89e4283..3613e72e858e 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -747,7 +747,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } - bearer_disable(net, bearer, true); + bearer_disable(net, bearer, false); rtnl_unlock(); return 0; diff --git a/net/tipc/link.c b/net/tipc/link.c index 14f09b3cb87c..98609fdfb06a 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -344,6 +344,7 @@ void tipc_link_delete_list(struct net *net, unsigned int bearer_id, struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_link *link; struct tipc_node *node; + bool del_link; rcu_read_lock(); list_for_each_entry_rcu(node, &tn->node_list, list) { @@ -353,12 +354,13 @@ void tipc_link_delete_list(struct net *net, unsigned int bearer_id, tipc_node_unlock(node); continue; } + del_link = !tipc_link_is_up(link) && !link->exp_msg_count; tipc_link_reset(link); if (del_timer(&link->timer)) tipc_link_put(link); link->flags |= LINK_STOPPED; /* Delete link now, or when failover is finished: */ - if (shutting_down || !tipc_node_is_up(node)) + if (shutting_down || !tipc_node_is_up(node) || del_link) tipc_link_delete(link); tipc_node_unlock(node); } -- cgit v1.2.3 From 7764d6e83d2c3b50d9282f12144ebb10418c056e Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Fri, 13 Mar 2015 16:08:05 -0400 Subject: tipc: add framework for node capabilities exchange The TIPC protocol spec has defined a 13 bit capability bitmap in the neighbor discovery header, as a means to maintain compatibility between different code and protocol generations. Until now this field has been unused. We now introduce the basic framework for exchanging capabilities between nodes at first contact. After exchange, a peer node's capabilities are stored as a 16 bit bitmap in struct tipc_node. Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/discover.c | 3 +++ net/tipc/msg.h | 11 ++++++++++- net/tipc/node.h | 4 +++- 3 files changed, 16 insertions(+), 2 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 5967506833ce..169f3dd038b9 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -89,6 +89,7 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type, MAX_H_SIZE, dest_domain); msg_set_non_seq(msg, 1); msg_set_node_sig(msg, tn->random); + msg_set_node_capabilities(msg, 0); msg_set_dest_domain(msg, dest_domain); msg_set_bc_netid(msg, tn->net_id); b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr); @@ -133,6 +134,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf, u32 net_id = msg_bc_netid(msg); u32 mtyp = msg_type(msg); u32 signature = msg_node_sig(msg); + u16 caps = msg_node_capabilities(msg); bool addr_match = false; bool sign_match = false; bool link_up = false; @@ -167,6 +169,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf, if (!node) return; tipc_node_lock(node); + node->capabilities = caps; link = node->links[bearer->identity]; /* Prepare to validate requesting node's signature and media address */ diff --git a/net/tipc/msg.h b/net/tipc/msg.h index fa167846d1ab..7cece647394c 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -510,7 +510,6 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) #define DSC_REQ_MSG 0 #define DSC_RESP_MSG 1 - /* * Word 1 */ @@ -534,6 +533,16 @@ static inline void msg_set_node_sig(struct tipc_msg *m, u32 n) msg_set_bits(m, 1, 0, 0xffff, n); } +static inline u32 msg_node_capabilities(struct tipc_msg *m) +{ + return msg_bits(m, 1, 15, 0x1fff); +} + +static inline void msg_set_node_capabilities(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 1, 15, 0x1fff, n); +} + /* * Word 2 diff --git a/net/tipc/node.h b/net/tipc/node.h index 3d18c66b7f78..f78be64e105b 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -106,6 +106,7 @@ struct tipc_node_bclink { * @list: links to adjacent nodes in sorted list of cluster's nodes * @working_links: number of working links to node (both active and standby) * @link_cnt: number of links to node + * @capabilities: bitmap, indicating peer node's functional capabilities * @signature: node instance identifier * @link_id: local and remote bearer ids of changing link, if any * @publ_list: list of publications @@ -125,7 +126,8 @@ struct tipc_node { struct tipc_node_bclink bclink; struct list_head list; int link_cnt; - int working_links; + u16 working_links; + u16 capabilities; u32 signature; u32 link_id; struct list_head publ_list; -- cgit v1.2.3 From cf2157f88a5abf1a64b8c51a737a35e918dc67e5 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Fri, 13 Mar 2015 16:08:06 -0400 Subject: tipc: move message validation function to msg.c The function link_buf_validate() is in reality re-entrant and context independent, and will in later commits be called from several locations. Therefore, we move it to msg.c, make it outline and rename the it to tipc_msg_validate(). We also redesign the function to make proper use of pskb_may_pull() Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 58 +-------------------------------------------------------- net/tipc/msg.c | 44 ++++++++++++++++++++++++++++++++++++++++++- net/tipc/msg.h | 5 +++-- 3 files changed, 47 insertions(+), 60 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 98609fdfb06a..944c8c663a2d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1047,61 +1047,6 @@ static void link_retrieve_defq(struct tipc_link *link, skb_queue_splice_tail_init(&link->deferred_queue, list); } -/** - * link_recv_buf_validate - validate basic format of received message - * - * This routine ensures a TIPC message has an acceptable header, and at least - * as much data as the header indicates it should. The routine also ensures - * that the entire message header is stored in the main fragment of the message - * buffer, to simplify future access to message header fields. - * - * Note: Having extra info present in the message header or data areas is OK. - * TIPC will ignore the excess, under the assumption that it is optional info - * introduced by a later release of the protocol. - */ -static int link_recv_buf_validate(struct sk_buff *buf) -{ - static u32 min_data_hdr_size[8] = { - SHORT_H_SIZE, MCAST_H_SIZE, NAMED_H_SIZE, BASIC_H_SIZE, - MAX_H_SIZE, MAX_H_SIZE, MAX_H_SIZE, MAX_H_SIZE - }; - - struct tipc_msg *msg; - u32 tipc_hdr[2]; - u32 size; - u32 hdr_size; - u32 min_hdr_size; - - /* If this packet comes from the defer queue, the skb has already - * been validated - */ - if (unlikely(TIPC_SKB_CB(buf)->deferred)) - return 1; - - if (unlikely(buf->len < MIN_H_SIZE)) - return 0; - - msg = skb_header_pointer(buf, 0, sizeof(tipc_hdr), tipc_hdr); - if (msg == NULL) - return 0; - - if (unlikely(msg_version(msg) != TIPC_VERSION)) - return 0; - - size = msg_size(msg); - hdr_size = msg_hdr_sz(msg); - min_hdr_size = msg_isdata(msg) ? - min_data_hdr_size[msg_type(msg)] : INT_H_SIZE; - - if (unlikely((hdr_size < min_hdr_size) || - (size < hdr_size) || - (buf->len < size) || - (size - hdr_size > TIPC_MAX_USER_MSG_SIZE))) - return 0; - - return pskb_may_pull(buf, hdr_size); -} - /** * tipc_rcv - process TIPC packets/messages arriving from off-node * @net: the applicable net namespace @@ -1127,7 +1072,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) while ((skb = __skb_dequeue(&head))) { /* Ensure message is well-formed */ - if (unlikely(!link_recv_buf_validate(skb))) + if (unlikely(!tipc_msg_validate(skb))) goto discard; /* Ensure message data is a single contiguous unit */ @@ -1398,7 +1343,6 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, if (tipc_link_defer_pkt(&l_ptr->deferred_queue, buf)) { l_ptr->stats.deferred_recv++; - TIPC_SKB_CB(buf)->deferred = true; if ((skb_queue_len(&l_ptr->deferred_queue) % 16) == 1) tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); } else { diff --git a/net/tipc/msg.c b/net/tipc/msg.c index b6eb90cd3ef7..4a64caf6fa20 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -1,7 +1,7 @@ /* * net/tipc/msg.c: TIPC message header routines * - * Copyright (c) 2000-2006, 2014, Ericsson AB + * Copyright (c) 2000-2006, 2014-2015, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -181,6 +181,48 @@ err: return 0; } +/* tipc_msg_validate - validate basic format of received message + * + * This routine ensures a TIPC message has an acceptable header, and at least + * as much data as the header indicates it should. The routine also ensures + * that the entire message header is stored in the main fragment of the message + * buffer, to simplify future access to message header fields. + * + * Note: Having extra info present in the message header or data areas is OK. + * TIPC will ignore the excess, under the assumption that it is optional info + * introduced by a later release of the protocol. + */ +bool tipc_msg_validate(struct sk_buff *skb) +{ + struct tipc_msg *msg; + int msz, hsz; + + if (unlikely(TIPC_SKB_CB(skb)->validated)) + return true; + if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE))) + return false; + + hsz = msg_hdr_sz(buf_msg(skb)); + if (unlikely(hsz < MIN_H_SIZE) || (hsz > MAX_H_SIZE)) + return false; + if (unlikely(!pskb_may_pull(skb, hsz))) + return false; + + msg = buf_msg(skb); + if (unlikely(msg_version(msg) != TIPC_VERSION)) + return false; + + msz = msg_size(msg); + if (unlikely(msz < hsz)) + return false; + if (unlikely((msz - hsz) > TIPC_MAX_USER_MSG_SIZE)) + return false; + if (unlikely(skb->len < msz)) + return false; + + TIPC_SKB_CB(skb)->validated = true; + return true; +} /** * tipc_msg_build - create buffer chain containing specified header and data diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 7cece647394c..62306b8d2410 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -1,7 +1,7 @@ /* * net/tipc/msg.h: Include file for TIPC message header routines * - * Copyright (c) 2000-2007, 2014, Ericsson AB + * Copyright (c) 2000-2007, 2014-2015 Ericsson AB * Copyright (c) 2005-2008, 2010-2011, Wind River Systems * All rights reserved. * @@ -92,7 +92,7 @@ struct plist; struct tipc_skb_cb { void *handle; struct sk_buff *tail; - bool deferred; + bool validated; bool wakeup_pending; bool bundling; u16 chain_sz; @@ -758,6 +758,7 @@ static inline u32 msg_tot_origport(struct tipc_msg *m) } struct sk_buff *tipc_buf_acquire(u32 size); +bool tipc_msg_validate(struct sk_buff *skb); bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, int err); void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type, -- cgit v1.2.3 From 1149557d64c97dc9adf3103347a1c0e8c06d3b89 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Fri, 13 Mar 2015 16:08:07 -0400 Subject: tipc: eliminate unnecessary linearization of incoming buffers Currently, TIPC linearizes all incoming buffers directly at reception before passing them upwards in the stack. This is clearly a waste of CPU resources, and must be avoided. In this commit, we eliminate this unnecessary linearization. We still ensure that at least the message header is linear, and that the buffer is linearized where this is still needed, i.e. when unbundling and when reversing messages. In addition, we ensure that fragmented messages are validated after reassembly before delivering them upwards in the stack. Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 5 ----- net/tipc/msg.c | 14 ++++++++++---- 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 944c8c663a2d..8c6639d107fc 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1075,13 +1075,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) if (unlikely(!tipc_msg_validate(skb))) goto discard; - /* Ensure message data is a single contiguous unit */ - if (unlikely(skb_linearize(skb))) - goto discard; - /* Handle arrival of a non-unicast link message */ msg = buf_msg(skb); - if (unlikely(msg_non_seq(msg))) { if (msg_user(msg) == LINK_CONFIG) tipc_disc_rcv(net, skb, b_ptr); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 4a64caf6fa20..ff8c64cd1cd9 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -165,6 +165,9 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) } if (fragid == LAST_FRAGMENT) { + TIPC_SKB_CB(head)->validated = false; + if (unlikely(!tipc_msg_validate(head))) + goto err; *buf = head; TIPC_SKB_CB(head)->tail = NULL; *headbuf = NULL; @@ -172,7 +175,6 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) } *buf = NULL; return 0; - err: pr_warn_ratelimited("Unable to build fragment list\n"); kfree_skb(*buf); @@ -378,10 +380,14 @@ bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu) */ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos) { - struct tipc_msg *msg = buf_msg(skb); + struct tipc_msg *msg; int imsz; - struct tipc_msg *imsg = (struct tipc_msg *)(msg_data(msg) + *pos); + struct tipc_msg *imsg; + if (unlikely(skb_linearize(skb))) + return false; + msg = buf_msg(skb); + imsg = (struct tipc_msg *)(msg_data(msg) + *pos); /* Is there space left for shortest possible message? */ if (*pos > (msg_data_sz(msg) - SHORT_H_SIZE)) goto none; @@ -463,11 +469,11 @@ bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, if (skb_linearize(buf)) goto exit; + msg = buf_msg(buf); if (msg_dest_droppable(msg)) goto exit; if (msg_errcode(msg)) goto exit; - memcpy(&ohdr, msg, msg_hdr_sz(msg)); imp = min_t(uint, imp + 1, TIPC_CRITICAL_IMPORTANCE); if (msg_isdata(msg)) -- cgit v1.2.3 From c1336ee472f83a90ede01fdae095ed5d0a2934c9 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Fri, 13 Mar 2015 16:08:08 -0400 Subject: tipc: extract bundled buffers by cloning instead of copying When we currently extract a bundled buffer from a message bundle in the function tipc_msg_extract(), we allocate a new buffer and explicitly copy the linear data area. This is unnecessary, since we can just clone the buffer and do skb_pull() on the clone to move the data pointer to the correct position. This is what we do in this commit. Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 45 ++++++++++++--------------------------------- net/tipc/msg.c | 30 ++++++++++++++++-------------- 2 files changed, 28 insertions(+), 47 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 8c6639d107fc..56c39b1a53a9 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1,7 +1,7 @@ /* * net/tipc/link.c: TIPC link code * - * Copyright (c) 1996-2007, 2012-2014, Ericsson AB + * Copyright (c) 1996-2007, 2012-2015, Ericsson AB * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * @@ -1117,7 +1117,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) ackd = msg_ack(msg); /* Release acked messages */ - if (n_ptr->bclink.recv_permitted) + if (likely(n_ptr->bclink.recv_permitted)) tipc_bclink_acknowledge(n_ptr, msg_bcast_ack(msg)); released = 0; @@ -1712,45 +1712,24 @@ void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, } } -/** - * buf_extract - extracts embedded TIPC message from another message - * @skb: encapsulating message buffer - * @from_pos: offset to extract from - * - * Returns a new message buffer containing an embedded message. The - * encapsulating buffer is left unchanged. - */ -static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos) -{ - struct tipc_msg *msg = (struct tipc_msg *)(skb->data + from_pos); - u32 size = msg_size(msg); - struct sk_buff *eb; - - eb = tipc_buf_acquire(size); - if (eb) - skb_copy_to_linear_data(eb, msg, size); - return eb; -} - /* tipc_link_dup_rcv(): Receive a tunnelled DUPLICATE_MSG packet. * Owner node is locked. */ -static void tipc_link_dup_rcv(struct tipc_link *l_ptr, - struct sk_buff *t_buf) +static void tipc_link_dup_rcv(struct tipc_link *link, + struct sk_buff *skb) { - struct sk_buff *buf; + struct sk_buff *iskb; + int pos = 0; - if (!tipc_link_is_up(l_ptr)) + if (!tipc_link_is_up(link)) return; - buf = buf_extract(t_buf, INT_H_SIZE); - if (buf == NULL) { + if (!tipc_msg_extract(skb, &iskb, &pos)) { pr_warn("%sfailed to extract inner dup pkt\n", link_co_err); return; } - - /* Add buffer to deferred queue, if applicable: */ - link_handle_out_of_seq_msg(l_ptr, buf); + /* Append buffer to deferred queue, if applicable: */ + link_handle_out_of_seq_msg(link, iskb); } /* tipc_link_failover_rcv(): Receive a tunnelled ORIGINAL_MSG packet @@ -1762,6 +1741,7 @@ static struct sk_buff *tipc_link_failover_rcv(struct tipc_link *l_ptr, struct tipc_msg *t_msg = buf_msg(t_buf); struct sk_buff *buf = NULL; struct tipc_msg *msg; + int pos = 0; if (tipc_link_is_up(l_ptr)) tipc_link_reset(l_ptr); @@ -1773,8 +1753,7 @@ static struct sk_buff *tipc_link_failover_rcv(struct tipc_link *l_ptr, /* Should there be an inner packet? */ if (l_ptr->exp_msg_count) { l_ptr->exp_msg_count--; - buf = buf_extract(t_buf, INT_H_SIZE); - if (buf == NULL) { + if (!tipc_msg_extract(t_buf, &buf, &pos)) { pr_warn("%sno inner failover pkt\n", link_co_err); goto exit; } diff --git a/net/tipc/msg.c b/net/tipc/msg.c index ff8c64cd1cd9..333d2ae1cf76 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -372,38 +372,40 @@ bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu) /** * tipc_msg_extract(): extract bundled inner packet from buffer - * @skb: linear outer buffer, to be extracted from. + * @skb: buffer to be extracted from. * @iskb: extracted inner buffer, to be returned - * @pos: position of msg to be extracted. Returns with pointer of next msg + * @pos: position in outer message of msg to be extracted. + * Returns position of next msg * Consumes outer buffer when last packet extracted * Returns true when when there is an extracted buffer, otherwise false */ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos) { struct tipc_msg *msg; - int imsz; - struct tipc_msg *imsg; + int imsz, offset; + *iskb = NULL; if (unlikely(skb_linearize(skb))) - return false; + goto none; + msg = buf_msg(skb); - imsg = (struct tipc_msg *)(msg_data(msg) + *pos); - /* Is there space left for shortest possible message? */ - if (*pos > (msg_data_sz(msg) - SHORT_H_SIZE)) + offset = msg_hdr_sz(msg) + *pos; + if (unlikely(offset > (msg_size(msg) - MIN_H_SIZE))) goto none; - imsz = msg_size(imsg); - /* Is there space left for current message ? */ - if ((*pos + imsz) > msg_data_sz(msg)) + *iskb = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!*iskb)) goto none; - *iskb = tipc_buf_acquire(imsz); - if (!*iskb) + skb_pull(*iskb, offset); + imsz = msg_size(buf_msg(*iskb)); + skb_trim(*iskb, imsz); + if (unlikely(!tipc_msg_validate(*iskb))) goto none; - skb_copy_to_linear_data(*iskb, imsg, imsz); *pos += align(imsz); return true; none: kfree_skb(skb); + kfree_skb(*iskb); *iskb = NULL; return false; } -- cgit v1.2.3 From 2cdf3918e47e98c8f34f7a64455ea9fd433756e7 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Fri, 13 Mar 2015 16:08:09 -0400 Subject: tipc: eliminate unnecessary call to broadcast ack function The unicast packet header contains a broadcast acknowledge sequence number, that may need to be conveyed to the broadcast link for proper treatment. Currently, the function tipc_rcv(), which is on the most critical data path, calls the function tipc_bclink_acknowledge() to have this done. This call is made for each received packet, and results in the unconditional grabbing of the broadcast link spinlock. This is unnecessary, since we can see directly from tipc_rcv() if the acknowledged number differs from what has been previously acked from the node in question. In the vast majority of cases the numbers won't differ, and there is nothing to update. We now make the call to tipc_bclink_acknowledge() conditional to that the two ack values differ. Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 4 ++++ net/tipc/link.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 3e41704832de..5ee5076a8b27 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -215,7 +215,11 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) struct net *net = n_ptr->net; struct tipc_net *tn = net_generic(net, tipc_net_id); + if (unlikely(!n_ptr->bclink.recv_permitted)) + return; + tipc_bclink_lock(net); + /* Bail out if tx queue is empty (no clean up is required) */ skb = skb_peek(&tn->bcl->outqueue); if (!skb) diff --git a/net/tipc/link.c b/net/tipc/link.c index 56c39b1a53a9..2652c3286e2f 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1117,7 +1117,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) ackd = msg_ack(msg); /* Release acked messages */ - if (likely(n_ptr->bclink.recv_permitted)) + if (unlikely(n_ptr->bclink.acked != msg_bcast_ack(msg))) tipc_bclink_acknowledge(n_ptr, msg_bcast_ack(msg)); released = 0; -- cgit v1.2.3 From 05dcc5aa4dcced4f59f925625cea669e82b75519 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Fri, 13 Mar 2015 16:08:10 -0400 Subject: tipc: split link outqueue struct tipc_link contains one single queue for outgoing packets, where both transmitted and waiting packets are queued. This infrastructure is hard to maintain, because we need to keep a number of fields to keep track of which packets are sent or unsent, and the number of packets in each category. A lot of code becomes simpler if we split this queue into a transmission queue, where sent/unacknowledged packets are kept, and a backlog queue, where we keep the not yet sent packets. In this commit we do this separation. Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 48 ++++++------- net/tipc/link.c | 208 ++++++++++++++++++++++++++----------------------------- net/tipc/link.h | 17 ++--- net/tipc/msg.c | 32 +++++---- net/tipc/msg.h | 6 +- net/tipc/node.c | 4 +- net/tipc/node.h | 2 +- 7 files changed, 150 insertions(+), 167 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 5ee5076a8b27..17cb0ff5f344 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -135,9 +135,10 @@ static void bclink_set_last_sent(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_link *bcl = tn->bcl; + struct sk_buff *skb = skb_peek(&bcl->backlogq); - if (bcl->next_out) - bcl->fsm_msg_cnt = mod(buf_seqno(bcl->next_out) - 1); + if (skb) + bcl->fsm_msg_cnt = mod(buf_seqno(skb) - 1); else bcl->fsm_msg_cnt = mod(bcl->next_out_no - 1); } @@ -180,7 +181,7 @@ static void bclink_retransmit_pkt(struct tipc_net *tn, u32 after, u32 to) struct sk_buff *skb; struct tipc_link *bcl = tn->bcl; - skb_queue_walk(&bcl->outqueue, skb) { + skb_queue_walk(&bcl->transmq, skb) { if (more(buf_seqno(skb), after)) { tipc_link_retransmit(bcl, skb, mod(to - after)); break; @@ -210,7 +211,6 @@ void tipc_bclink_wakeup_users(struct net *net) void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) { struct sk_buff *skb, *tmp; - struct sk_buff *next; unsigned int released = 0; struct net *net = n_ptr->net; struct tipc_net *tn = net_generic(net, tipc_net_id); @@ -221,7 +221,7 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) tipc_bclink_lock(net); /* Bail out if tx queue is empty (no clean up is required) */ - skb = skb_peek(&tn->bcl->outqueue); + skb = skb_peek(&tn->bcl->transmq); if (!skb) goto exit; @@ -248,27 +248,19 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) } /* Skip over packets that node has previously acknowledged */ - skb_queue_walk(&tn->bcl->outqueue, skb) { + skb_queue_walk(&tn->bcl->transmq, skb) { if (more(buf_seqno(skb), n_ptr->bclink.acked)) break; } /* Update packets that node is now acknowledging */ - skb_queue_walk_from_safe(&tn->bcl->outqueue, skb, tmp) { + skb_queue_walk_from_safe(&tn->bcl->transmq, skb, tmp) { if (more(buf_seqno(skb), acked)) break; - - next = tipc_skb_queue_next(&tn->bcl->outqueue, skb); - if (skb != tn->bcl->next_out) { - bcbuf_decr_acks(skb); - } else { - bcbuf_set_acks(skb, 0); - tn->bcl->next_out = next; - bclink_set_last_sent(net); - } - + bcbuf_decr_acks(skb); + bclink_set_last_sent(net); if (bcbuf_acks(skb) == 0) { - __skb_unlink(skb, &tn->bcl->outqueue); + __skb_unlink(skb, &tn->bcl->transmq); kfree_skb(skb); released = 1; } @@ -276,7 +268,7 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) n_ptr->bclink.acked = acked; /* Try resolving broadcast link congestion, if necessary */ - if (unlikely(tn->bcl->next_out)) { + if (unlikely(skb_peek(&tn->bcl->backlogq))) { tipc_link_push_packets(tn->bcl); bclink_set_last_sent(net); } @@ -323,7 +315,7 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, buf = tipc_buf_acquire(INT_H_SIZE); if (buf) { struct tipc_msg *msg = buf_msg(buf); - struct sk_buff *skb = skb_peek(&n_ptr->bclink.deferred_queue); + struct sk_buff *skb = skb_peek(&n_ptr->bclink.deferdq); u32 to = skb ? buf_seqno(skb) - 1 : n_ptr->bclink.last_sent; tipc_msg_init(tn->own_addr, msg, BCAST_PROTOCOL, STATE_MSG, @@ -398,7 +390,7 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list) if (likely(bclink->bcast_nodes.count)) { rc = __tipc_link_xmit(net, bcl, list); if (likely(!rc)) { - u32 len = skb_queue_len(&bcl->outqueue); + u32 len = skb_queue_len(&bcl->transmq); bclink_set_last_sent(net); bcl->stats.queue_sz_counts++; @@ -563,25 +555,25 @@ receive: if (node->bclink.last_in == node->bclink.last_sent) goto unlock; - if (skb_queue_empty(&node->bclink.deferred_queue)) { + if (skb_queue_empty(&node->bclink.deferdq)) { node->bclink.oos_state = 1; goto unlock; } - msg = buf_msg(skb_peek(&node->bclink.deferred_queue)); + msg = buf_msg(skb_peek(&node->bclink.deferdq)); seqno = msg_seqno(msg); next_in = mod(next_in + 1); if (seqno != next_in) goto unlock; /* Take in-sequence message from deferred queue & deliver it */ - buf = __skb_dequeue(&node->bclink.deferred_queue); + buf = __skb_dequeue(&node->bclink.deferdq); goto receive; } /* Handle out-of-sequence broadcast message */ if (less(next_in, seqno)) { - deferred = tipc_link_defer_pkt(&node->bclink.deferred_queue, + deferred = tipc_link_defer_pkt(&node->bclink.deferdq, buf); bclink_update_last_sent(node, seqno); buf = NULL; @@ -638,7 +630,6 @@ static int tipc_bcbearer_send(struct net *net, struct sk_buff *buf, msg_set_non_seq(msg, 1); msg_set_mc_netid(msg, tn->net_id); tn->bcl->stats.sent_info++; - if (WARN_ON(!bclink->bcast_nodes.count)) { dump_stack(); return 0; @@ -917,8 +908,9 @@ int tipc_bclink_init(struct net *net) sprintf(bcbearer->media.name, "tipc-broadcast"); spin_lock_init(&bclink->lock); - __skb_queue_head_init(&bcl->outqueue); - __skb_queue_head_init(&bcl->deferred_queue); + __skb_queue_head_init(&bcl->transmq); + __skb_queue_head_init(&bcl->backlogq); + __skb_queue_head_init(&bcl->deferdq); skb_queue_head_init(&bcl->wakeupq); bcl->next_out_no = 1; spin_lock_init(&bclink->node.lock); diff --git a/net/tipc/link.c b/net/tipc/link.c index 2652c3286e2f..7e0036f5a364 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -194,10 +194,10 @@ static void link_timeout(unsigned long data) tipc_node_lock(l_ptr->owner); /* update counters used in statistical profiling of send traffic */ - l_ptr->stats.accu_queue_sz += skb_queue_len(&l_ptr->outqueue); + l_ptr->stats.accu_queue_sz += skb_queue_len(&l_ptr->transmq); l_ptr->stats.queue_sz_counts++; - skb = skb_peek(&l_ptr->outqueue); + skb = skb_peek(&l_ptr->transmq); if (skb) { struct tipc_msg *msg = buf_msg(skb); u32 length = msg_size(msg); @@ -229,7 +229,7 @@ static void link_timeout(unsigned long data) /* do all other link processing performed on a periodic basis */ link_state_event(l_ptr, TIMEOUT_EVT); - if (l_ptr->next_out) + if (skb_queue_len(&l_ptr->backlogq)) tipc_link_push_packets(l_ptr); tipc_node_unlock(l_ptr->owner); @@ -313,8 +313,9 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, link_init_max_pkt(l_ptr); l_ptr->next_out_no = 1; - __skb_queue_head_init(&l_ptr->outqueue); - __skb_queue_head_init(&l_ptr->deferred_queue); + __skb_queue_head_init(&l_ptr->transmq); + __skb_queue_head_init(&l_ptr->backlogq); + __skb_queue_head_init(&l_ptr->deferdq); skb_queue_head_init(&l_ptr->wakeupq); skb_queue_head_init(&l_ptr->inputq); skb_queue_head_init(&l_ptr->namedq); @@ -400,7 +401,7 @@ static bool link_schedule_user(struct tipc_link *link, u32 oport, */ void link_prepare_wakeup(struct tipc_link *link) { - uint pend_qsz = skb_queue_len(&link->outqueue); + uint pend_qsz = skb_queue_len(&link->backlogq); struct sk_buff *skb, *tmp; skb_queue_walk_safe(&link->wakeupq, skb, tmp) { @@ -430,8 +431,9 @@ void tipc_link_reset_fragments(struct tipc_link *l_ptr) */ void tipc_link_purge_queues(struct tipc_link *l_ptr) { - __skb_queue_purge(&l_ptr->deferred_queue); - __skb_queue_purge(&l_ptr->outqueue); + __skb_queue_purge(&l_ptr->deferdq); + __skb_queue_purge(&l_ptr->transmq); + __skb_queue_purge(&l_ptr->backlogq); tipc_link_reset_fragments(l_ptr); } @@ -464,15 +466,15 @@ void tipc_link_reset(struct tipc_link *l_ptr) } /* Clean up all queues, except inputq: */ - __skb_queue_purge(&l_ptr->outqueue); - __skb_queue_purge(&l_ptr->deferred_queue); + __skb_queue_purge(&l_ptr->transmq); + __skb_queue_purge(&l_ptr->backlogq); + __skb_queue_purge(&l_ptr->deferdq); if (!owner->inputq) owner->inputq = &l_ptr->inputq; skb_queue_splice_init(&l_ptr->wakeupq, owner->inputq); if (!skb_queue_empty(owner->inputq)) owner->action_flags |= TIPC_MSG_EVT; - l_ptr->next_out = NULL; - l_ptr->unacked_window = 0; + l_ptr->rcv_unacked = 0; l_ptr->checkpoint = 1; l_ptr->next_out_no = 1; l_ptr->fsm_msg_cnt = 0; @@ -742,54 +744,51 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link, struct sk_buff_head *list) { struct tipc_msg *msg = buf_msg(skb_peek(list)); - uint psz = msg_size(msg); - uint sndlim = link->queue_limit[0]; + unsigned int maxwin = link->window; uint imp = tipc_msg_tot_importance(msg); uint mtu = link->max_pkt; uint ack = mod(link->next_in_no - 1); uint seqno = link->next_out_no; uint bc_last_in = link->owner->bclink.last_in; struct tipc_media_addr *addr = &link->media_addr; - struct sk_buff_head *outqueue = &link->outqueue; + struct sk_buff_head *transmq = &link->transmq; + struct sk_buff_head *backlogq = &link->backlogq; struct sk_buff *skb, *tmp; /* Match queue limits against msg importance: */ - if (unlikely(skb_queue_len(outqueue) >= link->queue_limit[imp])) + if (unlikely(skb_queue_len(backlogq) >= link->queue_limit[imp])) return tipc_link_cong(link, list); /* Has valid packet limit been used ? */ - if (unlikely(psz > mtu)) { + if (unlikely(msg_size(msg) > mtu)) { __skb_queue_purge(list); return -EMSGSIZE; } - /* Prepare each packet for sending, and add to outqueue: */ + /* Prepare each packet for sending, and add to relevant queue: */ skb_queue_walk_safe(list, skb, tmp) { __skb_unlink(skb, list); msg = buf_msg(skb); - msg_set_word(msg, 2, ((ack << 16) | mod(seqno))); + msg_set_seqno(msg, seqno); + msg_set_ack(msg, ack); msg_set_bcast_ack(msg, bc_last_in); - if (skb_queue_len(outqueue) < sndlim) { - __skb_queue_tail(outqueue, skb); - tipc_bearer_send(net, link->bearer_id, - skb, addr); - link->next_out = NULL; - link->unacked_window = 0; - } else if (tipc_msg_bundle(outqueue, skb, mtu)) { + if (likely(skb_queue_len(transmq) < maxwin)) { + __skb_queue_tail(transmq, skb); + tipc_bearer_send(net, link->bearer_id, skb, addr); + link->rcv_unacked = 0; + seqno++; + continue; + } + if (tipc_msg_bundle(skb_peek_tail(backlogq), skb, mtu)) { link->stats.sent_bundled++; continue; - } else if (tipc_msg_make_bundle(outqueue, skb, mtu, - link->addr)) { + } + if (tipc_msg_make_bundle(&skb, mtu, link->addr)) { link->stats.sent_bundled++; link->stats.sent_bundles++; - if (!link->next_out) - link->next_out = skb_peek_tail(outqueue); - } else { - __skb_queue_tail(outqueue, skb); - if (!link->next_out) - link->next_out = skb; } + __skb_queue_tail(backlogq, skb); seqno++; } link->next_out_no = seqno; @@ -895,14 +894,6 @@ static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf) kfree_skb(buf); } -struct sk_buff *tipc_skb_queue_next(const struct sk_buff_head *list, - const struct sk_buff *skb) -{ - if (skb_queue_is_last(list, skb)) - return NULL; - return skb->next; -} - /* * tipc_link_push_packets - push unsent packets to bearer * @@ -911,30 +902,23 @@ struct sk_buff *tipc_skb_queue_next(const struct sk_buff_head *list, * * Called with node locked */ -void tipc_link_push_packets(struct tipc_link *l_ptr) +void tipc_link_push_packets(struct tipc_link *link) { - struct sk_buff_head *outqueue = &l_ptr->outqueue; - struct sk_buff *skb = l_ptr->next_out; + struct sk_buff *skb; struct tipc_msg *msg; - u32 next, first; + unsigned int ack = mod(link->next_in_no - 1); - skb_queue_walk_from(outqueue, skb) { - msg = buf_msg(skb); - next = msg_seqno(msg); - first = buf_seqno(skb_peek(outqueue)); - - if (mod(next - first) < l_ptr->queue_limit[0]) { - msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); - msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - if (msg_user(msg) == MSG_BUNDLER) - TIPC_SKB_CB(skb)->bundling = false; - tipc_bearer_send(l_ptr->owner->net, - l_ptr->bearer_id, skb, - &l_ptr->media_addr); - l_ptr->next_out = tipc_skb_queue_next(outqueue, skb); - } else { + while (skb_queue_len(&link->transmq) < link->window) { + skb = __skb_dequeue(&link->backlogq); + if (!skb) break; - } + msg = buf_msg(skb); + msg_set_ack(msg, ack); + msg_set_bcast_ack(msg, link->owner->bclink.last_in); + link->rcv_unacked = 0; + __skb_queue_tail(&link->transmq, skb); + tipc_bearer_send(link->owner->net, link->bearer_id, + skb, &link->media_addr); } } @@ -1021,8 +1005,8 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb, l_ptr->stale_count = 1; } - skb_queue_walk_from(&l_ptr->outqueue, skb) { - if (!retransmits || skb == l_ptr->next_out) + skb_queue_walk_from(&l_ptr->transmq, skb) { + if (!retransmits) break; msg = buf_msg(skb); msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); @@ -1039,12 +1023,12 @@ static void link_retrieve_defq(struct tipc_link *link, { u32 seq_no; - if (skb_queue_empty(&link->deferred_queue)) + if (skb_queue_empty(&link->deferdq)) return; - seq_no = buf_seqno(skb_peek(&link->deferred_queue)); + seq_no = buf_seqno(skb_peek(&link->deferdq)); if (seq_no == mod(link->next_in_no)) - skb_queue_splice_tail_init(&link->deferred_queue, list); + skb_queue_splice_tail_init(&link->deferdq, list); } /** @@ -1121,17 +1105,16 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) tipc_bclink_acknowledge(n_ptr, msg_bcast_ack(msg)); released = 0; - skb_queue_walk_safe(&l_ptr->outqueue, skb1, tmp) { - if (skb1 == l_ptr->next_out || - more(buf_seqno(skb1), ackd)) + skb_queue_walk_safe(&l_ptr->transmq, skb1, tmp) { + if (more(buf_seqno(skb1), ackd)) break; - __skb_unlink(skb1, &l_ptr->outqueue); + __skb_unlink(skb1, &l_ptr->transmq); kfree_skb(skb1); released = 1; } /* Try sending any messages link endpoint has pending */ - if (unlikely(l_ptr->next_out)) + if (unlikely(skb_queue_len(&l_ptr->backlogq))) tipc_link_push_packets(l_ptr); if (released && !skb_queue_empty(&l_ptr->wakeupq)) @@ -1166,10 +1149,9 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) goto unlock; } l_ptr->next_in_no++; - if (unlikely(!skb_queue_empty(&l_ptr->deferred_queue))) + if (unlikely(!skb_queue_empty(&l_ptr->deferdq))) link_retrieve_defq(l_ptr, &head); - - if (unlikely(++l_ptr->unacked_window >= TIPC_MIN_LINK_WIN)) { + if (unlikely(++l_ptr->rcv_unacked >= TIPC_MIN_LINK_WIN)) { l_ptr->stats.sent_acks++; tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); } @@ -1336,9 +1318,9 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, return; } - if (tipc_link_defer_pkt(&l_ptr->deferred_queue, buf)) { + if (tipc_link_defer_pkt(&l_ptr->deferdq, buf)) { l_ptr->stats.deferred_recv++; - if ((skb_queue_len(&l_ptr->deferred_queue) % 16) == 1) + if ((skb_queue_len(&l_ptr->deferdq) % TIPC_MIN_LINK_WIN) == 1) tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); } else { l_ptr->stats.duplicates++; @@ -1375,11 +1357,11 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, if (!tipc_link_is_up(l_ptr)) return; - if (l_ptr->next_out) - next_sent = buf_seqno(l_ptr->next_out); + if (skb_queue_len(&l_ptr->backlogq)) + next_sent = buf_seqno(skb_peek(&l_ptr->backlogq)); msg_set_next_sent(msg, next_sent); - if (!skb_queue_empty(&l_ptr->deferred_queue)) { - u32 rec = buf_seqno(skb_peek(&l_ptr->deferred_queue)); + if (!skb_queue_empty(&l_ptr->deferdq)) { + u32 rec = buf_seqno(skb_peek(&l_ptr->deferdq)); gap = mod(rec - mod(l_ptr->next_in_no)); } msg_set_seq_gap(msg, gap); @@ -1431,10 +1413,9 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); buf->priority = TC_PRIO_CONTROL; - tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, buf, &l_ptr->media_addr); - l_ptr->unacked_window = 0; + l_ptr->rcv_unacked = 0; kfree_skb(buf); } @@ -1569,7 +1550,7 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, } if (msg_seq_gap(msg)) { l_ptr->stats.recv_nacks++; - tipc_link_retransmit(l_ptr, skb_peek(&l_ptr->outqueue), + tipc_link_retransmit(l_ptr, skb_peek(&l_ptr->transmq), msg_seq_gap(msg)); } break; @@ -1616,7 +1597,7 @@ static void tipc_link_tunnel_xmit(struct tipc_link *l_ptr, */ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) { - u32 msgcount = skb_queue_len(&l_ptr->outqueue); + int msgcount; struct tipc_link *tunnel = l_ptr->owner->active_links[0]; struct tipc_msg tunnel_hdr; struct sk_buff *skb; @@ -1627,10 +1608,12 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, CHANGEOVER_PROTOCOL, ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); + skb_queue_splice_tail_init(&l_ptr->backlogq, &l_ptr->transmq); + msgcount = skb_queue_len(&l_ptr->transmq); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); msg_set_msgcnt(&tunnel_hdr, msgcount); - if (skb_queue_empty(&l_ptr->outqueue)) { + if (skb_queue_empty(&l_ptr->transmq)) { skb = tipc_buf_acquire(INT_H_SIZE); if (skb) { skb_copy_to_linear_data(skb, &tunnel_hdr, INT_H_SIZE); @@ -1646,7 +1629,7 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) split_bundles = (l_ptr->owner->active_links[0] != l_ptr->owner->active_links[1]); - skb_queue_walk(&l_ptr->outqueue, skb) { + skb_queue_walk(&l_ptr->transmq, skb) { struct tipc_msg *msg = buf_msg(skb); if ((msg_user(msg) == MSG_BUNDLER) && split_bundles) { @@ -1677,39 +1660,46 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) * and sequence order is preserved per sender/receiver socket pair. * Owner node is locked. */ -void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, - struct tipc_link *tunnel) +void tipc_link_dup_queue_xmit(struct tipc_link *link, + struct tipc_link *tnl) { struct sk_buff *skb; - struct tipc_msg tunnel_hdr; - - tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, CHANGEOVER_PROTOCOL, - DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr); - msg_set_msgcnt(&tunnel_hdr, skb_queue_len(&l_ptr->outqueue)); - msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); - skb_queue_walk(&l_ptr->outqueue, skb) { + struct tipc_msg tnl_hdr; + struct sk_buff_head *queue = &link->transmq; + int mcnt; + + tipc_msg_init(link_own_addr(link), &tnl_hdr, CHANGEOVER_PROTOCOL, + DUPLICATE_MSG, INT_H_SIZE, link->addr); + mcnt = skb_queue_len(&link->transmq) + skb_queue_len(&link->backlogq); + msg_set_msgcnt(&tnl_hdr, mcnt); + msg_set_bearer_id(&tnl_hdr, link->peer_bearer_id); + +tunnel_queue: + skb_queue_walk(queue, skb) { struct sk_buff *outskb; struct tipc_msg *msg = buf_msg(skb); - u32 length = msg_size(msg); + u32 len = msg_size(msg); - if (msg_user(msg) == MSG_BUNDLER) - msg_set_type(msg, CLOSED_MSG); - msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); /* Update */ - msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); - msg_set_size(&tunnel_hdr, length + INT_H_SIZE); - outskb = tipc_buf_acquire(length + INT_H_SIZE); + msg_set_ack(msg, mod(link->next_in_no - 1)); + msg_set_bcast_ack(msg, link->owner->bclink.last_in); + msg_set_size(&tnl_hdr, len + INT_H_SIZE); + outskb = tipc_buf_acquire(len + INT_H_SIZE); if (outskb == NULL) { pr_warn("%sunable to send duplicate msg\n", link_co_err); return; } - skb_copy_to_linear_data(outskb, &tunnel_hdr, INT_H_SIZE); - skb_copy_to_linear_data_offset(outskb, INT_H_SIZE, skb->data, - length); - __tipc_link_xmit_skb(tunnel, outskb); - if (!tipc_link_is_up(l_ptr)) + skb_copy_to_linear_data(outskb, &tnl_hdr, INT_H_SIZE); + skb_copy_to_linear_data_offset(outskb, INT_H_SIZE, + skb->data, len); + __tipc_link_xmit_skb(tnl, outskb); + if (!tipc_link_is_up(link)) return; } + if (queue == &link->backlogq) + return; + queue = &link->backlogq; + goto tunnel_queue; } /* tipc_link_dup_rcv(): Receive a tunnelled DUPLICATE_MSG packet. @@ -1823,6 +1813,8 @@ static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol) void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window) { + l_ptr->window = window; + /* Data messages from this node, inclusive FIRST_FRAGM */ l_ptr->queue_limit[TIPC_LOW_IMPORTANCE] = window; l_ptr->queue_limit[TIPC_MEDIUM_IMPORTANCE] = (window / 3) * 4; diff --git a/net/tipc/link.h b/net/tipc/link.h index 7aeb52092bf3..eec3ecf2d450 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -124,7 +124,8 @@ struct tipc_stats { * @max_pkt: current maximum packet size for this link * @max_pkt_target: desired maximum packet size for this link * @max_pkt_probes: # of probes based on current (max_pkt, max_pkt_target) - * @outqueue: outbound message queue + * @transmitq: queue for sent, non-acked messages + * @backlogq: queue for messages waiting to be sent * @next_out_no: next sequence number to use for outbound messages * @last_retransmitted: sequence number of most recently retransmitted message * @stale_count: # of identical retransmit requests made by peer @@ -177,20 +178,21 @@ struct tipc_link { u32 max_pkt_probes; /* Sending */ - struct sk_buff_head outqueue; + struct sk_buff_head transmq; + struct sk_buff_head backlogq; u32 next_out_no; + u32 window; u32 last_retransmitted; u32 stale_count; /* Reception */ u32 next_in_no; - struct sk_buff_head deferred_queue; - u32 unacked_window; + u32 rcv_unacked; + struct sk_buff_head deferdq; struct sk_buff_head inputq; struct sk_buff_head namedq; /* Congestion handling */ - struct sk_buff *next_out; struct sk_buff_head wakeupq; /* Fragmentation/reassembly */ @@ -302,9 +304,4 @@ static inline int link_reset_reset(struct tipc_link *l_ptr) return l_ptr->state == RESET_RESET; } -static inline int link_congested(struct tipc_link *l_ptr) -{ - return skb_queue_len(&l_ptr->outqueue) >= l_ptr->queue_limit[0]; -} - #endif diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 333d2ae1cf76..47c8fd8e2fb2 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -330,33 +330,36 @@ error: /** * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one - * @list: the buffer chain of the existing buffer ("bundle") + * @bskb: the buffer to append to ("bundle") * @skb: buffer to be appended * @mtu: max allowable size for the bundle buffer * Consumes buffer if successful * Returns true if bundling could be performed, otherwise false */ -bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu) +bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu) { - struct sk_buff *bskb = skb_peek_tail(list); - struct tipc_msg *bmsg = buf_msg(bskb); + struct tipc_msg *bmsg; struct tipc_msg *msg = buf_msg(skb); - unsigned int bsz = msg_size(bmsg); + unsigned int bsz; unsigned int msz = msg_size(msg); - u32 start = align(bsz); + u32 start, pad; u32 max = mtu - INT_H_SIZE; - u32 pad = start - bsz; if (likely(msg_user(msg) == MSG_FRAGMENTER)) return false; + if (!bskb) + return false; + bmsg = buf_msg(bskb); + bsz = msg_size(bmsg); + start = align(bsz); + pad = start - bsz; + if (unlikely(msg_user(msg) == CHANGEOVER_PROTOCOL)) return false; if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) return false; if (likely(msg_user(bmsg) != MSG_BUNDLER)) return false; - if (likely(!TIPC_SKB_CB(bskb)->bundling)) - return false; if (unlikely(skb_tailroom(bskb) < (pad + msz))) return false; if (unlikely(max < (start + msz))) @@ -419,12 +422,11 @@ none: * Replaces buffer if successful * Returns true if success, otherwise false */ -bool tipc_msg_make_bundle(struct sk_buff_head *list, - struct sk_buff *skb, u32 mtu, u32 dnode) +bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode) { struct sk_buff *bskb; struct tipc_msg *bmsg; - struct tipc_msg *msg = buf_msg(skb); + struct tipc_msg *msg = buf_msg(*skb); u32 msz = msg_size(msg); u32 max = mtu - INT_H_SIZE; @@ -448,9 +450,9 @@ bool tipc_msg_make_bundle(struct sk_buff_head *list, msg_set_seqno(bmsg, msg_seqno(msg)); msg_set_ack(bmsg, msg_ack(msg)); msg_set_bcast_ack(bmsg, msg_bcast_ack(msg)); - TIPC_SKB_CB(bskb)->bundling = true; - __skb_queue_tail(list, bskb); - return tipc_msg_bundle(list, skb, mtu); + tipc_msg_bundle(bskb, *skb, mtu); + *skb = bskb; + return true; } /** diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 62306b8d2410..e5fc5fdb2ea7 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -767,9 +767,9 @@ struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, uint data_sz, u32 dnode, u32 onode, u32 dport, u32 oport, int errcode); int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); -bool tipc_msg_bundle(struct sk_buff_head *list, struct sk_buff *skb, u32 mtu); -bool tipc_msg_make_bundle(struct sk_buff_head *list, - struct sk_buff *skb, u32 mtu, u32 dnode); +bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu); + +bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode); bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int mtu, struct sk_buff_head *list); diff --git a/net/tipc/node.c b/net/tipc/node.c index 86152de8248d..26d1de1bf34d 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -111,7 +111,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr) INIT_LIST_HEAD(&n_ptr->list); INIT_LIST_HEAD(&n_ptr->publ_list); INIT_LIST_HEAD(&n_ptr->conn_sks); - __skb_queue_head_init(&n_ptr->bclink.deferred_queue); + __skb_queue_head_init(&n_ptr->bclink.deferdq); hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { if (n_ptr->addr < temp_node->addr) @@ -354,7 +354,7 @@ static void node_lost_contact(struct tipc_node *n_ptr) /* Flush broadcast link info associated with lost node */ if (n_ptr->bclink.recv_permitted) { - __skb_queue_purge(&n_ptr->bclink.deferred_queue); + __skb_queue_purge(&n_ptr->bclink.deferdq); if (n_ptr->bclink.reasm_buf) { kfree_skb(n_ptr->bclink.reasm_buf); diff --git a/net/tipc/node.h b/net/tipc/node.h index f78be64e105b..e89ac04ec2c3 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -84,7 +84,7 @@ struct tipc_node_bclink { u32 last_sent; u32 oos_state; u32 deferred_size; - struct sk_buff_head deferred_queue; + struct sk_buff_head deferdq; struct sk_buff *reasm_buf; int inputq_map; bool recv_permitted; -- cgit v1.2.3 From e3eea1eb47ac616ee09cf0ae5d1e7790ef8461ea Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Fri, 13 Mar 2015 16:08:11 -0400 Subject: tipc: clean up handling of message priorities Messages transferred by TIPC are assigned an "importance priority", -an integer value indicating how to treat the message when there is link or destination socket congestion. There is no separate header field for this value. Instead, the message user values have been chosen in ascending order according to perceived importance, so that the message user field can be used for this. This is not a good solution. First, we have many more users than the needed priority levels, so we end up with treating more priority levels than necessary. Second, the user field cannot always accurately reflect the priority of the message. E.g., a message fragment packet should really have the priority of the enveloped user data message, and not the priority of the MSG_FRAGMENTER user. Until now, we have been working around this problem in different ways, but it is now time to implement a consistent way of handling such priorities, although still within the constraint that we cannot allocate any more bits in the regular data message header for this. In this commit, we define a new priority level, TIPC_SYSTEM_IMPORTANCE, that will be the only one used apart from the four (lower) user data levels. All non-data messages map down to this priority. Furthermore, we take some free bits from the MSG_FRAGMENTER header and allocate them to store the priority of the enveloped message. We then adjust the functions msg_importance()/msg_set_importance() so that they read/set the correct header fields depending on user type. This small protocol change is fully compatible, because the code at the receiving end of a link currently reads the importance level only from user data messages, where there is no change. Reviewed-by: Erik Hugne Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 1 - net/tipc/link.c | 40 +++++++++++++--------------------- net/tipc/msg.c | 5 +---- net/tipc/msg.h | 65 +++++++++++++++++++++++++++++--------------------------- 4 files changed, 50 insertions(+), 61 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 17cb0ff5f344..5aff0844d4d3 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -383,7 +383,6 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list) __skb_queue_purge(list); return -EHOSTUNREACH; } - /* Broadcast to all nodes */ if (likely(bclink)) { tipc_bclink_lock(net); diff --git a/net/tipc/link.c b/net/tipc/link.c index 7e0036f5a364..bc49120bfb44 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -35,6 +35,7 @@ */ #include "core.h" +#include "subscr.h" #include "link.h" #include "bcast.h" #include "socket.h" @@ -305,12 +306,10 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, msg_set_session(msg, (tn->random & 0xffff)); msg_set_bearer_id(msg, b_ptr->identity); strcpy((char *)msg_data(msg), if_name); - - l_ptr->priority = b_ptr->priority; - tipc_link_set_queue_limits(l_ptr, b_ptr->window); - l_ptr->net_plane = b_ptr->net_plane; link_init_max_pkt(l_ptr); + l_ptr->priority = b_ptr->priority; + tipc_link_set_queue_limits(l_ptr, b_ptr->window); l_ptr->next_out_no = 1; __skb_queue_head_init(&l_ptr->transmq); @@ -708,7 +707,7 @@ static int tipc_link_cong(struct tipc_link *link, struct sk_buff_head *list) { struct sk_buff *skb = skb_peek(list); struct tipc_msg *msg = buf_msg(skb); - uint imp = tipc_msg_tot_importance(msg); + int imp = msg_importance(msg); u32 oport = msg_tot_origport(msg); if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) { @@ -745,7 +744,7 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link, { struct tipc_msg *msg = buf_msg(skb_peek(list)); unsigned int maxwin = link->window; - uint imp = tipc_msg_tot_importance(msg); + unsigned int imp = msg_importance(msg); uint mtu = link->max_pkt; uint ack = mod(link->next_in_no - 1); uint seqno = link->next_out_no; @@ -755,7 +754,7 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link, struct sk_buff_head *backlogq = &link->backlogq; struct sk_buff *skb, *tmp; - /* Match queue limits against msg importance: */ + /* Match queue limit against msg importance: */ if (unlikely(skb_queue_len(backlogq) >= link->queue_limit[imp])) return tipc_link_cong(link, list); @@ -1811,25 +1810,16 @@ static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol) l_ptr->abort_limit = tol / (jiffies_to_msecs(l_ptr->cont_intv) / 4); } -void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window) +void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) { - l_ptr->window = window; - - /* Data messages from this node, inclusive FIRST_FRAGM */ - l_ptr->queue_limit[TIPC_LOW_IMPORTANCE] = window; - l_ptr->queue_limit[TIPC_MEDIUM_IMPORTANCE] = (window / 3) * 4; - l_ptr->queue_limit[TIPC_HIGH_IMPORTANCE] = (window / 3) * 5; - l_ptr->queue_limit[TIPC_CRITICAL_IMPORTANCE] = (window / 3) * 6; - /* Transiting data messages,inclusive FIRST_FRAGM */ - l_ptr->queue_limit[TIPC_LOW_IMPORTANCE + 4] = 300; - l_ptr->queue_limit[TIPC_MEDIUM_IMPORTANCE + 4] = 600; - l_ptr->queue_limit[TIPC_HIGH_IMPORTANCE + 4] = 900; - l_ptr->queue_limit[TIPC_CRITICAL_IMPORTANCE + 4] = 1200; - l_ptr->queue_limit[CONN_MANAGER] = 1200; - l_ptr->queue_limit[CHANGEOVER_PROTOCOL] = 2500; - l_ptr->queue_limit[NAME_DISTRIBUTOR] = 3000; - /* FRAGMENT and LAST_FRAGMENT packets */ - l_ptr->queue_limit[MSG_FRAGMENTER] = 4000; + int max_bulk = TIPC_MAX_PUBLICATIONS / (l->max_pkt / ITEM_SIZE); + + l->window = win; + l->queue_limit[TIPC_LOW_IMPORTANCE] = win / 2; + l->queue_limit[TIPC_MEDIUM_IMPORTANCE] = win; + l->queue_limit[TIPC_HIGH_IMPORTANCE] = win / 2 * 3; + l->queue_limit[TIPC_CRITICAL_IMPORTANCE] = win * 2; + l->queue_limit[TIPC_SYSTEM_IMPORTANCE] = max_bulk; } /* tipc_link_find_owner - locate owner node of link by link's name diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 47c8fd8e2fb2..0c6dad8180a0 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -272,6 +272,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, FIRST_FRAGMENT, INT_H_SIZE, msg_destnode(mhdr)); msg_set_size(&pkthdr, pktmax); msg_set_fragm_no(&pkthdr, pktno); + msg_set_importance(&pkthdr, msg_importance(mhdr)); /* Prepare first fragment */ skb = tipc_buf_acquire(pktmax); @@ -467,7 +468,6 @@ bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, int err) { struct tipc_msg *msg = buf_msg(buf); - uint imp = msg_importance(msg); struct tipc_msg ohdr; uint rdsz = min_t(uint, msg_data_sz(msg), MAX_FORWARD_SIZE); @@ -479,9 +479,6 @@ bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, if (msg_errcode(msg)) goto exit; memcpy(&ohdr, msg, msg_hdr_sz(msg)); - imp = min_t(uint, imp + 1, TIPC_CRITICAL_IMPORTANCE); - if (msg_isdata(msg)) - msg_set_importance(msg, imp); msg_set_errcode(msg, err); msg_set_origport(msg, msg_destport(&ohdr)); msg_set_destport(msg, msg_origport(&ohdr)); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index e5fc5fdb2ea7..bd3969a80dd4 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -54,6 +54,8 @@ struct plist; * - TIPC_HIGH_IMPORTANCE * - TIPC_CRITICAL_IMPORTANCE */ +#define TIPC_SYSTEM_IMPORTANCE 4 + /* * Payload message types @@ -63,6 +65,19 @@ struct plist; #define TIPC_NAMED_MSG 2 #define TIPC_DIRECT_MSG 3 +/* + * Internal message users + */ +#define BCAST_PROTOCOL 5 +#define MSG_BUNDLER 6 +#define LINK_PROTOCOL 7 +#define CONN_MANAGER 8 +#define CHANGEOVER_PROTOCOL 10 +#define NAME_DISTRIBUTOR 11 +#define MSG_FRAGMENTER 12 +#define LINK_CONFIG 13 +#define SOCK_WAKEUP 14 /* pseudo user */ + /* * Message header sizes */ @@ -170,16 +185,6 @@ static inline void msg_set_user(struct tipc_msg *m, u32 n) msg_set_bits(m, 0, 25, 0xf, n); } -static inline u32 msg_importance(struct tipc_msg *m) -{ - return msg_bits(m, 0, 25, 0xf); -} - -static inline void msg_set_importance(struct tipc_msg *m, u32 i) -{ - msg_set_user(m, i); -} - static inline u32 msg_hdr_sz(struct tipc_msg *m) { return msg_bits(m, 0, 21, 0xf) << 2; @@ -336,6 +341,25 @@ static inline void msg_set_seqno(struct tipc_msg *m, u32 n) /* * Words 3-10 */ +static inline u32 msg_importance(struct tipc_msg *m) +{ + if (unlikely(msg_user(m) == MSG_FRAGMENTER)) + return msg_bits(m, 5, 13, 0x7); + if (likely(msg_isdata(m) && !msg_errcode(m))) + return msg_user(m); + return TIPC_SYSTEM_IMPORTANCE; +} + +static inline void msg_set_importance(struct tipc_msg *m, u32 i) +{ + if (unlikely(msg_user(m) == MSG_FRAGMENTER)) + msg_set_bits(m, 5, 13, 0x7, i); + else if (likely(i < TIPC_SYSTEM_IMPORTANCE)) + msg_set_user(m, i); + else + pr_warn("Trying to set illegal importance in message\n"); +} + static inline u32 msg_prevnode(struct tipc_msg *m) { return msg_word(m, 3); @@ -457,20 +481,6 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) * Constants and routines used to read and write TIPC internal message headers */ -/* - * Internal message users - */ -#define BCAST_PROTOCOL 5 -#define MSG_BUNDLER 6 -#define LINK_PROTOCOL 7 -#define CONN_MANAGER 8 -#define ROUTE_DISTRIBUTOR 9 /* obsoleted */ -#define CHANGEOVER_PROTOCOL 10 -#define NAME_DISTRIBUTOR 11 -#define MSG_FRAGMENTER 12 -#define LINK_CONFIG 13 -#define SOCK_WAKEUP 14 /* pseudo user */ - /* * Connection management protocol message types */ @@ -743,13 +753,6 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) msg_set_bits(m, 9, 0, 0xffff, n); } -static inline u32 tipc_msg_tot_importance(struct tipc_msg *m) -{ - if ((msg_user(m) == MSG_FRAGMENTER) && (msg_type(m) == FIRST_FRAGMENT)) - return msg_importance(msg_get_wrapped(m)); - return msg_importance(m); -} - static inline u32 msg_tot_origport(struct tipc_msg *m) { if ((msg_user(m) == MSG_FRAGMENTER) && (msg_type(m) == FIRST_FRAGMENT)) -- cgit v1.2.3 From 76100a8a64bc2ae898bc49d51dd28c1f4f5ed37b Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 18 Mar 2015 09:32:57 +0800 Subject: tipc: fix netns refcnt leak When the TIPC module is loaded, we launch a topology server in kernel space, which in its turn is creating TIPC sockets for communication with topology server users. Because both the socket's creator and provider reside in the same module, it is necessary that the TIPC module's reference count remains zero after the server is started and the socket created; otherwise it becomes impossible to perform "rmmod" even on an idle module. Currently, we achieve this by defining a separate "tipc_proto_kern" protocol struct, that is used only for kernel space socket allocations. This structure has the "owner" field set to NULL, which restricts the module reference count from being be bumped when sk_alloc() for local sockets is called. Furthermore, we have defined three kernel-specific functions, tipc_sock_create_local(), tipc_sock_release_local() and tipc_sock_accept_local(), to avoid the module counter being modified when module local sockets are created or deleted. This has worked well until we introduced name space support. However, after name space support was introduced, we have observed that a reference count leak occurs, because the netns counter is not decremented in tipc_sock_delete_local(). This commit remedies this problem. But instead of just modifying tipc_sock_delete_local(), we eliminate the whole parallel socket handling infrastructure, and start using the regular sk_create_kern(), kernel_accept() and sk_release_kernel() calls. Since those functions manipulate the module counter, we must now compensate for that by explicitly decrementing the counter after module local sockets are created, and increment it just before calling sk_release_kernel(). Fixes: a62fbccecd62 ("tipc: make subscriber server support net namespace") Signed-off-by: Ying Xue Reviewed-by: Jon Maloy Reviewed-by: Erik Hugne Reported-by: Cong Wang Tested-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/server.c | 44 +++++++++++++++++++++++++---- net/tipc/socket.c | 83 +------------------------------------------------------ net/tipc/socket.h | 4 --- 3 files changed, 39 insertions(+), 92 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/server.c b/net/tipc/server.c index eadd4ed45905..a57c8407cbf3 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -37,11 +37,13 @@ #include "core.h" #include "socket.h" #include +#include /* Number of messages to send before rescheduling */ #define MAX_SEND_MSG_COUNT 25 #define MAX_RECV_MSG_COUNT 25 #define CF_CONNECTED 1 +#define CF_SERVER 2 #define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data) @@ -88,9 +90,16 @@ static void tipc_clean_outqueues(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); + struct socket *sock = con->sock; + struct sock *sk; - if (con->sock) { - tipc_sock_release_local(con->sock); + if (sock) { + sk = sock->sk; + if (test_bit(CF_SERVER, &con->flags)) { + __module_get(sock->ops->owner); + __module_get(sk->sk_prot_creator->owner); + } + sk_release_kernel(sk); con->sock = NULL; } @@ -281,7 +290,7 @@ static int tipc_accept_from_sock(struct tipc_conn *con) struct tipc_conn *newcon; int ret; - ret = tipc_sock_accept_local(sock, &newsock, O_NONBLOCK); + ret = kernel_accept(sock, &newsock, O_NONBLOCK); if (ret < 0) return ret; @@ -309,9 +318,12 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) struct socket *sock = NULL; int ret; - ret = tipc_sock_create_local(s->net, s->type, &sock); + ret = sock_create_kern(AF_TIPC, SOCK_SEQPACKET, 0, &sock); if (ret < 0) return NULL; + + sk_change_net(sock->sk, s->net); + ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, (char *)&s->imp, sizeof(s->imp)); if (ret < 0) @@ -337,11 +349,31 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) pr_err("Unknown socket type %d\n", s->type); goto create_err; } + + /* As server's listening socket owner and creator is the same module, + * we have to decrease TIPC module reference count to guarantee that + * it remains zero after the server socket is created, otherwise, + * executing "rmmod" command is unable to make TIPC module deleted + * after TIPC module is inserted successfully. + * + * However, the reference count is ever increased twice in + * sock_create_kern(): one is to increase the reference count of owner + * of TIPC socket's proto_ops struct; another is to increment the + * reference count of owner of TIPC proto struct. Therefore, we must + * decrement the module reference count twice to ensure that it keeps + * zero after server's listening socket is created. Of course, we + * must bump the module reference count twice as well before the socket + * is closed. + */ + module_put(sock->ops->owner); + module_put(sock->sk->sk_prot_creator->owner); + set_bit(CF_SERVER, &con->flags); + return sock; create_err: - sock_release(sock); - con->sock = NULL; + kernel_sock_shutdown(sock, SHUT_RDWR); + sk_release_kernel(sock->sk); return NULL; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 934947f038b6..813847d25a49 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -121,9 +121,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; static const struct proto_ops msg_ops; - static struct proto tipc_proto; -static struct proto tipc_proto_kern; static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { [TIPC_NLA_SOCK_UNSPEC] = { .type = NLA_UNSPEC }, @@ -341,11 +339,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, } /* Allocate socket's protocol area */ - if (!kern) - sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); - else - sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto_kern); - + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); if (sk == NULL) return -ENOMEM; @@ -383,75 +377,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock, return 0; } -/** - * tipc_sock_create_local - create TIPC socket from inside TIPC module - * @type: socket type - SOCK_RDM or SOCK_SEQPACKET - * - * We cannot use sock_creat_kern here because it bumps module user count. - * Since socket owner and creator is the same module we must make sure - * that module count remains zero for module local sockets, otherwise - * we cannot do rmmod. - * - * Returns 0 on success, errno otherwise - */ -int tipc_sock_create_local(struct net *net, int type, struct socket **res) -{ - int rc; - - rc = sock_create_lite(AF_TIPC, type, 0, res); - if (rc < 0) { - pr_err("Failed to create kernel socket\n"); - return rc; - } - tipc_sk_create(net, *res, 0, 1); - - return 0; -} - -/** - * tipc_sock_release_local - release socket created by tipc_sock_create_local - * @sock: the socket to be released. - * - * Module reference count is not incremented when such sockets are created, - * so we must keep it from being decremented when they are released. - */ -void tipc_sock_release_local(struct socket *sock) -{ - tipc_release(sock); - sock->ops = NULL; - sock_release(sock); -} - -/** - * tipc_sock_accept_local - accept a connection on a socket created - * with tipc_sock_create_local. Use this function to avoid that - * module reference count is inadvertently incremented. - * - * @sock: the accepting socket - * @newsock: reference to the new socket to be created - * @flags: socket flags - */ - -int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, - int flags) -{ - struct sock *sk = sock->sk; - int ret; - - ret = sock_create_lite(sk->sk_family, sk->sk_type, - sk->sk_protocol, newsock); - if (ret < 0) - return ret; - - ret = tipc_accept(sock, *newsock, flags); - if (ret < 0) { - sock_release(*newsock); - return ret; - } - (*newsock)->ops = sock->ops; - return ret; -} - static void tipc_sk_callback(struct rcu_head *head) { struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu); @@ -2608,12 +2533,6 @@ static struct proto tipc_proto = { .sysctl_rmem = sysctl_tipc_rmem }; -static struct proto tipc_proto_kern = { - .name = "TIPC", - .obj_size = sizeof(struct tipc_sock), - .sysctl_rmem = sysctl_tipc_rmem -}; - /** * tipc_socket_init - initialize TIPC socket interface * diff --git a/net/tipc/socket.h b/net/tipc/socket.h index 238f1b7bd9bd..bf6551389522 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -44,10 +44,6 @@ SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) int tipc_socket_init(void); void tipc_socket_stop(void); -int tipc_sock_create_local(struct net *net, int type, struct socket **res); -void tipc_sock_release_local(struct socket *sock); -int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, - int flags); int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq); void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, struct sk_buff_head *inputq); -- cgit v1.2.3 From 8460504bdd9aa5996dfc5dd69cd61582a25139ec Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 18 Mar 2015 09:32:58 +0800 Subject: tipc: fix a potential deadlock when nametable is purged [ 28.531768] ============================================= [ 28.532322] [ INFO: possible recursive locking detected ] [ 28.532322] 3.19.0+ #194 Not tainted [ 28.532322] --------------------------------------------- [ 28.532322] insmod/583 is trying to acquire lock: [ 28.532322] (&(&nseq->lock)->rlock){+.....}, at: [] tipc_nametbl_remove_publ+0x49/0x2e0 [tipc] [ 28.532322] [ 28.532322] but task is already holding lock: [ 28.532322] (&(&nseq->lock)->rlock){+.....}, at: [] tipc_nametbl_stop+0xfc/0x1f0 [tipc] [ 28.532322] [ 28.532322] other info that might help us debug this: [ 28.532322] Possible unsafe locking scenario: [ 28.532322] [ 28.532322] CPU0 [ 28.532322] ---- [ 28.532322] lock(&(&nseq->lock)->rlock); [ 28.532322] lock(&(&nseq->lock)->rlock); [ 28.532322] [ 28.532322] *** DEADLOCK *** [ 28.532322] [ 28.532322] May be due to missing lock nesting notation [ 28.532322] [ 28.532322] 3 locks held by insmod/583: [ 28.532322] #0: (net_mutex){+.+.+.}, at: [] register_pernet_subsys+0x1f/0x50 [ 28.532322] #1: (&(&tn->nametbl_lock)->rlock){+.....}, at: [] tipc_nametbl_stop+0xb1/0x1f0 [tipc] [ 28.532322] #2: (&(&nseq->lock)->rlock){+.....}, at: [] tipc_nametbl_stop+0xfc/0x1f0 [tipc] [ 28.532322] [ 28.532322] stack backtrace: [ 28.532322] CPU: 1 PID: 583 Comm: insmod Not tainted 3.19.0+ #194 [ 28.532322] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 28.532322] ffffffff82394460 ffff8800144cb928 ffffffff81792f3e 0000000000000007 [ 28.532322] ffffffff82394460 ffff8800144cba28 ffffffff810a8080 ffff8800144cb998 [ 28.532322] ffffffff810a4df3 ffff880013e9cb10 ffffffff82b0d330 ffff880013e9cb38 [ 28.532322] Call Trace: [ 28.532322] [] dump_stack+0x4c/0x65 [ 28.532322] [] __lock_acquire+0x740/0x1ca0 [ 28.532322] [] ? __bfs+0x23/0x270 [ 28.532322] [] ? check_irq_usage+0x96/0xe0 [ 28.532322] [] ? __lock_acquire+0x1133/0x1ca0 [ 28.532322] [] ? tipc_nametbl_remove_publ+0x49/0x2e0 [tipc] [ 28.532322] [] lock_acquire+0x9c/0x140 [ 28.532322] [] ? tipc_nametbl_remove_publ+0x49/0x2e0 [tipc] [ 28.532322] [] _raw_spin_lock_bh+0x3f/0x50 [ 28.532322] [] ? tipc_nametbl_remove_publ+0x49/0x2e0 [tipc] [ 28.532322] [] tipc_nametbl_remove_publ+0x49/0x2e0 [tipc] [ 28.532322] [] tipc_nametbl_stop+0x13e/0x1f0 [tipc] [ 28.532322] [] ? tipc_nametbl_stop+0x5/0x1f0 [tipc] [ 28.532322] [] tipc_init_net+0x13b/0x150 [tipc] [ 28.532322] [] ? tipc_init_net+0x5/0x150 [tipc] [ 28.532322] [] ops_init+0x4e/0x150 [ 28.532322] [] ? trace_hardirqs_on+0xd/0x10 [ 28.532322] [] register_pernet_operations+0xf3/0x190 [ 28.532322] [] register_pernet_subsys+0x2e/0x50 [ 28.532322] [] tipc_init+0x6a/0x1000 [tipc] [ 28.532322] [] ? 0xffffffffa0024000 [ 28.532322] [] do_one_initcall+0x89/0x1c0 [ 28.532322] [] ? kmem_cache_alloc_trace+0x50/0x1b0 [ 28.532322] [] ? do_init_module+0x2b/0x200 [ 28.532322] [] do_init_module+0x64/0x200 [ 28.532322] [] load_module+0x12f3/0x18e0 [ 28.532322] [] ? show_initstate+0x50/0x50 [ 28.532322] [] SyS_init_module+0xd9/0x110 [ 28.532322] [] sysenter_dispatch+0x7/0x1f Before tipc_purge_publications() calls tipc_nametbl_remove_publ() to remove a publication with a name sequence, the name sequence's lock is held. However, when tipc_nametbl_remove_publ() calling tipc_nameseq_remove_publ() to remove the publication, it first tries to query name sequence instance with the publication, and then holds the lock of the found name sequence. But as the lock may be already taken in tipc_purge_publications(), deadlock happens like above scenario demonstrated. As tipc_nameseq_remove_publ() doesn't grab name sequence's lock, the deadlock can be avoided if it's directly invoked by tipc_purge_publications(). Fixes: 97ede29e80ee ("tipc: convert name table read-write lock to RCU") Signed-off-by: Ying Xue Reviewed-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/name_table.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 105ba7adf06f..ab0ac62a1287 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -811,8 +811,8 @@ static void tipc_purge_publications(struct net *net, struct name_seq *seq) sseq = seq->sseqs; info = sseq->info; list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) { - tipc_nametbl_remove_publ(net, publ->type, publ->lower, - publ->node, publ->ref, publ->key); + tipc_nameseq_remove_publ(net, seq, publ->lower, publ->node, + publ->ref, publ->key); kfree_rcu(publ, rcu); } hlist_del_init_rcu(&seq->ns_list); -- cgit v1.2.3 From 2b9bb7f338502d9d01543daa9fdf4a7f104bd572 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 18 Mar 2015 09:32:59 +0800 Subject: tipc: withdraw tipc topology server name when namespace is deleted The TIPC topology server is a per namespace service associated with the tipc name {1, 1}. When a namespace is deleted, that name must be withdrawn before we call sk_release_kernel because the kernel socket release is done in init_net and trying to withdraw a TIPC name published in another namespace will fail with an error as: [ 170.093264] Unable to remove local publication [ 170.093264] (type=1, lower=1, ref=2184244004, key=2184244005) We fix this by breaking the association between the topology server name and socket before calling sk_release_kernel. Signed-off-by: Ying Xue Reviewed-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/server.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/tipc') diff --git a/net/tipc/server.c b/net/tipc/server.c index a57c8407cbf3..ab6183cdb121 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -90,6 +90,7 @@ static void tipc_clean_outqueues(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); + struct sockaddr_tipc *saddr = con->server->saddr; struct socket *sock = con->sock; struct sock *sk; @@ -99,6 +100,8 @@ static void tipc_conn_kref_release(struct kref *kref) __module_get(sock->ops->owner); __module_get(sk->sk_prot_creator->owner); } + saddr->scope = -TIPC_NODE_SCOPE; + kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); sk_release_kernel(sk); con->sock = NULL; } -- cgit v1.2.3 From 446c89ac1f6026df9e3e0ca2614b36909398d431 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 18 Mar 2015 20:01:18 +1100 Subject: tipc: Use rhashtable max/min_size instead of max/min_shift This patch converts tipc to use rhashtable max/min_size instead of the obsolete max/min_shift. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/tipc/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 813847d25a49..d7a6c10202e9 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2286,8 +2286,8 @@ int tipc_sk_rht_init(struct net *net) .key_offset = offsetof(struct tipc_sock, portid), .key_len = sizeof(u32), /* portid */ .hashfn = jhash, - .max_shift = 20, /* 1M */ - .min_shift = 8, /* 256 */ + .max_size = 1048576, + .min_size = 256, }; return rhashtable_init(&tn->sk_rht, &rht_params); -- cgit v1.2.3 From 54ff9ef36bdf84d469a098cbf8e2a103fbc77054 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 18 Mar 2015 14:50:43 -0300 Subject: ipv4, ipv6: kill ip_mc_{join, leave}_group and ipv6_sock_mc_{join, drop} in favor of their inner __ ones, which doesn't grab rtnl. As these functions need to operate on a locked socket, we can't be grabbing rtnl by then. It's too late and doing so causes reversed locking. So this patch: - move rtnl handling to callers instead while already fixing some reversed locking situations, like on vxlan and ipvs code. - renames __ ones to not have the __ mark: __ip_mc_{join,leave}_group -> ip_mc_{join,leave}_group __ipv6_sock_mc_{join,drop} -> ipv6_sock_mc_{join,drop} Signed-off-by: Marcelo Ricardo Leitner Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 5 ++++- include/linux/igmp.h | 2 -- include/net/ipv6.h | 4 ---- net/ipv4/devinet.c | 4 ++-- net/ipv4/igmp.c | 41 ++++++++--------------------------------- net/ipv4/ip_sockglue.c | 21 +++++++++++++++------ net/ipv6/addrconf.c | 4 ++-- net/ipv6/ipv6_sockglue.c | 21 +++++++++++++-------- net/ipv6/mcast.c | 30 +++--------------------------- net/netfilter/ipvs/ip_vs_sync.c | 2 ++ net/tipc/udp_media.c | 4 ++-- 11 files changed, 51 insertions(+), 87 deletions(-) (limited to 'net/tipc') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 25d92d4fc625..8b8ca7492d56 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1097,7 +1097,6 @@ EXPORT_SYMBOL_GPL(vxlan_sock_release); /* Callback to update multicast group membership when first VNI on * multicast asddress is brought up - * Done as workqueue because ip_mc_join_group acquires RTNL. */ static void vxlan_igmp_join(struct work_struct *work) { @@ -1107,6 +1106,7 @@ static void vxlan_igmp_join(struct work_struct *work) union vxlan_addr *ip = &vxlan->default_dst.remote_ip; int ifindex = vxlan->default_dst.remote_ifindex; + rtnl_lock(); lock_sock(sk); if (ip->sa.sa_family == AF_INET) { struct ip_mreqn mreq = { @@ -1122,6 +1122,7 @@ static void vxlan_igmp_join(struct work_struct *work) #endif } release_sock(sk); + rtnl_unlock(); vxlan_sock_release(vs); dev_put(vxlan->dev); @@ -1136,6 +1137,7 @@ static void vxlan_igmp_leave(struct work_struct *work) union vxlan_addr *ip = &vxlan->default_dst.remote_ip; int ifindex = vxlan->default_dst.remote_ifindex; + rtnl_lock(); lock_sock(sk); if (ip->sa.sa_family == AF_INET) { struct ip_mreqn mreq = { @@ -1152,6 +1154,7 @@ static void vxlan_igmp_leave(struct work_struct *work) } release_sock(sk); + rtnl_unlock(); vxlan_sock_release(vs); dev_put(vxlan->dev); diff --git a/include/linux/igmp.h b/include/linux/igmp.h index b5a6470e686c..2c677afeea47 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -111,9 +111,7 @@ struct ip_mc_list { extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u16 proto); extern int igmp_rcv(struct sk_buff *); -extern int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); -extern int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr); extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr); extern void ip_mc_drop_socket(struct sock *sk); extern int ip_mc_source(int add, int omode, struct sock *sk, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index b7673065c074..e7ba9758a345 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -942,10 +942,6 @@ void ipv6_sysctl_unregister(void); int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr); -int __ipv6_sock_mc_join(struct sock *sk, int ifindex, - const struct in6_addr *addr); int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr); -int __ipv6_sock_mc_drop(struct sock *sk, int ifindex, - const struct in6_addr *addr); #endif /* _NET_IPV6_H */ diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 375dc71b9a64..975ee5e30c64 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -560,9 +560,9 @@ static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa) lock_sock(sk); if (join) - ret = __ip_mc_join_group(sk, &mreq); + ret = ip_mc_join_group(sk, &mreq); else - ret = __ip_mc_leave_group(sk, &mreq); + ret = ip_mc_leave_group(sk, &mreq); release_sock(sk); return ret; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 5cb1ef4ce292..ad3f866085de 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1850,7 +1850,10 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc) pmc->sfcount[MCAST_EXCLUDE] = 1; } -int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr) +/* Join a multicast group + */ + +int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr) { __be32 addr = imr->imr_multiaddr.s_addr; struct ip_mc_socklist *iml, *i; @@ -1898,20 +1901,6 @@ int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr) done: return err; } -EXPORT_SYMBOL(__ip_mc_join_group); - -/* Join a multicast group - */ -int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr) -{ - int ret; - - rtnl_lock(); - ret = __ip_mc_join_group(sk, imr); - rtnl_unlock(); - - return ret; -} EXPORT_SYMBOL(ip_mc_join_group); static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, @@ -1934,7 +1923,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, return err; } -int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) +int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) { struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *iml; @@ -1979,18 +1968,6 @@ int __ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) out: return ret; } -EXPORT_SYMBOL(__ip_mc_leave_group); - -int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) -{ - int ret; - - rtnl_lock(); - ret = __ip_mc_leave_group(sk, imr); - rtnl_unlock(); - - return ret; -} EXPORT_SYMBOL(ip_mc_leave_group); int ip_mc_source(int add, int omode, struct sock *sk, struct @@ -2010,7 +1987,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct if (!ipv4_is_multicast(addr)) return -EINVAL; - rtnl_lock(); + ASSERT_RTNL(); imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr; imr.imr_address.s_addr = mreqs->imr_interface; @@ -2124,9 +2101,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct ip_mc_add_src(in_dev, &mreqs->imr_multiaddr, omode, 1, &mreqs->imr_sourceaddr, 1); done: - rtnl_unlock(); if (leavegroup) - return ip_mc_leave_group(sk, &imr); + err = ip_mc_leave_group(sk, &imr); return err; } @@ -2148,7 +2124,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) msf->imsf_fmode != MCAST_EXCLUDE) return -EINVAL; - rtnl_lock(); + ASSERT_RTNL(); imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; imr.imr_address.s_addr = msf->imsf_interface; @@ -2210,7 +2186,6 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) pmc->sfmode = msf->imsf_fmode; err = 0; done: - rtnl_unlock(); if (leavegroup) err = ip_mc_leave_group(sk, &imr); return err; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 5171709199f4..f6a0d54b308a 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -541,9 +541,18 @@ static bool setsockopt_needs_rtnl(int optname) switch (optname) { case IP_ADD_MEMBERSHIP: case IP_ADD_SOURCE_MEMBERSHIP: + case IP_BLOCK_SOURCE: case IP_DROP_MEMBERSHIP: + case IP_DROP_SOURCE_MEMBERSHIP: + case IP_MSFILTER: + case IP_UNBLOCK_SOURCE: + case MCAST_BLOCK_SOURCE: + case MCAST_MSFILTER: case MCAST_JOIN_GROUP: + case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_UNBLOCK_SOURCE: return true; } return false; @@ -861,9 +870,9 @@ static int do_ip_setsockopt(struct sock *sk, int level, } if (optname == IP_ADD_MEMBERSHIP) - err = __ip_mc_join_group(sk, &mreq); + err = ip_mc_join_group(sk, &mreq); else - err = __ip_mc_leave_group(sk, &mreq); + err = ip_mc_leave_group(sk, &mreq); break; } case IP_MSFILTER: @@ -928,7 +937,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr; mreq.imr_address.s_addr = mreqs.imr_interface; mreq.imr_ifindex = 0; - err = __ip_mc_join_group(sk, &mreq); + err = ip_mc_join_group(sk, &mreq); if (err && err != -EADDRINUSE) break; omode = MCAST_INCLUDE; @@ -960,9 +969,9 @@ static int do_ip_setsockopt(struct sock *sk, int level, mreq.imr_ifindex = greq.gr_interface; if (optname == MCAST_JOIN_GROUP) - err = __ip_mc_join_group(sk, &mreq); + err = ip_mc_join_group(sk, &mreq); else - err = __ip_mc_leave_group(sk, &mreq); + err = ip_mc_leave_group(sk, &mreq); break; } case MCAST_JOIN_SOURCE_GROUP: @@ -1005,7 +1014,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, mreq.imr_multiaddr = psin->sin_addr; mreq.imr_address.s_addr = 0; mreq.imr_ifindex = greqs.gsr_interface; - err = __ip_mc_join_group(sk, &mreq); + err = ip_mc_join_group(sk, &mreq); if (err && err != -EADDRINUSE) break; greqs.gsr_interface = mreq.imr_ifindex; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 88d2cf0cae52..158378e73f0a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2473,9 +2473,9 @@ static int ipv6_mc_config(struct sock *sk, bool join, lock_sock(sk); if (join) - ret = __ipv6_sock_mc_join(sk, ifindex, addr); + ret = ipv6_sock_mc_join(sk, ifindex, addr); else - ret = __ipv6_sock_mc_drop(sk, ifindex, addr); + ret = ipv6_sock_mc_drop(sk, ifindex, addr); release_sock(sk); return ret; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index f2b731df8d77..cc5883791bac 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -124,6 +124,11 @@ static bool setsockopt_needs_rtnl(int optname) case IPV6_DROP_MEMBERSHIP: case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + case MCAST_MSFILTER: return true; } return false; @@ -597,9 +602,9 @@ done: break; if (optname == IPV6_ADD_MEMBERSHIP) - retv = __ipv6_sock_mc_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); + retv = ipv6_sock_mc_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); else - retv = __ipv6_sock_mc_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); + retv = ipv6_sock_mc_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); break; } case IPV6_JOIN_ANYCAST: @@ -638,11 +643,11 @@ done: } psin6 = (struct sockaddr_in6 *)&greq.gr_group; if (optname == MCAST_JOIN_GROUP) - retv = __ipv6_sock_mc_join(sk, greq.gr_interface, - &psin6->sin6_addr); + retv = ipv6_sock_mc_join(sk, greq.gr_interface, + &psin6->sin6_addr); else - retv = __ipv6_sock_mc_drop(sk, greq.gr_interface, - &psin6->sin6_addr); + retv = ipv6_sock_mc_drop(sk, greq.gr_interface, + &psin6->sin6_addr); break; } case MCAST_JOIN_SOURCE_GROUP: @@ -674,8 +679,8 @@ done: struct sockaddr_in6 *psin6; psin6 = (struct sockaddr_in6 *)&greqs.gsr_group; - retv = __ipv6_sock_mc_join(sk, greqs.gsr_interface, - &psin6->sin6_addr); + retv = ipv6_sock_mc_join(sk, greqs.gsr_interface, + &psin6->sin6_addr); /* prior join w/ different source is ok */ if (retv && retv != -EADDRINUSE) break; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 1dd1fedff9f4..cbb66fd3da6d 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -132,7 +132,7 @@ static int unsolicited_report_interval(struct inet6_dev *idev) return iv > 0 ? iv : 1; } -int __ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) +int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct net_device *dev = NULL; struct ipv6_mc_socklist *mc_lst; @@ -199,24 +199,12 @@ int __ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *add return 0; } -EXPORT_SYMBOL(__ipv6_sock_mc_join); - -int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) -{ - int ret; - - rtnl_lock(); - ret = __ipv6_sock_mc_join(sk, ifindex, addr); - rtnl_unlock(); - - return ret; -} EXPORT_SYMBOL(ipv6_sock_mc_join); /* * socket leave on multicast group */ -int __ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) +int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_mc_socklist *mc_lst; @@ -255,18 +243,6 @@ int __ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *add return -EADDRNOTAVAIL; } -EXPORT_SYMBOL(__ipv6_sock_mc_drop); - -int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) -{ - int ret; - - rtnl_lock(); - ret = __ipv6_sock_mc_drop(sk, ifindex, addr); - rtnl_unlock(); - - return ret; -} EXPORT_SYMBOL(ipv6_sock_mc_drop); /* called with rcu_read_lock() */ @@ -460,7 +436,7 @@ done: read_unlock_bh(&idev->lock); rcu_read_unlock(); if (leavegroup) - return ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group); + err = ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group); return err; } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 08d95559b6f7..19b9cce6c210 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1405,9 +1405,11 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) mreq.imr_ifindex = dev->ifindex; + rtnl_lock(); lock_sock(sk); ret = ip_mc_join_group(sk, &mreq); release_sock(sk); + rtnl_unlock(); return ret; } diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index fc2fb11a354d..04836dd70c2b 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -246,11 +246,11 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote) return 0; mreqn.imr_multiaddr = remote->ipv4; mreqn.imr_ifindex = ub->ifindex; - err = __ip_mc_join_group(sk, &mreqn); + err = ip_mc_join_group(sk, &mreqn); } else { if (!ipv6_addr_is_multicast(&remote->ipv6)) return 0; - err = __ipv6_sock_mc_join(sk, ub->ifindex, &remote->ipv6); + err = ipv6_sock_mc_join(sk, ub->ifindex, &remote->ipv6); } return err; } -- cgit v1.2.3 From 18d6c58415fa9f5ec98767a2434acc8197c7f288 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 19 Mar 2015 09:02:17 +0100 Subject: tipc: remove redundant call to tipc_node_remove_conn tipc_node_remove_conn may be called twice if shutdown() is called on a socket that have messages in the receive queue. Calling this function twice does no harm, but is unnecessary and we remove the redundant call. Signed-off-by: Erik Hugne Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/tipc') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index d7a6c10202e9..5bceebd81f64 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2078,7 +2078,6 @@ restart: TIPC_CONN_SHUTDOWN)) tipc_link_xmit_skb(net, skb, dnode, tsk->portid); - tipc_node_remove_conn(net, dnode, tsk->portid); } else { dnode = tsk_peer_node(tsk); -- cgit v1.2.3 From 3bd88ee7a2ea19dffe384e12fe452c59d9e53c29 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 19 Mar 2015 09:02:18 +0100 Subject: tipc: do not report -EHOSTUNREACH for failed local delivery Since commit 1186adf7df04 ("tipc: simplify message forwarding and rejection in socket layer") -EHOSTUNREACH is propagated back to the sending process if we fail to deliver the message to another socket local to the node. This is wrong, host unreachable should only be reported when the destination port/name does not exist in the cluster, and that check is always done before sending the message. Also, this introduces inconsistent sendmsg() behavior for local/remote destinations. Errors occurring on the receiving side should not trickle up to the sender. If message delivery fails TIPC should either discard the packet or reject it back to the sender based on the destination droppable option. Signed-off-by: Erik Hugne Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index bc49120bfb44..8c98c4d00ad6 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -845,8 +845,10 @@ int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, if (link) return rc; - if (likely(in_own_node(net, dnode))) - return tipc_sk_rcv(net, list); + if (likely(in_own_node(net, dnode))) { + tipc_sk_rcv(net, list); + return 0; + } __skb_queue_purge(list); return rc; -- cgit v1.2.3 From f2f8036e391eb82ee78764483f869f2feafb5da8 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 19 Mar 2015 09:02:19 +0100 Subject: tipc: add support for connect() on dgram/rdm sockets Following the example of ip4_datagram_connect, we store the address in the socket structure for dgram/rdm sockets and use that as the default destination for subsequent send() calls. It is allowed to connect to any address types, and the behaviour of send() will be the same as a normal sendto() with this address provided. Binding to an AF_UNSPEC address clears the association. Signed-off-by: Erik Hugne Reviewed-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 5bceebd81f64..c03a3d33806f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -74,6 +74,7 @@ * @link_cong: non-zero if owner must sleep because of link congestion * @sent_unacked: # messages sent by socket, and not yet acked by peer * @rcv_unacked: # messages read by user, but not yet acked back to peer + * @remote: 'connected' peer for dgram/rdm * @node: hash table node * @rcu: rcu struct for tipc_sock */ @@ -96,6 +97,7 @@ struct tipc_sock { bool link_cong; uint sent_unacked; uint rcv_unacked; + struct sockaddr_tipc remote; struct rhash_head node; struct rcu_head rcu; }; @@ -854,22 +856,23 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) u32 dnode, dport; struct sk_buff_head *pktchain = &sk->sk_write_queue; struct sk_buff *skb; - struct tipc_name_seq *seq = &dest->addr.nameseq; + struct tipc_name_seq *seq; struct iov_iter save; u32 mtu; long timeo; int rc; - if (unlikely(!dest)) - return -EDESTADDRREQ; - - if (unlikely((m->msg_namelen < sizeof(*dest)) || - (dest->family != AF_TIPC))) - return -EINVAL; - if (dsz > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; - + if (unlikely(!dest)) { + if (tsk->connected && sock->state == SS_READY) + dest = &tsk->remote; + else + return -EDESTADDRREQ; + } else if (unlikely(m->msg_namelen < sizeof(*dest)) || + dest->family != AF_TIPC) { + return -EINVAL; + } if (unlikely(sock->state != SS_READY)) { if (sock->state == SS_LISTENING) return -EPIPE; @@ -882,7 +885,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) tsk->conn_instance = dest->addr.name.name.instance; } } - + seq = &dest->addr.nameseq; timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); if (dest->addrtype == TIPC_ADDR_MCAST) { @@ -1833,17 +1836,24 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, int destlen, int flags) { struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest; struct msghdr m = {NULL,}; - long timeout = (flags & O_NONBLOCK) ? 0 : tipc_sk(sk)->conn_timeout; + long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout; socket_state previous; - int res; + int res = 0; lock_sock(sk); - /* For now, TIPC does not allow use of connect() with DGRAM/RDM types */ + /* DGRAM/RDM connect(), just save the destaddr */ if (sock->state == SS_READY) { - res = -EOPNOTSUPP; + if (dst->family == AF_UNSPEC) { + memset(&tsk->remote, 0, sizeof(struct sockaddr_tipc)); + tsk->connected = 0; + } else { + memcpy(&tsk->remote, dest, destlen); + tsk->connected = 1; + } goto exit; } -- cgit v1.2.3 From 446981e5fcad3d64a27358d6591928da4a55a156 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 19 Mar 2015 16:47:58 -0300 Subject: tipc: fix build issue when building without IPv6 We can't directly call ipv6_sock_mc_join() but should use the stub instead and protect it around IS_ENABLED. Fixes: d0f91938bede ("tipc: add ip/udp media type") Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/tipc/udp_media.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net/tipc') diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 04836dd70c2b..ac89101e5d1b 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include "core.h" #include "bearer.h" @@ -247,10 +248,13 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote) mreqn.imr_multiaddr = remote->ipv4; mreqn.imr_ifindex = ub->ifindex; err = ip_mc_join_group(sk, &mreqn); +#if IS_ENABLED(CONFIG_IPV6) } else { if (!ipv6_addr_is_multicast(&remote->ipv6)) return 0; - err = ipv6_sock_mc_join(sk, ub->ifindex, &remote->ipv6); + err = ipv6_stub->ipv6_sock_mc_join(sk, ub->ifindex, + &remote->ipv6); +#endif } return err; } -- cgit v1.2.3 From 6cca7289d5cba80d61da711205cd230fc637e2e3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 20 Mar 2015 21:57:05 +1100 Subject: tipc: Use inlined rhashtable interface This patch converts tipc to the inlined rhashtable interface. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/tipc/socket.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index c03a3d33806f..73c2f518a7c0 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -133,6 +133,8 @@ static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { [TIPC_NLA_SOCK_HAS_PUBL] = { .type = NLA_FLAG } }; +static const struct rhashtable_params tsk_rht_params; + /* * Revised TIPC socket locking policy: * @@ -2245,7 +2247,7 @@ static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid) struct tipc_sock *tsk; rcu_read_lock(); - tsk = rhashtable_lookup(&tn->sk_rht, &portid); + tsk = rhashtable_lookup_fast(&tn->sk_rht, &portid, tsk_rht_params); if (tsk) sock_hold(&tsk->sk); rcu_read_unlock(); @@ -2267,7 +2269,8 @@ static int tipc_sk_insert(struct tipc_sock *tsk) portid = TIPC_MIN_PORT; tsk->portid = portid; sock_hold(&tsk->sk); - if (rhashtable_lookup_insert(&tn->sk_rht, &tsk->node)) + if (!rhashtable_lookup_insert_fast(&tn->sk_rht, &tsk->node, + tsk_rht_params)) return 0; sock_put(&tsk->sk); } @@ -2280,26 +2283,27 @@ static void tipc_sk_remove(struct tipc_sock *tsk) struct sock *sk = &tsk->sk; struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); - if (rhashtable_remove(&tn->sk_rht, &tsk->node)) { + if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) { WARN_ON(atomic_read(&sk->sk_refcnt) == 1); __sock_put(sk); } } +static const struct rhashtable_params tsk_rht_params = { + .nelem_hint = 192, + .head_offset = offsetof(struct tipc_sock, node), + .key_offset = offsetof(struct tipc_sock, portid), + .key_len = sizeof(u32), /* portid */ + .hashfn = jhash, + .max_size = 1048576, + .min_size = 256, +}; + int tipc_sk_rht_init(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct rhashtable_params rht_params = { - .nelem_hint = 192, - .head_offset = offsetof(struct tipc_sock, node), - .key_offset = offsetof(struct tipc_sock, portid), - .key_len = sizeof(u32), /* portid */ - .hashfn = jhash, - .max_size = 1048576, - .min_size = 256, - }; - - return rhashtable_init(&tn->sk_rht, &rht_params); + + return rhashtable_init(&tn->sk_rht, &tsk_rht_params); } void tipc_sk_rht_destroy(struct net *net) -- cgit v1.2.3 From 6d022949810b1ea82d46a576d6166035720bbb32 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 24 Mar 2015 00:50:24 +1100 Subject: tipc: Use default rhashtable hashfn This patch removes the explicit jhash value for the hashfn parameter of rhashtable. The default is now jhash so removing the setting makes no difference apart from making one less copy of jhash in the kernel. Signed-off-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/tipc/socket.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 73c2f518a7c0..6dd5bd95236a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -35,7 +35,6 @@ */ #include -#include #include "core.h" #include "name_table.h" #include "node.h" @@ -2294,7 +2293,6 @@ static const struct rhashtable_params tsk_rht_params = { .head_offset = offsetof(struct tipc_sock, node), .key_offset = offsetof(struct tipc_sock, portid), .key_len = sizeof(u32), /* portid */ - .hashfn = jhash, .max_size = 1048576, .min_size = 256, }; -- cgit v1.2.3 From 610600c8c5e25d551a010b64412cf731c084b1e1 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 23 Mar 2015 15:30:00 -0400 Subject: tipc: validate length of sockaddr in connect() for dgram/rdm Commit f2f8036 ("tipc: add support for connect() on dgram/rdm sockets") hasn't validated user input length for the sockaddr structure which allows a user to overwrite kernel memory with arbitrary input. Fixes: f2f8036 ("tipc: add support for connect() on dgram/rdm sockets") Signed-off-by: Sasha Levin Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/socket.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/tipc') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 6dd5bd95236a..094710519477 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1851,6 +1851,8 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, if (dst->family == AF_UNSPEC) { memset(&tsk->remote, 0, sizeof(struct sockaddr_tipc)); tsk->connected = 0; + } else if (destlen != sizeof(struct sockaddr_tipc)) { + res = -EINVAL; } else { memcpy(&tsk->remote, dest, destlen); tsk->connected = 1; -- cgit v1.2.3 From ed3e852aa5039fda5a9f53c716c7466913403288 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Tue, 24 Mar 2015 16:59:21 +0800 Subject: tipc: fix compile error when IPV6=m and TIPC=y When IPV6=m and TIPC=y, below error will appear during building kernel image: net/tipc/udp_media.c:196: undefined reference to `ip6_dst_lookup' make: *** [vmlinux] Error 1 As ip6_dst_lookup() is implemented in IPV6 and IPV6 is compiled as module, ip6_dst_lookup() is not built-in core kernel image. As a result, compiler cannot find 'ip6_dst_lookup' reference while compiling TIPC code into core kernel image. But with the method introduced by commit 5f81bd2e5d80 ("ipv6: export a stub for IPv6 symbols used by vxlan"), we can avoid the compile error through "ipv6_stub" pointer to access ip6_dst_lookup(). Fixes: d0f91938bede ("tipc: add ip/udp media type") Suggested-by: Marcelo Ricardo Leitner Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/udp_media.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/tipc') diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index ac89101e5d1b..ef3d7aa2854a 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -193,7 +193,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, .saddr = src->ipv6, .flowi6_proto = IPPROTO_UDP }; - err = ip6_dst_lookup(ub->ubsock->sk, &ndst, &fl6); + err = ipv6_stub->ipv6_dst_lookup(ub->ubsock->sk, &ndst, &fl6); if (err) goto tx_error; ttl = ip6_dst_hoplimit(ndst); -- cgit v1.2.3 From b5e2c150ac914f28a28833b57397bec0b0a2bd5f Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 24 Mar 2015 20:42:19 +0000 Subject: rhashtable: Disable automatic shrinking by default Introduce a new bool automatic_shrinking to require the user to explicitly opt-in to automatic shrinking of tables. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 7 +++++-- lib/rhashtable.c | 2 +- net/netfilter/nft_hash.c | 1 + net/netlink/af_netlink.c | 1 + net/tipc/socket.c | 1 + 5 files changed, 9 insertions(+), 3 deletions(-) (limited to 'net/tipc') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 0e1f975ad101..ae26c494e230 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -2,7 +2,7 @@ * Resizable, Scalable, Concurrent Hash Table * * Copyright (c) 2015 Herbert Xu - * Copyright (c) 2014 Thomas Graf + * Copyright (c) 2014-2015 Thomas Graf * Copyright (c) 2008-2014 Patrick McHardy * * Code partially derived from nft_hash @@ -104,6 +104,7 @@ struct rhashtable; * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker * @insecure_elasticity: Set to true to disable chain length checks + * @automatic_shrinking: Enable automatic shrinking of tables * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @obj_hashfn: Function to hash object @@ -118,6 +119,7 @@ struct rhashtable_params { unsigned int min_size; u32 nulls_base; bool insecure_elasticity; + bool automatic_shrinking; size_t locks_mul; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; @@ -784,7 +786,8 @@ static inline int rhashtable_remove_fast( goto out; atomic_dec(&ht->nelems); - if (rht_shrink_below_30(ht, tbl)) + if (unlikely(ht->p.automatic_shrinking && + rht_shrink_below_30(ht, tbl))) schedule_work(&ht->run_work); out: diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 50abe4fec4b8..50374d181148 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -367,7 +367,7 @@ static void rht_deferred_worker(struct work_struct *work) if (rht_grow_above_75(ht, tbl)) rhashtable_expand(ht); - else if (rht_shrink_below_30(ht, tbl)) + else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl)) rhashtable_shrink(ht); err = rhashtable_rehash_table(ht); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index ad3966976cf5..8577a37af18b 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -172,6 +172,7 @@ static const struct rhashtable_params nft_hash_params = { .head_offset = offsetof(struct nft_hash_elem, node), .key_offset = offsetof(struct nft_hash_elem, key), .hashfn = jhash, + .automatic_shrinking = true, }; static int nft_hash_init(const struct nft_set *set, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index e2f7f28148e0..4caa809dbbe0 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -3142,6 +3142,7 @@ static const struct rhashtable_params netlink_rhashtable_params = { .obj_hashfn = netlink_hash, .obj_cmpfn = netlink_compare, .max_size = 65536, + .automatic_shrinking = true, }; static int __init netlink_proto_init(void) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 094710519477..ee90d74d7516 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2297,6 +2297,7 @@ static const struct rhashtable_params tsk_rht_params = { .key_len = sizeof(u32), /* portid */ .max_size = 1048576, .min_size = 256, + .automatic_shrinking = true, }; int tipc_sk_rht_init(struct net *net) -- cgit v1.2.3 From bc14b8d6a98eb0747126cd517b468148b9e1c7ac Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 25 Mar 2015 18:09:40 +0800 Subject: tipc: fix a link reset issue due to retransmission failures When a node joins a cluster while we are transmitting a fragment stream over the broadcast link, it's missing the preceding fragments needed to build a meaningful message. As a result, the node has to drop it. However, as the fragment message is not acknowledged to its sender before it's dropped, it accidentally causes link reset of retransmission failure on the node. Reported-by: Erik Hugne Signed-off-by: Ying Xue Reviewed-by: Erik Hugne Tested-by: Erik Hugne Signed-off-by: David S. Miller --- net/tipc/bcast.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 5aff0844d4d3..52307397e0b1 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -523,11 +523,13 @@ receive: tipc_bclink_unlock(net); tipc_node_unlock(node); } else if (msg_user(msg) == MSG_FRAGMENTER) { - tipc_buf_append(&node->bclink.reasm_buf, &buf); - if (unlikely(!buf && !node->bclink.reasm_buf)) - goto unlock; tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); + tipc_buf_append(&node->bclink.reasm_buf, &buf); + if (unlikely(!buf && !node->bclink.reasm_buf)) { + tipc_bclink_unlock(net); + goto unlock; + } bcl->stats.recv_fragments++; if (buf) { bcl->stats.recv_fragmented++; -- cgit v1.2.3 From 1f66d161ab3d8b518903fa6c3f9c1f48d6919e74 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 25 Mar 2015 12:07:24 -0400 Subject: tipc: introduce starvation free send algorithm Currently, we only use a single counter; the length of the backlog queue, to determine whether a message should be accepted to the queue or not. Each time a message is being sent, the queue length is compared to a threshold value for the message's importance priority. If the queue length is beyond this threshold, the message is rejected. This algorithm implies a risk of starvation of low importance senders during very high load, because it may take a long time before the backlog queue has decreased enough to accept a lower level message. We now eliminate this risk by introducing a counter for each importance priority. When a message is sent, we check only the queue level for that particular message's priority. If that is ok, the message can be added to the backlog, irrespective of the queue level for other priorities. This way, each level is guaranteed a certain portion of the total bandwidth, and any risk of starvation is eliminated. Reviewed-by: Ying Xue Reviewed-by: Erik Hugne Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 2 +- net/tipc/link.c | 58 +++++++++++++++++++++++++++++++++++--------------------- net/tipc/link.h | 7 +++++-- 3 files changed, 42 insertions(+), 25 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 52307397e0b1..79355531c3e2 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -831,7 +831,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); if (!prop) goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->queue_limit[0])) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) goto prop_msg_full; nla_nest_end(msg->skb, prop); diff --git a/net/tipc/link.c b/net/tipc/link.c index 8c98c4d00ad6..b9325a1bddaa 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -310,7 +310,6 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, link_init_max_pkt(l_ptr); l_ptr->priority = b_ptr->priority; tipc_link_set_queue_limits(l_ptr, b_ptr->window); - l_ptr->next_out_no = 1; __skb_queue_head_init(&l_ptr->transmq); __skb_queue_head_init(&l_ptr->backlogq); @@ -398,19 +397,22 @@ static bool link_schedule_user(struct tipc_link *link, u32 oport, * Move a number of waiting users, as permitted by available space in * the send queue, from link wait queue to node wait queue for wakeup */ -void link_prepare_wakeup(struct tipc_link *link) +void link_prepare_wakeup(struct tipc_link *l) { - uint pend_qsz = skb_queue_len(&link->backlogq); + int pnd[TIPC_SYSTEM_IMPORTANCE + 1] = {0,}; + int imp, lim; struct sk_buff *skb, *tmp; - skb_queue_walk_safe(&link->wakeupq, skb, tmp) { - if (pend_qsz >= link->queue_limit[TIPC_SKB_CB(skb)->chain_imp]) + skb_queue_walk_safe(&l->wakeupq, skb, tmp) { + imp = TIPC_SKB_CB(skb)->chain_imp; + lim = l->window + l->backlog[imp].limit; + pnd[imp] += TIPC_SKB_CB(skb)->chain_sz; + if ((pnd[imp] + l->backlog[imp].len) >= lim) break; - pend_qsz += TIPC_SKB_CB(skb)->chain_sz; - skb_unlink(skb, &link->wakeupq); - skb_queue_tail(&link->inputq, skb); - link->owner->inputq = &link->inputq; - link->owner->action_flags |= TIPC_MSG_EVT; + skb_unlink(skb, &l->wakeupq); + skb_queue_tail(&l->inputq, skb); + l->owner->inputq = &l->inputq; + l->owner->action_flags |= TIPC_MSG_EVT; } } @@ -424,6 +426,16 @@ void tipc_link_reset_fragments(struct tipc_link *l_ptr) l_ptr->reasm_buf = NULL; } +static void tipc_link_purge_backlog(struct tipc_link *l) +{ + __skb_queue_purge(&l->backlogq); + l->backlog[TIPC_LOW_IMPORTANCE].len = 0; + l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0; + l->backlog[TIPC_HIGH_IMPORTANCE].len = 0; + l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0; + l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0; +} + /** * tipc_link_purge_queues - purge all pkt queues associated with link * @l_ptr: pointer to link @@ -432,7 +444,7 @@ void tipc_link_purge_queues(struct tipc_link *l_ptr) { __skb_queue_purge(&l_ptr->deferdq); __skb_queue_purge(&l_ptr->transmq); - __skb_queue_purge(&l_ptr->backlogq); + tipc_link_purge_backlog(l_ptr); tipc_link_reset_fragments(l_ptr); } @@ -466,13 +478,13 @@ void tipc_link_reset(struct tipc_link *l_ptr) /* Clean up all queues, except inputq: */ __skb_queue_purge(&l_ptr->transmq); - __skb_queue_purge(&l_ptr->backlogq); __skb_queue_purge(&l_ptr->deferdq); if (!owner->inputq) owner->inputq = &l_ptr->inputq; skb_queue_splice_init(&l_ptr->wakeupq, owner->inputq); if (!skb_queue_empty(owner->inputq)) owner->action_flags |= TIPC_MSG_EVT; + tipc_link_purge_backlog(l_ptr); l_ptr->rcv_unacked = 0; l_ptr->checkpoint = 1; l_ptr->next_out_no = 1; @@ -754,16 +766,14 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link, struct sk_buff_head *backlogq = &link->backlogq; struct sk_buff *skb, *tmp; - /* Match queue limit against msg importance: */ - if (unlikely(skb_queue_len(backlogq) >= link->queue_limit[imp])) + /* Match backlog limit against msg importance: */ + if (unlikely(link->backlog[imp].len >= link->backlog[imp].limit)) return tipc_link_cong(link, list); - /* Has valid packet limit been used ? */ if (unlikely(msg_size(msg) > mtu)) { __skb_queue_purge(list); return -EMSGSIZE; } - /* Prepare each packet for sending, and add to relevant queue: */ skb_queue_walk_safe(list, skb, tmp) { __skb_unlink(skb, list); @@ -786,8 +796,10 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link, if (tipc_msg_make_bundle(&skb, mtu, link->addr)) { link->stats.sent_bundled++; link->stats.sent_bundles++; + imp = msg_importance(buf_msg(skb)); } __skb_queue_tail(backlogq, skb); + link->backlog[imp].len++; seqno++; } link->next_out_no = seqno; @@ -914,6 +926,7 @@ void tipc_link_push_packets(struct tipc_link *link) if (!skb) break; msg = buf_msg(skb); + link->backlog[msg_importance(msg)].len--; msg_set_ack(msg, ack); msg_set_bcast_ack(msg, link->owner->bclink.last_in); link->rcv_unacked = 0; @@ -1610,6 +1623,7 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, CHANGEOVER_PROTOCOL, ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); skb_queue_splice_tail_init(&l_ptr->backlogq, &l_ptr->transmq); + tipc_link_purge_backlog(l_ptr); msgcount = skb_queue_len(&l_ptr->transmq); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); msg_set_msgcnt(&tunnel_hdr, msgcount); @@ -1817,11 +1831,11 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) int max_bulk = TIPC_MAX_PUBLICATIONS / (l->max_pkt / ITEM_SIZE); l->window = win; - l->queue_limit[TIPC_LOW_IMPORTANCE] = win / 2; - l->queue_limit[TIPC_MEDIUM_IMPORTANCE] = win; - l->queue_limit[TIPC_HIGH_IMPORTANCE] = win / 2 * 3; - l->queue_limit[TIPC_CRITICAL_IMPORTANCE] = win * 2; - l->queue_limit[TIPC_SYSTEM_IMPORTANCE] = max_bulk; + l->backlog[TIPC_LOW_IMPORTANCE].limit = win / 2; + l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = win; + l->backlog[TIPC_HIGH_IMPORTANCE].limit = win / 2 * 3; + l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = win * 2; + l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk; } /* tipc_link_find_owner - locate owner node of link by link's name @@ -2120,7 +2134,7 @@ static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, link->tolerance)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, - link->queue_limit[TIPC_LOW_IMPORTANCE])) + link->window)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority)) goto prop_msg_full; diff --git a/net/tipc/link.h b/net/tipc/link.h index eec3ecf2d450..99543a46095a 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -118,7 +118,7 @@ struct tipc_stats { * @pmsg: convenience pointer to "proto_msg" field * @priority: current link priority * @net_plane: current link network plane ('A' through 'H') - * @queue_limit: outbound message queue congestion thresholds (indexed by user) + * @backlog_limit: backlog queue congestion thresholds (indexed by importance) * @exp_msg_count: # of tunnelled messages expected during link changeover * @reset_checkpoint: seq # of last acknowledged message at time of link reset * @max_pkt: current maximum packet size for this link @@ -166,7 +166,6 @@ struct tipc_link { struct tipc_msg *pmsg; u32 priority; char net_plane; - u32 queue_limit[15]; /* queue_limit[0]==window limit */ /* Changeover */ u32 exp_msg_count; @@ -180,6 +179,10 @@ struct tipc_link { /* Sending */ struct sk_buff_head transmq; struct sk_buff_head backlogq; + struct { + u16 len; + u16 limit; + } backlog[5]; u32 next_out_no; u32 window; u32 last_retransmitted; -- cgit v1.2.3 From 3127a0200d4a46cf279bb388cc0f71827cd60699 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 25 Mar 2015 12:07:25 -0400 Subject: tipc: clean up handling of link congestion After the recent changes in message importance handling it becomes possible to simplify handling of messages and sockets when we encounter link congestion. We merge the function tipc_link_cong() into link_schedule_user(), and simplify the code of the latter. The code should now be easier to follow, especially regarding return codes and handling of the message that caused the situation. In case the scheduling function is unable to pre-allocate a wakeup message buffer, it now returns -ENOBUFS, which is a more correct code than the previously used -EHOSTUNREACH. Reviewed-by: Ying Xue Reviewed-by: Erik Hugne Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 104 ++++++++++++++++++++++++++------------------------------ net/tipc/msg.h | 28 ++++++--------- 2 files changed, 60 insertions(+), 72 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index b9325a1bddaa..58e2460682da 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -367,28 +367,43 @@ void tipc_link_delete_list(struct net *net, unsigned int bearer_id, } /** - * link_schedule_user - schedule user for wakeup after congestion + * link_schedule_user - schedule a message sender for wakeup after congestion * @link: congested link - * @oport: sending port - * @chain_sz: size of buffer chain that was attempted sent - * @imp: importance of message attempted sent + * @list: message that was attempted sent * Create pseudo msg to send back to user when congestion abates + * Only consumes message if there is an error */ -static bool link_schedule_user(struct tipc_link *link, u32 oport, - uint chain_sz, uint imp) +static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list) { - struct sk_buff *buf; + struct tipc_msg *msg = buf_msg(skb_peek(list)); + int imp = msg_importance(msg); + u32 oport = msg_origport(msg); + u32 addr = link_own_addr(link); + struct sk_buff *skb; - buf = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, - link_own_addr(link), link_own_addr(link), - oport, 0, 0); - if (!buf) - return false; - TIPC_SKB_CB(buf)->chain_sz = chain_sz; - TIPC_SKB_CB(buf)->chain_imp = imp; - skb_queue_tail(&link->wakeupq, buf); + /* This really cannot happen... */ + if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) { + pr_warn("%s<%s>, send queue full", link_rst_msg, link->name); + tipc_link_reset(link); + goto err; + } + /* Non-blocking sender: */ + if (TIPC_SKB_CB(skb_peek(list))->wakeup_pending) + return -ELINKCONG; + + /* Create and schedule wakeup pseudo message */ + skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, + addr, addr, oport, 0, 0); + if (!skb) + goto err; + TIPC_SKB_CB(skb)->chain_sz = skb_queue_len(list); + TIPC_SKB_CB(skb)->chain_imp = imp; + skb_queue_tail(&link->wakeupq, skb); link->stats.link_congs++; - return true; + return -ELINKCONG; +err: + __skb_queue_purge(list); + return -ENOBUFS; } /** @@ -708,48 +723,15 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) } } -/* tipc_link_cong: determine return value and how to treat the - * sent buffer during link congestion. - * - For plain, errorless user data messages we keep the buffer and - * return -ELINKONG. - * - For all other messages we discard the buffer and return -EHOSTUNREACH - * - For TIPC internal messages we also reset the link - */ -static int tipc_link_cong(struct tipc_link *link, struct sk_buff_head *list) -{ - struct sk_buff *skb = skb_peek(list); - struct tipc_msg *msg = buf_msg(skb); - int imp = msg_importance(msg); - u32 oport = msg_tot_origport(msg); - - if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) { - pr_warn("%s<%s>, send queue full", link_rst_msg, link->name); - tipc_link_reset(link); - goto drop; - } - if (unlikely(msg_errcode(msg))) - goto drop; - if (unlikely(msg_reroute_cnt(msg))) - goto drop; - if (TIPC_SKB_CB(skb)->wakeup_pending) - return -ELINKCONG; - if (link_schedule_user(link, oport, skb_queue_len(list), imp)) - return -ELINKCONG; -drop: - __skb_queue_purge(list); - return -EHOSTUNREACH; -} - /** * __tipc_link_xmit(): same as tipc_link_xmit, but destlink is known & locked * @link: link to use * @list: chain of buffers containing message * - * Consumes the buffer chain, except when returning -ELINKCONG - * Returns 0 if success, otherwise errno: -ELINKCONG, -EMSGSIZE (plain socket - * user data messages) or -EHOSTUNREACH (all other messages/senders) - * Only the socket functions tipc_send_stream() and tipc_send_packet() need - * to act on the return value, since they may need to do more send attempts. + * Consumes the buffer chain, except when returning -ELINKCONG, + * since the caller then may want to make more send attempts. + * Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS + * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted */ int __tipc_link_xmit(struct net *net, struct tipc_link *link, struct sk_buff_head *list) @@ -768,7 +750,7 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link, /* Match backlog limit against msg importance: */ if (unlikely(link->backlog[imp].len >= link->backlog[imp].limit)) - return tipc_link_cong(link, list); + return link_schedule_user(link, list); if (unlikely(msg_size(msg) > mtu)) { __skb_queue_purge(list); @@ -820,13 +802,25 @@ static int __tipc_link_xmit_skb(struct tipc_link *link, struct sk_buff *skb) return __tipc_link_xmit(link->owner->net, link, &head); } +/* tipc_link_xmit_skb(): send single buffer to destination + * Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE + * messages, which will not be rejected + * The only exception is datagram messages rerouted after secondary + * lookup, which are rare and safe to dispose of anyway. + * TODO: Return real return value, and let callers use + * tipc_wait_for_sendpkt() where applicable + */ int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, u32 selector) { struct sk_buff_head head; + int rc; skb2list(skb, &head); - return tipc_link_xmit(net, &head, dnode, selector); + rc = tipc_link_xmit(net, &head, dnode, selector); + if (rc == -ELINKCONG) + kfree_skb(skb); + return 0; } /** diff --git a/net/tipc/msg.h b/net/tipc/msg.h index bd3969a80dd4..6445db09c0c4 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -240,6 +240,15 @@ static inline void msg_set_size(struct tipc_msg *m, u32 sz) m->hdr[0] = htonl((msg_word(m, 0) & ~0x1ffff) | sz); } +static inline unchar *msg_data(struct tipc_msg *m) +{ + return ((unchar *)m) + msg_hdr_sz(m); +} + +static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) +{ + return (struct tipc_msg *)msg_data(m); +} /* * Word 1 @@ -372,6 +381,8 @@ static inline void msg_set_prevnode(struct tipc_msg *m, u32 a) static inline u32 msg_origport(struct tipc_msg *m) { + if (msg_user(m) == MSG_FRAGMENTER) + m = msg_get_wrapped(m); return msg_word(m, 4); } @@ -467,16 +478,6 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) msg_set_word(m, 10, n); } -static inline unchar *msg_data(struct tipc_msg *m) -{ - return ((unchar *)m) + msg_hdr_sz(m); -} - -static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) -{ - return (struct tipc_msg *)msg_data(m); -} - /* * Constants and routines used to read and write TIPC internal message headers */ @@ -753,13 +754,6 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) msg_set_bits(m, 9, 0, 0xffff, n); } -static inline u32 msg_tot_origport(struct tipc_msg *m) -{ - if ((msg_user(m) == MSG_FRAGMENTER) && (msg_type(m) == FIRST_FRAGMENT)) - return msg_origport(msg_get_wrapped(m)); - return msg_origport(m); -} - struct sk_buff *tipc_buf_acquire(u32 size); bool tipc_msg_validate(struct sk_buff *skb); bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode, -- cgit v1.2.3 From 8b4ed8634f8b3f9aacfc42b4a872d30c36b9e255 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 25 Mar 2015 12:07:26 -0400 Subject: tipc: eliminate race condition at dual link establishment Despite recent improvements, the establishment of dual parallel links still has a small glitch where messages can bypass each other. When the second link in a dual-link configuration is established, part of the first link's traffic will be steered over to the new link. Although we do have a mechanism to ensure that packets sent before and after the establishment of the new link arrive in sequence to the destination node, this is not enough. The arriving messages will still be delivered upwards in different threads, something entailing a risk of message disordering during the transition phase. To fix this, we introduce a synchronization mechanism between the two parallel links, so that traffic arriving on the new link cannot be added to its input queue until we are guaranteed that all pre-establishment messages have been delivered on the old, parallel link. This problem seems to always have been around, but its occurrence is so rare that it has not been noticed until recent intensive testing. Reviewed-by: Ying Xue Reviewed-by: Erik Hugne Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ net/tipc/link.h | 2 ++ net/tipc/msg.h | 8 ++++++++ 3 files changed, 55 insertions(+) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 58e2460682da..1287161e9424 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -139,6 +139,13 @@ static void tipc_link_put(struct tipc_link *l_ptr) kref_put(&l_ptr->ref, tipc_link_release); } +static struct tipc_link *tipc_parallel_link(struct tipc_link *l) +{ + if (l->owner->active_links[0] != l) + return l->owner->active_links[0]; + return l->owner->active_links[1]; +} + static void link_init_max_pkt(struct tipc_link *l_ptr) { struct tipc_node *node = l_ptr->owner; @@ -1026,6 +1033,32 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb, } } +/* link_synch(): check if all packets arrived before the synch + * point have been consumed + * Returns true if the parallel links are synched, otherwise false + */ +static bool link_synch(struct tipc_link *l) +{ + unsigned int post_synch; + struct tipc_link *pl; + + pl = tipc_parallel_link(l); + if (pl == l) + goto synched; + + /* Was last pre-synch packet added to input queue ? */ + if (less_eq(pl->next_in_no, l->synch_point)) + return false; + + /* Is it still in the input queue ? */ + post_synch = mod(pl->next_in_no - l->synch_point) - 1; + if (skb_queue_len(&pl->inputq) > post_synch) + return false; +synched: + l->flags &= ~LINK_SYNCHING; + return true; +} + static void link_retrieve_defq(struct tipc_link *link, struct sk_buff_head *list) { @@ -1156,6 +1189,14 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) skb = NULL; goto unlock; } + /* Synchronize with parallel link if applicable */ + if (unlikely((l_ptr->flags & LINK_SYNCHING) && !msg_dup(msg))) { + link_handle_out_of_seq_msg(l_ptr, skb); + if (link_synch(l_ptr)) + link_retrieve_defq(l_ptr, &head); + skb = NULL; + goto unlock; + } l_ptr->next_in_no++; if (unlikely(!skb_queue_empty(&l_ptr->deferdq))) link_retrieve_defq(l_ptr, &head); @@ -1231,6 +1272,10 @@ static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb) switch (msg_user(msg)) { case CHANGEOVER_PROTOCOL: + if (msg_dup(msg)) { + link->flags |= LINK_SYNCHING; + link->synch_point = msg_seqno(msg_get_wrapped(msg)); + } if (!tipc_link_tunnel_rcv(node, &skb)) break; if (msg_user(buf_msg(skb)) != MSG_BUNDLER) { diff --git a/net/tipc/link.h b/net/tipc/link.h index 99543a46095a..d2b5663643da 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -60,6 +60,7 @@ */ #define LINK_STARTED 0x0001 #define LINK_STOPPED 0x0002 +#define LINK_SYNCHING 0x0004 /* Starting value for maximum packet size negotiation on unicast links * (unless bearer MTU is less) @@ -170,6 +171,7 @@ struct tipc_link { /* Changeover */ u32 exp_msg_count; u32 reset_checkpoint; + u32 synch_point; /* Max packet negotiation */ u32 max_pkt; diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 6445db09c0c4..d273207ede28 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -554,6 +554,14 @@ static inline void msg_set_node_capabilities(struct tipc_msg *m, u32 n) msg_set_bits(m, 1, 15, 0x1fff, n); } +static inline bool msg_dup(struct tipc_msg *m) +{ + if (likely(msg_user(m) != CHANGEOVER_PROTOCOL)) + return false; + if (msg_type(m) != DUPLICATE_MSG) + return false; + return true; +} /* * Word 2 -- cgit v1.2.3 From b952b2befb6f6b009e91f087285b9a0a6beb1cc8 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 26 Mar 2015 18:10:23 +0800 Subject: tipc: fix potential deadlock when all links are reset [ 60.988363] ====================================================== [ 60.988754] [ INFO: possible circular locking dependency detected ] [ 60.989152] 3.19.0+ #194 Not tainted [ 60.989377] ------------------------------------------------------- [ 60.989781] swapper/3/0 is trying to acquire lock: [ 60.990079] (&(&n_ptr->lock)->rlock){+.-...}, at: [] tipc_link_retransmit+0x1aa/0x240 [tipc] [ 60.990743] [ 60.990743] but task is already holding lock: [ 60.991106] (&(&bclink->lock)->rlock){+.-...}, at: [] tipc_bclink_lock+0x8e/0xa0 [tipc] [ 60.991738] [ 60.991738] which lock already depends on the new lock. [ 60.991738] [ 60.992174] [ 60.992174] the existing dependency chain (in reverse order) is: [ 60.992174] -> #1 (&(&bclink->lock)->rlock){+.-...}: [ 60.992174] [] lock_acquire+0x9c/0x140 [ 60.992174] [] _raw_spin_lock_bh+0x3f/0x50 [ 60.992174] [] tipc_bclink_lock+0x8e/0xa0 [tipc] [ 60.992174] [] tipc_bclink_add_node+0x97/0xf0 [tipc] [ 60.992174] [] tipc_node_link_up+0xf5/0x110 [tipc] [ 60.992174] [] link_state_event+0x2b3/0x4f0 [tipc] [ 60.992174] [] tipc_link_proto_rcv+0x24c/0x418 [tipc] [ 60.992174] [] tipc_rcv+0x827/0xac0 [tipc] [ 60.992174] [] tipc_l2_rcv_msg+0x73/0xd0 [tipc] [ 60.992174] [] __netif_receive_skb_core+0x746/0x980 [ 60.992174] [] __netif_receive_skb+0x21/0x70 [ 60.992174] [] netif_receive_skb_internal+0x35/0x130 [ 60.992174] [] napi_gro_receive+0x158/0x1d0 [ 60.992174] [] e1000_clean_rx_irq+0x155/0x490 [ 60.992174] [] e1000_clean+0x267/0x990 [ 60.992174] [] net_rx_action+0x150/0x360 [ 60.992174] [] __do_softirq+0x123/0x360 [ 60.992174] [] irq_exit+0x8e/0xb0 [ 60.992174] [] do_IRQ+0x65/0x110 [ 60.992174] [] ret_from_intr+0x0/0x13 [ 60.992174] [] arch_cpu_idle+0xf/0x20 [ 60.992174] [] cpu_startup_entry+0x2f6/0x3f0 [ 60.992174] [] start_secondary+0x13a/0x150 [ 60.992174] -> #0 (&(&n_ptr->lock)->rlock){+.-...}: [ 60.992174] [] __lock_acquire+0x163d/0x1ca0 [ 60.992174] [] lock_acquire+0x9c/0x140 [ 60.992174] [] _raw_spin_lock_bh+0x3f/0x50 [ 60.992174] [] tipc_link_retransmit+0x1aa/0x240 [tipc] [ 60.992174] [] tipc_bclink_rcv+0x611/0x640 [tipc] [ 60.992174] [] tipc_rcv+0x616/0xac0 [tipc] [ 60.992174] [] tipc_l2_rcv_msg+0x73/0xd0 [tipc] [ 60.992174] [] __netif_receive_skb_core+0x746/0x980 [ 60.992174] [] __netif_receive_skb+0x21/0x70 [ 60.992174] [] netif_receive_skb_internal+0x35/0x130 [ 60.992174] [] napi_gro_receive+0x158/0x1d0 [ 60.992174] [] e1000_clean_rx_irq+0x155/0x490 [ 60.992174] [] e1000_clean+0x267/0x990 [ 60.992174] [] net_rx_action+0x150/0x360 [ 60.992174] [] __do_softirq+0x123/0x360 [ 60.992174] [] irq_exit+0x8e/0xb0 [ 60.992174] [] do_IRQ+0x65/0x110 [ 60.992174] [] ret_from_intr+0x0/0x13 [ 60.992174] [] arch_cpu_idle+0xf/0x20 [ 60.992174] [] cpu_startup_entry+0x2f6/0x3f0 [ 60.992174] [] start_secondary+0x13a/0x150 [ 60.992174] [ 60.992174] other info that might help us debug this: [ 60.992174] [ 60.992174] Possible unsafe locking scenario: [ 60.992174] [ 60.992174] CPU0 CPU1 [ 60.992174] ---- ---- [ 60.992174] lock(&(&bclink->lock)->rlock); [ 60.992174] lock(&(&n_ptr->lock)->rlock); [ 60.992174] lock(&(&bclink->lock)->rlock); [ 60.992174] lock(&(&n_ptr->lock)->rlock); [ 60.992174] [ 60.992174] *** DEADLOCK *** [ 60.992174] [ 60.992174] 3 locks held by swapper/3/0: [ 60.992174] #0: (rcu_read_lock){......}, at: [] __netif_receive_skb_core+0x71/0x980 [ 60.992174] #1: (rcu_read_lock){......}, at: [] tipc_l2_rcv_msg+0x5/0xd0 [tipc] [ 60.992174] #2: (&(&bclink->lock)->rlock){+.-...}, at: [] tipc_bclink_lock+0x8e/0xa0 [tipc] [ 60.992174] The correct the sequence of grabbing n_ptr->lock and bclink->lock should be that the former is first held and the latter is then taken, which exactly happened on CPU1. But especially when the retransmission of broadcast link is failed, bclink->lock is first held in tipc_bclink_rcv(), and n_ptr->lock is taken in link_retransmit_failure() called by tipc_link_retransmit() subsequently, which is demonstrated on CPU0. As a result, deadlock occurs. If the order of holding the two locks happening on CPU0 is reversed, the deadlock risk will be relieved. Therefore, the node lock taken in link_retransmit_failure() originally is moved to tipc_bclink_rcv() so that it's obtained before bclink lock. But the precondition of the adjustment of node lock is that responding to bclink reset event must be moved from tipc_bclink_unlock() to tipc_node_unlock(). Reviewed-by: Erik Hugne Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bcast.c | 23 +---------------------- net/tipc/bcast.h | 4 ---- net/tipc/link.c | 5 +---- net/tipc/node.c | 5 ++++- net/tipc/node.h | 3 ++- 5 files changed, 8 insertions(+), 32 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 79355531c3e2..4289dd62f589 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -62,21 +62,8 @@ static void tipc_bclink_lock(struct net *net) static void tipc_bclink_unlock(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_node *node = NULL; - if (likely(!tn->bclink->flags)) { - spin_unlock_bh(&tn->bclink->lock); - return; - } - - if (tn->bclink->flags & TIPC_BCLINK_RESET) { - tn->bclink->flags &= ~TIPC_BCLINK_RESET; - node = tipc_bclink_retransmit_to(net); - } spin_unlock_bh(&tn->bclink->lock); - - if (node) - tipc_link_reset_all(node); } void tipc_bclink_input(struct net *net) @@ -91,13 +78,6 @@ uint tipc_bclink_get_mtu(void) return MAX_PKT_DEFAULT_MCAST; } -void tipc_bclink_set_flags(struct net *net, unsigned int flags) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - - tn->bclink->flags |= flags; -} - static u32 bcbuf_acks(struct sk_buff *buf) { return (u32)(unsigned long)TIPC_SKB_CB(buf)->handle; @@ -156,7 +136,6 @@ static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) seqno : node->bclink.last_sent; } - /** * tipc_bclink_retransmit_to - get most recent node to request retransmission * @@ -476,13 +455,13 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) goto unlock; if (msg_destnode(msg) == tn->own_addr) { tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); - tipc_node_unlock(node); tipc_bclink_lock(net); bcl->stats.recv_nacks++; tn->bclink->retransmit_to = node; bclink_retransmit_pkt(tn, msg_bcgap_after(msg), msg_bcgap_to(msg)); tipc_bclink_unlock(net); + tipc_node_unlock(node); } else { tipc_node_unlock(node); bclink_peek_nack(net, msg); diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 43f397fbac55..4bdc12277d33 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -55,7 +55,6 @@ struct tipc_bcbearer_pair { struct tipc_bearer *secondary; }; -#define TIPC_BCLINK_RESET 1 #define BCBEARER MAX_BEARERS /** @@ -86,7 +85,6 @@ struct tipc_bcbearer { * @lock: spinlock governing access to structure * @link: (non-standard) broadcast link structure * @node: (non-standard) node structure representing b'cast link's peer node - * @flags: represent bclink states * @bcast_nodes: map of broadcast-capable nodes * @retransmit_to: node that most recently requested a retransmit * @@ -96,7 +94,6 @@ struct tipc_bclink { spinlock_t lock; struct tipc_link link; struct tipc_node node; - unsigned int flags; struct sk_buff_head arrvq; struct sk_buff_head inputq; struct tipc_node_map bcast_nodes; @@ -117,7 +114,6 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, int tipc_bclink_init(struct net *net); void tipc_bclink_stop(struct net *net); -void tipc_bclink_set_flags(struct net *tn, unsigned int flags); void tipc_bclink_add_node(struct net *net, u32 addr); void tipc_bclink_remove_node(struct net *net, u32 addr); struct tipc_node *tipc_bclink_retransmit_to(struct net *tn); diff --git a/net/tipc/link.c b/net/tipc/link.c index 1287161e9424..f5e086c5f724 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -980,7 +980,6 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, (unsigned long) TIPC_SKB_CB(buf)->handle); n_ptr = tipc_bclink_retransmit_to(net); - tipc_node_lock(n_ptr); tipc_addr_string_fill(addr_string, n_ptr->addr); pr_info("Broadcast link info for %s\n", addr_string); @@ -992,9 +991,7 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, n_ptr->bclink.oos_state, n_ptr->bclink.last_sent); - tipc_node_unlock(n_ptr); - - tipc_bclink_set_flags(net, TIPC_BCLINK_RESET); + n_ptr->action_flags |= TIPC_BCAST_RESET; l_ptr->stale_count = 0; } } diff --git a/net/tipc/node.c b/net/tipc/node.c index 26d1de1bf34d..5cc43d34ad0a 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -459,7 +459,7 @@ void tipc_node_unlock(struct tipc_node *node) TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP | TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT | - TIPC_NAMED_MSG_EVT); + TIPC_NAMED_MSG_EVT | TIPC_BCAST_RESET); spin_unlock_bh(&node->lock); @@ -488,6 +488,9 @@ void tipc_node_unlock(struct tipc_node *node) if (flags & TIPC_BCAST_MSG_EVT) tipc_bclink_input(net); + + if (flags & TIPC_BCAST_RESET) + tipc_link_reset_all(node); } /* Caller should hold node lock for the passed node */ diff --git a/net/tipc/node.h b/net/tipc/node.h index e89ac04ec2c3..9629ecd2bdd8 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -64,7 +64,8 @@ enum { TIPC_NOTIFY_LINK_UP = (1 << 6), TIPC_NOTIFY_LINK_DOWN = (1 << 7), TIPC_NAMED_MSG_EVT = (1 << 8), - TIPC_BCAST_MSG_EVT = (1 << 9) + TIPC_BCAST_MSG_EVT = (1 << 9), + TIPC_BCAST_RESET = (1 << 10) }; /** -- cgit v1.2.3 From 8a0f6ebe8494c5c6ccfe12264385b64c280e3241 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 26 Mar 2015 18:10:24 +0800 Subject: tipc: involve reference counter for node structure TIPC node hash node table is protected with rcu lock on read side. tipc_node_find() is used to look for a node object with node address through iterating the hash node table. As the entire process of what tipc_node_find() traverses the table is guarded with rcu read lock, it's safe for us. However, when callers use the node object returned by tipc_node_find(), there is no rcu read lock applied. Therefore, this is absolutely unsafe for callers of tipc_node_find(). Now we introduce a reference counter for node structure. Before tipc_node_find() returns node object to its caller, it first increases the reference counter. Accordingly, after its caller used it up, it decreases the counter again. This can prevent a node being used by one thread from being freed by another thread. Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bcast.c | 5 +-- net/tipc/discover.c | 1 + net/tipc/link.c | 7 +++-- net/tipc/name_distr.c | 2 ++ net/tipc/node.c | 85 ++++++++++++++++++++++++++++++++++++--------------- net/tipc/node.h | 9 ++++-- 6 files changed, 79 insertions(+), 30 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 4289dd62f589..ae558dd7f8ee 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -329,13 +329,12 @@ static void bclink_peek_nack(struct net *net, struct tipc_msg *msg) return; tipc_node_lock(n_ptr); - if (n_ptr->bclink.recv_permitted && (n_ptr->bclink.last_in != n_ptr->bclink.last_sent) && (n_ptr->bclink.last_in == msg_bcgap_after(msg))) n_ptr->bclink.oos_state = 2; - tipc_node_unlock(n_ptr); + tipc_node_put(n_ptr); } /* tipc_bclink_xmit - deliver buffer chain to all nodes in cluster @@ -466,6 +465,7 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) tipc_node_unlock(node); bclink_peek_nack(net, msg); } + tipc_node_put(node); goto exit; } @@ -570,6 +570,7 @@ receive: unlock: tipc_node_unlock(node); + tipc_node_put(node); exit: kfree_skb(buf); } diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 169f3dd038b9..967e292f53c8 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -260,6 +260,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf, } } tipc_node_unlock(node); + tipc_node_put(node); } /** diff --git a/net/tipc/link.c b/net/tipc/link.c index f5e086c5f724..514466efc25c 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -854,6 +854,7 @@ int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, if (link) rc = __tipc_link_xmit(net, link, list); tipc_node_unlock(node); + tipc_node_put(node); } if (link) return rc; @@ -1116,8 +1117,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) n_ptr = tipc_node_find(net, msg_prevnode(msg)); if (unlikely(!n_ptr)) goto discard; - tipc_node_lock(n_ptr); + tipc_node_lock(n_ptr); /* Locate unicast link endpoint that should handle message */ l_ptr = n_ptr->links[b_ptr->identity]; if (unlikely(!l_ptr)) @@ -1205,6 +1206,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) skb = NULL; unlock: tipc_node_unlock(n_ptr); + tipc_node_put(n_ptr); discard: if (unlikely(skb)) kfree_skb(skb); @@ -2236,7 +2238,6 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) msg.seq = cb->nlh->nlmsg_seq; rcu_read_lock(); - if (prev_node) { node = tipc_node_find(net, prev_node); if (!node) { @@ -2249,6 +2250,7 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->prev_seq = 1; goto out; } + tipc_node_put(node); list_for_each_entry_continue_rcu(node, &tn->node_list, list) { @@ -2256,6 +2258,7 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) err = __tipc_nl_add_node_links(net, &msg, node, &prev_link); tipc_node_unlock(node); + tipc_node_put(node); if (err) goto out; diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 506aaa565da7..41e7b7e4dda0 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -244,6 +244,7 @@ static void tipc_publ_subscribe(struct net *net, struct publication *publ, tipc_node_lock(node); list_add_tail(&publ->nodesub_list, &node->publ_list); tipc_node_unlock(node); + tipc_node_put(node); } static void tipc_publ_unsubscribe(struct net *net, struct publication *publ, @@ -258,6 +259,7 @@ static void tipc_publ_unsubscribe(struct net *net, struct publication *publ, tipc_node_lock(node); list_del_init(&publ->nodesub_list); tipc_node_unlock(node); + tipc_node_put(node); } /** diff --git a/net/tipc/node.c b/net/tipc/node.c index 5cc43d34ad0a..3e4f04897c03 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -42,6 +42,7 @@ static void node_lost_contact(struct tipc_node *n_ptr); static void node_established_contact(struct tipc_node *n_ptr); +static void tipc_node_delete(struct tipc_node *node); struct tipc_sock_conn { u32 port; @@ -67,6 +68,23 @@ static unsigned int tipc_hashfn(u32 addr) return addr & (NODE_HTABLE_SIZE - 1); } +static void tipc_node_kref_release(struct kref *kref) +{ + struct tipc_node *node = container_of(kref, struct tipc_node, kref); + + tipc_node_delete(node); +} + +void tipc_node_put(struct tipc_node *node) +{ + kref_put(&node->kref, tipc_node_kref_release); +} + +static void tipc_node_get(struct tipc_node *node) +{ + kref_get(&node->kref); +} + /* * tipc_node_find - locate specified node object, if it exists */ @@ -82,6 +100,7 @@ struct tipc_node *tipc_node_find(struct net *net, u32 addr) hlist_for_each_entry_rcu(node, &tn->node_htable[tipc_hashfn(addr)], hash) { if (node->addr == addr) { + tipc_node_get(node); rcu_read_unlock(); return node; } @@ -106,6 +125,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr) } n_ptr->addr = addr; n_ptr->net = net; + kref_init(&n_ptr->kref); spin_lock_init(&n_ptr->lock); INIT_HLIST_NODE(&n_ptr->hash); INIT_LIST_HEAD(&n_ptr->list); @@ -120,16 +140,17 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr) list_add_tail_rcu(&n_ptr->list, &temp_node->list); n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN; n_ptr->signature = INVALID_NODE_SIG; + tipc_node_get(n_ptr); exit: spin_unlock_bh(&tn->node_list_lock); return n_ptr; } -static void tipc_node_delete(struct tipc_net *tn, struct tipc_node *n_ptr) +static void tipc_node_delete(struct tipc_node *node) { - list_del_rcu(&n_ptr->list); - hlist_del_rcu(&n_ptr->hash); - kfree_rcu(n_ptr, rcu); + list_del_rcu(&node->list); + hlist_del_rcu(&node->hash); + kfree_rcu(node, rcu); } void tipc_node_stop(struct net *net) @@ -139,7 +160,7 @@ void tipc_node_stop(struct net *net) spin_lock_bh(&tn->node_list_lock); list_for_each_entry_safe(node, t_node, &tn->node_list, list) - tipc_node_delete(tn, node); + tipc_node_put(node); spin_unlock_bh(&tn->node_list_lock); } @@ -147,6 +168,7 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) { struct tipc_node *node; struct tipc_sock_conn *conn; + int err = 0; if (in_own_node(net, dnode)) return 0; @@ -157,8 +179,10 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) return -EHOSTUNREACH; } conn = kmalloc(sizeof(*conn), GFP_ATOMIC); - if (!conn) - return -EHOSTUNREACH; + if (!conn) { + err = -EHOSTUNREACH; + goto exit; + } conn->peer_node = dnode; conn->port = port; conn->peer_port = peer_port; @@ -166,7 +190,9 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) tipc_node_lock(node); list_add_tail(&conn->list, &node->conn_sks); tipc_node_unlock(node); - return 0; +exit: + tipc_node_put(node); + return err; } void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) @@ -189,6 +215,7 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) kfree(conn); } tipc_node_unlock(node); + tipc_node_put(node); } /** @@ -417,19 +444,25 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, char *linkname, size_t len) { struct tipc_link *link; + int err = -EINVAL; struct tipc_node *node = tipc_node_find(net, addr); - if ((bearer_id >= MAX_BEARERS) || !node) - return -EINVAL; + if (!node) + return err; + + if (bearer_id >= MAX_BEARERS) + goto exit; + tipc_node_lock(node); link = node->links[bearer_id]; if (link) { strncpy(linkname, link->name, len); - tipc_node_unlock(node); - return 0; + err = 0; } +exit: tipc_node_unlock(node); - return -EINVAL; + tipc_node_put(node); + return err; } void tipc_node_unlock(struct tipc_node *node) @@ -545,17 +578,21 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) msg.seq = cb->nlh->nlmsg_seq; rcu_read_lock(); - - if (last_addr && !tipc_node_find(net, last_addr)) { - rcu_read_unlock(); - /* We never set seq or call nl_dump_check_consistent() this - * means that setting prev_seq here will cause the consistence - * check to fail in the netlink callback handler. Resulting in - * the NLMSG_DONE message having the NLM_F_DUMP_INTR flag set if - * the node state changed while we released the lock. - */ - cb->prev_seq = 1; - return -EPIPE; + if (last_addr) { + node = tipc_node_find(net, last_addr); + if (!node) { + rcu_read_unlock(); + /* We never set seq or call nl_dump_check_consistent() + * this means that setting prev_seq here will cause the + * consistence check to fail in the netlink callback + * handler. Resulting in the NLMSG_DONE message having + * the NLM_F_DUMP_INTR flag set if the node state + * changed while we released the lock. + */ + cb->prev_seq = 1; + return -EPIPE; + } + tipc_node_put(node); } list_for_each_entry_rcu(node, &tn->node_list, list) { diff --git a/net/tipc/node.h b/net/tipc/node.h index 9629ecd2bdd8..02d5c20dc551 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -94,6 +94,7 @@ struct tipc_node_bclink { /** * struct tipc_node - TIPC node structure * @addr: network address of node + * @ref: reference counter to node object * @lock: spinlock governing access to structure * @net: the applicable net namespace * @hash: links to adjacent nodes in unsorted hash chain @@ -115,6 +116,7 @@ struct tipc_node_bclink { */ struct tipc_node { u32 addr; + struct kref kref; spinlock_t lock; struct net *net; struct hlist_node hash; @@ -137,6 +139,7 @@ struct tipc_node { }; struct tipc_node *tipc_node_find(struct net *net, u32 addr); +void tipc_node_put(struct tipc_node *node); struct tipc_node *tipc_node_create(struct net *net, u32 addr); void tipc_node_stop(struct net *net); void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); @@ -171,10 +174,12 @@ static inline uint tipc_node_get_mtu(struct net *net, u32 addr, u32 selector) node = tipc_node_find(net, addr); - if (likely(node)) + if (likely(node)) { mtu = node->act_mtus[selector & 1]; - else + tipc_node_put(node); + } else { mtu = MAX_MSG_SIZE; + } return mtu; } -- cgit v1.2.3 From d482994fca82380912b3a80201b74d5118ff0487 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Fri, 27 Mar 2015 10:19:19 -0400 Subject: tipc: fix two bugs in secondary destination lookup A message sent to a node after a successful name table lookup may still find that the destination socket has disappeared, because distribution of name table updates is non-atomic. If so, the message will be rejected back to the sender with error code TIPC_ERR_NO_PORT. If the source socket of the message has disappeared in the meantime, the message should be dropped. However, in the currrent code, the message will instead be subject to an unwanted tertiary lookup, because the function tipc_msg_lookup_dest() doesn't check if there is an error code present in the message before performing the lookup. In the worst case, the message may now find the old destination again, and be redirected once more, instead of being dropped directly as it should be. A second bug in this function is that the "prev_node" field in the message is not updated after successful lookup, something that may have unpredictable consequences. The problems arising from those bugs occur very infrequently. The third change in this function; the test on msg_reroute_msg_cnt() is purely cosmetic, reflecting that the returned value never can be negative. This commit corrects the two bugs described above. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/addr.c | 7 +++++++ net/tipc/addr.h | 1 + net/tipc/msg.c | 7 ++++++- 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'net/tipc') diff --git a/net/tipc/addr.c b/net/tipc/addr.c index 48fd3b5a73fb..ba7daa864d44 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -38,6 +38,13 @@ #include "addr.h" #include "core.h" +u32 tipc_own_addr(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return tn->own_addr; +} + /** * in_own_cluster - test for cluster inclusion; <0.0.0> always matches */ diff --git a/net/tipc/addr.h b/net/tipc/addr.h index c700c2d28e09..7ba6d5c8ae40 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -55,6 +55,7 @@ static inline u32 tipc_cluster_mask(u32 addr) return addr & TIPC_CLUSTER_MASK; } +u32 tipc_own_addr(struct net *net); int in_own_cluster(struct net *net, u32 addr); int in_own_cluster_exact(struct net *net, u32 addr); int in_own_node(struct net *net, u32 addr); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 0c6dad8180a0..3bb499c61918 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -511,15 +511,18 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, { struct tipc_msg *msg = buf_msg(skb); u32 dport; + u32 own_addr = tipc_own_addr(net); if (!msg_isdata(msg)) return false; if (!msg_named(msg)) return false; + if (msg_errcode(msg)) + return false; *err = -TIPC_ERR_NO_NAME; if (skb_linearize(skb)) return false; - if (msg_reroute_cnt(msg) > 0) + if (msg_reroute_cnt(msg)) return false; *dnode = addr_domain(net, msg_lookup_scope(msg)); dport = tipc_nametbl_translate(net, msg_nametype(msg), @@ -527,6 +530,8 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, if (!dport) return false; msg_incr_reroute_cnt(msg); + if (*dnode != own_addr) + msg_set_prevnode(msg, own_addr); msg_set_destnode(msg, *dnode); msg_set_destport(msg, dport); *err = TIPC_OK; -- cgit v1.2.3 From 2da7142516527a5213588f47ed302e79a5d9527a Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 2 Apr 2015 09:33:00 -0400 Subject: tipc: drop tunneled packet duplicates at reception In commit 8b4ed8634f8b3f9aacfc42b4a872d30c36b9e255 ("tipc: eliminate race condition at dual link establishment") we introduced a parallel link synchronization mechanism that guarentees sequential delivery even for users switching from an old to a newly established link. The new mechanism makes it unnecessary to deliver the tunneled duplicate packets back to the old link, as we are currently doing. It is now sufficient to use the last tunneled packet's inner sequence number as synchronization point between the two parallel links, whereafter it can be dropped. In this commit, we drop the duplicate packets arriving on the new link, after updating the synchronization point at each new arrival. Although it would now have been sufficient for the other endpoint to only tunnel the last packet in its send queue, and not the entire queue, we must still do this to maintain compatibility with older nodes. This commit makes it possible to get rid if some complex interaction between the two parallel links. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 132 ++++++++++++++++++++------------------------------------ 1 file changed, 47 insertions(+), 85 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index 514466efc25c..c697cf69da91 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -105,8 +105,6 @@ static void link_handle_out_of_seq_msg(struct tipc_link *link, struct sk_buff *skb); static void tipc_link_proto_rcv(struct tipc_link *link, struct sk_buff *skb); -static int tipc_link_tunnel_rcv(struct tipc_node *node, - struct sk_buff **skb); static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol); static void link_state_event(struct tipc_link *l_ptr, u32 event); static void link_reset_statistics(struct tipc_link *l_ptr); @@ -115,7 +113,8 @@ static void tipc_link_sync_xmit(struct tipc_link *l); static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); static void tipc_link_input(struct tipc_link *l, struct sk_buff *skb); static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb); - +static bool tipc_link_failover_rcv(struct tipc_node *node, + struct sk_buff **skb); /* * Simple link routines */ @@ -1274,8 +1273,10 @@ static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb) if (msg_dup(msg)) { link->flags |= LINK_SYNCHING; link->synch_point = msg_seqno(msg_get_wrapped(msg)); + kfree_skb(skb); + break; } - if (!tipc_link_tunnel_rcv(node, &skb)) + if (!tipc_link_failover_rcv(node, &skb)) break; if (msg_user(buf_msg(skb)) != MSG_BUNDLER) { tipc_data_input(link, skb); @@ -1755,101 +1756,62 @@ tunnel_queue: goto tunnel_queue; } -/* tipc_link_dup_rcv(): Receive a tunnelled DUPLICATE_MSG packet. - * Owner node is locked. - */ -static void tipc_link_dup_rcv(struct tipc_link *link, - struct sk_buff *skb) -{ - struct sk_buff *iskb; - int pos = 0; - - if (!tipc_link_is_up(link)) - return; - - if (!tipc_msg_extract(skb, &iskb, &pos)) { - pr_warn("%sfailed to extract inner dup pkt\n", link_co_err); - return; - } - /* Append buffer to deferred queue, if applicable: */ - link_handle_out_of_seq_msg(link, iskb); -} - /* tipc_link_failover_rcv(): Receive a tunnelled ORIGINAL_MSG packet * Owner node is locked. */ -static struct sk_buff *tipc_link_failover_rcv(struct tipc_link *l_ptr, - struct sk_buff *t_buf) +static bool tipc_link_failover_rcv(struct tipc_node *node, + struct sk_buff **skb) { - struct tipc_msg *t_msg = buf_msg(t_buf); - struct sk_buff *buf = NULL; - struct tipc_msg *msg; + struct tipc_msg *msg = buf_msg(*skb); + struct sk_buff *iskb = NULL; + struct tipc_link *link = NULL; + int bearer_id = msg_bearer_id(msg); int pos = 0; - if (tipc_link_is_up(l_ptr)) - tipc_link_reset(l_ptr); - - /* First failover packet? */ - if (l_ptr->exp_msg_count == START_CHANGEOVER) - l_ptr->exp_msg_count = msg_msgcnt(t_msg); - - /* Should there be an inner packet? */ - if (l_ptr->exp_msg_count) { - l_ptr->exp_msg_count--; - if (!tipc_msg_extract(t_buf, &buf, &pos)) { - pr_warn("%sno inner failover pkt\n", link_co_err); - goto exit; - } - msg = buf_msg(buf); - - if (less(msg_seqno(msg), l_ptr->reset_checkpoint)) { - kfree_skb(buf); - buf = NULL; - goto exit; - } - if (msg_user(msg) == MSG_FRAGMENTER) { - l_ptr->stats.recv_fragments++; - tipc_buf_append(&l_ptr->reasm_buf, &buf); - } + if (msg_type(msg) != ORIGINAL_MSG) { + pr_warn("%sunknown tunnel pkt received\n", link_co_err); + goto exit; } -exit: - if ((!l_ptr->exp_msg_count) && (l_ptr->flags & LINK_STOPPED)) - tipc_link_delete(l_ptr); - return buf; -} - -/* tipc_link_tunnel_rcv(): Receive a tunnelled packet, sent - * via other link as result of a failover (ORIGINAL_MSG) or - * a new active link (DUPLICATE_MSG). Failover packets are - * returned to the active link for delivery upwards. - * Owner node is locked. - */ -static int tipc_link_tunnel_rcv(struct tipc_node *n_ptr, - struct sk_buff **buf) -{ - struct sk_buff *t_buf = *buf; - struct tipc_link *l_ptr; - struct tipc_msg *t_msg = buf_msg(t_buf); - u32 bearer_id = msg_bearer_id(t_msg); + if (bearer_id >= MAX_BEARERS) + goto exit; + link = node->links[bearer_id]; + if (!link) + goto exit; + if (tipc_link_is_up(link)) + tipc_link_reset(link); - *buf = NULL; + /* First failover packet? */ + if (link->exp_msg_count == START_CHANGEOVER) + link->exp_msg_count = msg_msgcnt(msg); - if (bearer_id >= MAX_BEARERS) + /* Should we expect an inner packet? */ + if (!link->exp_msg_count) goto exit; - l_ptr = n_ptr->links[bearer_id]; - if (!l_ptr) + if (!tipc_msg_extract(*skb, &iskb, &pos)) { + pr_warn("%sno inner failover pkt\n", link_co_err); + *skb = NULL; goto exit; + } + link->exp_msg_count--; + *skb = NULL; - if (msg_type(t_msg) == DUPLICATE_MSG) - tipc_link_dup_rcv(l_ptr, t_buf); - else if (msg_type(t_msg) == ORIGINAL_MSG) - *buf = tipc_link_failover_rcv(l_ptr, t_buf); - else - pr_warn("%sunknown tunnel pkt received\n", link_co_err); + /* Was packet already delivered? */ + if (less(buf_seqno(iskb), link->reset_checkpoint)) { + kfree_skb(iskb); + iskb = NULL; + goto exit; + } + if (msg_user(buf_msg(iskb)) == MSG_FRAGMENTER) { + link->stats.recv_fragments++; + tipc_buf_append(&link->reasm_buf, &iskb); + } exit: - kfree_skb(t_buf); - return *buf != NULL; + if (link && (!link->exp_msg_count) && (link->flags & LINK_STOPPED)) + tipc_link_delete(link); + kfree_skb(*skb); + *skb = iskb; + return *skb; } static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol) -- cgit v1.2.3 From dff29b1a88524fe6afe296d6c477c491d1e02af0 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 2 Apr 2015 09:33:01 -0400 Subject: tipc: eliminate delayed link deletion at link failover When a bearer is disabled manually, all its links have to be reset and deleted. However, if there is a remaining, parallel link ready to take over a deleted link's traffic, we currently delay the delete of the removed link until the failover procedure is finished. This is because the remaining link needs to access state from the reset link, such as the last received packet number, and any partially reassembled buffer, in order to perform a successful failover. In this commit, we do instead move the state data over to the new link, so that it can fulfill the procedure autonomously, without accessing any data on the old link. This means that we can now proceed and delete all pertaining links immediately when a bearer is disabled. This saves us from some unnecessary complexity in such situations. We also choose to change the confusing definitions CHANGEOVER_PROTOCOL, ORIGINAL_MSG and DUPLICATE_MSG to the more descriptive TUNNEL_PROTOCOL, FAILOVER_MSG and SYNCH_MSG respectively. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 124 +++++++++++++++++++++++++------------------------------- net/tipc/link.h | 17 ++++---- net/tipc/msg.c | 4 +- net/tipc/msg.h | 10 ++--- net/tipc/node.c | 13 +++--- 5 files changed, 78 insertions(+), 90 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/link.c b/net/tipc/link.c index c697cf69da91..b1e17953eeea 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -89,17 +89,9 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { #define TIMEOUT_EVT 560817u /* link timer expired */ /* - * The following two 'message types' is really just implementation - * data conveniently stored in the message header. - * They must not be considered part of the protocol + * State value stored in 'failover_pkts' */ -#define OPEN_MSG 0 -#define CLOSED_MSG 1 - -/* - * State value stored in 'exp_msg_count' - */ -#define START_CHANGEOVER 100000u +#define FIRST_FAILOVER 0xffffu static void link_handle_out_of_seq_msg(struct tipc_link *link, struct sk_buff *skb); @@ -113,8 +105,7 @@ static void tipc_link_sync_xmit(struct tipc_link *l); static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); static void tipc_link_input(struct tipc_link *l, struct sk_buff *skb); static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb); -static bool tipc_link_failover_rcv(struct tipc_node *node, - struct sk_buff **skb); +static bool tipc_link_failover_rcv(struct tipc_link *l, struct sk_buff **skb); /* * Simple link routines */ @@ -332,15 +323,19 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, } /** - * link_delete - Conditional deletion of link. - * If timer still running, real delete is done when it expires - * @link: link to be deleted + * tipc_link_delete - Delete a link + * @l: link to be deleted */ -void tipc_link_delete(struct tipc_link *link) +void tipc_link_delete(struct tipc_link *l) { - tipc_link_reset_fragments(link); - tipc_node_detach_link(link->owner, link); - tipc_link_put(link); + tipc_link_reset(l); + if (del_timer(&l->timer)) + tipc_link_put(l); + l->flags |= LINK_STOPPED; + /* Delete link now, or when timer is finished: */ + tipc_link_reset_fragments(l); + tipc_node_detach_link(l->owner, l); + tipc_link_put(l); } void tipc_link_delete_list(struct net *net, unsigned int bearer_id, @@ -349,23 +344,12 @@ void tipc_link_delete_list(struct net *net, unsigned int bearer_id, struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_link *link; struct tipc_node *node; - bool del_link; rcu_read_lock(); list_for_each_entry_rcu(node, &tn->node_list, list) { tipc_node_lock(node); link = node->links[bearer_id]; - if (!link) { - tipc_node_unlock(node); - continue; - } - del_link = !tipc_link_is_up(link) && !link->exp_msg_count; - tipc_link_reset(link); - if (del_timer(&link->timer)) - tipc_link_put(link); - link->flags |= LINK_STOPPED; - /* Delete link now, or when failover is finished: */ - if (shutting_down || !tipc_node_is_up(node) || del_link) + if (link) tipc_link_delete(link); tipc_node_unlock(node); } @@ -472,9 +456,9 @@ void tipc_link_purge_queues(struct tipc_link *l_ptr) void tipc_link_reset(struct tipc_link *l_ptr) { u32 prev_state = l_ptr->state; - u32 checkpoint = l_ptr->next_in_no; int was_active_link = tipc_link_is_active(l_ptr); struct tipc_node *owner = l_ptr->owner; + struct tipc_link *pl = tipc_parallel_link(l_ptr); msg_set_session(l_ptr->pmsg, ((msg_session(l_ptr->pmsg) + 1) & 0xffff)); @@ -492,11 +476,15 @@ void tipc_link_reset(struct tipc_link *l_ptr) tipc_node_link_down(l_ptr->owner, l_ptr); tipc_bearer_remove_dest(owner->net, l_ptr->bearer_id, l_ptr->addr); - if (was_active_link && tipc_node_active_links(l_ptr->owner)) { - l_ptr->reset_checkpoint = checkpoint; - l_ptr->exp_msg_count = START_CHANGEOVER; + if (was_active_link && tipc_node_is_up(l_ptr->owner) && (pl != l_ptr)) { + l_ptr->flags |= LINK_FAILINGOVER; + l_ptr->failover_checkpt = l_ptr->next_in_no; + pl->failover_pkts = FIRST_FAILOVER; + pl->failover_checkpt = l_ptr->next_in_no; + pl->failover_skb = l_ptr->reasm_buf; + } else { + kfree_skb(l_ptr->reasm_buf); } - /* Clean up all queues, except inputq: */ __skb_queue_purge(&l_ptr->transmq); __skb_queue_purge(&l_ptr->deferdq); @@ -506,6 +494,7 @@ void tipc_link_reset(struct tipc_link *l_ptr) if (!skb_queue_empty(owner->inputq)) owner->action_flags |= TIPC_MSG_EVT; tipc_link_purge_backlog(l_ptr); + l_ptr->reasm_buf = NULL; l_ptr->rcv_unacked = 0; l_ptr->checkpoint = 1; l_ptr->next_out_no = 1; @@ -557,8 +546,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) if (!(l_ptr->flags & LINK_STARTED) && (event != STARTING_EVT)) return; /* Not yet. */ - /* Check whether changeover is going on */ - if (l_ptr->exp_msg_count) { + if (l_ptr->flags & LINK_FAILINGOVER) { if (event == TIMEOUT_EVT) link_set_timer(l_ptr, cont_intv); return; @@ -1242,7 +1230,7 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb) node->action_flags |= TIPC_NAMED_MSG_EVT; return true; case MSG_BUNDLER: - case CHANGEOVER_PROTOCOL: + case TUNNEL_PROTOCOL: case MSG_FRAGMENTER: case BCAST_PROTOCOL: return false; @@ -1269,14 +1257,14 @@ static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb) return; switch (msg_user(msg)) { - case CHANGEOVER_PROTOCOL: + case TUNNEL_PROTOCOL: if (msg_dup(msg)) { link->flags |= LINK_SYNCHING; link->synch_point = msg_seqno(msg_get_wrapped(msg)); kfree_skb(skb); break; } - if (!tipc_link_failover_rcv(node, &skb)) + if (!tipc_link_failover_rcv(link, &skb)) break; if (msg_user(buf_msg(skb)) != MSG_BUNDLER) { tipc_data_input(link, skb); @@ -1391,8 +1379,8 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, u32 msg_size = sizeof(l_ptr->proto_msg); int r_flag; - /* Don't send protocol message during link changeover */ - if (l_ptr->exp_msg_count) + /* Don't send protocol message during link failover */ + if (l_ptr->flags & LINK_FAILINGOVER) return; /* Abort non-RESET send if communication with node is prohibited */ @@ -1444,7 +1432,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, } l_ptr->stats.sent_states++; } else { /* RESET_MSG or ACTIVATE_MSG */ - msg_set_ack(msg, mod(l_ptr->reset_checkpoint - 1)); + msg_set_ack(msg, mod(l_ptr->failover_checkpt - 1)); msg_set_seq_gap(msg, 0); msg_set_next_sent(msg, 1); msg_set_probe(msg, 0); @@ -1486,8 +1474,7 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, u32 msg_tol; struct tipc_msg *msg = buf_msg(buf); - /* Discard protocol message during link changeover */ - if (l_ptr->exp_msg_count) + if (l_ptr->flags & LINK_FAILINGOVER) goto exit; if (l_ptr->net_plane != msg_net_plane(msg)) @@ -1659,8 +1646,8 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) if (!tunnel) return; - tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, CHANGEOVER_PROTOCOL, - ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); + tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, TUNNEL_PROTOCOL, + FAILOVER_MSG, INT_H_SIZE, l_ptr->addr); skb_queue_splice_tail_init(&l_ptr->backlogq, &l_ptr->transmq); tipc_link_purge_backlog(l_ptr); msgcount = skb_queue_len(&l_ptr->transmq); @@ -1722,8 +1709,8 @@ void tipc_link_dup_queue_xmit(struct tipc_link *link, struct sk_buff_head *queue = &link->transmq; int mcnt; - tipc_msg_init(link_own_addr(link), &tnl_hdr, CHANGEOVER_PROTOCOL, - DUPLICATE_MSG, INT_H_SIZE, link->addr); + tipc_msg_init(link_own_addr(link), &tnl_hdr, TUNNEL_PROTOCOL, + SYNCH_MSG, INT_H_SIZE, link->addr); mcnt = skb_queue_len(&link->transmq) + skb_queue_len(&link->backlogq); msg_set_msgcnt(&tnl_hdr, mcnt); msg_set_bearer_id(&tnl_hdr, link->peer_bearer_id); @@ -1756,36 +1743,37 @@ tunnel_queue: goto tunnel_queue; } -/* tipc_link_failover_rcv(): Receive a tunnelled ORIGINAL_MSG packet +/* tipc_link_failover_rcv(): Receive a tunnelled FAILOVER_MSG packet * Owner node is locked. */ -static bool tipc_link_failover_rcv(struct tipc_node *node, +static bool tipc_link_failover_rcv(struct tipc_link *link, struct sk_buff **skb) { struct tipc_msg *msg = buf_msg(*skb); struct sk_buff *iskb = NULL; - struct tipc_link *link = NULL; + struct tipc_link *pl = NULL; int bearer_id = msg_bearer_id(msg); int pos = 0; - if (msg_type(msg) != ORIGINAL_MSG) { + if (msg_type(msg) != FAILOVER_MSG) { pr_warn("%sunknown tunnel pkt received\n", link_co_err); goto exit; } if (bearer_id >= MAX_BEARERS) goto exit; - link = node->links[bearer_id]; - if (!link) + + if (bearer_id == link->bearer_id) goto exit; - if (tipc_link_is_up(link)) - tipc_link_reset(link); - /* First failover packet? */ - if (link->exp_msg_count == START_CHANGEOVER) - link->exp_msg_count = msg_msgcnt(msg); + pl = link->owner->links[bearer_id]; + if (pl && tipc_link_is_up(pl)) + tipc_link_reset(pl); + + if (link->failover_pkts == FIRST_FAILOVER) + link->failover_pkts = msg_msgcnt(msg); /* Should we expect an inner packet? */ - if (!link->exp_msg_count) + if (!link->failover_pkts) goto exit; if (!tipc_msg_extract(*skb, &iskb, &pos)) { @@ -1793,22 +1781,22 @@ static bool tipc_link_failover_rcv(struct tipc_node *node, *skb = NULL; goto exit; } - link->exp_msg_count--; + link->failover_pkts--; *skb = NULL; - /* Was packet already delivered? */ - if (less(buf_seqno(iskb), link->reset_checkpoint)) { + /* Was this packet already delivered? */ + if (less(buf_seqno(iskb), link->failover_checkpt)) { kfree_skb(iskb); iskb = NULL; goto exit; } if (msg_user(buf_msg(iskb)) == MSG_FRAGMENTER) { link->stats.recv_fragments++; - tipc_buf_append(&link->reasm_buf, &iskb); + tipc_buf_append(&link->failover_skb, &iskb); } exit: - if (link && (!link->exp_msg_count) && (link->flags & LINK_STOPPED)) - tipc_link_delete(link); + if (!link->failover_pkts && pl) + pl->flags &= ~LINK_FAILINGOVER; kfree_skb(*skb); *skb = iskb; return *skb; diff --git a/net/tipc/link.h b/net/tipc/link.h index d2b5663643da..6e28f03c7905 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -58,9 +58,10 @@ /* Link endpoint execution states */ -#define LINK_STARTED 0x0001 -#define LINK_STOPPED 0x0002 -#define LINK_SYNCHING 0x0004 +#define LINK_STARTED 0x0001 +#define LINK_STOPPED 0x0002 +#define LINK_SYNCHING 0x0004 +#define LINK_FAILINGOVER 0x0008 /* Starting value for maximum packet size negotiation on unicast links * (unless bearer MTU is less) @@ -167,11 +168,12 @@ struct tipc_link { struct tipc_msg *pmsg; u32 priority; char net_plane; + u16 synch_point; - /* Changeover */ - u32 exp_msg_count; - u32 reset_checkpoint; - u32 synch_point; + /* Failover */ + u16 failover_pkts; + u16 failover_checkpt; + struct sk_buff *failover_skb; /* Max packet negotiation */ u32 max_pkt; @@ -201,7 +203,6 @@ struct tipc_link { struct sk_buff_head wakeupq; /* Fragmentation/reassembly */ - u32 long_msg_seq_no; struct sk_buff *reasm_buf; /* Statistics */ diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 3bb499c61918..c3e96e815418 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -355,7 +355,7 @@ bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu) start = align(bsz); pad = start - bsz; - if (unlikely(msg_user(msg) == CHANGEOVER_PROTOCOL)) + if (unlikely(msg_user(msg) == TUNNEL_PROTOCOL)) return false; if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) return false; @@ -433,7 +433,7 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode) if (msg_user(msg) == MSG_FRAGMENTER) return false; - if (msg_user(msg) == CHANGEOVER_PROTOCOL) + if (msg_user(msg) == TUNNEL_PROTOCOL) return false; if (msg_user(msg) == BCAST_PROTOCOL) return false; diff --git a/net/tipc/msg.h b/net/tipc/msg.h index d273207ede28..e1d3595e2ee9 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -72,7 +72,7 @@ struct plist; #define MSG_BUNDLER 6 #define LINK_PROTOCOL 7 #define CONN_MANAGER 8 -#define CHANGEOVER_PROTOCOL 10 +#define TUNNEL_PROTOCOL 10 #define NAME_DISTRIBUTOR 11 #define MSG_FRAGMENTER 12 #define LINK_CONFIG 13 @@ -512,8 +512,8 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) /* * Changeover tunnel message types */ -#define DUPLICATE_MSG 0 -#define ORIGINAL_MSG 1 +#define SYNCH_MSG 0 +#define FAILOVER_MSG 1 /* * Config protocol message types @@ -556,9 +556,9 @@ static inline void msg_set_node_capabilities(struct tipc_msg *m, u32 n) static inline bool msg_dup(struct tipc_msg *m) { - if (likely(msg_user(m) != CHANGEOVER_PROTOCOL)) + if (likely(msg_user(m) != TUNNEL_PROTOCOL)) return false; - if (msg_type(m) != DUPLICATE_MSG) + if (msg_type(m) != SYNCH_MSG) return false; return true; } diff --git a/net/tipc/node.c b/net/tipc/node.c index 3e4f04897c03..f3d522c2881a 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -394,18 +394,17 @@ static void node_lost_contact(struct tipc_node *n_ptr) n_ptr->bclink.recv_permitted = false; } - /* Abort link changeover */ + /* Abort any ongoing link failover */ for (i = 0; i < MAX_BEARERS; i++) { struct tipc_link *l_ptr = n_ptr->links[i]; if (!l_ptr) continue; - l_ptr->reset_checkpoint = l_ptr->next_in_no; - l_ptr->exp_msg_count = 0; + l_ptr->flags &= ~LINK_FAILINGOVER; + l_ptr->failover_checkpt = 0; + l_ptr->failover_pkts = 0; + kfree_skb(l_ptr->failover_skb); + l_ptr->failover_skb = NULL; tipc_link_reset_fragments(l_ptr); - - /* Link marked for deletion after failover? => do it now */ - if (l_ptr->flags & LINK_STOPPED) - tipc_link_delete(l_ptr); } n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN; -- cgit v1.2.3 From ed193ece2649c194a87a9d8470195760d367c075 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 2 Apr 2015 09:33:02 -0400 Subject: tipc: simplify link mtu negotiation When a link is being established, the two endpoints advertise their respective interface MTU in the transmitted RESET and ACTIVATE messages. If there is any difference, the lower of the two MTUs will be selected for use by both endpoints. However, as a remnant of earlier attempts to introduce TIPC level routing. there also exists an MTU discovery mechanism. If an intermediate node has a lower MTU than the two endpoints, they will discover this through a bisectional approach, and finally adopt this MTU for common use. Since there is no TIPC level routing, and probably never will be, this mechanism doesn't make any sense, and only serves to make the link level protocol unecessarily complex. In this commit, we eliminate the MTU discovery algorithm,and fall back to the simple MTU advertising approach. This change is fully backwards compatible. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 4 +- net/tipc/link.c | 129 ++++++++++++++----------------------------------------- net/tipc/link.h | 12 +++--- net/tipc/node.c | 9 ++-- 4 files changed, 43 insertions(+), 111 deletions(-) (limited to 'net/tipc') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index ae558dd7f8ee..c5cbdcb1f0b5 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -413,7 +413,7 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) */ if (((seqno - tn->own_addr) % TIPC_MIN_LINK_WIN) == 0) { tipc_link_proto_xmit(node->active_links[node->addr & 1], - STATE_MSG, 0, 0, 0, 0, 0); + STATE_MSG, 0, 0, 0, 0); tn->bcl->stats.sent_acks++; } } @@ -899,7 +899,7 @@ int tipc_bclink_init(struct net *net) skb_queue_head_init(&bclink->inputq); bcl->owner = &bclink->node; bcl->owner->net = net; - bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; + bcl->mtu = MAX_PKT_DEFAULT_MCAST; tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); bcl->bearer_id = MAX_BEARERS; rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer); diff --git a/net/tipc/link.c b/net/tipc/link.c index b1e17953eeea..a6b30df6ec02 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -136,34 +136,6 @@ static struct tipc_link *tipc_parallel_link(struct tipc_link *l) return l->owner->active_links[1]; } -static void link_init_max_pkt(struct tipc_link *l_ptr) -{ - struct tipc_node *node = l_ptr->owner; - struct tipc_net *tn = net_generic(node->net, tipc_net_id); - struct tipc_bearer *b_ptr; - u32 max_pkt; - - rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(tn->bearer_list[l_ptr->bearer_id]); - if (!b_ptr) { - rcu_read_unlock(); - return; - } - max_pkt = (b_ptr->mtu & ~3); - rcu_read_unlock(); - - if (max_pkt > MAX_MSG_SIZE) - max_pkt = MAX_MSG_SIZE; - - l_ptr->max_pkt_target = max_pkt; - if (l_ptr->max_pkt_target < MAX_PKT_DEFAULT) - l_ptr->max_pkt = l_ptr->max_pkt_target; - else - l_ptr->max_pkt = MAX_PKT_DEFAULT; - - l_ptr->max_pkt_probes = 0; -} - /* * Simple non-static link routines (i.e. referenced outside this file) */ @@ -304,7 +276,8 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, msg_set_bearer_id(msg, b_ptr->identity); strcpy((char *)msg_data(msg), if_name); l_ptr->net_plane = b_ptr->net_plane; - link_init_max_pkt(l_ptr); + l_ptr->advertised_mtu = b_ptr->mtu; + l_ptr->mtu = l_ptr->advertised_mtu; l_ptr->priority = b_ptr->priority; tipc_link_set_queue_limits(l_ptr, b_ptr->window); l_ptr->next_out_no = 1; @@ -465,8 +438,8 @@ void tipc_link_reset(struct tipc_link *l_ptr) /* Link is down, accept any session */ l_ptr->peer_session = INVALID_SESSION; - /* Prepare for max packet size negotiation */ - link_init_max_pkt(l_ptr); + /* Prepare for renewed mtu size negotiation */ + l_ptr->mtu = l_ptr->advertised_mtu; l_ptr->state = RESET_UNKNOWN; @@ -563,11 +536,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->checkpoint = l_ptr->next_in_no; if (tipc_bclink_acks_missing(l_ptr->owner)) { tipc_link_proto_xmit(l_ptr, STATE_MSG, - 0, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - } else if (l_ptr->max_pkt < l_ptr->max_pkt_target) { - tipc_link_proto_xmit(l_ptr, STATE_MSG, - 1, 0, 0, 0, 0); + 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; } link_set_timer(l_ptr, cont_intv); @@ -575,7 +544,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) } l_ptr->state = WORKING_UNKNOWN; l_ptr->fsm_msg_cnt = 0; - tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv / 4); break; @@ -586,7 +555,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, - 0, 0, 0, 0, 0); + 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); break; @@ -609,7 +578,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, - 0, 0, 0, 0, 0); + 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); break; @@ -620,13 +589,13 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->checkpoint = l_ptr->next_in_no; if (tipc_bclink_acks_missing(l_ptr->owner)) { tipc_link_proto_xmit(l_ptr, STATE_MSG, - 0, 0, 0, 0, 0); + 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; } link_set_timer(l_ptr, cont_intv); } else if (l_ptr->fsm_msg_cnt < l_ptr->abort_limit) { tipc_link_proto_xmit(l_ptr, STATE_MSG, - 1, 0, 0, 0, 0); + 1, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv / 4); } else { /* Link has failed */ @@ -636,7 +605,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->state = RESET_UNKNOWN; l_ptr->fsm_msg_cnt = 0; tipc_link_proto_xmit(l_ptr, RESET_MSG, - 0, 0, 0, 0, 0); + 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); } @@ -656,7 +625,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->state = WORKING_WORKING; l_ptr->fsm_msg_cnt = 0; link_activate(l_ptr); - tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); l_ptr->fsm_msg_cnt++; if (l_ptr->owner->working_links == 1) tipc_link_sync_xmit(l_ptr); @@ -666,7 +635,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, - 1, 0, 0, 0, 0); + 1, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); break; @@ -676,7 +645,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) link_set_timer(l_ptr, cont_intv); break; case TIMEOUT_EVT: - tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); break; @@ -694,7 +663,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->state = WORKING_WORKING; l_ptr->fsm_msg_cnt = 0; link_activate(l_ptr); - tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); l_ptr->fsm_msg_cnt++; if (l_ptr->owner->working_links == 1) tipc_link_sync_xmit(l_ptr); @@ -704,7 +673,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) break; case TIMEOUT_EVT: tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, - 0, 0, 0, 0, 0); + 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); break; @@ -733,7 +702,7 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link, struct tipc_msg *msg = buf_msg(skb_peek(list)); unsigned int maxwin = link->window; unsigned int imp = msg_importance(msg); - uint mtu = link->max_pkt; + uint mtu = link->mtu; uint ack = mod(link->next_in_no - 1); uint seqno = link->next_out_no; uint bc_last_in = link->owner->bclink.last_in; @@ -1187,7 +1156,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) link_retrieve_defq(l_ptr, &head); if (unlikely(++l_ptr->rcv_unacked >= TIPC_MIN_LINK_WIN)) { l_ptr->stats.sent_acks++; - tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0); } tipc_link_input(l_ptr, skb); skb = NULL; @@ -1362,7 +1331,7 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, if (tipc_link_defer_pkt(&l_ptr->deferdq, buf)) { l_ptr->stats.deferred_recv++; if ((skb_queue_len(&l_ptr->deferdq) % TIPC_MIN_LINK_WIN) == 1) - tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0); } else { l_ptr->stats.duplicates++; } @@ -1372,7 +1341,7 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, * Send protocol message to the other endpoint. */ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, - u32 gap, u32 tolerance, u32 priority, u32 ack_mtu) + u32 gap, u32 tolerance, u32 priority) { struct sk_buff *buf = NULL; struct tipc_msg *msg = l_ptr->pmsg; @@ -1410,26 +1379,11 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, l_ptr->stats.sent_nacks++; msg_set_link_tolerance(msg, tolerance); msg_set_linkprio(msg, priority); - msg_set_max_pkt(msg, ack_mtu); + msg_set_max_pkt(msg, l_ptr->mtu); msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); msg_set_probe(msg, probe_msg != 0); - if (probe_msg) { - u32 mtu = l_ptr->max_pkt; - - if ((mtu < l_ptr->max_pkt_target) && - link_working_working(l_ptr) && - l_ptr->fsm_msg_cnt) { - msg_size = (mtu + (l_ptr->max_pkt_target - mtu)/2 + 2) & ~3; - if (l_ptr->max_pkt_probes == 10) { - l_ptr->max_pkt_target = (msg_size - 4); - l_ptr->max_pkt_probes = 0; - msg_size = (mtu + (l_ptr->max_pkt_target - mtu)/2 + 2) & ~3; - } - l_ptr->max_pkt_probes++; - } - + if (probe_msg) l_ptr->stats.sent_probes++; - } l_ptr->stats.sent_states++; } else { /* RESET_MSG or ACTIVATE_MSG */ msg_set_ack(msg, mod(l_ptr->failover_checkpt - 1)); @@ -1438,7 +1392,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, msg_set_probe(msg, 0); msg_set_link_tolerance(msg, l_ptr->tolerance); msg_set_linkprio(msg, l_ptr->priority); - msg_set_max_pkt(msg, l_ptr->max_pkt_target); + msg_set_max_pkt(msg, l_ptr->advertised_mtu); } r_flag = (l_ptr->owner->working_links > tipc_link_is_up(l_ptr)); @@ -1469,8 +1423,6 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) { u32 rec_gap = 0; - u32 max_pkt_info; - u32 max_pkt_ack; u32 msg_tol; struct tipc_msg *msg = buf_msg(buf); @@ -1513,15 +1465,8 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, if (msg_linkprio(msg) > l_ptr->priority) l_ptr->priority = msg_linkprio(msg); - max_pkt_info = msg_max_pkt(msg); - if (max_pkt_info) { - if (max_pkt_info < l_ptr->max_pkt_target) - l_ptr->max_pkt_target = max_pkt_info; - if (l_ptr->max_pkt > l_ptr->max_pkt_target) - l_ptr->max_pkt = l_ptr->max_pkt_target; - } else { - l_ptr->max_pkt = l_ptr->max_pkt_target; - } + if (l_ptr->mtu > msg_max_pkt(msg)) + l_ptr->mtu = msg_max_pkt(msg); /* Synchronize broadcast link info, if not done previously */ if (!tipc_node_is_up(l_ptr->owner)) { @@ -1566,18 +1511,8 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, mod(l_ptr->next_in_no)); } - max_pkt_ack = msg_max_pkt(msg); - if (max_pkt_ack > l_ptr->max_pkt) { - l_ptr->max_pkt = max_pkt_ack; - l_ptr->max_pkt_probes = 0; - } - - max_pkt_ack = 0; - if (msg_probe(msg)) { + if (msg_probe(msg)) l_ptr->stats.recv_probes++; - if (msg_size(msg) > sizeof(l_ptr->proto_msg)) - max_pkt_ack = msg_size(msg); - } /* Protocol message before retransmits, reduce loss risk */ if (l_ptr->owner->bclink.recv_permitted) @@ -1585,8 +1520,8 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, msg_last_bcast(msg)); if (rec_gap || (msg_probe(msg))) { - tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, rec_gap, 0, - 0, max_pkt_ack); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, + rec_gap, 0, 0); } if (msg_seq_gap(msg)) { l_ptr->stats.recv_nacks++; @@ -1816,7 +1751,7 @@ static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol) void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) { - int max_bulk = TIPC_MAX_PUBLICATIONS / (l->max_pkt / ITEM_SIZE); + int max_bulk = TIPC_MAX_PUBLICATIONS / (l->mtu / ITEM_SIZE); l->window = win; l->backlog[TIPC_LOW_IMPORTANCE].limit = win / 2; @@ -1988,14 +1923,14 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); link_set_supervision_props(link, tol); - tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0, 0); + tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0); } if (props[TIPC_NLA_PROP_PRIO]) { u32 prio; prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); link->priority = prio; - tipc_link_proto_xmit(link, STATE_MSG, 0, 0, 0, prio, 0); + tipc_link_proto_xmit(link, STATE_MSG, 0, 0, 0, prio); } if (props[TIPC_NLA_PROP_WIN]) { u32 win; @@ -2100,7 +2035,7 @@ static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST, tipc_cluster_mask(tn->own_addr))) goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->max_pkt)) + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->mtu)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, link->next_in_no)) goto attr_msg_full; diff --git a/net/tipc/link.h b/net/tipc/link.h index 6e28f03c7905..b5b4e3554d4e 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -123,9 +123,8 @@ struct tipc_stats { * @backlog_limit: backlog queue congestion thresholds (indexed by importance) * @exp_msg_count: # of tunnelled messages expected during link changeover * @reset_checkpoint: seq # of last acknowledged message at time of link reset - * @max_pkt: current maximum packet size for this link - * @max_pkt_target: desired maximum packet size for this link - * @max_pkt_probes: # of probes based on current (max_pkt, max_pkt_target) + * @mtu: current maximum packet size for this link + * @advertised_mtu: advertised own mtu when link is being established * @transmitq: queue for sent, non-acked messages * @backlogq: queue for messages waiting to be sent * @next_out_no: next sequence number to use for outbound messages @@ -176,9 +175,8 @@ struct tipc_link { struct sk_buff *failover_skb; /* Max packet negotiation */ - u32 max_pkt; - u32 max_pkt_target; - u32 max_pkt_probes; + u16 mtu; + u16 advertised_mtu; /* Sending */ struct sk_buff_head transmq; @@ -233,7 +231,7 @@ int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dest, int __tipc_link_xmit(struct net *net, struct tipc_link *link, struct sk_buff_head *list); void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob, - u32 gap, u32 tolerance, u32 priority, u32 acked_mtu); + u32 gap, u32 tolerance, u32 priority); void tipc_link_push_packets(struct tipc_link *l_ptr); u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *buf); void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window); diff --git a/net/tipc/node.c b/net/tipc/node.c index f3d522c2881a..22c059ad2999 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -254,8 +254,8 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) active[0] = active[1] = l_ptr; exit: /* Leave room for changeover header when returning 'mtu' to users: */ - n_ptr->act_mtus[0] = active[0]->max_pkt - INT_H_SIZE; - n_ptr->act_mtus[1] = active[1]->max_pkt - INT_H_SIZE; + n_ptr->act_mtus[0] = active[0]->mtu - INT_H_SIZE; + n_ptr->act_mtus[1] = active[1]->mtu - INT_H_SIZE; } /** @@ -319,11 +319,10 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) /* Leave room for changeover header when returning 'mtu' to users: */ if (active[0]) { - n_ptr->act_mtus[0] = active[0]->max_pkt - INT_H_SIZE; - n_ptr->act_mtus[1] = active[1]->max_pkt - INT_H_SIZE; + n_ptr->act_mtus[0] = active[0]->mtu - INT_H_SIZE; + n_ptr->act_mtus[1] = active[1]->mtu - INT_H_SIZE; return; } - /* Loopback link went down? No fragmentation needed from now on. */ if (n_ptr->addr == tn->own_addr) { n_ptr->act_mtus[0] = MAX_MSG_SIZE; -- cgit v1.2.3 From 79b16aadea32cce077acbe9e229fcb58a7801687 Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 5 Apr 2015 22:19:09 -0400 Subject: udp_tunnel: Pass UDP socket down through udp_tunnel{, 6}_xmit_skb(). That was we can make sure the output path of ipv4/ipv6 operate on the UDP socket rather than whatever random thing happens to be in skb->sk. Based upon a patch by Jiri Pirko. Signed-off-by: David S. Miller Acked-by: Hannes Frederic Sowa --- drivers/net/vxlan.c | 14 ++++++++------ include/net/ip6_tunnel.h | 5 +++-- include/net/ipv6.h | 1 + include/net/udp_tunnel.h | 5 +++-- include/net/vxlan.h | 2 +- net/ipv4/geneve.c | 2 +- net/ipv4/ip_tunnel.c | 2 +- net/ipv4/udp_tunnel.c | 4 ++-- net/ipv6/ip6_gre.c | 2 +- net/ipv6/ip6_tunnel.c | 2 +- net/ipv6/ip6_udp_tunnel.c | 5 +++-- net/ipv6/output_core.c | 21 ++++++++++++++++----- net/openvswitch/vport-vxlan.c | 5 +++-- net/tipc/udp_media.c | 6 ++++-- 14 files changed, 48 insertions(+), 28 deletions(-) (limited to 'net/tipc') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index b5fecb49a0c6..51baac725a48 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1672,7 +1672,8 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags, } #if IS_ENABLED(CONFIG_IPV6) -static int vxlan6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb, +static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, struct net_device *dev, struct in6_addr *saddr, struct in6_addr *daddr, __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port, @@ -1748,7 +1749,7 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb, skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - udp_tunnel6_xmit_skb(dst, skb, dev, saddr, daddr, prio, + udp_tunnel6_xmit_skb(dst, sk, skb, dev, saddr, daddr, prio, ttl, src_port, dst_port, !!(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX)); return 0; @@ -1758,7 +1759,7 @@ err: } #endif -int vxlan_xmit_skb(struct rtable *rt, struct sk_buff *skb, +int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, struct vxlan_metadata *md, bool xnet, u32 vxflags) @@ -1827,7 +1828,7 @@ int vxlan_xmit_skb(struct rtable *rt, struct sk_buff *skb, skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - return udp_tunnel_xmit_skb(rt, skb, src, dst, tos, + return udp_tunnel_xmit_skb(rt, sk, skb, src, dst, tos, ttl, df, src_port, dst_port, xnet, !(vxflags & VXLAN_F_UDP_CSUM)); } @@ -1882,6 +1883,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct vxlan_rdst *rdst, bool did_rsc) { struct vxlan_dev *vxlan = netdev_priv(dev); + struct sock *sk = vxlan->vn_sock->sock->sk; struct rtable *rt = NULL; const struct iphdr *old_iph; struct flowi4 fl4; @@ -1961,7 +1963,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, md.vni = htonl(vni << 8); md.gbp = skb->mark; - err = vxlan_xmit_skb(rt, skb, fl4.saddr, + err = vxlan_xmit_skb(rt, sk, skb, fl4.saddr, dst->sin.sin_addr.s_addr, tos, ttl, df, src_port, dst_port, &md, !net_eq(vxlan->net, dev_net(vxlan->dev)), @@ -2021,7 +2023,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, md.vni = htonl(vni << 8); md.gbp = skb->mark; - err = vxlan6_xmit_skb(ndst, skb, dev, &fl6.saddr, &fl6.daddr, + err = vxlan6_xmit_skb(ndst, sk, skb, dev, &fl6.saddr, &fl6.daddr, 0, ttl, src_port, dst_port, &md, !net_eq(vxlan->net, dev_net(vxlan->dev)), vxlan->flags); diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 1668be5937e6..b8529aa1dae7 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -73,13 +73,14 @@ __u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr, struct net *ip6_tnl_get_link_net(const struct net_device *dev); int ip6_tnl_get_iflink(const struct net_device *dev); -static inline void ip6tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, + struct net_device *dev) { struct net_device_stats *stats = &dev->stats; int pkt_len, err; pkt_len = skb->len; - err = ip6_local_out(skb); + err = ip6_local_out_sk(sk, skb); if (net_xmit_eval(err) == 0) { struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index b6ae959824ff..27470cd1d5f8 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -827,6 +827,7 @@ int ip6_input(struct sk_buff *skb); int ip6_mc_input(struct sk_buff *skb); int __ip6_local_out(struct sk_buff *skb); +int ip6_local_out_sk(struct sock *sk, struct sk_buff *skb); int ip6_local_out(struct sk_buff *skb); /* diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 1a20d33d56bc..c491c1221606 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -77,13 +77,14 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, struct udp_tunnel_sock_cfg *sock_cfg); /* Transmit the skb using UDP encapsulation. */ -int udp_tunnel_xmit_skb(struct rtable *rt, struct sk_buff *skb, +int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, bool xnet, bool nocheck); #if IS_ENABLED(CONFIG_IPV6) -int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb, +int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, struct net_device *dev, struct in6_addr *saddr, struct in6_addr *daddr, __u8 prio, __u8 ttl, __be16 src_port, diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 756e4636bad8..0082b5d33d7d 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -145,7 +145,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, void vxlan_sock_release(struct vxlan_sock *vs); -int vxlan_xmit_skb(struct rtable *rt, struct sk_buff *skb, +int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, struct vxlan_metadata *md, bool xnet, u32 vxflags); diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index e64f8e9785d1..b77f5e84c623 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -136,7 +136,7 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - return udp_tunnel_xmit_skb(rt, skb, src, dst, + return udp_tunnel_xmit_skb(rt, gs->sock->sk, skb, src, dst, tos, ttl, df, src_port, dst_port, xnet, !csum); } diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 6d364ab8e14e..4c2c3ba4ba65 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -782,7 +782,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, return; } - err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol, + err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index c83b35485056..6bb98cc193c9 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -75,7 +75,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, } EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); -int udp_tunnel_xmit_skb(struct rtable *rt, struct sk_buff *skb, +int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, bool xnet, bool nocheck) @@ -92,7 +92,7 @@ int udp_tunnel_xmit_skb(struct rtable *rt, struct sk_buff *skb, udp_set_csum(nocheck, skb, src, dst, skb->len); - return iptunnel_xmit(skb->sk, rt, skb, src, dst, IPPROTO_UDP, + return iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet); } EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index f724329d7436..b5e6cc1d4a73 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -760,7 +760,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, skb_set_inner_protocol(skb, protocol); - ip6tunnel_xmit(skb, dev); + ip6tunnel_xmit(NULL, skb, dev); if (ndst) ip6_tnl_dst_store(tunnel, ndst); return 0; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index b6a211a150b2..5cafd92c2312 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1100,7 +1100,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; ipv6h->daddr = fl6->daddr; - ip6tunnel_xmit(skb, dev); + ip6tunnel_xmit(NULL, skb, dev); if (ndst) ip6_tnl_dst_store(t, ndst); return 0; diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index 32d9b268e7d8..bba8903e871f 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -62,7 +62,8 @@ error: } EXPORT_SYMBOL_GPL(udp_sock_create6); -int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb, +int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, struct net_device *dev, struct in6_addr *saddr, struct in6_addr *daddr, __u8 prio, __u8 ttl, __be16 src_port, @@ -97,7 +98,7 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb, ip6h->daddr = *daddr; ip6h->saddr = *saddr; - ip6tunnel_xmit(skb, dev); + ip6tunnel_xmit(sk, skb, dev); return 0; } EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 7d1131dc29fe..85892af57364 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -136,7 +136,7 @@ int ip6_dst_hoplimit(struct dst_entry *dst) EXPORT_SYMBOL(ip6_dst_hoplimit); #endif -int __ip6_local_out(struct sk_buff *skb) +static int __ip6_local_out_sk(struct sock *sk, struct sk_buff *skb) { int len; @@ -146,19 +146,30 @@ int __ip6_local_out(struct sk_buff *skb) ipv6_hdr(skb)->payload_len = htons(len); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); - return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb->sk, skb, + return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, NULL, skb_dst(skb)->dev, dst_output_sk); } + +int __ip6_local_out(struct sk_buff *skb) +{ + return __ip6_local_out_sk(skb->sk, skb); +} EXPORT_SYMBOL_GPL(__ip6_local_out); -int ip6_local_out(struct sk_buff *skb) +int ip6_local_out_sk(struct sock *sk, struct sk_buff *skb) { int err; - err = __ip6_local_out(skb); + err = __ip6_local_out_sk(sk, skb); if (likely(err == 1)) - err = dst_output(skb); + err = dst_output_sk(sk, skb); return err; } +EXPORT_SYMBOL_GPL(ip6_local_out_sk); + +int ip6_local_out(struct sk_buff *skb) +{ + return ip6_local_out_sk(skb->sk, skb); +} EXPORT_SYMBOL_GPL(ip6_local_out); diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 3277a7520e31..6d39766e7828 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -222,7 +222,8 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) { struct net *net = ovs_dp_get_net(vport->dp); struct vxlan_port *vxlan_port = vxlan_vport(vport); - __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport; + struct sock *sk = vxlan_port->vs->sock->sk; + __be16 dst_port = inet_sk(sk)->inet_sport; const struct ovs_key_ipv4_tunnel *tun_key; struct vxlan_metadata md = {0}; struct rtable *rt; @@ -255,7 +256,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) vxflags = vxlan_port->exts | (tun_key->tun_flags & TUNNEL_CSUM ? VXLAN_F_UDP_CSUM : 0); - err = vxlan_xmit_skb(rt, skb, fl.saddr, tun_key->ipv4_dst, + err = vxlan_xmit_skb(rt, sk, skb, fl.saddr, tun_key->ipv4_dst, tun_key->ipv4_tos, tun_key->ipv4_ttl, df, src_port, dst_port, &md, false, vxflags); diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index ef3d7aa2854a..66deebc66aa1 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -176,7 +176,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, goto tx_error; } ttl = ip4_dst_hoplimit(&rt->dst); - err = udp_tunnel_xmit_skb(rt, clone, src->ipv4.s_addr, + err = udp_tunnel_xmit_skb(rt, ub->ubsock->sk, clone, + src->ipv4.s_addr, dst->ipv4.s_addr, 0, ttl, 0, src->udp_port, dst->udp_port, false, true); @@ -197,7 +198,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, if (err) goto tx_error; ttl = ip6_dst_hoplimit(ndst); - err = udp_tunnel6_xmit_skb(ndst, clone, ndst->dev, &src->ipv6, + err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, clone, + ndst->dev, &src->ipv6, &dst->ipv6, 0, ttl, src->udp_port, dst->udp_port, false); #endif -- cgit v1.2.3