From 05dcc5aa4dcced4f59f925625cea669e82b75519 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Fri, 13 Mar 2015 16:08:10 -0400 Subject: tipc: split link outqueue struct tipc_link contains one single queue for outgoing packets, where both transmitted and waiting packets are queued. This infrastructure is hard to maintain, because we need to keep a number of fields to keep track of which packets are sent or unsent, and the number of packets in each category. A lot of code becomes simpler if we split this queue into a transmission queue, where sent/unacknowledged packets are kept, and a backlog queue, where we keep the not yet sent packets. In this commit we do this separation. Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/node.c b/net/tipc/node.c index 86152de8248d..26d1de1bf34d 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -111,7 +111,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr) INIT_LIST_HEAD(&n_ptr->list); INIT_LIST_HEAD(&n_ptr->publ_list); INIT_LIST_HEAD(&n_ptr->conn_sks); - __skb_queue_head_init(&n_ptr->bclink.deferred_queue); + __skb_queue_head_init(&n_ptr->bclink.deferdq); hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { if (n_ptr->addr < temp_node->addr) @@ -354,7 +354,7 @@ static void node_lost_contact(struct tipc_node *n_ptr) /* Flush broadcast link info associated with lost node */ if (n_ptr->bclink.recv_permitted) { - __skb_queue_purge(&n_ptr->bclink.deferred_queue); + __skb_queue_purge(&n_ptr->bclink.deferdq); if (n_ptr->bclink.reasm_buf) { kfree_skb(n_ptr->bclink.reasm_buf); -- cgit v1.2.3 From b952b2befb6f6b009e91f087285b9a0a6beb1cc8 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 26 Mar 2015 18:10:23 +0800 Subject: tipc: fix potential deadlock when all links are reset [ 60.988363] ====================================================== [ 60.988754] [ INFO: possible circular locking dependency detected ] [ 60.989152] 3.19.0+ #194 Not tainted [ 60.989377] ------------------------------------------------------- [ 60.989781] swapper/3/0 is trying to acquire lock: [ 60.990079] (&(&n_ptr->lock)->rlock){+.-...}, at: [] tipc_link_retransmit+0x1aa/0x240 [tipc] [ 60.990743] [ 60.990743] but task is already holding lock: [ 60.991106] (&(&bclink->lock)->rlock){+.-...}, at: [] tipc_bclink_lock+0x8e/0xa0 [tipc] [ 60.991738] [ 60.991738] which lock already depends on the new lock. [ 60.991738] [ 60.992174] [ 60.992174] the existing dependency chain (in reverse order) is: [ 60.992174] -> #1 (&(&bclink->lock)->rlock){+.-...}: [ 60.992174] [] lock_acquire+0x9c/0x140 [ 60.992174] [] _raw_spin_lock_bh+0x3f/0x50 [ 60.992174] [] tipc_bclink_lock+0x8e/0xa0 [tipc] [ 60.992174] [] tipc_bclink_add_node+0x97/0xf0 [tipc] [ 60.992174] [] tipc_node_link_up+0xf5/0x110 [tipc] [ 60.992174] [] link_state_event+0x2b3/0x4f0 [tipc] [ 60.992174] [] tipc_link_proto_rcv+0x24c/0x418 [tipc] [ 60.992174] [] tipc_rcv+0x827/0xac0 [tipc] [ 60.992174] [] tipc_l2_rcv_msg+0x73/0xd0 [tipc] [ 60.992174] [] __netif_receive_skb_core+0x746/0x980 [ 60.992174] [] __netif_receive_skb+0x21/0x70 [ 60.992174] [] netif_receive_skb_internal+0x35/0x130 [ 60.992174] [] napi_gro_receive+0x158/0x1d0 [ 60.992174] [] e1000_clean_rx_irq+0x155/0x490 [ 60.992174] [] e1000_clean+0x267/0x990 [ 60.992174] [] net_rx_action+0x150/0x360 [ 60.992174] [] __do_softirq+0x123/0x360 [ 60.992174] [] irq_exit+0x8e/0xb0 [ 60.992174] [] do_IRQ+0x65/0x110 [ 60.992174] [] ret_from_intr+0x0/0x13 [ 60.992174] [] arch_cpu_idle+0xf/0x20 [ 60.992174] [] cpu_startup_entry+0x2f6/0x3f0 [ 60.992174] [] start_secondary+0x13a/0x150 [ 60.992174] -> #0 (&(&n_ptr->lock)->rlock){+.-...}: [ 60.992174] [] __lock_acquire+0x163d/0x1ca0 [ 60.992174] [] lock_acquire+0x9c/0x140 [ 60.992174] [] _raw_spin_lock_bh+0x3f/0x50 [ 60.992174] [] tipc_link_retransmit+0x1aa/0x240 [tipc] [ 60.992174] [] tipc_bclink_rcv+0x611/0x640 [tipc] [ 60.992174] [] tipc_rcv+0x616/0xac0 [tipc] [ 60.992174] [] tipc_l2_rcv_msg+0x73/0xd0 [tipc] [ 60.992174] [] __netif_receive_skb_core+0x746/0x980 [ 60.992174] [] __netif_receive_skb+0x21/0x70 [ 60.992174] [] netif_receive_skb_internal+0x35/0x130 [ 60.992174] [] napi_gro_receive+0x158/0x1d0 [ 60.992174] [] e1000_clean_rx_irq+0x155/0x490 [ 60.992174] [] e1000_clean+0x267/0x990 [ 60.992174] [] net_rx_action+0x150/0x360 [ 60.992174] [] __do_softirq+0x123/0x360 [ 60.992174] [] irq_exit+0x8e/0xb0 [ 60.992174] [] do_IRQ+0x65/0x110 [ 60.992174] [] ret_from_intr+0x0/0x13 [ 60.992174] [] arch_cpu_idle+0xf/0x20 [ 60.992174] [] cpu_startup_entry+0x2f6/0x3f0 [ 60.992174] [] start_secondary+0x13a/0x150 [ 60.992174] [ 60.992174] other info that might help us debug this: [ 60.992174] [ 60.992174] Possible unsafe locking scenario: [ 60.992174] [ 60.992174] CPU0 CPU1 [ 60.992174] ---- ---- [ 60.992174] lock(&(&bclink->lock)->rlock); [ 60.992174] lock(&(&n_ptr->lock)->rlock); [ 60.992174] lock(&(&bclink->lock)->rlock); [ 60.992174] lock(&(&n_ptr->lock)->rlock); [ 60.992174] [ 60.992174] *** DEADLOCK *** [ 60.992174] [ 60.992174] 3 locks held by swapper/3/0: [ 60.992174] #0: (rcu_read_lock){......}, at: [] __netif_receive_skb_core+0x71/0x980 [ 60.992174] #1: (rcu_read_lock){......}, at: [] tipc_l2_rcv_msg+0x5/0xd0 [tipc] [ 60.992174] #2: (&(&bclink->lock)->rlock){+.-...}, at: [] tipc_bclink_lock+0x8e/0xa0 [tipc] [ 60.992174] The correct the sequence of grabbing n_ptr->lock and bclink->lock should be that the former is first held and the latter is then taken, which exactly happened on CPU1. But especially when the retransmission of broadcast link is failed, bclink->lock is first held in tipc_bclink_rcv(), and n_ptr->lock is taken in link_retransmit_failure() called by tipc_link_retransmit() subsequently, which is demonstrated on CPU0. As a result, deadlock occurs. If the order of holding the two locks happening on CPU0 is reversed, the deadlock risk will be relieved. Therefore, the node lock taken in link_retransmit_failure() originally is moved to tipc_bclink_rcv() so that it's obtained before bclink lock. But the precondition of the adjustment of node lock is that responding to bclink reset event must be moved from tipc_bclink_unlock() to tipc_node_unlock(). Reviewed-by: Erik Hugne Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bcast.c | 23 +---------------------- net/tipc/bcast.h | 4 ---- net/tipc/link.c | 5 +---- net/tipc/node.c | 5 ++++- net/tipc/node.h | 3 ++- 5 files changed, 8 insertions(+), 32 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 79355531c3e2..4289dd62f589 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -62,21 +62,8 @@ static void tipc_bclink_lock(struct net *net) static void tipc_bclink_unlock(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_node *node = NULL; - if (likely(!tn->bclink->flags)) { - spin_unlock_bh(&tn->bclink->lock); - return; - } - - if (tn->bclink->flags & TIPC_BCLINK_RESET) { - tn->bclink->flags &= ~TIPC_BCLINK_RESET; - node = tipc_bclink_retransmit_to(net); - } spin_unlock_bh(&tn->bclink->lock); - - if (node) - tipc_link_reset_all(node); } void tipc_bclink_input(struct net *net) @@ -91,13 +78,6 @@ uint tipc_bclink_get_mtu(void) return MAX_PKT_DEFAULT_MCAST; } -void tipc_bclink_set_flags(struct net *net, unsigned int flags) -{ - struct tipc_net *tn = net_generic(net, tipc_net_id); - - tn->bclink->flags |= flags; -} - static u32 bcbuf_acks(struct sk_buff *buf) { return (u32)(unsigned long)TIPC_SKB_CB(buf)->handle; @@ -156,7 +136,6 @@ static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) seqno : node->bclink.last_sent; } - /** * tipc_bclink_retransmit_to - get most recent node to request retransmission * @@ -476,13 +455,13 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) goto unlock; if (msg_destnode(msg) == tn->own_addr) { tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); - tipc_node_unlock(node); tipc_bclink_lock(net); bcl->stats.recv_nacks++; tn->bclink->retransmit_to = node; bclink_retransmit_pkt(tn, msg_bcgap_after(msg), msg_bcgap_to(msg)); tipc_bclink_unlock(net); + tipc_node_unlock(node); } else { tipc_node_unlock(node); bclink_peek_nack(net, msg); diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 43f397fbac55..4bdc12277d33 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -55,7 +55,6 @@ struct tipc_bcbearer_pair { struct tipc_bearer *secondary; }; -#define TIPC_BCLINK_RESET 1 #define BCBEARER MAX_BEARERS /** @@ -86,7 +85,6 @@ struct tipc_bcbearer { * @lock: spinlock governing access to structure * @link: (non-standard) broadcast link structure * @node: (non-standard) node structure representing b'cast link's peer node - * @flags: represent bclink states * @bcast_nodes: map of broadcast-capable nodes * @retransmit_to: node that most recently requested a retransmit * @@ -96,7 +94,6 @@ struct tipc_bclink { spinlock_t lock; struct tipc_link link; struct tipc_node node; - unsigned int flags; struct sk_buff_head arrvq; struct sk_buff_head inputq; struct tipc_node_map bcast_nodes; @@ -117,7 +114,6 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, int tipc_bclink_init(struct net *net); void tipc_bclink_stop(struct net *net); -void tipc_bclink_set_flags(struct net *tn, unsigned int flags); void tipc_bclink_add_node(struct net *net, u32 addr); void tipc_bclink_remove_node(struct net *net, u32 addr); struct tipc_node *tipc_bclink_retransmit_to(struct net *tn); diff --git a/net/tipc/link.c b/net/tipc/link.c index 1287161e9424..f5e086c5f724 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -980,7 +980,6 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, (unsigned long) TIPC_SKB_CB(buf)->handle); n_ptr = tipc_bclink_retransmit_to(net); - tipc_node_lock(n_ptr); tipc_addr_string_fill(addr_string, n_ptr->addr); pr_info("Broadcast link info for %s\n", addr_string); @@ -992,9 +991,7 @@ static void link_retransmit_failure(struct tipc_link *l_ptr, n_ptr->bclink.oos_state, n_ptr->bclink.last_sent); - tipc_node_unlock(n_ptr); - - tipc_bclink_set_flags(net, TIPC_BCLINK_RESET); + n_ptr->action_flags |= TIPC_BCAST_RESET; l_ptr->stale_count = 0; } } diff --git a/net/tipc/node.c b/net/tipc/node.c index 26d1de1bf34d..5cc43d34ad0a 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -459,7 +459,7 @@ void tipc_node_unlock(struct tipc_node *node) TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP | TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT | - TIPC_NAMED_MSG_EVT); + TIPC_NAMED_MSG_EVT | TIPC_BCAST_RESET); spin_unlock_bh(&node->lock); @@ -488,6 +488,9 @@ void tipc_node_unlock(struct tipc_node *node) if (flags & TIPC_BCAST_MSG_EVT) tipc_bclink_input(net); + + if (flags & TIPC_BCAST_RESET) + tipc_link_reset_all(node); } /* Caller should hold node lock for the passed node */ diff --git a/net/tipc/node.h b/net/tipc/node.h index e89ac04ec2c3..9629ecd2bdd8 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -64,7 +64,8 @@ enum { TIPC_NOTIFY_LINK_UP = (1 << 6), TIPC_NOTIFY_LINK_DOWN = (1 << 7), TIPC_NAMED_MSG_EVT = (1 << 8), - TIPC_BCAST_MSG_EVT = (1 << 9) + TIPC_BCAST_MSG_EVT = (1 << 9), + TIPC_BCAST_RESET = (1 << 10) }; /** -- cgit v1.2.3 From 8a0f6ebe8494c5c6ccfe12264385b64c280e3241 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 26 Mar 2015 18:10:24 +0800 Subject: tipc: involve reference counter for node structure TIPC node hash node table is protected with rcu lock on read side. tipc_node_find() is used to look for a node object with node address through iterating the hash node table. As the entire process of what tipc_node_find() traverses the table is guarded with rcu read lock, it's safe for us. However, when callers use the node object returned by tipc_node_find(), there is no rcu read lock applied. Therefore, this is absolutely unsafe for callers of tipc_node_find(). Now we introduce a reference counter for node structure. Before tipc_node_find() returns node object to its caller, it first increases the reference counter. Accordingly, after its caller used it up, it decreases the counter again. This can prevent a node being used by one thread from being freed by another thread. Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bcast.c | 5 +-- net/tipc/discover.c | 1 + net/tipc/link.c | 7 +++-- net/tipc/name_distr.c | 2 ++ net/tipc/node.c | 85 ++++++++++++++++++++++++++++++++++++--------------- net/tipc/node.h | 9 ++++-- 6 files changed, 79 insertions(+), 30 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 4289dd62f589..ae558dd7f8ee 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -329,13 +329,12 @@ static void bclink_peek_nack(struct net *net, struct tipc_msg *msg) return; tipc_node_lock(n_ptr); - if (n_ptr->bclink.recv_permitted && (n_ptr->bclink.last_in != n_ptr->bclink.last_sent) && (n_ptr->bclink.last_in == msg_bcgap_after(msg))) n_ptr->bclink.oos_state = 2; - tipc_node_unlock(n_ptr); + tipc_node_put(n_ptr); } /* tipc_bclink_xmit - deliver buffer chain to all nodes in cluster @@ -466,6 +465,7 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) tipc_node_unlock(node); bclink_peek_nack(net, msg); } + tipc_node_put(node); goto exit; } @@ -570,6 +570,7 @@ receive: unlock: tipc_node_unlock(node); + tipc_node_put(node); exit: kfree_skb(buf); } diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 169f3dd038b9..967e292f53c8 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -260,6 +260,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf, } } tipc_node_unlock(node); + tipc_node_put(node); } /** diff --git a/net/tipc/link.c b/net/tipc/link.c index f5e086c5f724..514466efc25c 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -854,6 +854,7 @@ int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, if (link) rc = __tipc_link_xmit(net, link, list); tipc_node_unlock(node); + tipc_node_put(node); } if (link) return rc; @@ -1116,8 +1117,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) n_ptr = tipc_node_find(net, msg_prevnode(msg)); if (unlikely(!n_ptr)) goto discard; - tipc_node_lock(n_ptr); + tipc_node_lock(n_ptr); /* Locate unicast link endpoint that should handle message */ l_ptr = n_ptr->links[b_ptr->identity]; if (unlikely(!l_ptr)) @@ -1205,6 +1206,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) skb = NULL; unlock: tipc_node_unlock(n_ptr); + tipc_node_put(n_ptr); discard: if (unlikely(skb)) kfree_skb(skb); @@ -2236,7 +2238,6 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) msg.seq = cb->nlh->nlmsg_seq; rcu_read_lock(); - if (prev_node) { node = tipc_node_find(net, prev_node); if (!node) { @@ -2249,6 +2250,7 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->prev_seq = 1; goto out; } + tipc_node_put(node); list_for_each_entry_continue_rcu(node, &tn->node_list, list) { @@ -2256,6 +2258,7 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) err = __tipc_nl_add_node_links(net, &msg, node, &prev_link); tipc_node_unlock(node); + tipc_node_put(node); if (err) goto out; diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 506aaa565da7..41e7b7e4dda0 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -244,6 +244,7 @@ static void tipc_publ_subscribe(struct net *net, struct publication *publ, tipc_node_lock(node); list_add_tail(&publ->nodesub_list, &node->publ_list); tipc_node_unlock(node); + tipc_node_put(node); } static void tipc_publ_unsubscribe(struct net *net, struct publication *publ, @@ -258,6 +259,7 @@ static void tipc_publ_unsubscribe(struct net *net, struct publication *publ, tipc_node_lock(node); list_del_init(&publ->nodesub_list); tipc_node_unlock(node); + tipc_node_put(node); } /** diff --git a/net/tipc/node.c b/net/tipc/node.c index 5cc43d34ad0a..3e4f04897c03 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -42,6 +42,7 @@ static void node_lost_contact(struct tipc_node *n_ptr); static void node_established_contact(struct tipc_node *n_ptr); +static void tipc_node_delete(struct tipc_node *node); struct tipc_sock_conn { u32 port; @@ -67,6 +68,23 @@ static unsigned int tipc_hashfn(u32 addr) return addr & (NODE_HTABLE_SIZE - 1); } +static void tipc_node_kref_release(struct kref *kref) +{ + struct tipc_node *node = container_of(kref, struct tipc_node, kref); + + tipc_node_delete(node); +} + +void tipc_node_put(struct tipc_node *node) +{ + kref_put(&node->kref, tipc_node_kref_release); +} + +static void tipc_node_get(struct tipc_node *node) +{ + kref_get(&node->kref); +} + /* * tipc_node_find - locate specified node object, if it exists */ @@ -82,6 +100,7 @@ struct tipc_node *tipc_node_find(struct net *net, u32 addr) hlist_for_each_entry_rcu(node, &tn->node_htable[tipc_hashfn(addr)], hash) { if (node->addr == addr) { + tipc_node_get(node); rcu_read_unlock(); return node; } @@ -106,6 +125,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr) } n_ptr->addr = addr; n_ptr->net = net; + kref_init(&n_ptr->kref); spin_lock_init(&n_ptr->lock); INIT_HLIST_NODE(&n_ptr->hash); INIT_LIST_HEAD(&n_ptr->list); @@ -120,16 +140,17 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr) list_add_tail_rcu(&n_ptr->list, &temp_node->list); n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN; n_ptr->signature = INVALID_NODE_SIG; + tipc_node_get(n_ptr); exit: spin_unlock_bh(&tn->node_list_lock); return n_ptr; } -static void tipc_node_delete(struct tipc_net *tn, struct tipc_node *n_ptr) +static void tipc_node_delete(struct tipc_node *node) { - list_del_rcu(&n_ptr->list); - hlist_del_rcu(&n_ptr->hash); - kfree_rcu(n_ptr, rcu); + list_del_rcu(&node->list); + hlist_del_rcu(&node->hash); + kfree_rcu(node, rcu); } void tipc_node_stop(struct net *net) @@ -139,7 +160,7 @@ void tipc_node_stop(struct net *net) spin_lock_bh(&tn->node_list_lock); list_for_each_entry_safe(node, t_node, &tn->node_list, list) - tipc_node_delete(tn, node); + tipc_node_put(node); spin_unlock_bh(&tn->node_list_lock); } @@ -147,6 +168,7 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) { struct tipc_node *node; struct tipc_sock_conn *conn; + int err = 0; if (in_own_node(net, dnode)) return 0; @@ -157,8 +179,10 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) return -EHOSTUNREACH; } conn = kmalloc(sizeof(*conn), GFP_ATOMIC); - if (!conn) - return -EHOSTUNREACH; + if (!conn) { + err = -EHOSTUNREACH; + goto exit; + } conn->peer_node = dnode; conn->port = port; conn->peer_port = peer_port; @@ -166,7 +190,9 @@ int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) tipc_node_lock(node); list_add_tail(&conn->list, &node->conn_sks); tipc_node_unlock(node); - return 0; +exit: + tipc_node_put(node); + return err; } void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) @@ -189,6 +215,7 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) kfree(conn); } tipc_node_unlock(node); + tipc_node_put(node); } /** @@ -417,19 +444,25 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, char *linkname, size_t len) { struct tipc_link *link; + int err = -EINVAL; struct tipc_node *node = tipc_node_find(net, addr); - if ((bearer_id >= MAX_BEARERS) || !node) - return -EINVAL; + if (!node) + return err; + + if (bearer_id >= MAX_BEARERS) + goto exit; + tipc_node_lock(node); link = node->links[bearer_id]; if (link) { strncpy(linkname, link->name, len); - tipc_node_unlock(node); - return 0; + err = 0; } +exit: tipc_node_unlock(node); - return -EINVAL; + tipc_node_put(node); + return err; } void tipc_node_unlock(struct tipc_node *node) @@ -545,17 +578,21 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) msg.seq = cb->nlh->nlmsg_seq; rcu_read_lock(); - - if (last_addr && !tipc_node_find(net, last_addr)) { - rcu_read_unlock(); - /* We never set seq or call nl_dump_check_consistent() this - * means that setting prev_seq here will cause the consistence - * check to fail in the netlink callback handler. Resulting in - * the NLMSG_DONE message having the NLM_F_DUMP_INTR flag set if - * the node state changed while we released the lock. - */ - cb->prev_seq = 1; - return -EPIPE; + if (last_addr) { + node = tipc_node_find(net, last_addr); + if (!node) { + rcu_read_unlock(); + /* We never set seq or call nl_dump_check_consistent() + * this means that setting prev_seq here will cause the + * consistence check to fail in the netlink callback + * handler. Resulting in the NLMSG_DONE message having + * the NLM_F_DUMP_INTR flag set if the node state + * changed while we released the lock. + */ + cb->prev_seq = 1; + return -EPIPE; + } + tipc_node_put(node); } list_for_each_entry_rcu(node, &tn->node_list, list) { diff --git a/net/tipc/node.h b/net/tipc/node.h index 9629ecd2bdd8..02d5c20dc551 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -94,6 +94,7 @@ struct tipc_node_bclink { /** * struct tipc_node - TIPC node structure * @addr: network address of node + * @ref: reference counter to node object * @lock: spinlock governing access to structure * @net: the applicable net namespace * @hash: links to adjacent nodes in unsorted hash chain @@ -115,6 +116,7 @@ struct tipc_node_bclink { */ struct tipc_node { u32 addr; + struct kref kref; spinlock_t lock; struct net *net; struct hlist_node hash; @@ -137,6 +139,7 @@ struct tipc_node { }; struct tipc_node *tipc_node_find(struct net *net, u32 addr); +void tipc_node_put(struct tipc_node *node); struct tipc_node *tipc_node_create(struct net *net, u32 addr); void tipc_node_stop(struct net *net); void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr); @@ -171,10 +174,12 @@ static inline uint tipc_node_get_mtu(struct net *net, u32 addr, u32 selector) node = tipc_node_find(net, addr); - if (likely(node)) + if (likely(node)) { mtu = node->act_mtus[selector & 1]; - else + tipc_node_put(node); + } else { mtu = MAX_MSG_SIZE; + } return mtu; } -- cgit v1.2.3 From dff29b1a88524fe6afe296d6c477c491d1e02af0 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 2 Apr 2015 09:33:01 -0400 Subject: tipc: eliminate delayed link deletion at link failover When a bearer is disabled manually, all its links have to be reset and deleted. However, if there is a remaining, parallel link ready to take over a deleted link's traffic, we currently delay the delete of the removed link until the failover procedure is finished. This is because the remaining link needs to access state from the reset link, such as the last received packet number, and any partially reassembled buffer, in order to perform a successful failover. In this commit, we do instead move the state data over to the new link, so that it can fulfill the procedure autonomously, without accessing any data on the old link. This means that we can now proceed and delete all pertaining links immediately when a bearer is disabled. This saves us from some unnecessary complexity in such situations. We also choose to change the confusing definitions CHANGEOVER_PROTOCOL, ORIGINAL_MSG and DUPLICATE_MSG to the more descriptive TUNNEL_PROTOCOL, FAILOVER_MSG and SYNCH_MSG respectively. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 124 +++++++++++++++++++++++++------------------------------- net/tipc/link.h | 17 ++++---- net/tipc/msg.c | 4 +- net/tipc/msg.h | 10 ++--- net/tipc/node.c | 13 +++--- 5 files changed, 78 insertions(+), 90 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/link.c b/net/tipc/link.c index c697cf69da91..b1e17953eeea 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -89,17 +89,9 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { #define TIMEOUT_EVT 560817u /* link timer expired */ /* - * The following two 'message types' is really just implementation - * data conveniently stored in the message header. - * They must not be considered part of the protocol + * State value stored in 'failover_pkts' */ -#define OPEN_MSG 0 -#define CLOSED_MSG 1 - -/* - * State value stored in 'exp_msg_count' - */ -#define START_CHANGEOVER 100000u +#define FIRST_FAILOVER 0xffffu static void link_handle_out_of_seq_msg(struct tipc_link *link, struct sk_buff *skb); @@ -113,8 +105,7 @@ static void tipc_link_sync_xmit(struct tipc_link *l); static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf); static void tipc_link_input(struct tipc_link *l, struct sk_buff *skb); static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb); -static bool tipc_link_failover_rcv(struct tipc_node *node, - struct sk_buff **skb); +static bool tipc_link_failover_rcv(struct tipc_link *l, struct sk_buff **skb); /* * Simple link routines */ @@ -332,15 +323,19 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, } /** - * link_delete - Conditional deletion of link. - * If timer still running, real delete is done when it expires - * @link: link to be deleted + * tipc_link_delete - Delete a link + * @l: link to be deleted */ -void tipc_link_delete(struct tipc_link *link) +void tipc_link_delete(struct tipc_link *l) { - tipc_link_reset_fragments(link); - tipc_node_detach_link(link->owner, link); - tipc_link_put(link); + tipc_link_reset(l); + if (del_timer(&l->timer)) + tipc_link_put(l); + l->flags |= LINK_STOPPED; + /* Delete link now, or when timer is finished: */ + tipc_link_reset_fragments(l); + tipc_node_detach_link(l->owner, l); + tipc_link_put(l); } void tipc_link_delete_list(struct net *net, unsigned int bearer_id, @@ -349,23 +344,12 @@ void tipc_link_delete_list(struct net *net, unsigned int bearer_id, struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_link *link; struct tipc_node *node; - bool del_link; rcu_read_lock(); list_for_each_entry_rcu(node, &tn->node_list, list) { tipc_node_lock(node); link = node->links[bearer_id]; - if (!link) { - tipc_node_unlock(node); - continue; - } - del_link = !tipc_link_is_up(link) && !link->exp_msg_count; - tipc_link_reset(link); - if (del_timer(&link->timer)) - tipc_link_put(link); - link->flags |= LINK_STOPPED; - /* Delete link now, or when failover is finished: */ - if (shutting_down || !tipc_node_is_up(node) || del_link) + if (link) tipc_link_delete(link); tipc_node_unlock(node); } @@ -472,9 +456,9 @@ void tipc_link_purge_queues(struct tipc_link *l_ptr) void tipc_link_reset(struct tipc_link *l_ptr) { u32 prev_state = l_ptr->state; - u32 checkpoint = l_ptr->next_in_no; int was_active_link = tipc_link_is_active(l_ptr); struct tipc_node *owner = l_ptr->owner; + struct tipc_link *pl = tipc_parallel_link(l_ptr); msg_set_session(l_ptr->pmsg, ((msg_session(l_ptr->pmsg) + 1) & 0xffff)); @@ -492,11 +476,15 @@ void tipc_link_reset(struct tipc_link *l_ptr) tipc_node_link_down(l_ptr->owner, l_ptr); tipc_bearer_remove_dest(owner->net, l_ptr->bearer_id, l_ptr->addr); - if (was_active_link && tipc_node_active_links(l_ptr->owner)) { - l_ptr->reset_checkpoint = checkpoint; - l_ptr->exp_msg_count = START_CHANGEOVER; + if (was_active_link && tipc_node_is_up(l_ptr->owner) && (pl != l_ptr)) { + l_ptr->flags |= LINK_FAILINGOVER; + l_ptr->failover_checkpt = l_ptr->next_in_no; + pl->failover_pkts = FIRST_FAILOVER; + pl->failover_checkpt = l_ptr->next_in_no; + pl->failover_skb = l_ptr->reasm_buf; + } else { + kfree_skb(l_ptr->reasm_buf); } - /* Clean up all queues, except inputq: */ __skb_queue_purge(&l_ptr->transmq); __skb_queue_purge(&l_ptr->deferdq); @@ -506,6 +494,7 @@ void tipc_link_reset(struct tipc_link *l_ptr) if (!skb_queue_empty(owner->inputq)) owner->action_flags |= TIPC_MSG_EVT; tipc_link_purge_backlog(l_ptr); + l_ptr->reasm_buf = NULL; l_ptr->rcv_unacked = 0; l_ptr->checkpoint = 1; l_ptr->next_out_no = 1; @@ -557,8 +546,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) if (!(l_ptr->flags & LINK_STARTED) && (event != STARTING_EVT)) return; /* Not yet. */ - /* Check whether changeover is going on */ - if (l_ptr->exp_msg_count) { + if (l_ptr->flags & LINK_FAILINGOVER) { if (event == TIMEOUT_EVT) link_set_timer(l_ptr, cont_intv); return; @@ -1242,7 +1230,7 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb) node->action_flags |= TIPC_NAMED_MSG_EVT; return true; case MSG_BUNDLER: - case CHANGEOVER_PROTOCOL: + case TUNNEL_PROTOCOL: case MSG_FRAGMENTER: case BCAST_PROTOCOL: return false; @@ -1269,14 +1257,14 @@ static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb) return; switch (msg_user(msg)) { - case CHANGEOVER_PROTOCOL: + case TUNNEL_PROTOCOL: if (msg_dup(msg)) { link->flags |= LINK_SYNCHING; link->synch_point = msg_seqno(msg_get_wrapped(msg)); kfree_skb(skb); break; } - if (!tipc_link_failover_rcv(node, &skb)) + if (!tipc_link_failover_rcv(link, &skb)) break; if (msg_user(buf_msg(skb)) != MSG_BUNDLER) { tipc_data_input(link, skb); @@ -1391,8 +1379,8 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, u32 msg_size = sizeof(l_ptr->proto_msg); int r_flag; - /* Don't send protocol message during link changeover */ - if (l_ptr->exp_msg_count) + /* Don't send protocol message during link failover */ + if (l_ptr->flags & LINK_FAILINGOVER) return; /* Abort non-RESET send if communication with node is prohibited */ @@ -1444,7 +1432,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, } l_ptr->stats.sent_states++; } else { /* RESET_MSG or ACTIVATE_MSG */ - msg_set_ack(msg, mod(l_ptr->reset_checkpoint - 1)); + msg_set_ack(msg, mod(l_ptr->failover_checkpt - 1)); msg_set_seq_gap(msg, 0); msg_set_next_sent(msg, 1); msg_set_probe(msg, 0); @@ -1486,8 +1474,7 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, u32 msg_tol; struct tipc_msg *msg = buf_msg(buf); - /* Discard protocol message during link changeover */ - if (l_ptr->exp_msg_count) + if (l_ptr->flags & LINK_FAILINGOVER) goto exit; if (l_ptr->net_plane != msg_net_plane(msg)) @@ -1659,8 +1646,8 @@ void tipc_link_failover_send_queue(struct tipc_link *l_ptr) if (!tunnel) return; - tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, CHANGEOVER_PROTOCOL, - ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); + tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, TUNNEL_PROTOCOL, + FAILOVER_MSG, INT_H_SIZE, l_ptr->addr); skb_queue_splice_tail_init(&l_ptr->backlogq, &l_ptr->transmq); tipc_link_purge_backlog(l_ptr); msgcount = skb_queue_len(&l_ptr->transmq); @@ -1722,8 +1709,8 @@ void tipc_link_dup_queue_xmit(struct tipc_link *link, struct sk_buff_head *queue = &link->transmq; int mcnt; - tipc_msg_init(link_own_addr(link), &tnl_hdr, CHANGEOVER_PROTOCOL, - DUPLICATE_MSG, INT_H_SIZE, link->addr); + tipc_msg_init(link_own_addr(link), &tnl_hdr, TUNNEL_PROTOCOL, + SYNCH_MSG, INT_H_SIZE, link->addr); mcnt = skb_queue_len(&link->transmq) + skb_queue_len(&link->backlogq); msg_set_msgcnt(&tnl_hdr, mcnt); msg_set_bearer_id(&tnl_hdr, link->peer_bearer_id); @@ -1756,36 +1743,37 @@ tunnel_queue: goto tunnel_queue; } -/* tipc_link_failover_rcv(): Receive a tunnelled ORIGINAL_MSG packet +/* tipc_link_failover_rcv(): Receive a tunnelled FAILOVER_MSG packet * Owner node is locked. */ -static bool tipc_link_failover_rcv(struct tipc_node *node, +static bool tipc_link_failover_rcv(struct tipc_link *link, struct sk_buff **skb) { struct tipc_msg *msg = buf_msg(*skb); struct sk_buff *iskb = NULL; - struct tipc_link *link = NULL; + struct tipc_link *pl = NULL; int bearer_id = msg_bearer_id(msg); int pos = 0; - if (msg_type(msg) != ORIGINAL_MSG) { + if (msg_type(msg) != FAILOVER_MSG) { pr_warn("%sunknown tunnel pkt received\n", link_co_err); goto exit; } if (bearer_id >= MAX_BEARERS) goto exit; - link = node->links[bearer_id]; - if (!link) + + if (bearer_id == link->bearer_id) goto exit; - if (tipc_link_is_up(link)) - tipc_link_reset(link); - /* First failover packet? */ - if (link->exp_msg_count == START_CHANGEOVER) - link->exp_msg_count = msg_msgcnt(msg); + pl = link->owner->links[bearer_id]; + if (pl && tipc_link_is_up(pl)) + tipc_link_reset(pl); + + if (link->failover_pkts == FIRST_FAILOVER) + link->failover_pkts = msg_msgcnt(msg); /* Should we expect an inner packet? */ - if (!link->exp_msg_count) + if (!link->failover_pkts) goto exit; if (!tipc_msg_extract(*skb, &iskb, &pos)) { @@ -1793,22 +1781,22 @@ static bool tipc_link_failover_rcv(struct tipc_node *node, *skb = NULL; goto exit; } - link->exp_msg_count--; + link->failover_pkts--; *skb = NULL; - /* Was packet already delivered? */ - if (less(buf_seqno(iskb), link->reset_checkpoint)) { + /* Was this packet already delivered? */ + if (less(buf_seqno(iskb), link->failover_checkpt)) { kfree_skb(iskb); iskb = NULL; goto exit; } if (msg_user(buf_msg(iskb)) == MSG_FRAGMENTER) { link->stats.recv_fragments++; - tipc_buf_append(&link->reasm_buf, &iskb); + tipc_buf_append(&link->failover_skb, &iskb); } exit: - if (link && (!link->exp_msg_count) && (link->flags & LINK_STOPPED)) - tipc_link_delete(link); + if (!link->failover_pkts && pl) + pl->flags &= ~LINK_FAILINGOVER; kfree_skb(*skb); *skb = iskb; return *skb; diff --git a/net/tipc/link.h b/net/tipc/link.h index d2b5663643da..6e28f03c7905 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -58,9 +58,10 @@ /* Link endpoint execution states */ -#define LINK_STARTED 0x0001 -#define LINK_STOPPED 0x0002 -#define LINK_SYNCHING 0x0004 +#define LINK_STARTED 0x0001 +#define LINK_STOPPED 0x0002 +#define LINK_SYNCHING 0x0004 +#define LINK_FAILINGOVER 0x0008 /* Starting value for maximum packet size negotiation on unicast links * (unless bearer MTU is less) @@ -167,11 +168,12 @@ struct tipc_link { struct tipc_msg *pmsg; u32 priority; char net_plane; + u16 synch_point; - /* Changeover */ - u32 exp_msg_count; - u32 reset_checkpoint; - u32 synch_point; + /* Failover */ + u16 failover_pkts; + u16 failover_checkpt; + struct sk_buff *failover_skb; /* Max packet negotiation */ u32 max_pkt; @@ -201,7 +203,6 @@ struct tipc_link { struct sk_buff_head wakeupq; /* Fragmentation/reassembly */ - u32 long_msg_seq_no; struct sk_buff *reasm_buf; /* Statistics */ diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 3bb499c61918..c3e96e815418 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -355,7 +355,7 @@ bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu) start = align(bsz); pad = start - bsz; - if (unlikely(msg_user(msg) == CHANGEOVER_PROTOCOL)) + if (unlikely(msg_user(msg) == TUNNEL_PROTOCOL)) return false; if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) return false; @@ -433,7 +433,7 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode) if (msg_user(msg) == MSG_FRAGMENTER) return false; - if (msg_user(msg) == CHANGEOVER_PROTOCOL) + if (msg_user(msg) == TUNNEL_PROTOCOL) return false; if (msg_user(msg) == BCAST_PROTOCOL) return false; diff --git a/net/tipc/msg.h b/net/tipc/msg.h index d273207ede28..e1d3595e2ee9 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -72,7 +72,7 @@ struct plist; #define MSG_BUNDLER 6 #define LINK_PROTOCOL 7 #define CONN_MANAGER 8 -#define CHANGEOVER_PROTOCOL 10 +#define TUNNEL_PROTOCOL 10 #define NAME_DISTRIBUTOR 11 #define MSG_FRAGMENTER 12 #define LINK_CONFIG 13 @@ -512,8 +512,8 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) /* * Changeover tunnel message types */ -#define DUPLICATE_MSG 0 -#define ORIGINAL_MSG 1 +#define SYNCH_MSG 0 +#define FAILOVER_MSG 1 /* * Config protocol message types @@ -556,9 +556,9 @@ static inline void msg_set_node_capabilities(struct tipc_msg *m, u32 n) static inline bool msg_dup(struct tipc_msg *m) { - if (likely(msg_user(m) != CHANGEOVER_PROTOCOL)) + if (likely(msg_user(m) != TUNNEL_PROTOCOL)) return false; - if (msg_type(m) != DUPLICATE_MSG) + if (msg_type(m) != SYNCH_MSG) return false; return true; } diff --git a/net/tipc/node.c b/net/tipc/node.c index 3e4f04897c03..f3d522c2881a 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -394,18 +394,17 @@ static void node_lost_contact(struct tipc_node *n_ptr) n_ptr->bclink.recv_permitted = false; } - /* Abort link changeover */ + /* Abort any ongoing link failover */ for (i = 0; i < MAX_BEARERS; i++) { struct tipc_link *l_ptr = n_ptr->links[i]; if (!l_ptr) continue; - l_ptr->reset_checkpoint = l_ptr->next_in_no; - l_ptr->exp_msg_count = 0; + l_ptr->flags &= ~LINK_FAILINGOVER; + l_ptr->failover_checkpt = 0; + l_ptr->failover_pkts = 0; + kfree_skb(l_ptr->failover_skb); + l_ptr->failover_skb = NULL; tipc_link_reset_fragments(l_ptr); - - /* Link marked for deletion after failover? => do it now */ - if (l_ptr->flags & LINK_STOPPED) - tipc_link_delete(l_ptr); } n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN; -- cgit v1.2.3 From ed193ece2649c194a87a9d8470195760d367c075 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 2 Apr 2015 09:33:02 -0400 Subject: tipc: simplify link mtu negotiation When a link is being established, the two endpoints advertise their respective interface MTU in the transmitted RESET and ACTIVATE messages. If there is any difference, the lower of the two MTUs will be selected for use by both endpoints. However, as a remnant of earlier attempts to introduce TIPC level routing. there also exists an MTU discovery mechanism. If an intermediate node has a lower MTU than the two endpoints, they will discover this through a bisectional approach, and finally adopt this MTU for common use. Since there is no TIPC level routing, and probably never will be, this mechanism doesn't make any sense, and only serves to make the link level protocol unecessarily complex. In this commit, we eliminate the MTU discovery algorithm,and fall back to the simple MTU advertising approach. This change is fully backwards compatible. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 4 +- net/tipc/link.c | 129 ++++++++++++++----------------------------------------- net/tipc/link.h | 12 +++--- net/tipc/node.c | 9 ++-- 4 files changed, 43 insertions(+), 111 deletions(-) (limited to 'net/tipc/node.c') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index ae558dd7f8ee..c5cbdcb1f0b5 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -413,7 +413,7 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) */ if (((seqno - tn->own_addr) % TIPC_MIN_LINK_WIN) == 0) { tipc_link_proto_xmit(node->active_links[node->addr & 1], - STATE_MSG, 0, 0, 0, 0, 0); + STATE_MSG, 0, 0, 0, 0); tn->bcl->stats.sent_acks++; } } @@ -899,7 +899,7 @@ int tipc_bclink_init(struct net *net) skb_queue_head_init(&bclink->inputq); bcl->owner = &bclink->node; bcl->owner->net = net; - bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; + bcl->mtu = MAX_PKT_DEFAULT_MCAST; tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); bcl->bearer_id = MAX_BEARERS; rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer); diff --git a/net/tipc/link.c b/net/tipc/link.c index b1e17953eeea..a6b30df6ec02 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -136,34 +136,6 @@ static struct tipc_link *tipc_parallel_link(struct tipc_link *l) return l->owner->active_links[1]; } -static void link_init_max_pkt(struct tipc_link *l_ptr) -{ - struct tipc_node *node = l_ptr->owner; - struct tipc_net *tn = net_generic(node->net, tipc_net_id); - struct tipc_bearer *b_ptr; - u32 max_pkt; - - rcu_read_lock(); - b_ptr = rcu_dereference_rtnl(tn->bearer_list[l_ptr->bearer_id]); - if (!b_ptr) { - rcu_read_unlock(); - return; - } - max_pkt = (b_ptr->mtu & ~3); - rcu_read_unlock(); - - if (max_pkt > MAX_MSG_SIZE) - max_pkt = MAX_MSG_SIZE; - - l_ptr->max_pkt_target = max_pkt; - if (l_ptr->max_pkt_target < MAX_PKT_DEFAULT) - l_ptr->max_pkt = l_ptr->max_pkt_target; - else - l_ptr->max_pkt = MAX_PKT_DEFAULT; - - l_ptr->max_pkt_probes = 0; -} - /* * Simple non-static link routines (i.e. referenced outside this file) */ @@ -304,7 +276,8 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr, msg_set_bearer_id(msg, b_ptr->identity); strcpy((char *)msg_data(msg), if_name); l_ptr->net_plane = b_ptr->net_plane; - link_init_max_pkt(l_ptr); + l_ptr->advertised_mtu = b_ptr->mtu; + l_ptr->mtu = l_ptr->advertised_mtu; l_ptr->priority = b_ptr->priority; tipc_link_set_queue_limits(l_ptr, b_ptr->window); l_ptr->next_out_no = 1; @@ -465,8 +438,8 @@ void tipc_link_reset(struct tipc_link *l_ptr) /* Link is down, accept any session */ l_ptr->peer_session = INVALID_SESSION; - /* Prepare for max packet size negotiation */ - link_init_max_pkt(l_ptr); + /* Prepare for renewed mtu size negotiation */ + l_ptr->mtu = l_ptr->advertised_mtu; l_ptr->state = RESET_UNKNOWN; @@ -563,11 +536,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->checkpoint = l_ptr->next_in_no; if (tipc_bclink_acks_missing(l_ptr->owner)) { tipc_link_proto_xmit(l_ptr, STATE_MSG, - 0, 0, 0, 0, 0); - l_ptr->fsm_msg_cnt++; - } else if (l_ptr->max_pkt < l_ptr->max_pkt_target) { - tipc_link_proto_xmit(l_ptr, STATE_MSG, - 1, 0, 0, 0, 0); + 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; } link_set_timer(l_ptr, cont_intv); @@ -575,7 +544,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) } l_ptr->state = WORKING_UNKNOWN; l_ptr->fsm_msg_cnt = 0; - tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv / 4); break; @@ -586,7 +555,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, - 0, 0, 0, 0, 0); + 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); break; @@ -609,7 +578,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, - 0, 0, 0, 0, 0); + 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); break; @@ -620,13 +589,13 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->checkpoint = l_ptr->next_in_no; if (tipc_bclink_acks_missing(l_ptr->owner)) { tipc_link_proto_xmit(l_ptr, STATE_MSG, - 0, 0, 0, 0, 0); + 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; } link_set_timer(l_ptr, cont_intv); } else if (l_ptr->fsm_msg_cnt < l_ptr->abort_limit) { tipc_link_proto_xmit(l_ptr, STATE_MSG, - 1, 0, 0, 0, 0); + 1, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv / 4); } else { /* Link has failed */ @@ -636,7 +605,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->state = RESET_UNKNOWN; l_ptr->fsm_msg_cnt = 0; tipc_link_proto_xmit(l_ptr, RESET_MSG, - 0, 0, 0, 0, 0); + 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); } @@ -656,7 +625,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->state = WORKING_WORKING; l_ptr->fsm_msg_cnt = 0; link_activate(l_ptr); - tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); l_ptr->fsm_msg_cnt++; if (l_ptr->owner->working_links == 1) tipc_link_sync_xmit(l_ptr); @@ -666,7 +635,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->state = RESET_RESET; l_ptr->fsm_msg_cnt = 0; tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, - 1, 0, 0, 0, 0); + 1, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); break; @@ -676,7 +645,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) link_set_timer(l_ptr, cont_intv); break; case TIMEOUT_EVT: - tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); break; @@ -694,7 +663,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) l_ptr->state = WORKING_WORKING; l_ptr->fsm_msg_cnt = 0; link_activate(l_ptr); - tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0); l_ptr->fsm_msg_cnt++; if (l_ptr->owner->working_links == 1) tipc_link_sync_xmit(l_ptr); @@ -704,7 +673,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) break; case TIMEOUT_EVT: tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG, - 0, 0, 0, 0, 0); + 0, 0, 0, 0); l_ptr->fsm_msg_cnt++; link_set_timer(l_ptr, cont_intv); break; @@ -733,7 +702,7 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link, struct tipc_msg *msg = buf_msg(skb_peek(list)); unsigned int maxwin = link->window; unsigned int imp = msg_importance(msg); - uint mtu = link->max_pkt; + uint mtu = link->mtu; uint ack = mod(link->next_in_no - 1); uint seqno = link->next_out_no; uint bc_last_in = link->owner->bclink.last_in; @@ -1187,7 +1156,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr) link_retrieve_defq(l_ptr, &head); if (unlikely(++l_ptr->rcv_unacked >= TIPC_MIN_LINK_WIN)) { l_ptr->stats.sent_acks++; - tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0); } tipc_link_input(l_ptr, skb); skb = NULL; @@ -1362,7 +1331,7 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, if (tipc_link_defer_pkt(&l_ptr->deferdq, buf)) { l_ptr->stats.deferred_recv++; if ((skb_queue_len(&l_ptr->deferdq) % TIPC_MIN_LINK_WIN) == 1) - tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0); } else { l_ptr->stats.duplicates++; } @@ -1372,7 +1341,7 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, * Send protocol message to the other endpoint. */ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, - u32 gap, u32 tolerance, u32 priority, u32 ack_mtu) + u32 gap, u32 tolerance, u32 priority) { struct sk_buff *buf = NULL; struct tipc_msg *msg = l_ptr->pmsg; @@ -1410,26 +1379,11 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, l_ptr->stats.sent_nacks++; msg_set_link_tolerance(msg, tolerance); msg_set_linkprio(msg, priority); - msg_set_max_pkt(msg, ack_mtu); + msg_set_max_pkt(msg, l_ptr->mtu); msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); msg_set_probe(msg, probe_msg != 0); - if (probe_msg) { - u32 mtu = l_ptr->max_pkt; - - if ((mtu < l_ptr->max_pkt_target) && - link_working_working(l_ptr) && - l_ptr->fsm_msg_cnt) { - msg_size = (mtu + (l_ptr->max_pkt_target - mtu)/2 + 2) & ~3; - if (l_ptr->max_pkt_probes == 10) { - l_ptr->max_pkt_target = (msg_size - 4); - l_ptr->max_pkt_probes = 0; - msg_size = (mtu + (l_ptr->max_pkt_target - mtu)/2 + 2) & ~3; - } - l_ptr->max_pkt_probes++; - } - + if (probe_msg) l_ptr->stats.sent_probes++; - } l_ptr->stats.sent_states++; } else { /* RESET_MSG or ACTIVATE_MSG */ msg_set_ack(msg, mod(l_ptr->failover_checkpt - 1)); @@ -1438,7 +1392,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg, msg_set_probe(msg, 0); msg_set_link_tolerance(msg, l_ptr->tolerance); msg_set_linkprio(msg, l_ptr->priority); - msg_set_max_pkt(msg, l_ptr->max_pkt_target); + msg_set_max_pkt(msg, l_ptr->advertised_mtu); } r_flag = (l_ptr->owner->working_links > tipc_link_is_up(l_ptr)); @@ -1469,8 +1423,6 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf) { u32 rec_gap = 0; - u32 max_pkt_info; - u32 max_pkt_ack; u32 msg_tol; struct tipc_msg *msg = buf_msg(buf); @@ -1513,15 +1465,8 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, if (msg_linkprio(msg) > l_ptr->priority) l_ptr->priority = msg_linkprio(msg); - max_pkt_info = msg_max_pkt(msg); - if (max_pkt_info) { - if (max_pkt_info < l_ptr->max_pkt_target) - l_ptr->max_pkt_target = max_pkt_info; - if (l_ptr->max_pkt > l_ptr->max_pkt_target) - l_ptr->max_pkt = l_ptr->max_pkt_target; - } else { - l_ptr->max_pkt = l_ptr->max_pkt_target; - } + if (l_ptr->mtu > msg_max_pkt(msg)) + l_ptr->mtu = msg_max_pkt(msg); /* Synchronize broadcast link info, if not done previously */ if (!tipc_node_is_up(l_ptr->owner)) { @@ -1566,18 +1511,8 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, mod(l_ptr->next_in_no)); } - max_pkt_ack = msg_max_pkt(msg); - if (max_pkt_ack > l_ptr->max_pkt) { - l_ptr->max_pkt = max_pkt_ack; - l_ptr->max_pkt_probes = 0; - } - - max_pkt_ack = 0; - if (msg_probe(msg)) { + if (msg_probe(msg)) l_ptr->stats.recv_probes++; - if (msg_size(msg) > sizeof(l_ptr->proto_msg)) - max_pkt_ack = msg_size(msg); - } /* Protocol message before retransmits, reduce loss risk */ if (l_ptr->owner->bclink.recv_permitted) @@ -1585,8 +1520,8 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, msg_last_bcast(msg)); if (rec_gap || (msg_probe(msg))) { - tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, rec_gap, 0, - 0, max_pkt_ack); + tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, + rec_gap, 0, 0); } if (msg_seq_gap(msg)) { l_ptr->stats.recv_nacks++; @@ -1816,7 +1751,7 @@ static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol) void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) { - int max_bulk = TIPC_MAX_PUBLICATIONS / (l->max_pkt / ITEM_SIZE); + int max_bulk = TIPC_MAX_PUBLICATIONS / (l->mtu / ITEM_SIZE); l->window = win; l->backlog[TIPC_LOW_IMPORTANCE].limit = win / 2; @@ -1988,14 +1923,14 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info) tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); link_set_supervision_props(link, tol); - tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0, 0); + tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0); } if (props[TIPC_NLA_PROP_PRIO]) { u32 prio; prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); link->priority = prio; - tipc_link_proto_xmit(link, STATE_MSG, 0, 0, 0, prio, 0); + tipc_link_proto_xmit(link, STATE_MSG, 0, 0, 0, prio); } if (props[TIPC_NLA_PROP_WIN]) { u32 win; @@ -2100,7 +2035,7 @@ static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST, tipc_cluster_mask(tn->own_addr))) goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->max_pkt)) + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->mtu)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, link->next_in_no)) goto attr_msg_full; diff --git a/net/tipc/link.h b/net/tipc/link.h index 6e28f03c7905..b5b4e3554d4e 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -123,9 +123,8 @@ struct tipc_stats { * @backlog_limit: backlog queue congestion thresholds (indexed by importance) * @exp_msg_count: # of tunnelled messages expected during link changeover * @reset_checkpoint: seq # of last acknowledged message at time of link reset - * @max_pkt: current maximum packet size for this link - * @max_pkt_target: desired maximum packet size for this link - * @max_pkt_probes: # of probes based on current (max_pkt, max_pkt_target) + * @mtu: current maximum packet size for this link + * @advertised_mtu: advertised own mtu when link is being established * @transmitq: queue for sent, non-acked messages * @backlogq: queue for messages waiting to be sent * @next_out_no: next sequence number to use for outbound messages @@ -176,9 +175,8 @@ struct tipc_link { struct sk_buff *failover_skb; /* Max packet negotiation */ - u32 max_pkt; - u32 max_pkt_target; - u32 max_pkt_probes; + u16 mtu; + u16 advertised_mtu; /* Sending */ struct sk_buff_head transmq; @@ -233,7 +231,7 @@ int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dest, int __tipc_link_xmit(struct net *net, struct tipc_link *link, struct sk_buff_head *list); void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob, - u32 gap, u32 tolerance, u32 priority, u32 acked_mtu); + u32 gap, u32 tolerance, u32 priority); void tipc_link_push_packets(struct tipc_link *l_ptr); u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *buf); void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window); diff --git a/net/tipc/node.c b/net/tipc/node.c index f3d522c2881a..22c059ad2999 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -254,8 +254,8 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) active[0] = active[1] = l_ptr; exit: /* Leave room for changeover header when returning 'mtu' to users: */ - n_ptr->act_mtus[0] = active[0]->max_pkt - INT_H_SIZE; - n_ptr->act_mtus[1] = active[1]->max_pkt - INT_H_SIZE; + n_ptr->act_mtus[0] = active[0]->mtu - INT_H_SIZE; + n_ptr->act_mtus[1] = active[1]->mtu - INT_H_SIZE; } /** @@ -319,11 +319,10 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) /* Leave room for changeover header when returning 'mtu' to users: */ if (active[0]) { - n_ptr->act_mtus[0] = active[0]->max_pkt - INT_H_SIZE; - n_ptr->act_mtus[1] = active[1]->max_pkt - INT_H_SIZE; + n_ptr->act_mtus[0] = active[0]->mtu - INT_H_SIZE; + n_ptr->act_mtus[1] = active[1]->mtu - INT_H_SIZE; return; } - /* Loopback link went down? No fragmentation needed from now on. */ if (n_ptr->addr == tn->own_addr) { n_ptr->act_mtus[0] = MAX_MSG_SIZE; -- cgit v1.2.3