From 703068eee6dde2ab318048f976b6ebd7fd239a78 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 9 Jan 2015 15:26:58 +0800 Subject: tipc: fix bug in broadcast retransmit code In commit 58dc55f25631178ee74cd27185956a8f7dcb3e32 ("tipc: use generic SKB list APIs to manage link transmission queue") we replace all list traversal loops with the macros skb_queue_walk() or skb_queue_walk_safe(). While the previous loops were based on the assumption that the list was NULL-terminated, the standard macros stop when the iterator reaches the list head, which is non-NULL. In the function bclink_retransmit_pkt() this macro replacement has lead to a bug. When we receive a BCAST STATE_MSG we unconditionally call the function bclink_retransmit_pkt(), whether there really is anything to retransmit or not, assuming that the sequence number comparisons will lead to the correct behavior. However, if the transmission queue is empty, or if there are no eligible buffers in the transmission queue, we will by mistake pass the list head pointer to the function tipc_link_retransmit(). Since the list head is not a valid sk_buff, this leads to a crash. In this commit we fix this by only calling tipc_link_retransmit() if we actually found eligible buffers in the transmission queue. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/tipc/bcast.c') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 96ceefeb9daf..a9e174fc0f91 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -220,10 +220,11 @@ static void bclink_retransmit_pkt(u32 after, u32 to) struct sk_buff *skb; skb_queue_walk(&bcl->outqueue, skb) { - if (more(buf_seqno(skb), after)) + if (more(buf_seqno(skb), after)) { + tipc_link_retransmit(bcl, skb, mod(to - after)); break; + } } - tipc_link_retransmit(bcl, skb, mod(to - after)); } /** -- cgit v1.2.3 From c93d3baa24095887005647984cff5de8c63d3611 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 9 Jan 2015 15:27:04 +0800 Subject: tipc: involve namespace infrastructure Involve namespace infrastructure, make the "tipc_net_id" global variable aware of per namespace, and rename it to "net_id". In order that the conversion can be successfully done, an instance of networking namespace must be passed to relevant functions, allowing them to access the "net_id" variable of per namespace. Signed-off-by: Ying Xue Tested-by: Tero Aho Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'net/tipc/bcast.c') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index a9e174fc0f91..f98231138916 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -327,9 +327,11 @@ exit: * * RCU and node lock set */ -void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) +void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr, + u32 last_sent) { struct sk_buff *buf; + struct tipc_net *tn = net_generic(net, tipc_net_id); /* Ignore "stale" link state info */ if (less_eq(last_sent, n_ptr->bclink.last_in)) @@ -362,7 +364,7 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, n_ptr->addr); msg_set_non_seq(msg, 1); - msg_set_mc_netid(msg, tipc_net_id); + msg_set_mc_netid(msg, tn->net_id); msg_set_bcast_ack(msg, n_ptr->bclink.last_in); msg_set_bcgap_after(msg, n_ptr->bclink.last_in); msg_set_bcgap_to(msg, to); @@ -476,8 +478,9 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) * * RCU is locked, no other locks set */ -void tipc_bclink_rcv(struct sk_buff *buf) +void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_msg *msg = buf_msg(buf); struct tipc_node *node; u32 next_in; @@ -485,7 +488,7 @@ void tipc_bclink_rcv(struct sk_buff *buf) int deferred = 0; /* Screen out unwanted broadcast messages */ - if (msg_mc_netid(msg) != tipc_net_id) + if (msg_mc_netid(msg) != tn->net_id) goto exit; node = tipc_node_find(msg_prevnode(msg)); @@ -638,6 +641,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, { int bp_index; struct tipc_msg *msg = buf_msg(buf); + struct net *net = sock_net(buf->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); /* Prepare broadcast link message for reliable transmission, * if first time trying to send it; @@ -647,7 +652,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, if (likely(!msg_non_seq(buf_msg(buf)))) { bcbuf_set_acks(buf, bclink->bcast_nodes.count); msg_set_non_seq(msg, 1); - msg_set_mc_netid(msg, tipc_net_id); + msg_set_mc_netid(msg, tn->net_id); bcl->stats.sent_info++; if (WARN_ON(!bclink->bcast_nodes.count)) { -- cgit v1.2.3 From f2f9800d4955a96d92896841d8ba9b04201deaa1 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 9 Jan 2015 15:27:05 +0800 Subject: tipc: make tipc node table aware of net namespace Global variables associated with node table are below: - node table list (node_htable) - node hash table list (tipc_node_list) - node table lock (node_list_lock) - node number counter (tipc_num_nodes) - node link number counter (tipc_num_links) To make node table support namespace, above global variables must be moved to tipc_net structure in order to keep secret for different namespaces. As a consequence, these variables are allocated and initialized when namespace is created, and deallocated when namespace is destroyed. After the change, functions associated with these variables have to utilize a namespace pointer to access them. So adding namespace pointer as a parameter of these functions is the major change made in the commit. Signed-off-by: Ying Xue Tested-by: Tero Aho Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'net/tipc/bcast.c') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index f98231138916..816c0e49319f 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -232,13 +232,12 @@ static void bclink_retransmit_pkt(u32 after, u32 to) * * Called with no locks taken */ -void tipc_bclink_wakeup_users(void) +void tipc_bclink_wakeup_users(struct net *net) { struct sk_buff *skb; while ((skb = skb_dequeue(&bclink->link.waiting_sks))) - tipc_sk_rcv(skb); - + tipc_sk_rcv(net, skb); } /** @@ -385,9 +384,9 @@ void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr, * Delay any upcoming NACK by this node if another node has already * requested the first message this node is going to ask for. */ -static void bclink_peek_nack(struct tipc_msg *msg) +static void bclink_peek_nack(struct net *net, struct tipc_msg *msg) { - struct tipc_node *n_ptr = tipc_node_find(msg_destnode(msg)); + struct tipc_node *n_ptr = tipc_node_find(net, msg_destnode(msg)); if (unlikely(!n_ptr)) return; @@ -404,11 +403,12 @@ static void bclink_peek_nack(struct tipc_msg *msg) /* tipc_bclink_xmit - broadcast buffer chain to all nodes in cluster * and to identified node local sockets + * @net: the applicable net namespace * @list: chain of buffers containing message * Consumes the buffer chain, except when returning -ELINKCONG * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE */ -int tipc_bclink_xmit(struct sk_buff_head *list) +int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list) { int rc = 0; int bc = 0; @@ -443,7 +443,7 @@ int tipc_bclink_xmit(struct sk_buff_head *list) /* Deliver message clone */ if (likely(!rc)) - tipc_sk_mcast_rcv(skb); + tipc_sk_mcast_rcv(net, skb); else kfree_skb(skb); @@ -491,7 +491,7 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) if (msg_mc_netid(msg) != tn->net_id) goto exit; - node = tipc_node_find(msg_prevnode(msg)); + node = tipc_node_find(net, msg_prevnode(msg)); if (unlikely(!node)) goto exit; @@ -514,7 +514,7 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) tipc_bclink_unlock(); } else { tipc_node_unlock(node); - bclink_peek_nack(msg); + bclink_peek_nack(net, msg); } goto exit; } @@ -532,7 +532,7 @@ receive: tipc_bclink_unlock(); tipc_node_unlock(node); if (likely(msg_mcast(msg))) - tipc_sk_mcast_rcv(buf); + tipc_sk_mcast_rcv(net, buf); else kfree_skb(buf); } else if (msg_user(msg) == MSG_BUNDLER) { @@ -542,7 +542,7 @@ receive: bcl->stats.recv_bundled += msg_msgcnt(msg); tipc_bclink_unlock(); tipc_node_unlock(node); - tipc_link_bundle_rcv(buf); + tipc_link_bundle_rcv(net, buf); } else if (msg_user(msg) == MSG_FRAGMENTER) { tipc_buf_append(&node->bclink.reasm_buf, &buf); if (unlikely(!buf && !node->bclink.reasm_buf)) @@ -563,7 +563,7 @@ receive: bclink_accept_pkt(node, seqno); tipc_bclink_unlock(); tipc_node_unlock(node); - tipc_named_rcv(buf); + tipc_named_rcv(net, buf); } else { tipc_bclink_lock(); bclink_accept_pkt(node, seqno); -- cgit v1.2.3 From 7f9f95d9d9bcdf253c4149a157b096958013eceb Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 9 Jan 2015 15:27:06 +0800 Subject: tipc: make bearer list support net namespace Bearer list defined as a global variable is used to store bearer instances. When tipc supports net namespace, bearers created in one namespace must be isolated with others allocated in other namespaces, which requires us that the bearer list(bearer_list) must be moved to tipc_net structure. As a result, a net namespace pointer has to be passed to functions which access the bearer list. Signed-off-by: Ying Xue Tested-by: Tero Aho Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'net/tipc/bcast.c') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 816c0e49319f..e7c538304595 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -369,7 +369,7 @@ void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr, msg_set_bcgap_to(msg, to); tipc_bclink_lock(); - tipc_bearer_send(MAX_BEARERS, buf, NULL); + tipc_bearer_send(net, MAX_BEARERS, buf, NULL); bcl->stats.sent_nacks++; tipc_bclink_unlock(); kfree_skb(buf); @@ -425,7 +425,7 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list) if (likely(bclink)) { tipc_bclink_lock(); if (likely(bclink->bcast_nodes.count)) { - rc = __tipc_link_xmit(bcl, list); + rc = __tipc_link_xmit(net, bcl, list); if (likely(!rc)) { u32 len = skb_queue_len(&bcl->outqueue); @@ -682,13 +682,14 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, if (bp_index == 0) { /* Use original buffer for first bearer */ - tipc_bearer_send(b->identity, buf, &b->bcast_addr); + tipc_bearer_send(net, b->identity, buf, &b->bcast_addr); } else { /* Avoid concurrent buffer access */ tbuf = pskb_copy_for_clone(buf, GFP_ATOMIC); if (!tbuf) break; - tipc_bearer_send(b->identity, tbuf, &b->bcast_addr); + tipc_bearer_send(net, b->identity, tbuf, + &b->bcast_addr); kfree_skb(tbuf); /* Bearer keeps a clone */ } if (bcbearer->remains_new.count == 0) @@ -703,8 +704,10 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, /** * tipc_bcbearer_sort - create sets of bearer pairs used by broadcast bearer */ -void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action) +void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr, + u32 node, bool action) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp; struct tipc_bcbearer_pair *bp_curr; struct tipc_bearer *b; @@ -723,7 +726,7 @@ void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action) rcu_read_lock(); for (b_index = 0; b_index < MAX_BEARERS; b_index++) { - b = rcu_dereference_rtnl(bearer_list[b_index]); + b = rcu_dereference_rtnl(tn->bearer_list[b_index]); if (!b || !b->nodes.count) continue; @@ -939,8 +942,10 @@ int tipc_bclink_set_queue_limits(u32 limit) return 0; } -int tipc_bclink_init(void) +int tipc_bclink_init(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC); if (!bcbearer) return -ENOMEM; @@ -967,19 +972,21 @@ int tipc_bclink_init(void) bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); bcl->bearer_id = MAX_BEARERS; - rcu_assign_pointer(bearer_list[MAX_BEARERS], &bcbearer->bearer); + rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer); bcl->state = WORKING_WORKING; strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); return 0; } -void tipc_bclink_stop(void) +void tipc_bclink_stop(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + tipc_bclink_lock(); tipc_link_purge_queues(bcl); tipc_bclink_unlock(); - RCU_INIT_POINTER(bearer_list[BCBEARER], NULL); + RCU_INIT_POINTER(tn->bearer_list[BCBEARER], NULL); synchronize_net(); kfree(bcbearer); kfree(bclink); -- cgit v1.2.3 From 1da465683a93142488a54a9038155f23d6349441 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 9 Jan 2015 15:27:07 +0800 Subject: tipc: make tipc broadcast link support net namespace TIPC broadcast link is statically established and its relevant states are maintained with the global variables: "bcbearer", "bclink" and "bcl". Allowing different namespace to own different broadcast link instances, these variables must be moved to tipc_net structure and broadcast link instances would be allocated and initialized when namespace is created. Signed-off-by: Ying Xue Tested-by: Tero Aho Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 297 ++++++++++++++++++++++++++----------------------------- 1 file changed, 140 insertions(+), 157 deletions(-) (limited to 'net/tipc/bcast.c') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index e7c538304595..bc58097ebad2 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -35,77 +35,14 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "core.h" -#include "link.h" #include "socket.h" #include "msg.h" #include "bcast.h" #include "name_distr.h" +#include "core.h" #define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ #define BCLINK_WIN_DEFAULT 20 /* bcast link window size (default) */ -#define BCBEARER MAX_BEARERS - -/** - * struct tipc_bcbearer_pair - a pair of bearers used by broadcast link - * @primary: pointer to primary bearer - * @secondary: pointer to secondary bearer - * - * Bearers must have same priority and same set of reachable destinations - * to be paired. - */ - -struct tipc_bcbearer_pair { - struct tipc_bearer *primary; - struct tipc_bearer *secondary; -}; - -/** - * struct tipc_bcbearer - bearer used by broadcast link - * @bearer: (non-standard) broadcast bearer structure - * @media: (non-standard) broadcast media structure - * @bpairs: array of bearer pairs - * @bpairs_temp: temporary array of bearer pairs used by tipc_bcbearer_sort() - * @remains: temporary node map used by tipc_bcbearer_send() - * @remains_new: temporary node map used tipc_bcbearer_send() - * - * Note: The fields labelled "temporary" are incorporated into the bearer - * to avoid consuming potentially limited stack space through the use of - * large local variables within multicast routines. Concurrent access is - * prevented through use of the spinlock "bclink_lock". - */ -struct tipc_bcbearer { - struct tipc_bearer bearer; - struct tipc_media media; - struct tipc_bcbearer_pair bpairs[MAX_BEARERS]; - struct tipc_bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1]; - struct tipc_node_map remains; - struct tipc_node_map remains_new; -}; - -/** - * struct tipc_bclink - link used for broadcast messages - * @lock: spinlock governing access to structure - * @link: (non-standard) broadcast link structure - * @node: (non-standard) node structure representing b'cast link's peer node - * @flags: represent bclink states - * @bcast_nodes: map of broadcast-capable nodes - * @retransmit_to: node that most recently requested a retransmit - * - * Handles sequence numbering, fragmentation, bundling, etc. - */ -struct tipc_bclink { - spinlock_t lock; - struct tipc_link link; - struct tipc_node node; - unsigned int flags; - struct tipc_node_map bcast_nodes; - struct tipc_node *retransmit_to; -}; - -static struct tipc_bcbearer *bcbearer; -static struct tipc_bclink *bclink; -static struct tipc_link *bcl; const char tipc_bclink_name[] = "broadcast-link"; @@ -115,25 +52,28 @@ static void tipc_nmap_diff(struct tipc_node_map *nm_a, static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node); static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); -static void tipc_bclink_lock(void) +static void tipc_bclink_lock(struct net *net) { - spin_lock_bh(&bclink->lock); + struct tipc_net *tn = net_generic(net, tipc_net_id); + + spin_lock_bh(&tn->bclink->lock); } -static void tipc_bclink_unlock(void) +static void tipc_bclink_unlock(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node = NULL; - if (likely(!bclink->flags)) { - spin_unlock_bh(&bclink->lock); + if (likely(!tn->bclink->flags)) { + spin_unlock_bh(&tn->bclink->lock); return; } - if (bclink->flags & TIPC_BCLINK_RESET) { - bclink->flags &= ~TIPC_BCLINK_RESET; - node = tipc_bclink_retransmit_to(); + if (tn->bclink->flags & TIPC_BCLINK_RESET) { + tn->bclink->flags &= ~TIPC_BCLINK_RESET; + node = tipc_bclink_retransmit_to(net); } - spin_unlock_bh(&bclink->lock); + spin_unlock_bh(&tn->bclink->lock); if (node) tipc_link_reset_all(node); @@ -144,9 +84,11 @@ uint tipc_bclink_get_mtu(void) return MAX_PKT_DEFAULT_MCAST; } -void tipc_bclink_set_flags(unsigned int flags) +void tipc_bclink_set_flags(struct net *net, unsigned int flags) { - bclink->flags |= flags; + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tn->bclink->flags |= flags; } static u32 bcbuf_acks(struct sk_buff *buf) @@ -164,31 +106,40 @@ static void bcbuf_decr_acks(struct sk_buff *buf) bcbuf_set_acks(buf, bcbuf_acks(buf) - 1); } -void tipc_bclink_add_node(u32 addr) +void tipc_bclink_add_node(struct net *net, u32 addr) { - tipc_bclink_lock(); - tipc_nmap_add(&bclink->bcast_nodes, addr); - tipc_bclink_unlock(); + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tipc_bclink_lock(net); + tipc_nmap_add(&tn->bclink->bcast_nodes, addr); + tipc_bclink_unlock(net); } -void tipc_bclink_remove_node(u32 addr) +void tipc_bclink_remove_node(struct net *net, u32 addr) { - tipc_bclink_lock(); - tipc_nmap_remove(&bclink->bcast_nodes, addr); - tipc_bclink_unlock(); + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tipc_bclink_lock(net); + tipc_nmap_remove(&tn->bclink->bcast_nodes, addr); + tipc_bclink_unlock(net); } -static void bclink_set_last_sent(void) +static void bclink_set_last_sent(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + if (bcl->next_out) bcl->fsm_msg_cnt = mod(buf_seqno(bcl->next_out) - 1); else bcl->fsm_msg_cnt = mod(bcl->next_out_no - 1); } -u32 tipc_bclink_get_last_sent(void) +u32 tipc_bclink_get_last_sent(struct net *net) { - return bcl->fsm_msg_cnt; + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return tn->bcl->fsm_msg_cnt; } static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) @@ -203,9 +154,11 @@ static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) * * Called with bclink_lock locked */ -struct tipc_node *tipc_bclink_retransmit_to(void) +struct tipc_node *tipc_bclink_retransmit_to(struct net *net) { - return bclink->retransmit_to; + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return tn->bclink->retransmit_to; } /** @@ -215,9 +168,10 @@ struct tipc_node *tipc_bclink_retransmit_to(void) * * Called with bclink_lock locked */ -static void bclink_retransmit_pkt(u32 after, u32 to) +static void bclink_retransmit_pkt(struct tipc_net *tn, u32 after, u32 to) { struct sk_buff *skb; + struct tipc_link *bcl = tn->bcl; skb_queue_walk(&bcl->outqueue, skb) { if (more(buf_seqno(skb), after)) { @@ -234,9 +188,10 @@ static void bclink_retransmit_pkt(u32 after, u32 to) */ void tipc_bclink_wakeup_users(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); struct sk_buff *skb; - while ((skb = skb_dequeue(&bclink->link.waiting_sks))) + while ((skb = skb_dequeue(&tn->bclink->link.waiting_sks))) tipc_sk_rcv(net, skb); } @@ -252,10 +207,12 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) struct sk_buff *skb, *tmp; struct sk_buff *next; unsigned int released = 0; + struct net *net = n_ptr->net; + struct tipc_net *tn = net_generic(net, tipc_net_id); - tipc_bclink_lock(); + tipc_bclink_lock(net); /* Bail out if tx queue is empty (no clean up is required) */ - skb = skb_peek(&bcl->outqueue); + skb = skb_peek(&tn->bcl->outqueue); if (!skb) goto exit; @@ -266,43 +223,43 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) * acknowledge sent messages only (if other nodes still exist) * or both sent and unsent messages (otherwise) */ - if (bclink->bcast_nodes.count) - acked = bcl->fsm_msg_cnt; + if (tn->bclink->bcast_nodes.count) + acked = tn->bcl->fsm_msg_cnt; else - acked = bcl->next_out_no; + acked = tn->bcl->next_out_no; } else { /* * Bail out if specified sequence number does not correspond * to a message that has been sent and not yet acknowledged */ if (less(acked, buf_seqno(skb)) || - less(bcl->fsm_msg_cnt, acked) || + less(tn->bcl->fsm_msg_cnt, acked) || less_eq(acked, n_ptr->bclink.acked)) goto exit; } /* Skip over packets that node has previously acknowledged */ - skb_queue_walk(&bcl->outqueue, skb) { + skb_queue_walk(&tn->bcl->outqueue, skb) { if (more(buf_seqno(skb), n_ptr->bclink.acked)) break; } /* Update packets that node is now acknowledging */ - skb_queue_walk_from_safe(&bcl->outqueue, skb, tmp) { + skb_queue_walk_from_safe(&tn->bcl->outqueue, skb, tmp) { if (more(buf_seqno(skb), acked)) break; - next = tipc_skb_queue_next(&bcl->outqueue, skb); - if (skb != bcl->next_out) { + next = tipc_skb_queue_next(&tn->bcl->outqueue, skb); + if (skb != tn->bcl->next_out) { bcbuf_decr_acks(skb); } else { bcbuf_set_acks(skb, 0); - bcl->next_out = next; - bclink_set_last_sent(); + tn->bcl->next_out = next; + bclink_set_last_sent(net); } if (bcbuf_acks(skb) == 0) { - __skb_unlink(skb, &bcl->outqueue); + __skb_unlink(skb, &tn->bcl->outqueue); kfree_skb(skb); released = 1; } @@ -310,15 +267,15 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) n_ptr->bclink.acked = acked; /* Try resolving broadcast link congestion, if necessary */ - if (unlikely(bcl->next_out)) { - tipc_link_push_packets(bcl); - bclink_set_last_sent(); + if (unlikely(tn->bcl->next_out)) { + tipc_link_push_packets(tn->bcl); + bclink_set_last_sent(net); } - if (unlikely(released && !skb_queue_empty(&bcl->waiting_sks))) + if (unlikely(released && !skb_queue_empty(&tn->bcl->waiting_sks))) n_ptr->action_flags |= TIPC_WAKEUP_BCAST_USERS; exit: - tipc_bclink_unlock(); + tipc_bclink_unlock(net); } /** @@ -368,10 +325,10 @@ void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr, msg_set_bcgap_after(msg, n_ptr->bclink.last_in); msg_set_bcgap_to(msg, to); - tipc_bclink_lock(); + tipc_bclink_lock(net); tipc_bearer_send(net, MAX_BEARERS, buf, NULL); - bcl->stats.sent_nacks++; - tipc_bclink_unlock(); + tn->bcl->stats.sent_nacks++; + tipc_bclink_unlock(net); kfree_skb(buf); n_ptr->bclink.oos_state++; @@ -410,6 +367,9 @@ static void bclink_peek_nack(struct net *net, struct tipc_msg *msg) */ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + struct tipc_bclink *bclink = tn->bclink; int rc = 0; int bc = 0; struct sk_buff *skb; @@ -423,19 +383,19 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list) /* Broadcast to all other nodes */ if (likely(bclink)) { - tipc_bclink_lock(); + tipc_bclink_lock(net); if (likely(bclink->bcast_nodes.count)) { rc = __tipc_link_xmit(net, bcl, list); if (likely(!rc)) { u32 len = skb_queue_len(&bcl->outqueue); - bclink_set_last_sent(); + bclink_set_last_sent(net); bcl->stats.queue_sz_counts++; bcl->stats.accu_queue_sz += len; } bc = 1; } - tipc_bclink_unlock(); + tipc_bclink_unlock(net); } if (unlikely(!bc)) @@ -457,10 +417,12 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list) */ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) { + struct tipc_net *tn = net_generic(node->net, tipc_net_id); + bclink_update_last_sent(node, seqno); node->bclink.last_in = seqno; node->bclink.oos_state = 0; - bcl->stats.recv_info++; + tn->bcl->stats.recv_info++; /* * Unicast an ACK periodically, ensuring that @@ -469,7 +431,7 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) if (((seqno - tipc_own_addr) % TIPC_MIN_LINK_WIN) == 0) { tipc_link_proto_xmit(node->active_links[node->addr & 1], STATE_MSG, 0, 0, 0, 0, 0); - bcl->stats.sent_acks++; + tn->bcl->stats.sent_acks++; } } @@ -481,6 +443,7 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) { struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; struct tipc_msg *msg = buf_msg(buf); struct tipc_node *node; u32 next_in; @@ -506,12 +469,12 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) if (msg_destnode(msg) == tipc_own_addr) { tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); tipc_node_unlock(node); - tipc_bclink_lock(); + tipc_bclink_lock(net); bcl->stats.recv_nacks++; - bclink->retransmit_to = node; - bclink_retransmit_pkt(msg_bcgap_after(msg), + tn->bclink->retransmit_to = node; + bclink_retransmit_pkt(tn, msg_bcgap_after(msg), msg_bcgap_to(msg)); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); } else { tipc_node_unlock(node); bclink_peek_nack(net, msg); @@ -527,47 +490,47 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) receive: /* Deliver message to destination */ if (likely(msg_isdata(msg))) { - tipc_bclink_lock(); + tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); tipc_node_unlock(node); if (likely(msg_mcast(msg))) tipc_sk_mcast_rcv(net, buf); else kfree_skb(buf); } else if (msg_user(msg) == MSG_BUNDLER) { - tipc_bclink_lock(); + tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); bcl->stats.recv_bundles++; bcl->stats.recv_bundled += msg_msgcnt(msg); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); tipc_node_unlock(node); tipc_link_bundle_rcv(net, buf); } else if (msg_user(msg) == MSG_FRAGMENTER) { tipc_buf_append(&node->bclink.reasm_buf, &buf); if (unlikely(!buf && !node->bclink.reasm_buf)) goto unlock; - tipc_bclink_lock(); + tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); bcl->stats.recv_fragments++; if (buf) { bcl->stats.recv_fragmented++; msg = buf_msg(buf); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); goto receive; } - tipc_bclink_unlock(); + tipc_bclink_unlock(net); tipc_node_unlock(node); } else if (msg_user(msg) == NAME_DISTRIBUTOR) { - tipc_bclink_lock(); + tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); tipc_node_unlock(node); tipc_named_rcv(net, buf); } else { - tipc_bclink_lock(); + tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); tipc_node_unlock(node); kfree_skb(buf); } @@ -605,14 +568,14 @@ receive: buf = NULL; } - tipc_bclink_lock(); + tipc_bclink_lock(net); if (deferred) bcl->stats.deferred_recv++; else bcl->stats.duplicates++; - tipc_bclink_unlock(); + tipc_bclink_unlock(net); unlock: tipc_node_unlock(node); @@ -623,7 +586,7 @@ exit: u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) { return (n_ptr->bclink.recv_permitted && - (tipc_bclink_get_last_sent() != n_ptr->bclink.acked)); + (tipc_bclink_get_last_sent(n_ptr->net) != n_ptr->bclink.acked)); } @@ -636,13 +599,15 @@ u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) * Returns 0 (packet sent successfully) under all circumstances, * since the broadcast link's pseudo-bearer never blocks */ -static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, +static int tipc_bcbearer_send(struct net *net, struct sk_buff *buf, + struct tipc_bearer *unused1, struct tipc_media_addr *unused2) { int bp_index; struct tipc_msg *msg = buf_msg(buf); - struct net *net = sock_net(buf->sk); struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bcbearer *bcbearer = tn->bcbearer; + struct tipc_bclink *bclink = tn->bclink; /* Prepare broadcast link message for reliable transmission, * if first time trying to send it; @@ -653,7 +618,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, bcbuf_set_acks(buf, bclink->bcast_nodes.count); msg_set_non_seq(msg, 1); msg_set_mc_netid(msg, tn->net_id); - bcl->stats.sent_info++; + tn->bcl->stats.sent_info++; if (WARN_ON(!bclink->bcast_nodes.count)) { dump_stack(); @@ -708,13 +673,14 @@ void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr, u32 node, bool action) { struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bcbearer *bcbearer = tn->bcbearer; struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp; struct tipc_bcbearer_pair *bp_curr; struct tipc_bearer *b; int b_index; int pri; - tipc_bclink_lock(); + tipc_bclink_lock(net); if (action) tipc_nmap_add(nm_ptr, node); @@ -761,7 +727,7 @@ void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr, bp_curr++; } - tipc_bclink_unlock(); + tipc_bclink_unlock(net); } static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, @@ -815,17 +781,19 @@ msg_full: return -EMSGSIZE; } -int tipc_nl_add_bc_link(struct tipc_nl_msg *msg) +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) { int err; void *hdr; struct nlattr *attrs; struct nlattr *prop; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; if (!bcl) return 0; - tipc_bclink_lock(); + tipc_bclink_lock(net); hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, NLM_F_MULTI, TIPC_NL_LINK_GET); @@ -860,7 +828,7 @@ int tipc_nl_add_bc_link(struct tipc_nl_msg *msg) if (err) goto attr_msg_full; - tipc_bclink_unlock(); + tipc_bclink_unlock(net); nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); @@ -871,21 +839,23 @@ prop_msg_full: attr_msg_full: nla_nest_cancel(msg->skb, attrs); msg_full: - tipc_bclink_unlock(); + tipc_bclink_unlock(net); genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; } -int tipc_bclink_stats(char *buf, const u32 buf_size) +int tipc_bclink_stats(struct net *net, char *buf, const u32 buf_size) { int ret; struct tipc_stats *s; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; if (!bcl) return 0; - tipc_bclink_lock(); + tipc_bclink_lock(net); s = &bcl->stats; @@ -914,37 +884,46 @@ int tipc_bclink_stats(char *buf, const u32 buf_size) s->queue_sz_counts ? (s->accu_queue_sz / s->queue_sz_counts) : 0); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); return ret; } -int tipc_bclink_reset_stats(void) +int tipc_bclink_reset_stats(struct net *net) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + if (!bcl) return -ENOPROTOOPT; - tipc_bclink_lock(); + tipc_bclink_lock(net); memset(&bcl->stats, 0, sizeof(bcl->stats)); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); return 0; } -int tipc_bclink_set_queue_limits(u32 limit) +int tipc_bclink_set_queue_limits(struct net *net, u32 limit) { + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + if (!bcl) return -ENOPROTOOPT; if ((limit < TIPC_MIN_LINK_WIN) || (limit > TIPC_MAX_LINK_WIN)) return -EINVAL; - tipc_bclink_lock(); + tipc_bclink_lock(net); tipc_link_set_queue_limits(bcl, limit); - tipc_bclink_unlock(); + tipc_bclink_unlock(net); return 0; } int tipc_bclink_init(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bcbearer *bcbearer; + struct tipc_bclink *bclink; + struct tipc_link *bcl; bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC); if (!bcbearer) @@ -969,12 +948,16 @@ int tipc_bclink_init(struct net *net) spin_lock_init(&bclink->node.lock); __skb_queue_head_init(&bclink->node.waiting_sks); bcl->owner = &bclink->node; + bcl->owner->net = net; bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); bcl->bearer_id = MAX_BEARERS; rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer); bcl->state = WORKING_WORKING; strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); + tn->bcbearer = bcbearer; + tn->bclink = bclink; + tn->bcl = bcl; return 0; } @@ -982,14 +965,14 @@ void tipc_bclink_stop(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); - tipc_bclink_lock(); - tipc_link_purge_queues(bcl); - tipc_bclink_unlock(); + tipc_bclink_lock(net); + tipc_link_purge_queues(tn->bcl); + tipc_bclink_unlock(net); RCU_INIT_POINTER(tn->bearer_list[BCBEARER], NULL); synchronize_net(); - kfree(bcbearer); - kfree(bclink); + kfree(tn->bcbearer); + kfree(tn->bclink); } /** -- cgit v1.2.3 From 347475395434abb2b61bf59c2952470f37072567 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 9 Jan 2015 15:27:10 +0800 Subject: tipc: make tipc node address support net namespace If net namespace is supported in tipc, each namespace will be treated as a separate tipc node. Therefore, every namespace must own its private tipc node address. This means the "tipc_own_addr" global variable of node address must be moved to tipc_net structure to satisfy the requirement. It's turned out that users also can assign node address for every namespace. Signed-off-by: Ying Xue Tested-by: Tero Aho Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/tipc/bcast.c') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index bc58097ebad2..53f8bf059fec 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -317,7 +317,7 @@ void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr, struct sk_buff *skb = skb_peek(&n_ptr->bclink.deferred_queue); u32 to = skb ? buf_seqno(skb) - 1 : n_ptr->bclink.last_sent; - tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, + tipc_msg_init(net, msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, n_ptr->addr); msg_set_non_seq(msg, 1); msg_set_mc_netid(msg, tn->net_id); @@ -428,7 +428,7 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) * Unicast an ACK periodically, ensuring that * all nodes in the cluster don't ACK at the same time */ - if (((seqno - tipc_own_addr) % TIPC_MIN_LINK_WIN) == 0) { + if (((seqno - tn->own_addr) % TIPC_MIN_LINK_WIN) == 0) { tipc_link_proto_xmit(node->active_links[node->addr & 1], STATE_MSG, 0, 0, 0, 0, 0); tn->bcl->stats.sent_acks++; @@ -466,7 +466,7 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) { if (msg_type(msg) != STATE_MSG) goto unlock; - if (msg_destnode(msg) == tipc_own_addr) { + if (msg_destnode(msg) == tn->own_addr) { tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); tipc_node_unlock(node); tipc_bclink_lock(net); -- cgit v1.2.3 From c5898636c440da91d58f10beac00f073e68378df Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 5 Feb 2015 08:36:36 -0500 Subject: tipc: reduce usage of context info in socket and link The most common usage of namespace information is when we fetch the own node addess from the net structure. This leads to a lot of passing around of a parameter of type 'struct net *' between functions just to make them able to obtain this address. However, in many cases this is unnecessary. The own node address is readily available as a member of both struct tipc_sock and tipc_link, and can be fetched from there instead. The fact that the vast majority of functions in socket.c and link.c anyway are maintaining a pointer to their respective base structures makes this option even more compelling. In this commit, we introduce the inline functions tsk_own_node() and link_own_node() to make it easy for functions to fetch the node address from those structs instead of having to pass along and dereference the namespace struct. In particular, we make calls to the msg_xx() functions in msg.{h,c} context independent by directly passing them the own node address as parameter when needed. Those functions should be regarded as leaves in the code dependency tree, and it is hence desirable to keep them namspace unaware. Apart from a potential positive effect on cache behavior, these changes make it easier to introduce the changes that will follow later in this series. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net/tipc/bcast.c') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 53f8bf059fec..3b886eb35c87 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -283,10 +283,11 @@ exit: * * RCU and node lock set */ -void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr, +void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) { struct sk_buff *buf; + struct net *net = n_ptr->net; struct tipc_net *tn = net_generic(net, tipc_net_id); /* Ignore "stale" link state info */ @@ -317,7 +318,7 @@ void tipc_bclink_update_link_state(struct net *net, struct tipc_node *n_ptr, struct sk_buff *skb = skb_peek(&n_ptr->bclink.deferred_queue); u32 to = skb ? buf_seqno(skb) - 1 : n_ptr->bclink.last_sent; - tipc_msg_init(net, msg, BCAST_PROTOCOL, STATE_MSG, + tipc_msg_init(tn->own_addr, msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, n_ptr->addr); msg_set_non_seq(msg, 1); msg_set_mc_netid(msg, tn->net_id); @@ -954,6 +955,8 @@ int tipc_bclink_init(struct net *net) bcl->bearer_id = MAX_BEARERS; rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer); bcl->state = WORKING_WORKING; + bcl->pmsg = (struct tipc_msg *)&bcl->proto_msg; + msg_set_prevnode(bcl->pmsg, tn->own_addr); strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); tn->bcbearer = bcbearer; tn->bclink = bclink; -- cgit v1.2.3 From c637c1035534867b85b78b453c38c495b58e2c5a Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 5 Feb 2015 08:36:41 -0500 Subject: tipc: resolve race problem at unicast message reception TIPC handles message cardinality and sequencing at the link layer, before passing messages upwards to the destination sockets. During the upcall from link to socket no locks are held. It is therefore possible, and we see it happen occasionally, that messages arriving in different threads and delivered in sequence still bypass each other before they reach the destination socket. This must not happen, since it violates the sequentiality guarantee. We solve this by adding a new input buffer queue to the link structure. Arriving messages are added safely to the tail of that queue by the link, while the head of the queue is consumed, also safely, by the receiving socket. Sequentiality is secured per socket by only allowing buffers to be dequeued inside the socket lock. Since there may be multiple simultaneous readers of the queue, we use a 'filter' parameter to reduce the risk that they peek the same buffer from the queue, hence also reducing the risk of contention on the receiving socket locks. This solves the sequentiality problem, and seems to cause no measurable performance degradation. A nice side effect of this change is that lock handling in the functions tipc_rcv() and tipc_bcast_rcv() now becomes uniform, something that will enable future simplifications of those functions. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net/tipc/bcast.c') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 3b886eb35c87..2dfaf272928a 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -189,10 +189,8 @@ static void bclink_retransmit_pkt(struct tipc_net *tn, u32 after, u32 to) void tipc_bclink_wakeup_users(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); - struct sk_buff *skb; - while ((skb = skb_dequeue(&tn->bclink->link.waiting_sks))) - tipc_sk_rcv(net, skb); + tipc_sk_rcv(net, &tn->bclink->link.wakeupq); } /** @@ -271,9 +269,8 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) tipc_link_push_packets(tn->bcl); bclink_set_last_sent(net); } - if (unlikely(released && !skb_queue_empty(&tn->bcl->waiting_sks))) + if (unlikely(released && !skb_queue_empty(&tn->bcl->wakeupq))) n_ptr->action_flags |= TIPC_WAKEUP_BCAST_USERS; - exit: tipc_bclink_unlock(net); } @@ -450,6 +447,9 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) u32 next_in; u32 seqno; int deferred = 0; + int pos = 0; + struct sk_buff *iskb; + struct sk_buff_head msgs; /* Screen out unwanted broadcast messages */ if (msg_mc_netid(msg) != tn->net_id) @@ -506,7 +506,8 @@ receive: bcl->stats.recv_bundled += msg_msgcnt(msg); tipc_bclink_unlock(net); tipc_node_unlock(node); - tipc_link_bundle_rcv(net, buf); + while (tipc_msg_extract(buf, &iskb, &pos)) + tipc_sk_mcast_rcv(net, iskb); } else if (msg_user(msg) == MSG_FRAGMENTER) { tipc_buf_append(&node->bclink.reasm_buf, &buf); if (unlikely(!buf && !node->bclink.reasm_buf)) @@ -527,7 +528,9 @@ receive: bclink_accept_pkt(node, seqno); tipc_bclink_unlock(net); tipc_node_unlock(node); - tipc_named_rcv(net, buf); + skb_queue_head_init(&msgs); + skb_queue_tail(&msgs, buf); + tipc_named_rcv(net, &msgs); } else { tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); @@ -944,10 +947,9 @@ int tipc_bclink_init(struct net *net) spin_lock_init(&bclink->lock); __skb_queue_head_init(&bcl->outqueue); __skb_queue_head_init(&bcl->deferred_queue); - skb_queue_head_init(&bcl->waiting_sks); + skb_queue_head_init(&bcl->wakeupq); bcl->next_out_no = 1; spin_lock_init(&bclink->node.lock); - __skb_queue_head_init(&bclink->node.waiting_sks); bcl->owner = &bclink->node; bcl->owner->net = net; bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; -- cgit v1.2.3 From 3c724acdd5049907555a831f814bfd5927c3350c Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 5 Feb 2015 08:36:43 -0500 Subject: tipc: simplify socket multicast reception The structure 'tipc_port_list' is used to collect port numbers representing multicast destination socket on a receiving node. The list is not based on a standard linked list, and is in reality optimized for the uncommon case that there are more than one multicast destinations per node. This makes the list handling unecessarily complex, and as a consequence, even the socket multicast reception becomes more complex. In this commit, we replace 'tipc_port_list' with a new 'struct tipc_plist', which is based on a standard list. We give the new list stack (push/pop) semantics, someting that simplifies the implementation of the function tipc_sk_mcast_rcv(). Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 49 +------------------------------------------------ 1 file changed, 1 insertion(+), 48 deletions(-) (limited to 'net/tipc/bcast.c') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 2dfaf272928a..3eaa931e2e8c 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -1,7 +1,7 @@ /* * net/tipc/bcast.c: TIPC broadcast code * - * Copyright (c) 2004-2006, 2014, Ericsson AB + * Copyright (c) 2004-2006, 2014-2015, Ericsson AB * Copyright (c) 2004, Intel Corporation. * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. @@ -1037,50 +1037,3 @@ static void tipc_nmap_diff(struct tipc_node_map *nm_a, } } } - -/** - * tipc_port_list_add - add a port to a port list, ensuring no duplicates - */ -void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port) -{ - struct tipc_port_list *item = pl_ptr; - int i; - int item_sz = PLSIZE; - int cnt = pl_ptr->count; - - for (; ; cnt -= item_sz, item = item->next) { - if (cnt < PLSIZE) - item_sz = cnt; - for (i = 0; i < item_sz; i++) - if (item->ports[i] == port) - return; - if (i < PLSIZE) { - item->ports[i] = port; - pl_ptr->count++; - return; - } - if (!item->next) { - item->next = kmalloc(sizeof(*item), GFP_ATOMIC); - if (!item->next) { - pr_warn("Incomplete multicast delivery, no memory\n"); - return; - } - item->next->next = NULL; - } - } -} - -/** - * tipc_port_list_free - free dynamically created entries in port_list chain - * - */ -void tipc_port_list_free(struct tipc_port_list *pl_ptr) -{ - struct tipc_port_list *item; - struct tipc_port_list *next; - - for (item = pl_ptr->next; item; item = next) { - next = item->next; - kfree(item); - } -} -- cgit v1.2.3 From cb1b728096f54e7408d60fb571944bed00c5b771 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 5 Feb 2015 08:36:44 -0500 Subject: tipc: eliminate race condition at multicast reception In a previous commit in this series we resolved a race problem during unicast message reception. Here, we resolve the same problem at multicast reception. We apply the same technique: an input queue serializing the delivery of arriving buffers. The main difference is that here we do it in two steps. First, the broadcast link feeds arriving buffers into the tail of an arrival queue, which head is consumed at the socket level, and where destination lookup is performed. Second, if the lookup is successful, the resulting buffer clones are fed into a second queue, the input queue. This queue is consumed at reception in the socket just like in the unicast case. Both queues are protected by the same lock, -the one of the input queue. Reviewed-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 57 ++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 22 deletions(-) (limited to 'net/tipc/bcast.c') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 3eaa931e2e8c..81b1fef1f5e0 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -79,6 +79,13 @@ static void tipc_bclink_unlock(struct net *net) tipc_link_reset_all(node); } +void tipc_bclink_input(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + tipc_sk_mcast_rcv(net, &tn->bclink->arrvq, &tn->bclink->inputq); +} + uint tipc_bclink_get_mtu(void) { return MAX_PKT_DEFAULT_MCAST; @@ -356,7 +363,7 @@ static void bclink_peek_nack(struct net *net, struct tipc_msg *msg) tipc_node_unlock(n_ptr); } -/* tipc_bclink_xmit - broadcast buffer chain to all nodes in cluster +/* tipc_bclink_xmit - deliver buffer chain to all nodes in cluster * and to identified node local sockets * @net: the applicable net namespace * @list: chain of buffers containing message @@ -371,6 +378,8 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list) int rc = 0; int bc = 0; struct sk_buff *skb; + struct sk_buff_head arrvq; + struct sk_buff_head inputq; /* Prepare clone of message for local node */ skb = tipc_msg_reassemble(list); @@ -379,7 +388,7 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list) return -EHOSTUNREACH; } - /* Broadcast to all other nodes */ + /* Broadcast to all nodes */ if (likely(bclink)) { tipc_bclink_lock(net); if (likely(bclink->bcast_nodes.count)) { @@ -399,12 +408,15 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list) if (unlikely(!bc)) __skb_queue_purge(list); - /* Deliver message clone */ - if (likely(!rc)) - tipc_sk_mcast_rcv(net, skb); - else + if (unlikely(rc)) { kfree_skb(skb); - + return rc; + } + /* Deliver message clone */ + __skb_queue_head_init(&arrvq); + skb_queue_head_init(&inputq); + __skb_queue_tail(&arrvq, skb); + tipc_sk_mcast_rcv(net, &arrvq, &inputq); return rc; } @@ -449,7 +461,7 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) int deferred = 0; int pos = 0; struct sk_buff *iskb; - struct sk_buff_head msgs; + struct sk_buff_head *arrvq, *inputq; /* Screen out unwanted broadcast messages */ if (msg_mc_netid(msg) != tn->net_id) @@ -486,6 +498,8 @@ void tipc_bclink_rcv(struct net *net, struct sk_buff *buf) /* Handle in-sequence broadcast message */ seqno = msg_seqno(msg); next_in = mod(node->bclink.last_in + 1); + arrvq = &tn->bclink->arrvq; + inputq = &tn->bclink->inputq; if (likely(seqno == next_in)) { receive: @@ -493,21 +507,26 @@ receive: if (likely(msg_isdata(msg))) { tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); + spin_lock_bh(&inputq->lock); + __skb_queue_tail(arrvq, buf); + spin_unlock_bh(&inputq->lock); + node->action_flags |= TIPC_BCAST_MSG_EVT; tipc_bclink_unlock(net); tipc_node_unlock(node); - if (likely(msg_mcast(msg))) - tipc_sk_mcast_rcv(net, buf); - else - kfree_skb(buf); } else if (msg_user(msg) == MSG_BUNDLER) { tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); bcl->stats.recv_bundles++; bcl->stats.recv_bundled += msg_msgcnt(msg); + pos = 0; + while (tipc_msg_extract(buf, &iskb, &pos)) { + spin_lock_bh(&inputq->lock); + __skb_queue_tail(arrvq, iskb); + spin_unlock_bh(&inputq->lock); + } + node->action_flags |= TIPC_BCAST_MSG_EVT; tipc_bclink_unlock(net); tipc_node_unlock(node); - while (tipc_msg_extract(buf, &iskb, &pos)) - tipc_sk_mcast_rcv(net, iskb); } else if (msg_user(msg) == MSG_FRAGMENTER) { tipc_buf_append(&node->bclink.reasm_buf, &buf); if (unlikely(!buf && !node->bclink.reasm_buf)) @@ -523,14 +542,6 @@ receive: } tipc_bclink_unlock(net); tipc_node_unlock(node); - } else if (msg_user(msg) == NAME_DISTRIBUTOR) { - tipc_bclink_lock(net); - bclink_accept_pkt(node, seqno); - tipc_bclink_unlock(net); - tipc_node_unlock(node); - skb_queue_head_init(&msgs); - skb_queue_tail(&msgs, buf); - tipc_named_rcv(net, &msgs); } else { tipc_bclink_lock(net); bclink_accept_pkt(node, seqno); @@ -950,6 +961,8 @@ int tipc_bclink_init(struct net *net) skb_queue_head_init(&bcl->wakeupq); bcl->next_out_no = 1; spin_lock_init(&bclink->node.lock); + __skb_queue_head_init(&bclink->arrvq); + skb_queue_head_init(&bclink->inputq); bcl->owner = &bclink->node; bcl->owner->net = net; bcl->max_pkt = MAX_PKT_DEFAULT_MCAST; -- cgit v1.2.3 From bfb3e5dd8dfd84dfd13649393abab63e43267b00 Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Mon, 9 Feb 2015 09:50:03 +0100 Subject: tipc: move and rename the legacy nl api to "nl compat" The new netlink API is no longer "v2" but rather the standard API and the legacy API is now "nl compat". We split them into separate start/stop and put them in different files in order to further distinguish them. Signed-off-by: Richard Alpe Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/tipc/bcast.c') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 81b1fef1f5e0..e96fd6a6d5c2 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -810,7 +810,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) tipc_bclink_lock(net); - hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_v2_family, + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_LINK_GET); if (!hdr) return -EMSGSIZE; -- cgit v1.2.3 From f2b3b2d4ccbf9666f5f42a21347cd1aaa532b2fa Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Mon, 9 Feb 2015 09:50:06 +0100 Subject: tipc: convert legacy nl link stat to nl compat Add functionality for safely appending string data to a TLV without keeping write count in the caller. Convert TIPC_CMD_SHOW_LINK_STATS to compat dumpit. Signed-off-by: Richard Alpe Reviewed-by: Erik Hugne Reviewed-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 43 ------------------------------------------- 1 file changed, 43 deletions(-) (limited to 'net/tipc/bcast.c') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index e96fd6a6d5c2..3e41704832de 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -860,49 +860,6 @@ msg_full: return -EMSGSIZE; } -int tipc_bclink_stats(struct net *net, char *buf, const u32 buf_size) -{ - int ret; - struct tipc_stats *s; - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_link *bcl = tn->bcl; - - if (!bcl) - return 0; - - tipc_bclink_lock(net); - - s = &bcl->stats; - - ret = tipc_snprintf(buf, buf_size, "Link <%s>\n" - " Window:%u packets\n", - bcl->name, bcl->queue_limit[0]); - ret += tipc_snprintf(buf + ret, buf_size - ret, - " RX packets:%u fragments:%u/%u bundles:%u/%u\n", - s->recv_info, s->recv_fragments, - s->recv_fragmented, s->recv_bundles, - s->recv_bundled); - ret += tipc_snprintf(buf + ret, buf_size - ret, - " TX packets:%u fragments:%u/%u bundles:%u/%u\n", - s->sent_info, s->sent_fragments, - s->sent_fragmented, s->sent_bundles, - s->sent_bundled); - ret += tipc_snprintf(buf + ret, buf_size - ret, - " RX naks:%u defs:%u dups:%u\n", - s->recv_nacks, s->deferred_recv, s->duplicates); - ret += tipc_snprintf(buf + ret, buf_size - ret, - " TX naks:%u acks:%u dups:%u\n", - s->sent_nacks, s->sent_acks, s->retransmitted); - ret += tipc_snprintf(buf + ret, buf_size - ret, - " Congestion link:%u Send queue max:%u avg:%u\n", - s->link_congs, s->max_queue_sz, - s->queue_sz_counts ? - (s->accu_queue_sz / s->queue_sz_counts) : 0); - - tipc_bclink_unlock(net); - return ret; -} - int tipc_bclink_reset_stats(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); -- cgit v1.2.3