diff options
Diffstat (limited to 'drivers/net/bonding')
-rw-r--r-- | drivers/net/bonding/bond_3ad.c | 122 | ||||
-rw-r--r-- | drivers/net/bonding/bond_alb.c | 46 | ||||
-rw-r--r-- | drivers/net/bonding/bond_debugfs.c | 5 | ||||
-rw-r--r-- | drivers/net/bonding/bond_main.c | 254 |
4 files changed, 258 insertions, 169 deletions
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index e3b25f310936..31e43a2197a3 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -31,16 +31,6 @@ #define AD_CHURN_DETECTION_TIME 60 #define AD_AGGREGATE_WAIT_TIME 2 -/* Port state definitions (43.4.2.2 in the 802.3ad standard) */ -#define AD_STATE_LACP_ACTIVITY 0x1 -#define AD_STATE_LACP_TIMEOUT 0x2 -#define AD_STATE_AGGREGATION 0x4 -#define AD_STATE_SYNCHRONIZATION 0x8 -#define AD_STATE_COLLECTING 0x10 -#define AD_STATE_DISTRIBUTING 0x20 -#define AD_STATE_DEFAULTED 0x40 -#define AD_STATE_EXPIRED 0x80 - /* Port Variables definitions used by the State Machines (43.4.7 in the * 802.3ad standard) */ @@ -457,8 +447,8 @@ static void __choose_matched(struct lacpdu *lacpdu, struct port *port) MAC_ADDRESS_EQUAL(&(lacpdu->partner_system), &(port->actor_system)) && (ntohs(lacpdu->partner_system_priority) == port->actor_system_priority) && (ntohs(lacpdu->partner_key) == port->actor_oper_port_key) && - ((lacpdu->partner_state & AD_STATE_AGGREGATION) == (port->actor_oper_port_state & AD_STATE_AGGREGATION))) || - ((lacpdu->actor_state & AD_STATE_AGGREGATION) == 0) + ((lacpdu->partner_state & LACP_STATE_AGGREGATION) == (port->actor_oper_port_state & LACP_STATE_AGGREGATION))) || + ((lacpdu->actor_state & LACP_STATE_AGGREGATION) == 0) ) { port->sm_vars |= AD_PORT_MATCHED; } else { @@ -492,18 +482,18 @@ static void __record_pdu(struct lacpdu *lacpdu, struct port *port) partner->port_state = lacpdu->actor_state; /* set actor_oper_port_state.defaulted to FALSE */ - port->actor_oper_port_state &= ~AD_STATE_DEFAULTED; + port->actor_oper_port_state &= ~LACP_STATE_DEFAULTED; /* set the partner sync. to on if the partner is sync, * and the port is matched */ if ((port->sm_vars & AD_PORT_MATCHED) && - (lacpdu->actor_state & AD_STATE_SYNCHRONIZATION)) { - partner->port_state |= AD_STATE_SYNCHRONIZATION; + (lacpdu->actor_state & LACP_STATE_SYNCHRONIZATION)) { + partner->port_state |= LACP_STATE_SYNCHRONIZATION; slave_dbg(port->slave->bond->dev, port->slave->dev, "partner sync=1\n"); } else { - partner->port_state &= ~AD_STATE_SYNCHRONIZATION; + partner->port_state &= ~LACP_STATE_SYNCHRONIZATION; slave_dbg(port->slave->bond->dev, port->slave->dev, "partner sync=0\n"); } @@ -526,7 +516,7 @@ static void __record_default(struct port *port) sizeof(struct port_params)); /* set actor_oper_port_state.defaulted to true */ - port->actor_oper_port_state |= AD_STATE_DEFAULTED; + port->actor_oper_port_state |= LACP_STATE_DEFAULTED; } } @@ -556,7 +546,7 @@ static void __update_selected(struct lacpdu *lacpdu, struct port *port) !MAC_ADDRESS_EQUAL(&lacpdu->actor_system, &partner->system) || ntohs(lacpdu->actor_system_priority) != partner->system_priority || ntohs(lacpdu->actor_key) != partner->key || - (lacpdu->actor_state & AD_STATE_AGGREGATION) != (partner->port_state & AD_STATE_AGGREGATION)) { + (lacpdu->actor_state & LACP_STATE_AGGREGATION) != (partner->port_state & LACP_STATE_AGGREGATION)) { port->sm_vars &= ~AD_PORT_SELECTED; } } @@ -588,8 +578,8 @@ static void __update_default_selected(struct port *port) !MAC_ADDRESS_EQUAL(&admin->system, &oper->system) || admin->system_priority != oper->system_priority || admin->key != oper->key || - (admin->port_state & AD_STATE_AGGREGATION) - != (oper->port_state & AD_STATE_AGGREGATION)) { + (admin->port_state & LACP_STATE_AGGREGATION) + != (oper->port_state & LACP_STATE_AGGREGATION)) { port->sm_vars &= ~AD_PORT_SELECTED; } } @@ -619,10 +609,10 @@ static void __update_ntt(struct lacpdu *lacpdu, struct port *port) !MAC_ADDRESS_EQUAL(&(lacpdu->partner_system), &(port->actor_system)) || (ntohs(lacpdu->partner_system_priority) != port->actor_system_priority) || (ntohs(lacpdu->partner_key) != port->actor_oper_port_key) || - ((lacpdu->partner_state & AD_STATE_LACP_ACTIVITY) != (port->actor_oper_port_state & AD_STATE_LACP_ACTIVITY)) || - ((lacpdu->partner_state & AD_STATE_LACP_TIMEOUT) != (port->actor_oper_port_state & AD_STATE_LACP_TIMEOUT)) || - ((lacpdu->partner_state & AD_STATE_SYNCHRONIZATION) != (port->actor_oper_port_state & AD_STATE_SYNCHRONIZATION)) || - ((lacpdu->partner_state & AD_STATE_AGGREGATION) != (port->actor_oper_port_state & AD_STATE_AGGREGATION)) + ((lacpdu->partner_state & LACP_STATE_LACP_ACTIVITY) != (port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY)) || + ((lacpdu->partner_state & LACP_STATE_LACP_TIMEOUT) != (port->actor_oper_port_state & LACP_STATE_LACP_TIMEOUT)) || + ((lacpdu->partner_state & LACP_STATE_SYNCHRONIZATION) != (port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION)) || + ((lacpdu->partner_state & LACP_STATE_AGGREGATION) != (port->actor_oper_port_state & LACP_STATE_AGGREGATION)) ) { port->ntt = true; } @@ -978,7 +968,7 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) * edable port will take place only after this timer) */ if ((port->sm_vars & AD_PORT_SELECTED) && - (port->partner_oper.port_state & AD_STATE_SYNCHRONIZATION) && + (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) && !__check_agg_selection_timer(port)) { if (port->aggregator->is_active) port->sm_mux_state = @@ -996,14 +986,14 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) port->sm_mux_state = AD_MUX_DETACHED; } else if (port->aggregator->is_active) { port->actor_oper_port_state |= - AD_STATE_SYNCHRONIZATION; + LACP_STATE_SYNCHRONIZATION; } break; case AD_MUX_COLLECTING_DISTRIBUTING: if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY) || - !(port->partner_oper.port_state & AD_STATE_SYNCHRONIZATION) || - !(port->actor_oper_port_state & AD_STATE_SYNCHRONIZATION)) { + !(port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) || + !(port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION)) { port->sm_mux_state = AD_MUX_ATTACHED; } else { /* if port state hasn't changed make @@ -1032,11 +1022,11 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) port->sm_mux_state); switch (port->sm_mux_state) { case AD_MUX_DETACHED: - port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION; + port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; ad_disable_collecting_distributing(port, update_slave_arr); - port->actor_oper_port_state &= ~AD_STATE_COLLECTING; - port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; + port->actor_oper_port_state &= ~LACP_STATE_COLLECTING; + port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING; port->ntt = true; break; case AD_MUX_WAITING: @@ -1045,20 +1035,20 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) case AD_MUX_ATTACHED: if (port->aggregator->is_active) port->actor_oper_port_state |= - AD_STATE_SYNCHRONIZATION; + LACP_STATE_SYNCHRONIZATION; else port->actor_oper_port_state &= - ~AD_STATE_SYNCHRONIZATION; - port->actor_oper_port_state &= ~AD_STATE_COLLECTING; - port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; + ~LACP_STATE_SYNCHRONIZATION; + port->actor_oper_port_state &= ~LACP_STATE_COLLECTING; + port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING; ad_disable_collecting_distributing(port, update_slave_arr); port->ntt = true; break; case AD_MUX_COLLECTING_DISTRIBUTING: - port->actor_oper_port_state |= AD_STATE_COLLECTING; - port->actor_oper_port_state |= AD_STATE_DISTRIBUTING; - port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION; + port->actor_oper_port_state |= LACP_STATE_COLLECTING; + port->actor_oper_port_state |= LACP_STATE_DISTRIBUTING; + port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION; ad_enable_collecting_distributing(port, update_slave_arr); port->ntt = true; @@ -1156,7 +1146,7 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port) port->sm_vars |= AD_PORT_LACP_ENABLED; port->sm_vars &= ~AD_PORT_SELECTED; __record_default(port); - port->actor_oper_port_state &= ~AD_STATE_EXPIRED; + port->actor_oper_port_state &= ~LACP_STATE_EXPIRED; port->sm_rx_state = AD_RX_PORT_DISABLED; /* Fall Through */ @@ -1166,9 +1156,9 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port) case AD_RX_LACP_DISABLED: port->sm_vars &= ~AD_PORT_SELECTED; __record_default(port); - port->partner_oper.port_state &= ~AD_STATE_AGGREGATION; + port->partner_oper.port_state &= ~LACP_STATE_AGGREGATION; port->sm_vars |= AD_PORT_MATCHED; - port->actor_oper_port_state &= ~AD_STATE_EXPIRED; + port->actor_oper_port_state &= ~LACP_STATE_EXPIRED; break; case AD_RX_EXPIRED: /* Reset of the Synchronization flag (Standard 43.4.12) @@ -1177,19 +1167,19 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port) * case of EXPIRED even if LINK_DOWN didn't arrive for * the port. */ - port->partner_oper.port_state &= ~AD_STATE_SYNCHRONIZATION; + port->partner_oper.port_state &= ~LACP_STATE_SYNCHRONIZATION; port->sm_vars &= ~AD_PORT_MATCHED; - port->partner_oper.port_state |= AD_STATE_LACP_TIMEOUT; - port->partner_oper.port_state |= AD_STATE_LACP_ACTIVITY; + port->partner_oper.port_state |= LACP_STATE_LACP_TIMEOUT; + port->partner_oper.port_state |= LACP_STATE_LACP_ACTIVITY; port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(AD_SHORT_TIMEOUT)); - port->actor_oper_port_state |= AD_STATE_EXPIRED; + port->actor_oper_port_state |= LACP_STATE_EXPIRED; port->sm_vars |= AD_PORT_CHURNED; break; case AD_RX_DEFAULTED: __update_default_selected(port); __record_default(port); port->sm_vars |= AD_PORT_MATCHED; - port->actor_oper_port_state &= ~AD_STATE_EXPIRED; + port->actor_oper_port_state &= ~LACP_STATE_EXPIRED; break; case AD_RX_CURRENT: /* detect loopback situation */ @@ -1202,8 +1192,8 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port) __update_selected(lacpdu, port); __update_ntt(lacpdu, port); __record_pdu(lacpdu, port); - port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(port->actor_oper_port_state & AD_STATE_LACP_TIMEOUT)); - port->actor_oper_port_state &= ~AD_STATE_EXPIRED; + port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(port->actor_oper_port_state & LACP_STATE_LACP_TIMEOUT)); + port->actor_oper_port_state &= ~LACP_STATE_EXPIRED; break; default: break; @@ -1231,7 +1221,7 @@ static void ad_churn_machine(struct port *port) if (port->sm_churn_actor_timer_counter && !(--port->sm_churn_actor_timer_counter) && port->sm_churn_actor_state == AD_CHURN_MONITOR) { - if (port->actor_oper_port_state & AD_STATE_SYNCHRONIZATION) { + if (port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION) { port->sm_churn_actor_state = AD_NO_CHURN; } else { port->churn_actor_count++; @@ -1241,7 +1231,7 @@ static void ad_churn_machine(struct port *port) if (port->sm_churn_partner_timer_counter && !(--port->sm_churn_partner_timer_counter) && port->sm_churn_partner_state == AD_CHURN_MONITOR) { - if (port->partner_oper.port_state & AD_STATE_SYNCHRONIZATION) { + if (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) { port->sm_churn_partner_state = AD_NO_CHURN; } else { port->churn_partner_count++; @@ -1298,7 +1288,7 @@ static void ad_periodic_machine(struct port *port) /* check if port was reinitialized */ if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) || - (!(port->actor_oper_port_state & AD_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & AD_STATE_LACP_ACTIVITY)) + (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) ) { port->sm_periodic_state = AD_NO_PERIODIC; } @@ -1315,11 +1305,11 @@ static void ad_periodic_machine(struct port *port) switch (port->sm_periodic_state) { case AD_FAST_PERIODIC: if (!(port->partner_oper.port_state - & AD_STATE_LACP_TIMEOUT)) + & LACP_STATE_LACP_TIMEOUT)) port->sm_periodic_state = AD_SLOW_PERIODIC; break; case AD_SLOW_PERIODIC: - if ((port->partner_oper.port_state & AD_STATE_LACP_TIMEOUT)) { + if ((port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT)) { port->sm_periodic_timer_counter = 0; port->sm_periodic_state = AD_PERIODIC_TX; } @@ -1335,7 +1325,7 @@ static void ad_periodic_machine(struct port *port) break; case AD_PERIODIC_TX: if (!(port->partner_oper.port_state & - AD_STATE_LACP_TIMEOUT)) + LACP_STATE_LACP_TIMEOUT)) port->sm_periodic_state = AD_SLOW_PERIODIC; else port->sm_periodic_state = AD_FAST_PERIODIC; @@ -1542,7 +1532,7 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) ad_agg_selection_logic(aggregator, update_slave_arr); if (!port->aggregator->is_active) - port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION; + port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; } /* Decide if "agg" is a better choice for the new active aggregator that @@ -1848,13 +1838,13 @@ static void ad_initialize_port(struct port *port, int lacp_fast) port->actor_port_priority = 0xff; port->actor_port_aggregator_identifier = 0; port->ntt = false; - port->actor_admin_port_state = AD_STATE_AGGREGATION | - AD_STATE_LACP_ACTIVITY; - port->actor_oper_port_state = AD_STATE_AGGREGATION | - AD_STATE_LACP_ACTIVITY; + port->actor_admin_port_state = LACP_STATE_AGGREGATION | + LACP_STATE_LACP_ACTIVITY; + port->actor_oper_port_state = LACP_STATE_AGGREGATION | + LACP_STATE_LACP_ACTIVITY; if (lacp_fast) - port->actor_oper_port_state |= AD_STATE_LACP_TIMEOUT; + port->actor_oper_port_state |= LACP_STATE_LACP_TIMEOUT; memcpy(&port->partner_admin, &tmpl, sizeof(tmpl)); memcpy(&port->partner_oper, &tmpl, sizeof(tmpl)); @@ -2105,10 +2095,10 @@ void bond_3ad_unbind_slave(struct slave *slave) aggregator->aggregator_identifier); /* Tell the partner that this port is not suitable for aggregation */ - port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION; - port->actor_oper_port_state &= ~AD_STATE_COLLECTING; - port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; - port->actor_oper_port_state &= ~AD_STATE_AGGREGATION; + port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION; + port->actor_oper_port_state &= ~LACP_STATE_COLLECTING; + port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING; + port->actor_oper_port_state &= ~LACP_STATE_AGGREGATION; __update_lacpdu_from_port(port); ad_lacpdu_send(port); @@ -2695,9 +2685,9 @@ void bond_3ad_update_lacp_rate(struct bonding *bond) bond_for_each_slave(bond, slave, iter) { port = &(SLAVE_AD_INFO(slave)->port); if (lacp_fast) - port->actor_oper_port_state |= AD_STATE_LACP_TIMEOUT; + port->actor_oper_port_state |= LACP_STATE_LACP_TIMEOUT; else - port->actor_oper_port_state &= ~AD_STATE_LACP_TIMEOUT; + port->actor_oper_port_state &= ~LACP_STATE_LACP_TIMEOUT; } spin_unlock_bh(&bond->mode_lock); } diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 8c79bad2a9a5..1cc2cd894f87 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -952,7 +952,7 @@ static int alb_upper_dev_walk(struct net_device *upper, void *_data) struct bond_vlan_tag *tags; if (is_vlan_dev(upper) && - bond->nest_level == vlan_get_encap_level(upper) - 1) { + bond->dev->lower_level == upper->lower_level - 1) { if (upper->addr_assign_type == NET_ADDR_STOLEN) { alb_send_lp_vid(slave, mac_addr, vlan_dev_vlan_proto(upper), @@ -1383,26 +1383,31 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) bool do_tx_balance = true; u32 hash_index = 0; const u8 *hash_start = NULL; - struct ipv6hdr *ip6hdr; skb_reset_mac_header(skb); eth_data = eth_hdr(skb); switch (ntohs(skb->protocol)) { case ETH_P_IP: { - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; if (is_broadcast_ether_addr(eth_data->h_dest) || - iph->daddr == ip_bcast || - iph->protocol == IPPROTO_IGMP) { + !pskb_network_may_pull(skb, sizeof(*iph))) { + do_tx_balance = false; + break; + } + iph = ip_hdr(skb); + if (iph->daddr == ip_bcast || iph->protocol == IPPROTO_IGMP) { do_tx_balance = false; break; } hash_start = (char *)&(iph->daddr); hash_size = sizeof(iph->daddr); - } break; - case ETH_P_IPV6: + } + case ETH_P_IPV6: { + const struct ipv6hdr *ip6hdr; + /* IPv6 doesn't really use broadcast mac address, but leave * that here just in case. */ @@ -1419,7 +1424,11 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) break; } - /* Additianally, DAD probes should not be tx-balanced as that + if (!pskb_network_may_pull(skb, sizeof(*ip6hdr))) { + do_tx_balance = false; + break; + } + /* Additionally, DAD probes should not be tx-balanced as that * will lead to false positives for duplicate addresses and * prevent address configuration from working. */ @@ -1429,17 +1438,26 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) break; } - hash_start = (char *)&(ipv6_hdr(skb)->daddr); - hash_size = sizeof(ipv6_hdr(skb)->daddr); + hash_start = (char *)&ip6hdr->daddr; + hash_size = sizeof(ip6hdr->daddr); break; - case ETH_P_IPX: - if (ipx_hdr(skb)->ipx_checksum != IPX_NO_CHECKSUM) { + } + case ETH_P_IPX: { + const struct ipxhdr *ipxhdr; + + if (pskb_network_may_pull(skb, sizeof(*ipxhdr))) { + do_tx_balance = false; + break; + } + ipxhdr = (struct ipxhdr *)skb_network_header(skb); + + if (ipxhdr->ipx_checksum != IPX_NO_CHECKSUM) { /* something is wrong with this packet */ do_tx_balance = false; break; } - if (ipx_hdr(skb)->ipx_type != IPX_TYPE_NCP) { + if (ipxhdr->ipx_type != IPX_TYPE_NCP) { /* The only protocol worth balancing in * this family since it has an "ARP" like * mechanism @@ -1448,9 +1466,11 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) break; } + eth_data = eth_hdr(skb); hash_start = (char *)eth_data->h_dest; hash_size = ETH_ALEN; break; + } case ETH_P_ARP: do_tx_balance = false; if (bond_info->rlb_enabled) diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c index 1360f1ffe070..f3f86ef68ae0 100644 --- a/drivers/net/bonding/bond_debugfs.c +++ b/drivers/net/bonding/bond_debugfs.c @@ -55,11 +55,6 @@ void bond_debug_register(struct bonding *bond) bond->debug_dir = debugfs_create_dir(bond->dev->name, bonding_debug_root); - if (!bond->debug_dir) { - netdev_warn(bond->dev, "failed to register to debugfs\n"); - return; - } - debugfs_create_file("rlb_hash_table", 0400, bond->debug_dir, bond, &bond_debug_rlb_hash_fops); } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 931d9d935686..48d5ec770b94 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -41,6 +41,8 @@ #include <linux/in.h> #include <net/ip.h> #include <linux/ip.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/slab.h> @@ -200,6 +202,51 @@ atomic_t netpoll_block_tx = ATOMIC_INIT(0); unsigned int bond_net_id __read_mostly; +static const struct flow_dissector_key flow_keys_bonding_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_CONTROL, + .offset = offsetof(struct flow_keys, control), + }, + { + .key_id = FLOW_DISSECTOR_KEY_BASIC, + .offset = offsetof(struct flow_keys, basic), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, + .offset = offsetof(struct flow_keys, addrs.v4addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, + .offset = offsetof(struct flow_keys, addrs.v6addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_TIPC, + .offset = offsetof(struct flow_keys, addrs.tipckey), + }, + { + .key_id = FLOW_DISSECTOR_KEY_PORTS, + .offset = offsetof(struct flow_keys, ports), + }, + { + .key_id = FLOW_DISSECTOR_KEY_ICMP, + .offset = offsetof(struct flow_keys, icmp), + }, + { + .key_id = FLOW_DISSECTOR_KEY_VLAN, + .offset = offsetof(struct flow_keys, vlan), + }, + { + .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL, + .offset = offsetof(struct flow_keys, tags), + }, + { + .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID, + .offset = offsetof(struct flow_keys, keyid), + }, +}; + +static struct flow_dissector flow_keys_bonding __read_mostly; + /*-------------------------- Forward declarations ---------------------------*/ static int bond_init(struct net_device *bond_dev); @@ -1733,8 +1780,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, goto err_upper_unlink; } - bond->nest_level = dev_get_nest_level(bond_dev) + 1; - /* If the mode uses primary, then the following is handled by * bond_change_active_slave(). */ @@ -1816,7 +1861,8 @@ err_detach: slave_disable_netpoll(new_slave); err_close: - slave_dev->priv_flags &= ~IFF_BONDING; + if (!netif_is_bond_master(slave_dev)) + slave_dev->priv_flags &= ~IFF_BONDING; dev_close(slave_dev); err_restore_mac: @@ -1956,9 +2002,6 @@ static int __bond_release_one(struct net_device *bond_dev, if (!bond_has_slaves(bond)) { bond_set_carrier(bond); eth_hw_addr_random(bond_dev); - bond->nest_level = SINGLE_DEPTH_NESTING; - } else { - bond->nest_level = dev_get_nest_level(bond_dev) + 1; } unblock_netpoll_tx(); @@ -2017,7 +2060,8 @@ static int __bond_release_one(struct net_device *bond_dev, else dev_set_mtu(slave_dev, slave->original_mtu); - slave_dev->priv_flags &= ~IFF_BONDING; + if (!netif_is_bond_master(slave_dev)) + slave_dev->priv_flags &= ~IFF_BONDING; bond_free_slave(slave); @@ -2086,8 +2130,7 @@ static int bond_miimon_inspect(struct bonding *bond) ignore_updelay = !rcu_dereference(bond->curr_active_slave); bond_for_each_slave_rcu(bond, slave, iter) { - slave->new_link = BOND_LINK_NOCHANGE; - slave->link_new_state = slave->link; + bond_propose_link_state(slave, BOND_LINK_NOCHANGE); link_state = bond_check_dev_link(bond, slave->dev, 0); @@ -2121,7 +2164,7 @@ static int bond_miimon_inspect(struct bonding *bond) } if (slave->delay <= 0) { - slave->new_link = BOND_LINK_DOWN; + bond_propose_link_state(slave, BOND_LINK_DOWN); commit++; continue; } @@ -2158,7 +2201,7 @@ static int bond_miimon_inspect(struct bonding *bond) slave->delay = 0; if (slave->delay <= 0) { - slave->new_link = BOND_LINK_UP; + bond_propose_link_state(slave, BOND_LINK_UP); commit++; ignore_updelay = false; continue; @@ -2196,7 +2239,7 @@ static void bond_miimon_commit(struct bonding *bond) struct slave *slave, *primary; bond_for_each_slave(bond, slave, iter) { - switch (slave->new_link) { + switch (slave->link_new_state) { case BOND_LINK_NOCHANGE: /* For 802.3ad mode, check current slave speed and * duplex again in case its port was disabled after @@ -2229,9 +2272,6 @@ static void bond_miimon_commit(struct bonding *bond) } else if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { /* make it immediately active */ bond_set_active_slave(slave); - } else if (slave != primary) { - /* prevent it from being the active one */ - bond_set_backup_slave(slave); } slave_info(bond->dev, slave->dev, "link status definitely up, %u Mbps %s duplex\n", @@ -2268,8 +2308,8 @@ static void bond_miimon_commit(struct bonding *bond) default: slave_err(bond->dev, slave->dev, "invalid new link %d on slave\n", - slave->new_link); - slave->new_link = BOND_LINK_NOCHANGE; + slave->link_new_state); + bond_propose_link_state(slave, BOND_LINK_NOCHANGE); continue; } @@ -2677,13 +2717,13 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) bond_for_each_slave_rcu(bond, slave, iter) { unsigned long trans_start = dev_trans_start(slave->dev); - slave->new_link = BOND_LINK_NOCHANGE; + bond_propose_link_state(slave, BOND_LINK_NOCHANGE); if (slave->link != BOND_LINK_UP) { if (bond_time_in_interval(bond, trans_start, 1) && bond_time_in_interval(bond, slave->last_rx, 1)) { - slave->new_link = BOND_LINK_UP; + bond_propose_link_state(slave, BOND_LINK_UP); slave_state_changed = 1; /* primary_slave has no meaning in round-robin @@ -2708,7 +2748,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) if (!bond_time_in_interval(bond, trans_start, 2) || !bond_time_in_interval(bond, slave->last_rx, 2)) { - slave->new_link = BOND_LINK_DOWN; + bond_propose_link_state(slave, BOND_LINK_DOWN); slave_state_changed = 1; if (slave->link_failure_count < UINT_MAX) @@ -2739,8 +2779,8 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) goto re_arm; bond_for_each_slave(bond, slave, iter) { - if (slave->new_link != BOND_LINK_NOCHANGE) - slave->link = slave->new_link; + if (slave->link_new_state != BOND_LINK_NOCHANGE) + slave->link = slave->link_new_state; } if (slave_state_changed) { @@ -2763,9 +2803,9 @@ re_arm: } /* Called to inspect slaves for active-backup mode ARP monitor link state - * changes. Sets new_link in slaves to specify what action should take - * place for the slave. Returns 0 if no changes are found, >0 if changes - * to link states must be committed. + * changes. Sets proposed link state in slaves to specify what action + * should take place for the slave. Returns 0 if no changes are found, >0 + * if changes to link states must be committed. * * Called with rcu_read_lock held. */ @@ -2777,12 +2817,12 @@ static int bond_ab_arp_inspect(struct bonding *bond) int commit = 0; bond_for_each_slave_rcu(bond, slave, iter) { - slave->new_link = BOND_LINK_NOCHANGE; + bond_propose_link_state(slave, BOND_LINK_NOCHANGE); last_rx = slave_last_rx(bond, slave); if (slave->link != BOND_LINK_UP) { if (bond_time_in_interval(bond, last_rx, 1)) { - slave->new_link = BOND_LINK_UP; + bond_propose_link_state(slave, BOND_LINK_UP); commit++; } continue; @@ -2810,7 +2850,7 @@ static int bond_ab_arp_inspect(struct bonding *bond) if (!bond_is_active_slave(slave) && !rcu_access_pointer(bond->current_arp_slave) && !bond_time_in_interval(bond, last_rx, 3)) { - slave->new_link = BOND_LINK_DOWN; + bond_propose_link_state(slave, BOND_LINK_DOWN); commit++; } @@ -2823,7 +2863,7 @@ static int bond_ab_arp_inspect(struct bonding *bond) if (bond_is_active_slave(slave) && (!bond_time_in_interval(bond, trans_start, 2) || !bond_time_in_interval(bond, last_rx, 2))) { - slave->new_link = BOND_LINK_DOWN; + bond_propose_link_state(slave, BOND_LINK_DOWN); commit++; } } @@ -2843,7 +2883,7 @@ static void bond_ab_arp_commit(struct bonding *bond) struct slave *slave; bond_for_each_slave(bond, slave, iter) { - switch (slave->new_link) { + switch (slave->link_new_state) { case BOND_LINK_NOCHANGE: continue; @@ -2893,8 +2933,9 @@ static void bond_ab_arp_commit(struct bonding *bond) continue; default: - slave_err(bond->dev, slave->dev, "impossible: new_link %d on slave\n", - slave->new_link); + slave_err(bond->dev, slave->dev, + "impossible: link_new_state %d on slave\n", + slave->link_new_state); continue; } @@ -3255,39 +3296,78 @@ static inline u32 bond_eth_hash(struct sk_buff *skb) return 0; } -/* Extract the appropriate headers based on bond's xmit policy */ -static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, - struct flow_keys *fk) +static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, + int *noff, int *proto, bool l34) { const struct ipv6hdr *iph6; const struct iphdr *iph; - int noff, proto = -1; - if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) - return skb_flow_dissect_flow_keys(skb, fk, 0); - - fk->ports.ports = 0; - noff = skb_network_offset(skb); if (skb->protocol == htons(ETH_P_IP)) { - if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph)))) + if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph)))) return false; - iph = ip_hdr(skb); + iph = (const struct iphdr *)(skb->data + *noff); iph_to_flow_copy_v4addrs(fk, iph); - noff += iph->ihl << 2; + *noff += iph->ihl << 2; if (!ip_is_fragment(iph)) - proto = iph->protocol; + *proto = iph->protocol; } else if (skb->protocol == htons(ETH_P_IPV6)) { - if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6)))) + if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph6)))) return false; - iph6 = ipv6_hdr(skb); + iph6 = (const struct ipv6hdr *)(skb->data + *noff); iph_to_flow_copy_v6addrs(fk, iph6); - noff += sizeof(*iph6); - proto = iph6->nexthdr; + *noff += sizeof(*iph6); + *proto = iph6->nexthdr; } else { return false; } - if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0) - fk->ports.ports = skb_flow_get_ports(skb, noff, proto); + + if (l34 && *proto >= 0) + fk->ports.ports = skb_flow_get_ports(skb, *noff, *proto); + + return true; +} + +/* Extract the appropriate headers based on bond's xmit policy */ +static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, + struct flow_keys *fk) +{ + bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34; + int noff, proto = -1; + + if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) { + memset(fk, 0, sizeof(*fk)); + return __skb_flow_dissect(NULL, skb, &flow_keys_bonding, + fk, NULL, 0, 0, 0, 0); + } + + fk->ports.ports = 0; + memset(&fk->icmp, 0, sizeof(fk->icmp)); + noff = skb_network_offset(skb); + if (!bond_flow_ip(skb, fk, &noff, &proto, l34)) + return false; + + /* ICMP error packets contains at least 8 bytes of the header + * of the packet which generated the error. Use this information + * to correlate ICMP error packets within the same flow which + * generated the error. + */ + if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) { + skb_flow_get_icmp_tci(skb, &fk->icmp, skb->data, + skb_transport_offset(skb), + skb_headlen(skb)); + if (proto == IPPROTO_ICMP) { + if (!icmp_is_err(fk->icmp.type)) + return true; + + noff += sizeof(struct icmphdr); + } else if (proto == IPPROTO_ICMPV6) { + if (!icmpv6_is_err(fk->icmp.type)) + return true; + + noff += sizeof(struct icmp6hdr); + } + return bond_flow_ip(skb, fk, &noff, &proto, l34); + } return true; } @@ -3314,10 +3394,14 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) return bond_eth_hash(skb); if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 || - bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) + bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) { hash = bond_eth_hash(skb); - else - hash = (__force u32)flow.ports.ports; + } else { + if (flow.icmp.id) + memcpy(&hash, &flow.icmp, sizeof(hash)); + else + memcpy(&hash, &flow.ports.ports, sizeof(hash)); + } hash ^= (__force u32)flow_get_u32_dst(&flow) ^ (__force u32)flow_get_u32_src(&flow); hash ^= (hash >> 16); @@ -3442,13 +3526,6 @@ static void bond_fold_stats(struct rtnl_link_stats64 *_res, } } -static int bond_get_nest_level(struct net_device *bond_dev) -{ - struct bonding *bond = netdev_priv(bond_dev); - - return bond->nest_level; -} - static void bond_get_stats(struct net_device *bond_dev, struct rtnl_link_stats64 *stats) { @@ -3457,7 +3534,7 @@ static void bond_get_stats(struct net_device *bond_dev, struct list_head *iter; struct slave *slave; - spin_lock_nested(&bond->stats_lock, bond_get_nest_level(bond_dev)); + spin_lock(&bond->stats_lock); memcpy(stats, &bond->bond_stats, sizeof(*stats)); rcu_read_lock(); @@ -3622,32 +3699,35 @@ static int bond_neigh_init(struct neighbour *n) const struct net_device_ops *slave_ops; struct neigh_parms parms; struct slave *slave; - int ret; + int ret = 0; - slave = bond_first_slave(bond); + rcu_read_lock(); + slave = bond_first_slave_rcu(bond); if (!slave) - return 0; + goto out; slave_ops = slave->dev->netdev_ops; if (!slave_ops->ndo_neigh_setup) - return 0; - - parms.neigh_setup = NULL; - parms.neigh_cleanup = NULL; - ret = slave_ops->ndo_neigh_setup(slave->dev, &parms); - if (ret) - return ret; + goto out; - /* Assign slave's neigh_cleanup to neighbour in case cleanup is called - * after the last slave has been detached. Assumes that all slaves - * utilize the same neigh_cleanup (true at this writing as only user - * is ipoib). + /* TODO: find another way [1] to implement this. + * Passing a zeroed structure is fragile, + * but at least we do not pass garbage. + * + * [1] One way would be that ndo_neigh_setup() never touch + * struct neigh_parms, but propagate the new neigh_setup() + * back to ___neigh_create() / neigh_parms_alloc() */ - n->parms->neigh_cleanup = parms.neigh_cleanup; + memset(&parms, 0, sizeof(parms)); + ret = slave_ops->ndo_neigh_setup(slave->dev, &parms); - if (!parms.neigh_setup) - return 0; + if (ret) + goto out; - return parms.neigh_setup(n); + if (parms.neigh_setup) + ret = parms.neigh_setup(n); +out: + rcu_read_unlock(); + return ret; } /* The bonding ndo_neigh_setup is called at init time beofre any @@ -4039,7 +4119,7 @@ out: * this to-be-skipped slave to send a packet out. */ old_arr = rtnl_dereference(bond->slave_arr); - for (idx = 0; idx < old_arr->count; idx++) { + for (idx = 0; old_arr != NULL && idx < old_arr->count; idx++) { if (skipslave == old_arr->arr[idx]) { old_arr->arr[idx] = old_arr->arr[old_arr->count-1]; @@ -4268,7 +4348,6 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_neigh_setup = bond_neigh_setup, .ndo_vlan_rx_add_vid = bond_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = bond_vlan_rx_kill_vid, - .ndo_get_lock_subclass = bond_get_nest_level, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_netpoll_setup = bond_netpoll_setup, .ndo_netpoll_cleanup = bond_netpoll_cleanup, @@ -4296,7 +4375,6 @@ void bond_setup(struct net_device *bond_dev) struct bonding *bond = netdev_priv(bond_dev); spin_lock_init(&bond->mode_lock); - spin_lock_init(&bond->stats_lock); bond->params = bonding_defaults; /* Initialize pointers */ @@ -4365,6 +4443,7 @@ static void bond_uninit(struct net_device *bond_dev) list_del(&bond->bond_list); + lockdep_unregister_key(&bond->stats_lock_key); bond_debug_unregister(bond); } @@ -4768,8 +4847,9 @@ static int bond_init(struct net_device *bond_dev) if (!bond->wq) return -ENOMEM; - bond->nest_level = SINGLE_DEPTH_NESTING; - netdev_lockdep_set_classes(bond_dev); + spin_lock_init(&bond->stats_lock); + lockdep_register_key(&bond->stats_lock_key); + lockdep_set_class(&bond->stats_lock, &bond->stats_lock_key); list_add_tail(&bond->bond_list, &bn->dev_list); @@ -4901,6 +4981,10 @@ static int __init bonding_init(void) goto err; } + skb_flow_dissector_init(&flow_keys_bonding, + flow_keys_bonding_keys, + ARRAY_SIZE(flow_keys_bonding_keys)); + register_netdevice_notifier(&bond_netdev_notifier); out: return res; |