diff options
Diffstat (limited to 'drivers/net')
42 files changed, 2494 insertions, 389 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index fea17b92b1ae..bd53a71f6b00 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1218,12 +1218,37 @@ static enum netdev_lag_tx_type bond_lag_tx_type(struct bonding *bond) } } +static enum netdev_lag_hash bond_lag_hash_type(struct bonding *bond, + enum netdev_lag_tx_type type) +{ + if (type != NETDEV_LAG_TX_TYPE_HASH) + return NETDEV_LAG_HASH_NONE; + + switch (bond->params.xmit_policy) { + case BOND_XMIT_POLICY_LAYER2: + return NETDEV_LAG_HASH_L2; + case BOND_XMIT_POLICY_LAYER34: + return NETDEV_LAG_HASH_L34; + case BOND_XMIT_POLICY_LAYER23: + return NETDEV_LAG_HASH_L23; + case BOND_XMIT_POLICY_ENCAP23: + return NETDEV_LAG_HASH_E23; + case BOND_XMIT_POLICY_ENCAP34: + return NETDEV_LAG_HASH_E34; + default: + return NETDEV_LAG_HASH_UNKNOWN; + } +} + static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave, struct netlink_ext_ack *extack) { struct netdev_lag_upper_info lag_upper_info; + enum netdev_lag_tx_type type; - lag_upper_info.tx_type = bond_lag_tx_type(bond); + type = bond_lag_tx_type(bond); + lag_upper_info.tx_type = type; + lag_upper_info.hash_type = bond_lag_hash_type(bond, type); return netdev_master_upper_dev_link(slave->dev, bond->dev, slave, &lag_upper_info, extack); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 0f305d97f4ee..0dbe2d9e22d6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1738,6 +1738,9 @@ int t4_set_addr_hash(struct adapter *adap, unsigned int mbox, unsigned int viid, bool ucast, u64 vec, bool sleep_ok); int t4_enable_vi_params(struct adapter *adap, unsigned int mbox, unsigned int viid, bool rx_en, bool tx_en, bool dcb_en); +int t4_enable_pi_params(struct adapter *adap, unsigned int mbox, + struct port_info *pi, + bool rx_en, bool tx_en, bool dcb_en); int t4_enable_vi(struct adapter *adap, unsigned int mbox, unsigned int viid, bool rx_en, bool tx_en); int t4_identify_port(struct adapter *adap, unsigned int mbox, unsigned int viid, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 59d04d73c672..f7eef93ffc87 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -800,24 +800,20 @@ static int set_link_ksettings(struct net_device *dev, if (base->duplex != DUPLEX_FULL) return -EINVAL; - if (!(lc->pcaps & FW_PORT_CAP32_ANEG)) { - /* PHY offers a single speed. See if that's what's - * being requested. - */ - if (base->autoneg == AUTONEG_DISABLE && - (lc->pcaps & speed_to_fw_caps(base->speed))) - return 0; - return -EINVAL; - } - old_lc = *lc; - if (base->autoneg == AUTONEG_DISABLE) { + if (!(lc->pcaps & FW_PORT_CAP32_ANEG) || + base->autoneg == AUTONEG_DISABLE) { fw_caps = speed_to_fw_caps(base->speed); - if (!(lc->pcaps & fw_caps)) + /* Must only specify a single speed which must be supported + * as part of the Physical Port Capabilities. + */ + if ((fw_caps & (fw_caps - 1)) != 0 || + !(lc->pcaps & fw_caps)) return -EINVAL; + lc->speed_caps = fw_caps; - lc->acaps = 0; + lc->acaps = fw_caps; } else { fw_caps = lmm_to_fw_caps(link_ksettings->link_modes.advertising); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 513e1d356384..0efae2030e71 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -465,7 +465,7 @@ static int link_start(struct net_device *dev) &pi->link_cfg); if (ret == 0) { local_bh_disable(); - ret = t4_enable_vi_params(pi->adapter, mb, pi->viid, true, + ret = t4_enable_pi_params(pi->adapter, mb, pi, true, true, CXGB4_DCB_ENABLED); local_bh_enable(); } @@ -2344,7 +2344,8 @@ static int cxgb_close(struct net_device *dev) netif_tx_stop_all_queues(dev); netif_carrier_off(dev); - ret = t4_enable_vi(adapter, adapter->pf, pi->viid, false, false); + ret = t4_enable_pi_params(adapter, adapter->pf, pi, + false, false, false); #ifdef CONFIG_CHELSIO_T4_DCB cxgb4_dcb_reset(dev); dcb_tx_queue_prio_enable(dev, false); @@ -4140,6 +4141,10 @@ static int adap_init0(struct adapter *adap) * card */ card_fw = kvzalloc(sizeof(*card_fw), GFP_KERNEL); + if (!card_fw) { + ret = -ENOMEM; + goto bye; + } /* Get FW from from /lib/firmware/ */ ret = request_firmware(&fw, fw_info->fw_mod_name, @@ -5236,14 +5241,11 @@ static void free_some_resources(struct adapter *adapter) NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA) #define SEGMENT_SIZE 128 -static int get_chip_type(struct pci_dev *pdev, u32 pl_rev) +static int t4_get_chip_type(struct adapter *adap, int ver) { - u16 device_id; + u32 pl_rev = REV_G(t4_read_reg(adap, PL_REV_A)); - /* Retrieve adapter's device ID */ - pci_read_config_word(pdev, PCI_DEVICE_ID, &device_id); - - switch (device_id >> 12) { + switch (ver) { case CHELSIO_T4: return CHELSIO_CHIP_CODE(CHELSIO_T4, pl_rev); case CHELSIO_T5: @@ -5251,8 +5253,7 @@ static int get_chip_type(struct pci_dev *pdev, u32 pl_rev) case CHELSIO_T6: return CHELSIO_CHIP_CODE(CHELSIO_T6, pl_rev); default: - dev_err(&pdev->dev, "Device %d is not supported\n", - device_id); + break; } return -EINVAL; } @@ -5422,15 +5423,18 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { - int func, i, err, s_qpp, qpp, num_seg; + struct net_device *netdev; + struct adapter *adapter; + static int adap_idx = 1; + int s_qpp, qpp, num_seg; struct port_info *pi; bool highdma = false; - struct adapter *adapter = NULL; - struct net_device *netdev; - void __iomem *regs; - u32 whoami, pl_rev; enum chip_type chip; - static int adap_idx = 1; + void __iomem *regs; + int func, chip_ver; + u16 device_id; + int i, err; + u32 whoami; printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION); @@ -5466,11 +5470,17 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_free_adapter; /* We control everything through one PF */ - whoami = readl(regs + PL_WHOAMI_A); - pl_rev = REV_G(readl(regs + PL_REV_A)); - chip = get_chip_type(pdev, pl_rev); - func = CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5 ? - SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami); + whoami = t4_read_reg(adapter, PL_WHOAMI_A); + pci_read_config_word(pdev, PCI_DEVICE_ID, &device_id); + chip = t4_get_chip_type(adapter, CHELSIO_PCI_ID_VER(device_id)); + if (chip < 0) { + dev_err(&pdev->dev, "Device %d is not supported\n", device_id); + err = chip; + goto out_free_adapter; + } + chip_ver = CHELSIO_CHIP_VERSION(chip); + func = chip_ver <= CHELSIO_T5 ? + SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami); adapter->pdev = pdev; adapter->pdev_dev = &pdev->dev; @@ -5636,7 +5646,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_TC; - if (CHELSIO_CHIP_VERSION(chip) > CHELSIO_T5) { + if (chip_ver > CHELSIO_T5) { netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | @@ -5716,7 +5726,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev_warn(&pdev->dev, "could not allocate MPS Encap entries, continuing\n"); #if IS_ENABLED(CONFIG_IPV6) - if ((CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5) && + if (chip_ver <= CHELSIO_T5 && (!(t4_read_reg(adapter, LE_DB_CONFIG_A) & ASLIPCOMPEN_F))) { /* CLIP functionality is not present in hardware, * hence disable all offload features diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h b/drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h index 54b718111e3f..721c77577ec5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h @@ -34,6 +34,8 @@ #ifndef __T4_CHIP_TYPE_H__ #define __T4_CHIP_TYPE_H__ +#define CHELSIO_PCI_ID_VER(__DeviceID) ((__DeviceID) >> 12) + #define CHELSIO_T4 0x4 #define CHELSIO_T5 0x5 #define CHELSIO_T6 0x6 diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 704f6960a6ea..39da7e3c804b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -4066,6 +4066,7 @@ int t4_link_l1cfg_core(struct adapter *adapter, unsigned int mbox, fw_port_cap32_t fw_fc, cc_fec, fw_fec, rcap; struct fw_port_cmd cmd; unsigned int fw_mdi; + int ret; fw_mdi = (FW_PORT_CAP32_MDI_V(FW_PORT_CAP32_MDI_AUTO) & lc->pcaps); /* Convert driver coding of Pause Frame Flow Control settings into the @@ -4100,6 +4101,13 @@ int t4_link_l1cfg_core(struct adapter *adapter, unsigned int mbox, rcap = lc->acaps | fw_fc | fw_fec | fw_mdi; } + if (rcap & ~lc->pcaps) { + dev_err(adapter->pdev_dev, + "Requested Port Capabilities %#x exceed Physical Port Capabilities %#x\n", + rcap, lc->pcaps); + return -EINVAL; + } + /* And send that on to the Firmware ... */ memset(&cmd, 0, sizeof(cmd)); @@ -4110,13 +4118,21 @@ int t4_link_l1cfg_core(struct adapter *adapter, unsigned int mbox, cpu_to_be32(FW_PORT_CMD_ACTION_V(fw_caps == FW_CAPS16 ? FW_PORT_ACTION_L1_CFG : FW_PORT_ACTION_L1_CFG32) | - FW_LEN16(cmd)); + FW_LEN16(cmd)); if (fw_caps == FW_CAPS16) cmd.u.l1cfg.rcap = cpu_to_be32(fwcaps32_to_caps16(rcap)); else cmd.u.l1cfg32.rcap32 = cpu_to_be32(rcap); - return t4_wr_mbox_meat_timeout(adapter, mbox, &cmd, sizeof(cmd), NULL, - sleep_ok, timeout); + + ret = t4_wr_mbox_meat_timeout(adapter, mbox, &cmd, sizeof(cmd), NULL, + sleep_ok, timeout); + if (ret) { + dev_err(adapter->pdev_dev, + "Requested Port Capabilities %#x rejected, error %d\n", + rcap, -ret); + return ret; + } + return ret; } /** @@ -7998,6 +8014,34 @@ int t4_enable_vi(struct adapter *adap, unsigned int mbox, unsigned int viid, } /** + * t4_enable_pi_params - enable/disable a Port's Virtual Interface + * @adap: the adapter + * @mbox: mailbox to use for the FW command + * @pi: the Port Information structure + * @rx_en: 1=enable Rx, 0=disable Rx + * @tx_en: 1=enable Tx, 0=disable Tx + * @dcb_en: 1=enable delivery of Data Center Bridging messages. + * + * Enables/disables a Port's Virtual Interface. Note that setting DCB + * Enable only makes sense when enabling a Virtual Interface ... + * If the Virtual Interface enable/disable operation is successful, + * we notify the OS-specific code of a potential Link Status change + * via the OS Contract API t4_os_link_changed(). + */ +int t4_enable_pi_params(struct adapter *adap, unsigned int mbox, + struct port_info *pi, + bool rx_en, bool tx_en, bool dcb_en) +{ + int ret = t4_enable_vi_params(adap, mbox, pi->viid, + rx_en, tx_en, dcb_en); + if (ret) + return ret; + t4_os_link_changed(adap, pi->port_id, + rx_en && tx_en && pi->link_cfg.link_ok); + return 0; +} + +/** * t4_identify_port - identify a VI's port by blinking its LED * @adap: the adapter * @mbox: mailbox to use for the FW command @@ -8395,7 +8439,9 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl) lc->lpacaps = lpacaps; lc->acaps = acaps & ADVERT_MASK; - if (lc->acaps & FW_PORT_CAP32_ANEG) { + if (!(lc->acaps & FW_PORT_CAP32_ANEG)) { + lc->autoneg = AUTONEG_DISABLE; + } else if (lc->acaps & FW_PORT_CAP32_ANEG) { lc->autoneg = AUTONEG_ENABLE; } else { /* When Autoneg is disabled, user needs to set @@ -8600,6 +8646,13 @@ static void init_link_config(struct link_config *lc, fw_port_cap32_t pcaps, lc->requested_fec = FEC_AUTO; lc->fec = fwcap_to_cc_fec(lc->def_acaps); + /* If the Port is capable of Auto-Negtotiation, initialize it as + * "enabled" and copy over all of the Physical Port Capabilities + * to the Advertised Port Capabilities. Otherwise mark it as + * Auto-Negotiate disabled and select the highest supported speed + * for the link. Note parallel structure in t4_link_l1cfg_core() + * and t4_handle_get_port_info(). + */ if (lc->pcaps & FW_PORT_CAP32_ANEG) { lc->acaps = lc->pcaps & ADVERT_MASK; lc->autoneg = AUTONEG_ENABLE; @@ -8607,6 +8660,7 @@ static void init_link_config(struct link_config *lc, fw_port_cap32_t pcaps, } else { lc->acaps = 0; lc->autoneg = AUTONEG_DISABLE; + lc->speed_caps = fwcap_to_fwspeed(acaps); } } diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 71f13bd2b5e4..ff84791a0ff8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -274,7 +274,7 @@ static int link_start(struct net_device *dev) * is enabled on a port. */ if (ret == 0) - ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true); + ret = t4vf_enable_pi(pi->adapter, pi, true, true); /* The Virtual Interfaces are connected to an internal switch on the * chip which allows VIs attached to the same port to talk to each @@ -822,8 +822,7 @@ static int cxgb4vf_stop(struct net_device *dev) netif_tx_stop_all_queues(dev); netif_carrier_off(dev); - t4vf_enable_vi(adapter, pi->viid, false, false); - pi->link_cfg.link_ok = 0; + t4vf_enable_pi(adapter, pi, false, false); clear_bit(pi->port_id, &adapter->open_device_map); if (adapter->open_device_map == 0) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h index 712e8f0c71b4..ccca67cf4487 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h @@ -391,7 +391,10 @@ int t4vf_config_rss_range(struct adapter *, unsigned int, int, int, int t4vf_alloc_vi(struct adapter *, int); int t4vf_free_vi(struct adapter *, int); -int t4vf_enable_vi(struct adapter *, unsigned int, bool, bool); +int t4vf_enable_vi(struct adapter *adapter, unsigned int viid, bool rx_en, + bool tx_en); +int t4vf_enable_pi(struct adapter *adapter, struct port_info *pi, bool rx_en, + bool tx_en); int t4vf_identify_port(struct adapter *, unsigned int, unsigned int); int t4vf_set_rxmode(struct adapter *, unsigned int, int, int, int, int, int, diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index 3017f7873ff9..5b8c08cf523f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -405,6 +405,36 @@ static unsigned int fwcap_to_speed(fw_port_cap32_t caps) return 0; } +/** + * fwcap_to_fwspeed - return highest speed in Port Capabilities + * @acaps: advertised Port Capabilities + * + * Get the highest speed for the port from the advertised Port + * Capabilities. It will be either the highest speed from the list of + * speeds or whatever user has set using ethtool. + */ +static fw_port_cap32_t fwcap_to_fwspeed(fw_port_cap32_t acaps) +{ + #define TEST_SPEED_RETURN(__caps_speed) \ + do { \ + if (acaps & FW_PORT_CAP32_SPEED_##__caps_speed) \ + return FW_PORT_CAP32_SPEED_##__caps_speed; \ + } while (0) + + TEST_SPEED_RETURN(400G); + TEST_SPEED_RETURN(200G); + TEST_SPEED_RETURN(100G); + TEST_SPEED_RETURN(50G); + TEST_SPEED_RETURN(40G); + TEST_SPEED_RETURN(25G); + TEST_SPEED_RETURN(10G); + TEST_SPEED_RETURN(1G); + TEST_SPEED_RETURN(100M); + + #undef TEST_SPEED_RETURN + return 0; +} + /* * init_link_config - initialize a link's SW state * @lc: structure holding the link state @@ -431,6 +461,13 @@ static void init_link_config(struct link_config *lc, lc->requested_fec = FEC_AUTO; lc->fec = lc->auto_fec; + /* If the Port is capable of Auto-Negtotiation, initialize it as + * "enabled" and copy over all of the Physical Port Capabilities + * to the Advertised Port Capabilities. Otherwise mark it as + * Auto-Negotiate disabled and select the highest supported speed + * for the link. Note parallel structure in t4_link_l1cfg_core() + * and t4_handle_get_port_info(). + */ if (lc->pcaps & FW_PORT_CAP32_ANEG) { lc->acaps = acaps & ADVERT_MASK; lc->autoneg = AUTONEG_ENABLE; @@ -438,6 +475,7 @@ static void init_link_config(struct link_config *lc, } else { lc->acaps = 0; lc->autoneg = AUTONEG_DISABLE; + lc->speed_caps = fwcap_to_fwspeed(acaps); } } @@ -1362,6 +1400,30 @@ int t4vf_enable_vi(struct adapter *adapter, unsigned int viid, } /** + * t4vf_enable_pi - enable/disable a Port's virtual interface + * @adapter: the adapter + * @pi: the Port Information structure + * @rx_en: 1=enable Rx, 0=disable Rx + * @tx_en: 1=enable Tx, 0=disable Tx + * + * Enables/disables a Port's virtual interface. If the Virtual + * Interface enable/disable operation is successful, we notify the + * OS-specific code of a potential Link Status change via the OS Contract + * API t4vf_os_link_changed(). + */ +int t4vf_enable_pi(struct adapter *adapter, struct port_info *pi, + bool rx_en, bool tx_en) +{ + int ret = t4vf_enable_vi(adapter, pi->viid, rx_en, tx_en); + + if (ret) + return ret; + t4vf_os_link_changed(adapter, pi->pidx, + rx_en && tx_en && pi->link_cfg.link_ok); + return 0; +} + +/** * t4vf_identify_port - identify a VI's port by blinking its LED * @adapter: the adapter * @viid: the Virtual Interface ID @@ -1955,7 +2017,14 @@ static void t4vf_handle_get_port_info(struct port_info *pi, lc->lpacaps = lpacaps; lc->acaps = acaps & ADVERT_MASK; - if (lc->acaps & FW_PORT_CAP32_ANEG) { + /* If we're not physically capable of Auto-Negotiation, note + * this as Auto-Negotiation disabled. Otherwise, we track + * what Auto-Negotiation settings we have. Note parallel + * structure in init_link_config(). + */ + if (!(lc->pcaps & FW_PORT_CAP32_ANEG)) { + lc->autoneg = AUTONEG_DISABLE; + } else if (lc->acaps & FW_PORT_CAP32_ANEG) { lc->autoneg = AUTONEG_ENABLE; } else { /* When Autoneg is disabled, user needs to set diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index f81439796ac7..d438ef8a371d 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -466,12 +466,6 @@ static int fec_ptp_enable(struct ptp_clock_info *ptp, return -EOPNOTSUPP; } -/** - * fec_ptp_hwtstamp_ioctl - control hardware time stamping - * @ndev: pointer to net_device - * @ifreq: ioctl data - * @cmd: particular ioctl requested - */ int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr) { struct fec_enet_private *fep = netdev_priv(ndev); diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 4bb4646a5f92..09f8e6baf049 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -109,13 +109,14 @@ static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *, struct ibmvnic_sub_crq_queue *); static int ibmvnic_poll(struct napi_struct *napi, int data); static void send_map_query(struct ibmvnic_adapter *adapter); -static void send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8); -static void send_request_unmap(struct ibmvnic_adapter *, u8); +static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8); +static int send_request_unmap(struct ibmvnic_adapter *, u8); static int send_login(struct ibmvnic_adapter *adapter); static void send_cap_queries(struct ibmvnic_adapter *adapter); static int init_sub_crqs(struct ibmvnic_adapter *); static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter); static int ibmvnic_init(struct ibmvnic_adapter *); +static int ibmvnic_reset_init(struct ibmvnic_adapter *); static void release_crq_queue(struct ibmvnic_adapter *); static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p); static int init_crq_queue(struct ibmvnic_adapter *adapter); @@ -172,6 +173,7 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, struct ibmvnic_long_term_buff *ltb, int size) { struct device *dev = &adapter->vdev->dev; + int rc; ltb->size = size; ltb->buff = dma_alloc_coherent(dev, ltb->size, <b->addr, @@ -185,8 +187,12 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, adapter->map_id++; init_completion(&adapter->fw_done); - send_request_map(adapter, ltb->addr, - ltb->size, ltb->map_id); + rc = send_request_map(adapter, ltb->addr, + ltb->size, ltb->map_id); + if (rc) { + dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); + return rc; + } wait_for_completion(&adapter->fw_done); if (adapter->fw_done_rc) { @@ -215,10 +221,14 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter, static int reset_long_term_buff(struct ibmvnic_adapter *adapter, struct ibmvnic_long_term_buff *ltb) { + int rc; + memset(ltb->buff, 0, ltb->size); init_completion(&adapter->fw_done); - send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id); + rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id); + if (rc) + return rc; wait_for_completion(&adapter->fw_done); if (adapter->fw_done_rc) { @@ -789,6 +799,7 @@ static void release_napi(struct ibmvnic_adapter *adapter) kfree(adapter->napi); adapter->napi = NULL; adapter->num_active_rx_napi = 0; + adapter->napi_enabled = false; } static int ibmvnic_login(struct net_device *netdev) @@ -919,6 +930,10 @@ static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state) /* Partuial success, delay and re-send */ mdelay(1000); resend = true; + } else if (adapter->init_done_rc) { + netdev_warn(netdev, "Unable to set link state, rc=%d\n", + adapter->init_done_rc); + return adapter->init_done_rc; } } while (resend); @@ -951,6 +966,7 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter) struct device *dev = &adapter->vdev->dev; union ibmvnic_crq crq; int len = 0; + int rc; if (adapter->vpd->buff) len = adapter->vpd->len; @@ -958,7 +974,9 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter) init_completion(&adapter->fw_done); crq.get_vpd_size.first = IBMVNIC_CRQ_CMD; crq.get_vpd_size.cmd = GET_VPD_SIZE; - ibmvnic_send_crq(adapter, &crq); + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) + return rc; wait_for_completion(&adapter->fw_done); if (!adapter->vpd->len) @@ -991,7 +1009,12 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter) crq.get_vpd.cmd = GET_VPD; crq.get_vpd.ioba = cpu_to_be32(adapter->vpd->dma_addr); crq.get_vpd.len = cpu_to_be32((u32)adapter->vpd->len); - ibmvnic_send_crq(adapter, &crq); + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) { + kfree(adapter->vpd->buff); + adapter->vpd->buff = NULL; + return rc; + } wait_for_completion(&adapter->fw_done); return 0; @@ -1690,6 +1713,7 @@ static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p) struct ibmvnic_adapter *adapter = netdev_priv(netdev); struct sockaddr *addr = p; union ibmvnic_crq crq; + int rc; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; @@ -1700,7 +1724,9 @@ static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p) ether_addr_copy(&crq.change_mac_addr.mac_addr[0], addr->sa_data); init_completion(&adapter->fw_done); - ibmvnic_send_crq(adapter, &crq); + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) + return rc; wait_for_completion(&adapter->fw_done); /* netdev->dev_addr is changed in handle_change_mac_rsp function */ return adapter->fw_done_rc ? -EIO : 0; @@ -1782,7 +1808,7 @@ static int do_reset(struct ibmvnic_adapter *adapter, return rc; } - rc = ibmvnic_init(adapter); + rc = ibmvnic_reset_init(adapter); if (rc) return IBMVNIC_INIT_FAILED; @@ -1852,6 +1878,85 @@ static int do_reset(struct ibmvnic_adapter *adapter, return 0; } +static int do_hard_reset(struct ibmvnic_adapter *adapter, + struct ibmvnic_rwi *rwi, u32 reset_state) +{ + struct net_device *netdev = adapter->netdev; + int rc; + + netdev_dbg(adapter->netdev, "Hard resetting driver (%d)\n", + rwi->reset_reason); + + netif_carrier_off(netdev); + adapter->reset_reason = rwi->reset_reason; + + ibmvnic_cleanup(netdev); + release_resources(adapter); + release_sub_crqs(adapter, 0); + release_crq_queue(adapter); + + /* remove the closed state so when we call open it appears + * we are coming from the probed state. + */ + adapter->state = VNIC_PROBED; + + rc = init_crq_queue(adapter); + if (rc) { + netdev_err(adapter->netdev, + "Couldn't initialize crq. rc=%d\n", rc); + return rc; + } + + rc = ibmvnic_init(adapter); + if (rc) + return rc; + + /* If the adapter was in PROBE state prior to the reset, + * exit here. + */ + if (reset_state == VNIC_PROBED) + return 0; + + rc = ibmvnic_login(netdev); + if (rc) { + adapter->state = VNIC_PROBED; + return 0; + } + /* netif_set_real_num_xx_queues needs to take rtnl lock here + * unless wait_for_reset is set, in which case the rtnl lock + * has already been taken before initializing the reset + */ + if (!adapter->wait_for_reset) { + rtnl_lock(); + rc = init_resources(adapter); + rtnl_unlock(); + } else { + rc = init_resources(adapter); + } + if (rc) + return rc; + + ibmvnic_disable_irqs(adapter); + adapter->state = VNIC_CLOSED; + + if (reset_state == VNIC_CLOSED) + return 0; + + rc = __ibmvnic_open(netdev); + if (rc) { + if (list_empty(&adapter->rwi_list)) + adapter->state = VNIC_CLOSED; + else + adapter->state = reset_state; + + return 0; + } + + netif_carrier_on(netdev); + + return 0; +} + static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter) { struct ibmvnic_rwi *rwi; @@ -1893,14 +1998,19 @@ static void __ibmvnic_reset(struct work_struct *work) netdev = adapter->netdev; mutex_lock(&adapter->reset_lock); - adapter->resetting = true; reset_state = adapter->state; rwi = get_next_rwi(adapter); while (rwi) { - rc = do_reset(adapter, rwi, reset_state); + if (adapter->force_reset_recovery) { + adapter->force_reset_recovery = false; + rc = do_hard_reset(adapter, rwi, reset_state); + } else { + rc = do_reset(adapter, rwi, reset_state); + } kfree(rwi); - if (rc && rc != IBMVNIC_INIT_FAILED) + if (rc && rc != IBMVNIC_INIT_FAILED && + !adapter->force_reset_recovery) break; rwi = get_next_rwi(adapter); @@ -1926,9 +2036,9 @@ static void __ibmvnic_reset(struct work_struct *work) static int ibmvnic_reset(struct ibmvnic_adapter *adapter, enum ibmvnic_reset_reason reason) { + struct list_head *entry, *tmp_entry; struct ibmvnic_rwi *rwi, *tmp; struct net_device *netdev = adapter->netdev; - struct list_head *entry; int ret; if (adapter->state == VNIC_REMOVING || @@ -1964,11 +2074,17 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, ret = ENOMEM; goto err; } - + /* if we just received a transport event, + * flush reset queue and process this reset + */ + if (adapter->force_reset_recovery && !list_empty(&adapter->rwi_list)) { + list_for_each_safe(entry, tmp_entry, &adapter->rwi_list) + list_del(entry); + } rwi->reset_reason = reason; list_add_tail(&rwi->list, &adapter->rwi_list); mutex_unlock(&adapter->rwi_lock); - + adapter->resetting = true; netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason); schedule_work(&adapter->ibmvnic_reset); @@ -2364,6 +2480,7 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev, struct ibmvnic_adapter *adapter = netdev_priv(dev); union ibmvnic_crq crq; int i, j; + int rc; memset(&crq, 0, sizeof(crq)); crq.request_statistics.first = IBMVNIC_CRQ_CMD; @@ -2374,7 +2491,9 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev, /* Wait for data to be written */ init_completion(&adapter->stats_done); - ibmvnic_send_crq(adapter, &crq); + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) + return; wait_for_completion(&adapter->stats_done); for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++) @@ -3146,6 +3265,12 @@ static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter, (unsigned long int)cpu_to_be64(u64_crq[0]), (unsigned long int)cpu_to_be64(u64_crq[1])); + if (!adapter->crq.active && + crq->generic.first != IBMVNIC_CRQ_INIT_CMD) { + dev_warn(dev, "Invalid request detected while CRQ is inactive, possible device state change during reset\n"); + return -EINVAL; + } + /* Make sure the hypervisor sees the complete request */ mb(); @@ -3370,8 +3495,8 @@ buf_alloc_failed: return -1; } -static void send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr, - u32 len, u8 map_id) +static int send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr, + u32 len, u8 map_id) { union ibmvnic_crq crq; @@ -3381,10 +3506,10 @@ static void send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr, crq.request_map.map_id = map_id; crq.request_map.ioba = cpu_to_be32(addr); crq.request_map.len = cpu_to_be32(len); - ibmvnic_send_crq(adapter, &crq); + return ibmvnic_send_crq(adapter, &crq); } -static void send_request_unmap(struct ibmvnic_adapter *adapter, u8 map_id) +static int send_request_unmap(struct ibmvnic_adapter *adapter, u8 map_id) { union ibmvnic_crq crq; @@ -3392,7 +3517,7 @@ static void send_request_unmap(struct ibmvnic_adapter *adapter, u8 map_id) crq.request_unmap.first = IBMVNIC_CRQ_CMD; crq.request_unmap.cmd = REQUEST_UNMAP; crq.request_unmap.map_id = map_id; - ibmvnic_send_crq(adapter, &crq); + return ibmvnic_send_crq(adapter, &crq); } static void send_map_query(struct ibmvnic_adapter *adapter) @@ -4219,11 +4344,15 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, dev_info(dev, "Partner initialized\n"); adapter->from_passive_init = true; adapter->failover_pending = false; - complete(&adapter->init_done); + if (!completion_done(&adapter->init_done)) { + complete(&adapter->init_done); + adapter->init_done_rc = -EIO; + } ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); break; case IBMVNIC_CRQ_INIT_COMPLETE: dev_info(dev, "Partner initialization complete\n"); + adapter->crq.active = true; send_version_xchg(adapter); break; default: @@ -4232,6 +4361,9 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, return; case IBMVNIC_CRQ_XPORT_EVENT: netif_carrier_off(netdev); + adapter->crq.active = false; + if (adapter->resetting) + adapter->force_reset_recovery = true; if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) { dev_info(dev, "Migrated, re-enabling adapter\n"); ibmvnic_reset(adapter, VNIC_RESET_MOBILITY); @@ -4419,6 +4551,7 @@ static int ibmvnic_reset_crq(struct ibmvnic_adapter *adapter) /* Clean out the queue */ memset(crq->msgs, 0, PAGE_SIZE); crq->cur = 0; + crq->active = false; /* And re-open it again */ rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address, @@ -4453,6 +4586,7 @@ static void release_crq_queue(struct ibmvnic_adapter *adapter) DMA_BIDIRECTIONAL); free_page((unsigned long)crq->msgs); crq->msgs = NULL; + crq->active = false; } static int init_crq_queue(struct ibmvnic_adapter *adapter) @@ -4530,7 +4664,7 @@ map_failed: return retrc; } -static int ibmvnic_init(struct ibmvnic_adapter *adapter) +static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter) { struct device *dev = &adapter->vdev->dev; unsigned long timeout = msecs_to_jiffies(30000); @@ -4589,6 +4723,49 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter) return rc; } +static int ibmvnic_init(struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + unsigned long timeout = msecs_to_jiffies(30000); + int rc; + + adapter->from_passive_init = false; + + init_completion(&adapter->init_done); + adapter->init_done_rc = 0; + ibmvnic_send_crq_init(adapter); + if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { + dev_err(dev, "Initialization sequence timed out\n"); + return -1; + } + + if (adapter->init_done_rc) { + release_crq_queue(adapter); + return adapter->init_done_rc; + } + + if (adapter->from_passive_init) { + adapter->state = VNIC_OPEN; + adapter->from_passive_init = false; + return -1; + } + + rc = init_sub_crqs(adapter); + if (rc) { + dev_err(dev, "Initialization of sub crqs failed\n"); + release_crq_queue(adapter); + return rc; + } + + rc = init_sub_crq_irqs(adapter); + if (rc) { + dev_err(dev, "Failed to initialize sub crq irqs\n"); + release_crq_queue(adapter); + } + + return rc; +} + static struct device_attribute dev_attr_failover; static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 22391e8805f6..f9fb780102ac 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -865,6 +865,7 @@ struct ibmvnic_crq_queue { int size, cur; dma_addr_t msg_token; spinlock_t lock; + bool active; }; union sub_crq { @@ -1108,6 +1109,7 @@ struct ibmvnic_adapter { bool mac_change_pending; bool failover_pending; + bool force_reset_recovery; struct ibmvnic_tunables desired; struct ibmvnic_tunables fallback; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 5efa68de935b..9b698c5acd05 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -3664,14 +3664,19 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev) * @dev: netdev * @xdp: XDP buffer * - * Returns Zero if sent, else an error code + * Returns number of frames successfully sent. Frames that fail are + * free'ed via XDP return API. + * + * For error cases, a negative errno code is returned and no-frames + * are transmitted (caller must handle freeing frames). **/ -int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) +int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames) { struct i40e_netdev_priv *np = netdev_priv(dev); unsigned int queue_index = smp_processor_id(); struct i40e_vsi *vsi = np->vsi; - int err; + int drops = 0; + int i; if (test_bit(__I40E_VSI_DOWN, vsi->state)) return -ENETDOWN; @@ -3679,11 +3684,18 @@ int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs) return -ENXIO; - err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]); - if (err != I40E_XDP_TX) - return -ENOSPC; + for (i = 0; i < n; i++) { + struct xdp_frame *xdpf = frames[i]; + int err; - return 0; + err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]); + if (err != I40E_XDP_TX) { + xdp_return_frame_rx_napi(xdpf); + drops++; + } + } + + return n - drops; } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index fdd2c55f03a6..eb8804b3d7b6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -487,7 +487,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw); void i40e_detect_recover_hung(struct i40e_vsi *vsi); int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); bool __i40e_chk_linearize(struct sk_buff *skb); -int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf); +int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames); void i40e_xdp_flush(struct net_device *dev); /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index a52d92e182ee..031d65c4178d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -10022,11 +10022,13 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp) } } -static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) +static int ixgbe_xdp_xmit(struct net_device *dev, int n, + struct xdp_frame **frames) { struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_ring *ring; - int err; + int drops = 0; + int i; if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state))) return -ENETDOWN; @@ -10038,11 +10040,18 @@ static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) if (unlikely(!ring)) return -ENXIO; - err = ixgbe_xmit_xdp_ring(adapter, xdpf); - if (err != IXGBE_XDP_TX) - return -ENOSPC; + for (i = 0; i < n; i++) { + struct xdp_frame *xdpf = frames[i]; + int err; - return 0; + err = ixgbe_xmit_xdp_ring(adapter, xdpf); + if (err != IXGBE_XDP_TX) { + xdp_return_frame_rx_napi(xdpf); + drops++; + } + } + + return n - drops; } static void ixgbe_xdp_flush(struct net_device *dev) diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index 6373f56205fd..4afb10375397 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -37,6 +37,7 @@ ifeq ($(CONFIG_NFP_APP_FLOWER),y) nfp-objs += \ flower/action.o \ flower/cmsg.o \ + flower/lag_conf.o \ flower/main.o \ flower/match.o \ flower/metadata.o \ diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index a4d3da215863..8a92088df0d7 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -212,6 +212,60 @@ emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) } static void +__emit_br_bit(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 addr, u8 defer, + bool set, bool src_lmextn) +{ + u16 addr_lo, addr_hi; + u64 insn; + + addr_lo = addr & (OP_BR_BIT_ADDR_LO >> __bf_shf(OP_BR_BIT_ADDR_LO)); + addr_hi = addr != addr_lo; + + insn = OP_BR_BIT_BASE | + FIELD_PREP(OP_BR_BIT_A_SRC, areg) | + FIELD_PREP(OP_BR_BIT_B_SRC, breg) | + FIELD_PREP(OP_BR_BIT_BV, set) | + FIELD_PREP(OP_BR_BIT_DEFBR, defer) | + FIELD_PREP(OP_BR_BIT_ADDR_LO, addr_lo) | + FIELD_PREP(OP_BR_BIT_ADDR_HI, addr_hi) | + FIELD_PREP(OP_BR_BIT_SRC_LMEXTN, src_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_br_bit_relo(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, + u8 defer, bool set, enum nfp_relo_type relo) +{ + struct nfp_insn_re_regs reg; + int err; + + /* NOTE: The bit to test is specified as an rotation amount, such that + * the bit to test will be placed on the MSB of the result when + * doing a rotate right. For bit X, we need right rotate X + 1. + */ + bit += 1; + + err = swreg_to_restricted(reg_none(), src, reg_imm(bit), ®, false); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_br_bit(nfp_prog, reg.areg, reg.breg, addr, defer, set, + reg.src_lmextn); + + nfp_prog->prog[nfp_prog->prog_len - 1] |= + FIELD_PREP(OP_RELO_TYPE, relo); +} + +static void +emit_br_bset(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, u8 defer) +{ + emit_br_bit_relo(nfp_prog, src, bit, addr, defer, true, RELO_BR_REL); +} + +static void __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, enum immed_width width, bool invert, enum immed_shift shift, bool wr_both, @@ -310,6 +364,19 @@ emit_shf(struct nfp_prog *nfp_prog, swreg dst, } static void +emit_shf_indir(struct nfp_prog *nfp_prog, swreg dst, + swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc) +{ + if (sc == SHF_SC_R_ROT) { + pr_err("indirect shift is not allowed on rotation\n"); + nfp_prog->error = -EFAULT; + return; + } + + emit_shf(nfp_prog, dst, lreg, op, rreg, sc, 0); +} + +static void __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both, bool dst_lmextn, bool src_lmextn) @@ -1629,26 +1696,142 @@ static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return 0; } +/* Pseudo code: + * if shift_amt >= 32 + * dst_high = dst_low << shift_amt[4:0] + * dst_low = 0; + * else + * dst_high = (dst_high, dst_low) >> (32 - shift_amt) + * dst_low = dst_low << shift_amt + * + * The indirect shift will use the same logic at runtime. + */ +static int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) +{ + if (shift_amt < 32) { + emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), + SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF, + 32 - shift_amt); + emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_L_SHF, shift_amt); + } else if (shift_amt == 32) { + wrp_reg_mov(nfp_prog, dst + 1, dst); + wrp_immed(nfp_prog, reg_both(dst), 0); + } else if (shift_amt > 32) { + emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_L_SHF, shift_amt - 32); + wrp_immed(nfp_prog, reg_both(dst), 0); + } + + return 0; +} + static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; u8 dst = insn->dst_reg * 2; - if (insn->imm < 32) { - emit_shf(nfp_prog, reg_both(dst + 1), - reg_a(dst + 1), SHF_OP_NONE, reg_b(dst), - SHF_SC_R_DSHF, 32 - insn->imm); - emit_shf(nfp_prog, reg_both(dst), - reg_none(), SHF_OP_NONE, reg_b(dst), - SHF_SC_L_SHF, insn->imm); - } else if (insn->imm == 32) { - wrp_reg_mov(nfp_prog, dst + 1, dst); - wrp_immed(nfp_prog, reg_both(dst), 0); - } else if (insn->imm > 32) { - emit_shf(nfp_prog, reg_both(dst + 1), - reg_none(), SHF_OP_NONE, reg_b(dst), - SHF_SC_L_SHF, insn->imm - 32); - wrp_immed(nfp_prog, reg_both(dst), 0); + return __shl_imm64(nfp_prog, dst, insn->imm); +} + +static void shl_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + emit_alu(nfp_prog, imm_both(nfp_prog), reg_imm(32), ALU_OP_SUB, + reg_b(src)); + emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), SHF_OP_NONE, + reg_b(dst), SHF_SC_R_DSHF); +} + +/* NOTE: for indirect left shift, HIGH part should be calculated first. */ +static void shl_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_L_SHF); +} + +static void shl_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + shl_reg64_lt32_high(nfp_prog, dst, src); + shl_reg64_lt32_low(nfp_prog, dst, src); +} + +static void shl_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE, + reg_b(dst), SHF_SC_L_SHF); + wrp_immed(nfp_prog, reg_both(dst), 0); +} + +static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 umin, umax; + u8 dst, src; + + dst = insn->dst_reg * 2; + umin = meta->umin; + umax = meta->umax; + if (umin == umax) + return __shl_imm64(nfp_prog, dst, umin); + + src = insn->src_reg * 2; + if (umax < 32) { + shl_reg64_lt32(nfp_prog, dst, src); + } else if (umin >= 32) { + shl_reg64_ge32(nfp_prog, dst, src); + } else { + /* Generate different instruction sequences depending on runtime + * value of shift amount. + */ + u16 label_ge32, label_end; + + label_ge32 = nfp_prog_current_offset(nfp_prog) + 7; + emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0); + + shl_reg64_lt32_high(nfp_prog, dst, src); + label_end = nfp_prog_current_offset(nfp_prog) + 6; + emit_br(nfp_prog, BR_UNC, label_end, 2); + /* shl_reg64_lt32_low packed in delay slot. */ + shl_reg64_lt32_low(nfp_prog, dst, src); + + if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32)) + return -EINVAL; + shl_reg64_ge32(nfp_prog, dst, src); + + if (!nfp_prog_confirm_current_offset(nfp_prog, label_end)) + return -EINVAL; + } + + return 0; +} + +/* Pseudo code: + * if shift_amt >= 32 + * dst_high = 0; + * dst_low = dst_high >> shift_amt[4:0] + * else + * dst_high = dst_high >> shift_amt + * dst_low = (dst_high, dst_low) >> shift_amt + * + * The indirect shift will use the same logic at runtime. + */ +static int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) +{ + if (shift_amt < 32) { + emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE, + reg_b(dst), SHF_SC_R_DSHF, shift_amt); + emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE, + reg_b(dst + 1), SHF_SC_R_SHF, shift_amt); + } else if (shift_amt == 32) { + wrp_reg_mov(nfp_prog, dst, dst + 1); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); + } else if (shift_amt > 32) { + emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); } return 0; @@ -1659,21 +1842,186 @@ static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) const struct bpf_insn *insn = &meta->insn; u8 dst = insn->dst_reg * 2; - if (insn->imm < 32) { - emit_shf(nfp_prog, reg_both(dst), - reg_a(dst + 1), SHF_OP_NONE, reg_b(dst), - SHF_SC_R_DSHF, insn->imm); - emit_shf(nfp_prog, reg_both(dst + 1), - reg_none(), SHF_OP_NONE, reg_b(dst + 1), - SHF_SC_R_SHF, insn->imm); - } else if (insn->imm == 32) { + return __shr_imm64(nfp_prog, dst, insn->imm); +} + +/* NOTE: for indirect right shift, LOW part should be calculated first. */ +static void shr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE, + reg_b(dst + 1), SHF_SC_R_SHF); +} + +static void shr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE, + reg_b(dst), SHF_SC_R_DSHF); +} + +static void shr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + shr_reg64_lt32_low(nfp_prog, dst, src); + shr_reg64_lt32_high(nfp_prog, dst, src); +} + +static void shr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); + emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, + reg_b(dst + 1), SHF_SC_R_SHF); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); +} + +static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 umin, umax; + u8 dst, src; + + dst = insn->dst_reg * 2; + umin = meta->umin; + umax = meta->umax; + if (umin == umax) + return __shr_imm64(nfp_prog, dst, umin); + + src = insn->src_reg * 2; + if (umax < 32) { + shr_reg64_lt32(nfp_prog, dst, src); + } else if (umin >= 32) { + shr_reg64_ge32(nfp_prog, dst, src); + } else { + /* Generate different instruction sequences depending on runtime + * value of shift amount. + */ + u16 label_ge32, label_end; + + label_ge32 = nfp_prog_current_offset(nfp_prog) + 6; + emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0); + shr_reg64_lt32_low(nfp_prog, dst, src); + label_end = nfp_prog_current_offset(nfp_prog) + 6; + emit_br(nfp_prog, BR_UNC, label_end, 2); + /* shr_reg64_lt32_high packed in delay slot. */ + shr_reg64_lt32_high(nfp_prog, dst, src); + + if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32)) + return -EINVAL; + shr_reg64_ge32(nfp_prog, dst, src); + + if (!nfp_prog_confirm_current_offset(nfp_prog, label_end)) + return -EINVAL; + } + + return 0; +} + +/* Code logic is the same as __shr_imm64 except ashr requires signedness bit + * told through PREV_ALU result. + */ +static int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) +{ + if (shift_amt < 32) { + emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE, + reg_b(dst), SHF_SC_R_DSHF, shift_amt); + /* Set signedness bit. */ + emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR, + reg_imm(0)); + emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, + reg_b(dst + 1), SHF_SC_R_SHF, shift_amt); + } else if (shift_amt == 32) { + /* NOTE: this also helps setting signedness bit. */ wrp_reg_mov(nfp_prog, dst, dst + 1); - wrp_immed(nfp_prog, reg_both(dst + 1), 0); - } else if (insn->imm > 32) { - emit_shf(nfp_prog, reg_both(dst), - reg_none(), SHF_OP_NONE, reg_b(dst + 1), - SHF_SC_R_SHF, insn->imm - 32); - wrp_immed(nfp_prog, reg_both(dst + 1), 0); + emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, + reg_b(dst + 1), SHF_SC_R_SHF, 31); + } else if (shift_amt > 32) { + emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR, + reg_imm(0)); + emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, + reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32); + emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, + reg_b(dst + 1), SHF_SC_R_SHF, 31); + } + + return 0; +} + +static int ashr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; + + return __ashr_imm64(nfp_prog, dst, insn->imm); +} + +static void ashr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + /* NOTE: the first insn will set both indirect shift amount (source A) + * and signedness bit (MSB of result). + */ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1)); + emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, + reg_b(dst + 1), SHF_SC_R_SHF); +} + +static void ashr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + /* NOTE: it is the same as logic shift because we don't need to shift in + * signedness bit when the shift amount is less than 32. + */ + return shr_reg64_lt32_low(nfp_prog, dst, src); +} + +static void ashr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + ashr_reg64_lt32_low(nfp_prog, dst, src); + ashr_reg64_lt32_high(nfp_prog, dst, src); +} + +static void ashr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src) +{ + emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1)); + emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, + reg_b(dst + 1), SHF_SC_R_SHF); + emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, + reg_b(dst + 1), SHF_SC_R_SHF, 31); +} + +/* Like ashr_imm64, but need to use indirect shift. */ +static int ashr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 umin, umax; + u8 dst, src; + + dst = insn->dst_reg * 2; + umin = meta->umin; + umax = meta->umax; + if (umin == umax) + return __ashr_imm64(nfp_prog, dst, umin); + + src = insn->src_reg * 2; + if (umax < 32) { + ashr_reg64_lt32(nfp_prog, dst, src); + } else if (umin >= 32) { + ashr_reg64_ge32(nfp_prog, dst, src); + } else { + u16 label_ge32, label_end; + + label_ge32 = nfp_prog_current_offset(nfp_prog) + 6; + emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0); + ashr_reg64_lt32_low(nfp_prog, dst, src); + label_end = nfp_prog_current_offset(nfp_prog) + 6; + emit_br(nfp_prog, BR_UNC, label_end, 2); + /* ashr_reg64_lt32_high packed in delay slot. */ + ashr_reg64_lt32_high(nfp_prog, dst, src); + + if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32)) + return -EINVAL; + ashr_reg64_ge32(nfp_prog, dst, src); + + if (!nfp_prog_confirm_current_offset(nfp_prog, label_end)) + return -EINVAL; } return 0; @@ -2501,8 +2849,12 @@ static const instr_cb_t instr_cb[256] = { [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64, [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64, [BPF_ALU64 | BPF_NEG] = neg_reg64, + [BPF_ALU64 | BPF_LSH | BPF_X] = shl_reg64, [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64, + [BPF_ALU64 | BPF_RSH | BPF_X] = shr_reg64, [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64, + [BPF_ALU64 | BPF_ARSH | BPF_X] = ashr_reg64, + [BPF_ALU64 | BPF_ARSH | BPF_K] = ashr_imm64, [BPF_ALU | BPF_MOV | BPF_X] = mov_reg, [BPF_ALU | BPF_MOV | BPF_K] = mov_imm, [BPF_ALU | BPF_XOR | BPF_X] = xor_reg, diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 8b143546ae85..654fe7823e5e 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -263,6 +263,8 @@ struct nfp_bpf_reg_state { * @func_id: function id for call instructions * @arg1: arg1 for call instructions * @arg2: arg2 for call instructions + * @umin: copy of core verifier umin_value. + * @umax: copy of core verifier umax_value. * @off: index of first generated machine instruction (in nfp_prog.prog) * @n: eBPF instruction number * @flags: eBPF instruction extra optimization flags @@ -298,6 +300,13 @@ struct nfp_insn_meta { struct bpf_reg_state arg1; struct nfp_bpf_reg_state arg2; }; + /* We are interested in range info for some operands, + * for example, the shift amount. + */ + struct { + u64 umin; + u64 umax; + }; }; unsigned int off; unsigned short n; @@ -375,6 +384,25 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta) return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_XADD); } +static inline bool is_mbpf_indir_shift(const struct nfp_insn_meta *meta) +{ + u8 code = meta->insn.code; + bool is_alu, is_shift; + u8 opclass, opcode; + + opclass = BPF_CLASS(code); + is_alu = opclass == BPF_ALU64 || opclass == BPF_ALU; + if (!is_alu) + return false; + + opcode = BPF_OP(code); + is_shift = opcode == BPF_LSH || opcode == BPF_RSH || opcode == BPF_ARSH; + if (!is_shift) + return false; + + return BPF_SRC(code) == BPF_X; +} + /** * struct nfp_prog - nfp BPF program * @bpf: backpointer to the bpf app priv structure diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 4db0ac1e42a8..7eae4c0266f8 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -190,6 +190,8 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, meta->insn = prog[i]; meta->n = i; + if (is_mbpf_indir_shift(meta)) + meta->umin = U64_MAX; list_add_tail(&meta->l, &nfp_prog->insns); } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 844a9be6e55a..4bfeba7b21b2 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -551,6 +551,14 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) if (is_mbpf_xadd(meta)) return nfp_bpf_check_xadd(nfp_prog, meta, env); + if (is_mbpf_indir_shift(meta)) { + const struct bpf_reg_state *sreg = + cur_regs(env) + meta->insn.src_reg; + + meta->umin = min(meta->umin, sreg->umin_value); + meta->umax = max(meta->umax, sreg->umax_value); + } + return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 80df9a5d4217..4a6d2db75071 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -72,6 +72,42 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan, push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci); } +static int +nfp_fl_pre_lag(struct nfp_app *app, const struct tc_action *action, + struct nfp_fl_payload *nfp_flow, int act_len) +{ + size_t act_size = sizeof(struct nfp_fl_pre_lag); + struct nfp_fl_pre_lag *pre_lag; + struct net_device *out_dev; + int err; + + out_dev = tcf_mirred_dev(action); + if (!out_dev || !netif_is_lag_master(out_dev)) + return 0; + + if (act_len + act_size > NFP_FL_MAX_A_SIZ) + return -EOPNOTSUPP; + + /* Pre_lag action must be first on action list. + * If other actions already exist they need pushed forward. + */ + if (act_len) + memmove(nfp_flow->action_data + act_size, + nfp_flow->action_data, act_len); + + pre_lag = (struct nfp_fl_pre_lag *)nfp_flow->action_data; + err = nfp_flower_lag_populate_pre_action(app, out_dev, pre_lag); + if (err) + return err; + + pre_lag->head.jump_id = NFP_FL_ACTION_OPCODE_PRE_LAG; + pre_lag->head.len_lw = act_size >> NFP_FL_LW_SIZ; + + nfp_flow->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + + return act_size; +} + static bool nfp_fl_netdev_is_tunnel_type(struct net_device *out_dev, enum nfp_flower_tun_type tun_type) { @@ -88,12 +124,13 @@ static bool nfp_fl_netdev_is_tunnel_type(struct net_device *out_dev, } static int -nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action, - struct nfp_fl_payload *nfp_flow, bool last, - struct net_device *in_dev, enum nfp_flower_tun_type tun_type, - int *tun_out_cnt) +nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output, + const struct tc_action *action, struct nfp_fl_payload *nfp_flow, + bool last, struct net_device *in_dev, + enum nfp_flower_tun_type tun_type, int *tun_out_cnt) { size_t act_size = sizeof(struct nfp_fl_output); + struct nfp_flower_priv *priv = app->priv; struct net_device *out_dev; u16 tmp_flags; @@ -118,6 +155,15 @@ nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action, output->flags = cpu_to_be16(tmp_flags | NFP_FL_OUT_FLAGS_USE_TUN); output->port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type); + } else if (netif_is_lag_master(out_dev) && + priv->flower_ext_feats & NFP_FL_FEATS_LAG) { + int gid; + + output->flags = cpu_to_be16(tmp_flags); + gid = nfp_flower_lag_get_output_id(app, out_dev); + if (gid < 0) + return gid; + output->port = cpu_to_be32(NFP_FL_LAG_OUT | gid); } else { /* Set action output parameters. */ output->flags = cpu_to_be16(tmp_flags); @@ -164,7 +210,7 @@ static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len) struct nfp_fl_pre_tunnel *pre_tun_act; /* Pre_tunnel action must be first on action list. - * If other actions already exist they need pushed forward. + * If other actions already exist they need to be pushed forward. */ if (act_len) memmove(act_data + act_size, act_data, act_len); @@ -443,42 +489,73 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) } static int -nfp_flower_loop_action(const struct tc_action *a, +nfp_flower_output_action(struct nfp_app *app, const struct tc_action *a, + struct nfp_fl_payload *nfp_fl, int *a_len, + struct net_device *netdev, bool last, + enum nfp_flower_tun_type *tun_type, int *tun_out_cnt, + int *out_cnt) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_output *output; + int err, prelag_size; + + if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ) + return -EOPNOTSUPP; + + output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len]; + err = nfp_fl_output(app, output, a, nfp_fl, last, netdev, *tun_type, + tun_out_cnt); + if (err) + return err; + + *a_len += sizeof(struct nfp_fl_output); + + if (priv->flower_ext_feats & NFP_FL_FEATS_LAG) { + /* nfp_fl_pre_lag returns -err or size of prelag action added. + * This will be 0 if it is not egressing to a lag dev. + */ + prelag_size = nfp_fl_pre_lag(app, a, nfp_fl, *a_len); + if (prelag_size < 0) + return prelag_size; + else if (prelag_size > 0 && (!last || *out_cnt)) + return -EOPNOTSUPP; + + *a_len += prelag_size; + } + (*out_cnt)++; + + return 0; +} + +static int +nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a, struct nfp_fl_payload *nfp_fl, int *a_len, struct net_device *netdev, - enum nfp_flower_tun_type *tun_type, int *tun_out_cnt) + enum nfp_flower_tun_type *tun_type, int *tun_out_cnt, + int *out_cnt) { struct nfp_fl_set_ipv4_udp_tun *set_tun; struct nfp_fl_pre_tunnel *pre_tun; struct nfp_fl_push_vlan *psh_v; struct nfp_fl_pop_vlan *pop_v; - struct nfp_fl_output *output; int err; if (is_tcf_gact_shot(a)) { nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_DROP); } else if (is_tcf_mirred_egress_redirect(a)) { - if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ) - return -EOPNOTSUPP; - - output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len]; - err = nfp_fl_output(output, a, nfp_fl, true, netdev, *tun_type, - tun_out_cnt); + err = nfp_flower_output_action(app, a, nfp_fl, a_len, netdev, + true, tun_type, tun_out_cnt, + out_cnt); if (err) return err; - *a_len += sizeof(struct nfp_fl_output); } else if (is_tcf_mirred_egress_mirror(a)) { - if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ) - return -EOPNOTSUPP; - - output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len]; - err = nfp_fl_output(output, a, nfp_fl, false, netdev, *tun_type, - tun_out_cnt); + err = nfp_flower_output_action(app, a, nfp_fl, a_len, netdev, + false, tun_type, tun_out_cnt, + out_cnt); if (err) return err; - *a_len += sizeof(struct nfp_fl_output); } else if (is_tcf_vlan(a) && tcf_vlan_action(a) == TCA_VLAN_ACT_POP) { if (*a_len + sizeof(struct nfp_fl_pop_vlan) > NFP_FL_MAX_A_SIZ) return -EOPNOTSUPP; @@ -535,11 +612,12 @@ nfp_flower_loop_action(const struct tc_action *a, return 0; } -int nfp_flower_compile_action(struct tc_cls_flower_offload *flow, +int nfp_flower_compile_action(struct nfp_app *app, + struct tc_cls_flower_offload *flow, struct net_device *netdev, struct nfp_fl_payload *nfp_flow) { - int act_len, act_cnt, err, tun_out_cnt; + int act_len, act_cnt, err, tun_out_cnt, out_cnt; enum nfp_flower_tun_type tun_type; const struct tc_action *a; LIST_HEAD(actions); @@ -550,11 +628,12 @@ int nfp_flower_compile_action(struct tc_cls_flower_offload *flow, act_len = 0; act_cnt = 0; tun_out_cnt = 0; + out_cnt = 0; tcf_exts_to_list(flow->exts, &actions); list_for_each_entry(a, &actions, list) { - err = nfp_flower_loop_action(a, nfp_flow, &act_len, netdev, - &tun_type, &tun_out_cnt); + err = nfp_flower_loop_action(app, a, nfp_flow, &act_len, netdev, + &tun_type, &tun_out_cnt, &out_cnt); if (err) return err; act_cnt++; diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index 577659f332e4..cb8565222621 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -239,8 +239,10 @@ nfp_flower_cmsg_portreify_rx(struct nfp_app *app, struct sk_buff *skb) static void nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb) { + struct nfp_flower_priv *app_priv = app->priv; struct nfp_flower_cmsg_hdr *cmsg_hdr; enum nfp_flower_cmsg_type_port type; + bool skb_stored = false; cmsg_hdr = nfp_flower_cmsg_get_hdr(skb); @@ -258,13 +260,20 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb) case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS: nfp_tunnel_keep_alive(app, skb); break; + case NFP_FLOWER_CMSG_TYPE_LAG_CONFIG: + if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG) { + skb_stored = nfp_flower_lag_unprocessed_msg(app, skb); + break; + } + /* fall through */ default: nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control type %u\n", type); goto out; } - dev_consume_skb_any(skb); + if (!skb_stored) + dev_consume_skb_any(skb); return; out: dev_kfree_skb_any(skb); diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index bee4367a2c38..4a7f3510a296 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -92,6 +92,7 @@ #define NFP_FL_ACTION_OPCODE_SET_IPV6_DST 12 #define NFP_FL_ACTION_OPCODE_SET_UDP 14 #define NFP_FL_ACTION_OPCODE_SET_TCP 15 +#define NFP_FL_ACTION_OPCODE_PRE_LAG 16 #define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17 #define NFP_FL_ACTION_OPCODE_NUM 32 @@ -103,6 +104,9 @@ #define NFP_FL_PUSH_VLAN_CFI BIT(12) #define NFP_FL_PUSH_VLAN_VID GENMASK(11, 0) +/* LAG ports */ +#define NFP_FL_LAG_OUT 0xC0DE0000 + /* Tunnel ports */ #define NFP_FL_PORT_TYPE_TUN 0x50000000 #define NFP_FL_IPV4_TUNNEL_TYPE GENMASK(7, 4) @@ -177,6 +181,15 @@ struct nfp_fl_pop_vlan { __be16 reserved; }; +struct nfp_fl_pre_lag { + struct nfp_fl_act_head head; + __be16 group_id; + u8 lag_version[3]; + u8 instance; +}; + +#define NFP_FL_PRE_LAG_VER_OFF 8 + struct nfp_fl_pre_tunnel { struct nfp_fl_act_head head; __be16 reserved; @@ -366,6 +379,7 @@ struct nfp_flower_cmsg_hdr { enum nfp_flower_cmsg_type_port { NFP_FLOWER_CMSG_TYPE_FLOW_ADD = 0, NFP_FLOWER_CMSG_TYPE_FLOW_DEL = 2, + NFP_FLOWER_CMSG_TYPE_LAG_CONFIG = 4, NFP_FLOWER_CMSG_TYPE_PORT_REIFY = 6, NFP_FLOWER_CMSG_TYPE_MAC_REPR = 7, NFP_FLOWER_CMSG_TYPE_PORT_MOD = 8, diff --git a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c new file mode 100644 index 000000000000..0c4c957717ea --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c @@ -0,0 +1,726 @@ +/* + * Copyright (C) 2018 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "main.h" + +/* LAG group config flags. */ +#define NFP_FL_LAG_LAST BIT(1) +#define NFP_FL_LAG_FIRST BIT(2) +#define NFP_FL_LAG_DATA BIT(3) +#define NFP_FL_LAG_XON BIT(4) +#define NFP_FL_LAG_SYNC BIT(5) +#define NFP_FL_LAG_SWITCH BIT(6) +#define NFP_FL_LAG_RESET BIT(7) + +/* LAG port state flags. */ +#define NFP_PORT_LAG_LINK_UP BIT(0) +#define NFP_PORT_LAG_TX_ENABLED BIT(1) +#define NFP_PORT_LAG_CHANGED BIT(2) + +enum nfp_fl_lag_batch { + NFP_FL_LAG_BATCH_FIRST, + NFP_FL_LAG_BATCH_MEMBER, + NFP_FL_LAG_BATCH_FINISHED +}; + +/** + * struct nfp_flower_cmsg_lag_config - control message payload for LAG config + * @ctrl_flags: Configuration flags + * @reserved: Reserved for future use + * @ttl: Time to live of packet - host always sets to 0xff + * @pkt_number: Config message packet number - increment for each message + * @batch_ver: Batch version of messages - increment for each batch of messages + * @group_id: Group ID applicable + * @group_inst: Group instance number - increment when group is reused + * @members: Array of 32-bit words listing all active group members + */ +struct nfp_flower_cmsg_lag_config { + u8 ctrl_flags; + u8 reserved[2]; + u8 ttl; + __be32 pkt_number; + __be32 batch_ver; + __be32 group_id; + __be32 group_inst; + __be32 members[]; +}; + +/** + * struct nfp_fl_lag_group - list entry for each LAG group + * @group_id: Assigned group ID for host/kernel sync + * @group_inst: Group instance in case of ID reuse + * @list: List entry + * @master_ndev: Group master Netdev + * @dirty: Marked if the group needs synced to HW + * @offloaded: Marked if the group is currently offloaded to NIC + * @to_remove: Marked if the group should be removed from NIC + * @to_destroy: Marked if the group should be removed from driver + * @slave_cnt: Number of slaves in group + */ +struct nfp_fl_lag_group { + unsigned int group_id; + u8 group_inst; + struct list_head list; + struct net_device *master_ndev; + bool dirty; + bool offloaded; + bool to_remove; + bool to_destroy; + unsigned int slave_cnt; +}; + +#define NFP_FL_LAG_PKT_NUMBER_MASK GENMASK(30, 0) +#define NFP_FL_LAG_VERSION_MASK GENMASK(22, 0) +#define NFP_FL_LAG_HOST_TTL 0xff + +/* Use this ID with zero members to ack a batch config */ +#define NFP_FL_LAG_SYNC_ID 0 +#define NFP_FL_LAG_GROUP_MIN 1 /* ID 0 reserved */ +#define NFP_FL_LAG_GROUP_MAX 32 /* IDs 1 to 31 are valid */ + +/* wait for more config */ +#define NFP_FL_LAG_DELAY (msecs_to_jiffies(2)) + +#define NFP_FL_LAG_RETRANS_LIMIT 100 /* max retrans cmsgs to store */ + +static unsigned int nfp_fl_get_next_pkt_number(struct nfp_fl_lag *lag) +{ + lag->pkt_num++; + lag->pkt_num &= NFP_FL_LAG_PKT_NUMBER_MASK; + + return lag->pkt_num; +} + +static void nfp_fl_increment_version(struct nfp_fl_lag *lag) +{ + /* LSB is not considered by firmware so add 2 for each increment. */ + lag->batch_ver += 2; + lag->batch_ver &= NFP_FL_LAG_VERSION_MASK; + + /* Zero is reserved by firmware. */ + if (!lag->batch_ver) + lag->batch_ver += 2; +} + +static struct nfp_fl_lag_group * +nfp_fl_lag_group_create(struct nfp_fl_lag *lag, struct net_device *master) +{ + struct nfp_fl_lag_group *group; + struct nfp_flower_priv *priv; + int id; + + priv = container_of(lag, struct nfp_flower_priv, nfp_lag); + + id = ida_simple_get(&lag->ida_handle, NFP_FL_LAG_GROUP_MIN, + NFP_FL_LAG_GROUP_MAX, GFP_KERNEL); + if (id < 0) { + nfp_flower_cmsg_warn(priv->app, + "No more bonding groups available\n"); + return ERR_PTR(id); + } + + group = kmalloc(sizeof(*group), GFP_KERNEL); + if (!group) { + ida_simple_remove(&lag->ida_handle, id); + return ERR_PTR(-ENOMEM); + } + + group->group_id = id; + group->master_ndev = master; + group->dirty = true; + group->offloaded = false; + group->to_remove = false; + group->to_destroy = false; + group->slave_cnt = 0; + group->group_inst = ++lag->global_inst; + list_add_tail(&group->list, &lag->group_list); + + return group; +} + +static struct nfp_fl_lag_group * +nfp_fl_lag_find_group_for_master_with_lag(struct nfp_fl_lag *lag, + struct net_device *master) +{ + struct nfp_fl_lag_group *entry; + + if (!master) + return NULL; + + list_for_each_entry(entry, &lag->group_list, list) + if (entry->master_ndev == master) + return entry; + + return NULL; +} + +int nfp_flower_lag_populate_pre_action(struct nfp_app *app, + struct net_device *master, + struct nfp_fl_pre_lag *pre_act) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_lag_group *group = NULL; + __be32 temp_vers; + + mutex_lock(&priv->nfp_lag.lock); + group = nfp_fl_lag_find_group_for_master_with_lag(&priv->nfp_lag, + master); + if (!group) { + mutex_unlock(&priv->nfp_lag.lock); + return -ENOENT; + } + + pre_act->group_id = cpu_to_be16(group->group_id); + temp_vers = cpu_to_be32(priv->nfp_lag.batch_ver << + NFP_FL_PRE_LAG_VER_OFF); + memcpy(pre_act->lag_version, &temp_vers, 3); + pre_act->instance = group->group_inst; + mutex_unlock(&priv->nfp_lag.lock); + + return 0; +} + +int nfp_flower_lag_get_output_id(struct nfp_app *app, struct net_device *master) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_lag_group *group = NULL; + int group_id = -ENOENT; + + mutex_lock(&priv->nfp_lag.lock); + group = nfp_fl_lag_find_group_for_master_with_lag(&priv->nfp_lag, + master); + if (group) + group_id = group->group_id; + mutex_unlock(&priv->nfp_lag.lock); + + return group_id; +} + +static int +nfp_fl_lag_config_group(struct nfp_fl_lag *lag, struct nfp_fl_lag_group *group, + struct net_device **active_members, + unsigned int member_cnt, enum nfp_fl_lag_batch *batch) +{ + struct nfp_flower_cmsg_lag_config *cmsg_payload; + struct nfp_flower_priv *priv; + unsigned long int flags; + unsigned int size, i; + struct sk_buff *skb; + + priv = container_of(lag, struct nfp_flower_priv, nfp_lag); + size = sizeof(*cmsg_payload) + sizeof(__be32) * member_cnt; + skb = nfp_flower_cmsg_alloc(priv->app, size, + NFP_FLOWER_CMSG_TYPE_LAG_CONFIG, + GFP_KERNEL); + if (!skb) + return -ENOMEM; + + cmsg_payload = nfp_flower_cmsg_get_data(skb); + flags = 0; + + /* Increment batch version for each new batch of config messages. */ + if (*batch == NFP_FL_LAG_BATCH_FIRST) { + flags |= NFP_FL_LAG_FIRST; + nfp_fl_increment_version(lag); + *batch = NFP_FL_LAG_BATCH_MEMBER; + } + + /* If it is a reset msg then it is also the end of the batch. */ + if (lag->rst_cfg) { + flags |= NFP_FL_LAG_RESET; + *batch = NFP_FL_LAG_BATCH_FINISHED; + } + + /* To signal the end of a batch, both the switch and last flags are set + * and the the reserved SYNC group ID is used. + */ + if (*batch == NFP_FL_LAG_BATCH_FINISHED) { + flags |= NFP_FL_LAG_SWITCH | NFP_FL_LAG_LAST; + lag->rst_cfg = false; + cmsg_payload->group_id = cpu_to_be32(NFP_FL_LAG_SYNC_ID); + cmsg_payload->group_inst = 0; + } else { + cmsg_payload->group_id = cpu_to_be32(group->group_id); + cmsg_payload->group_inst = cpu_to_be32(group->group_inst); + } + + cmsg_payload->reserved[0] = 0; + cmsg_payload->reserved[1] = 0; + cmsg_payload->ttl = NFP_FL_LAG_HOST_TTL; + cmsg_payload->ctrl_flags = flags; + cmsg_payload->batch_ver = cpu_to_be32(lag->batch_ver); + cmsg_payload->pkt_number = cpu_to_be32(nfp_fl_get_next_pkt_number(lag)); + + for (i = 0; i < member_cnt; i++) + cmsg_payload->members[i] = + cpu_to_be32(nfp_repr_get_port_id(active_members[i])); + + nfp_ctrl_tx(priv->app->ctrl, skb); + return 0; +} + +static void nfp_fl_lag_do_work(struct work_struct *work) +{ + enum nfp_fl_lag_batch batch = NFP_FL_LAG_BATCH_FIRST; + struct nfp_fl_lag_group *entry, *storage; + struct delayed_work *delayed_work; + struct nfp_flower_priv *priv; + struct nfp_fl_lag *lag; + int err; + + delayed_work = to_delayed_work(work); + lag = container_of(delayed_work, struct nfp_fl_lag, work); + priv = container_of(lag, struct nfp_flower_priv, nfp_lag); + + mutex_lock(&lag->lock); + list_for_each_entry_safe(entry, storage, &lag->group_list, list) { + struct net_device *iter_netdev, **acti_netdevs; + struct nfp_flower_repr_priv *repr_priv; + int active_count = 0, slaves = 0; + struct nfp_repr *repr; + unsigned long *flags; + + if (entry->to_remove) { + /* Active count of 0 deletes group on hw. */ + err = nfp_fl_lag_config_group(lag, entry, NULL, 0, + &batch); + if (!err) { + entry->to_remove = false; + entry->offloaded = false; + } else { + nfp_flower_cmsg_warn(priv->app, + "group delete failed\n"); + schedule_delayed_work(&lag->work, + NFP_FL_LAG_DELAY); + continue; + } + + if (entry->to_destroy) { + ida_simple_remove(&lag->ida_handle, + entry->group_id); + list_del(&entry->list); + kfree(entry); + } + continue; + } + + acti_netdevs = kmalloc_array(entry->slave_cnt, + sizeof(*acti_netdevs), GFP_KERNEL); + + /* Include sanity check in the loop. It may be that a bond has + * changed between processing the last notification and the + * work queue triggering. If the number of slaves has changed + * or it now contains netdevs that cannot be offloaded, ignore + * the group until pending notifications are processed. + */ + rcu_read_lock(); + for_each_netdev_in_bond_rcu(entry->master_ndev, iter_netdev) { + if (!nfp_netdev_is_nfp_repr(iter_netdev)) { + slaves = 0; + break; + } + + repr = netdev_priv(iter_netdev); + + if (repr->app != priv->app) { + slaves = 0; + break; + } + + slaves++; + if (slaves > entry->slave_cnt) + break; + + /* Check the ports for state changes. */ + repr_priv = repr->app_priv; + flags = &repr_priv->lag_port_flags; + + if (*flags & NFP_PORT_LAG_CHANGED) { + *flags &= ~NFP_PORT_LAG_CHANGED; + entry->dirty = true; + } + + if ((*flags & NFP_PORT_LAG_TX_ENABLED) && + (*flags & NFP_PORT_LAG_LINK_UP)) + acti_netdevs[active_count++] = iter_netdev; + } + rcu_read_unlock(); + + if (slaves != entry->slave_cnt || !entry->dirty) { + kfree(acti_netdevs); + continue; + } + + err = nfp_fl_lag_config_group(lag, entry, acti_netdevs, + active_count, &batch); + if (!err) { + entry->offloaded = true; + entry->dirty = false; + } else { + nfp_flower_cmsg_warn(priv->app, + "group offload failed\n"); + schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY); + } + + kfree(acti_netdevs); + } + + /* End the config batch if at least one packet has been batched. */ + if (batch == NFP_FL_LAG_BATCH_MEMBER) { + batch = NFP_FL_LAG_BATCH_FINISHED; + err = nfp_fl_lag_config_group(lag, NULL, NULL, 0, &batch); + if (err) + nfp_flower_cmsg_warn(priv->app, + "group batch end cmsg failed\n"); + } + + mutex_unlock(&lag->lock); +} + +static int +nfp_fl_lag_put_unprocessed(struct nfp_fl_lag *lag, struct sk_buff *skb) +{ + struct nfp_flower_cmsg_lag_config *cmsg_payload; + + cmsg_payload = nfp_flower_cmsg_get_data(skb); + if (be32_to_cpu(cmsg_payload->group_id) >= NFP_FL_LAG_GROUP_MAX) + return -EINVAL; + + /* Drop cmsg retrans if storage limit is exceeded to prevent + * overloading. If the fw notices that expected messages have not been + * received in a given time block, it will request a full resync. + */ + if (skb_queue_len(&lag->retrans_skbs) >= NFP_FL_LAG_RETRANS_LIMIT) + return -ENOSPC; + + __skb_queue_tail(&lag->retrans_skbs, skb); + + return 0; +} + +static void nfp_fl_send_unprocessed(struct nfp_fl_lag *lag) +{ + struct nfp_flower_priv *priv; + struct sk_buff *skb; + + priv = container_of(lag, struct nfp_flower_priv, nfp_lag); + + while ((skb = __skb_dequeue(&lag->retrans_skbs))) + nfp_ctrl_tx(priv->app->ctrl, skb); +} + +bool nfp_flower_lag_unprocessed_msg(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_flower_cmsg_lag_config *cmsg_payload; + struct nfp_flower_priv *priv = app->priv; + struct nfp_fl_lag_group *group_entry; + unsigned long int flags; + bool store_skb = false; + int err; + + cmsg_payload = nfp_flower_cmsg_get_data(skb); + flags = cmsg_payload->ctrl_flags; + + /* Note the intentional fall through below. If DATA and XON are both + * set, the message will stored and sent again with the rest of the + * unprocessed messages list. + */ + + /* Store */ + if (flags & NFP_FL_LAG_DATA) + if (!nfp_fl_lag_put_unprocessed(&priv->nfp_lag, skb)) + store_skb = true; + + /* Send stored */ + if (flags & NFP_FL_LAG_XON) + nfp_fl_send_unprocessed(&priv->nfp_lag); + + /* Resend all */ + if (flags & NFP_FL_LAG_SYNC) { + /* To resend all config: + * 1) Clear all unprocessed messages + * 2) Mark all groups dirty + * 3) Reset NFP group config + * 4) Schedule a LAG config update + */ + + __skb_queue_purge(&priv->nfp_lag.retrans_skbs); + + mutex_lock(&priv->nfp_lag.lock); + list_for_each_entry(group_entry, &priv->nfp_lag.group_list, + list) + group_entry->dirty = true; + + err = nfp_flower_lag_reset(&priv->nfp_lag); + if (err) + nfp_flower_cmsg_warn(priv->app, + "mem err in group reset msg\n"); + mutex_unlock(&priv->nfp_lag.lock); + + schedule_delayed_work(&priv->nfp_lag.work, 0); + } + + return store_skb; +} + +static void +nfp_fl_lag_schedule_group_remove(struct nfp_fl_lag *lag, + struct nfp_fl_lag_group *group) +{ + group->to_remove = true; + + schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY); +} + +static int +nfp_fl_lag_schedule_group_delete(struct nfp_fl_lag *lag, + struct net_device *master) +{ + struct nfp_fl_lag_group *group; + + mutex_lock(&lag->lock); + group = nfp_fl_lag_find_group_for_master_with_lag(lag, master); + if (!group) { + mutex_unlock(&lag->lock); + return -ENOENT; + } + + group->to_remove = true; + group->to_destroy = true; + mutex_unlock(&lag->lock); + + schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY); + return 0; +} + +static int +nfp_fl_lag_changeupper_event(struct nfp_fl_lag *lag, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *upper = info->upper_dev, *iter_netdev; + struct netdev_lag_upper_info *lag_upper_info; + struct nfp_fl_lag_group *group; + struct nfp_flower_priv *priv; + unsigned int slave_count = 0; + bool can_offload = true; + struct nfp_repr *repr; + + if (!netif_is_lag_master(upper)) + return 0; + + priv = container_of(lag, struct nfp_flower_priv, nfp_lag); + + rcu_read_lock(); + for_each_netdev_in_bond_rcu(upper, iter_netdev) { + if (!nfp_netdev_is_nfp_repr(iter_netdev)) { + can_offload = false; + break; + } + repr = netdev_priv(iter_netdev); + + /* Ensure all ports are created by the same app/on same card. */ + if (repr->app != priv->app) { + can_offload = false; + break; + } + + slave_count++; + } + rcu_read_unlock(); + + lag_upper_info = info->upper_info; + + /* Firmware supports active/backup and L3/L4 hash bonds. */ + if (lag_upper_info && + lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && + (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH || + (lag_upper_info->hash_type != NETDEV_LAG_HASH_L34 && + lag_upper_info->hash_type != NETDEV_LAG_HASH_E34))) { + can_offload = false; + nfp_flower_cmsg_warn(priv->app, + "Unable to offload tx_type %u hash %u\n", + lag_upper_info->tx_type, + lag_upper_info->hash_type); + } + + mutex_lock(&lag->lock); + group = nfp_fl_lag_find_group_for_master_with_lag(lag, upper); + + if (slave_count == 0 || !can_offload) { + /* Cannot offload the group - remove if previously offloaded. */ + if (group && group->offloaded) + nfp_fl_lag_schedule_group_remove(lag, group); + + mutex_unlock(&lag->lock); + return 0; + } + + if (!group) { + group = nfp_fl_lag_group_create(lag, upper); + if (IS_ERR(group)) { + mutex_unlock(&lag->lock); + return PTR_ERR(group); + } + } + + group->dirty = true; + group->slave_cnt = slave_count; + + /* Group may have been on queue for removal but is now offfloable. */ + group->to_remove = false; + mutex_unlock(&lag->lock); + + schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY); + return 0; +} + +static int +nfp_fl_lag_changels_event(struct nfp_fl_lag *lag, struct net_device *netdev, + struct netdev_notifier_changelowerstate_info *info) +{ + struct netdev_lag_lower_state_info *lag_lower_info; + struct nfp_flower_repr_priv *repr_priv; + struct nfp_flower_priv *priv; + struct nfp_repr *repr; + unsigned long *flags; + + if (!netif_is_lag_port(netdev) || !nfp_netdev_is_nfp_repr(netdev)) + return 0; + + lag_lower_info = info->lower_state_info; + if (!lag_lower_info) + return 0; + + priv = container_of(lag, struct nfp_flower_priv, nfp_lag); + repr = netdev_priv(netdev); + + /* Verify that the repr is associated with this app. */ + if (repr->app != priv->app) + return 0; + + repr_priv = repr->app_priv; + flags = &repr_priv->lag_port_flags; + + mutex_lock(&lag->lock); + if (lag_lower_info->link_up) + *flags |= NFP_PORT_LAG_LINK_UP; + else + *flags &= ~NFP_PORT_LAG_LINK_UP; + + if (lag_lower_info->tx_enabled) + *flags |= NFP_PORT_LAG_TX_ENABLED; + else + *flags &= ~NFP_PORT_LAG_TX_ENABLED; + + *flags |= NFP_PORT_LAG_CHANGED; + mutex_unlock(&lag->lock); + + schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY); + return 0; +} + +static int +nfp_fl_lag_netdev_event(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct net_device *netdev; + struct nfp_fl_lag *lag; + int err; + + netdev = netdev_notifier_info_to_dev(ptr); + lag = container_of(nb, struct nfp_fl_lag, lag_nb); + + switch (event) { + case NETDEV_CHANGEUPPER: + err = nfp_fl_lag_changeupper_event(lag, ptr); + if (err) + return NOTIFY_BAD; + return NOTIFY_OK; + case NETDEV_CHANGELOWERSTATE: + err = nfp_fl_lag_changels_event(lag, netdev, ptr); + if (err) + return NOTIFY_BAD; + return NOTIFY_OK; + case NETDEV_UNREGISTER: + if (netif_is_bond_master(netdev)) { + err = nfp_fl_lag_schedule_group_delete(lag, netdev); + if (err) + return NOTIFY_BAD; + return NOTIFY_OK; + } + } + + return NOTIFY_DONE; +} + +int nfp_flower_lag_reset(struct nfp_fl_lag *lag) +{ + enum nfp_fl_lag_batch batch = NFP_FL_LAG_BATCH_FIRST; + + lag->rst_cfg = true; + return nfp_fl_lag_config_group(lag, NULL, NULL, 0, &batch); +} + +void nfp_flower_lag_init(struct nfp_fl_lag *lag) +{ + INIT_DELAYED_WORK(&lag->work, nfp_fl_lag_do_work); + INIT_LIST_HEAD(&lag->group_list); + mutex_init(&lag->lock); + ida_init(&lag->ida_handle); + + __skb_queue_head_init(&lag->retrans_skbs); + + /* 0 is a reserved batch version so increment to first valid value. */ + nfp_fl_increment_version(lag); + + lag->lag_nb.notifier_call = nfp_fl_lag_netdev_event; +} + +void nfp_flower_lag_cleanup(struct nfp_fl_lag *lag) +{ + struct nfp_fl_lag_group *entry, *storage; + + cancel_delayed_work_sync(&lag->work); + + __skb_queue_purge(&lag->retrans_skbs); + + /* Remove all groups. */ + mutex_lock(&lag->lock); + list_for_each_entry_safe(entry, storage, &lag->group_list, list) { + list_del(&entry->list); + kfree(entry); + } + mutex_unlock(&lag->lock); + mutex_destroy(&lag->lock); + ida_destroy(&lag->ida_handle); +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index 4e67c0cbf9f0..19cfa162ac65 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -185,6 +185,10 @@ nfp_flower_repr_netdev_init(struct nfp_app *app, struct net_device *netdev) static void nfp_flower_repr_netdev_clean(struct nfp_app *app, struct net_device *netdev) { + struct nfp_repr *repr = netdev_priv(netdev); + + kfree(repr->app_priv); + tc_setup_cb_egdev_unregister(netdev, nfp_flower_setup_tc_egress_cb, netdev_priv(netdev)); } @@ -225,7 +229,9 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app, u8 nfp_pcie = nfp_cppcore_pcie_unit(app->pf->cpp); struct nfp_flower_priv *priv = app->priv; atomic_t *replies = &priv->reify_replies; + struct nfp_flower_repr_priv *repr_priv; enum nfp_port_type port_type; + struct nfp_repr *nfp_repr; struct nfp_reprs *reprs; int i, err, reify_cnt; const u8 queue = 0; @@ -248,6 +254,15 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app, goto err_reprs_clean; } + repr_priv = kzalloc(sizeof(*repr_priv), GFP_KERNEL); + if (!repr_priv) { + err = -ENOMEM; + goto err_reprs_clean; + } + + nfp_repr = netdev_priv(repr); + nfp_repr->app_priv = repr_priv; + /* For now we only support 1 PF */ WARN_ON(repr_type == NFP_REPR_TYPE_PF && i); @@ -324,6 +339,8 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv) { struct nfp_eth_table *eth_tbl = app->pf->eth_tbl; atomic_t *replies = &priv->reify_replies; + struct nfp_flower_repr_priv *repr_priv; + struct nfp_repr *nfp_repr; struct sk_buff *ctrl_skb; struct nfp_reprs *reprs; int err, reify_cnt; @@ -351,6 +368,15 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv) goto err_reprs_clean; } + repr_priv = kzalloc(sizeof(*repr_priv), GFP_KERNEL); + if (!repr_priv) { + err = -ENOMEM; + goto err_reprs_clean; + } + + nfp_repr = netdev_priv(repr); + nfp_repr->app_priv = repr_priv; + port = nfp_port_alloc(app, NFP_PORT_PHYS_PORT, repr); if (IS_ERR(port)) { err = PTR_ERR(port); @@ -546,8 +572,22 @@ static int nfp_flower_init(struct nfp_app *app) else app_priv->flower_ext_feats = features; + /* Tell the firmware that the driver supports lag. */ + err = nfp_rtsym_write_le(app->pf->rtbl, + "_abi_flower_balance_sync_enable", 1); + if (!err) { + app_priv->flower_ext_feats |= NFP_FL_FEATS_LAG; + nfp_flower_lag_init(&app_priv->nfp_lag); + } else if (err == -ENOENT) { + nfp_warn(app->cpp, "LAG not supported by FW.\n"); + } else { + goto err_cleanup_metadata; + } + return 0; +err_cleanup_metadata: + nfp_flower_metadata_cleanup(app); err_free_app_priv: vfree(app->priv); return err; @@ -561,6 +601,9 @@ static void nfp_flower_clean(struct nfp_app *app) skb_queue_purge(&app_priv->cmsg_skbs_low); flush_work(&app_priv->cmsg_work); + if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG) + nfp_flower_lag_cleanup(&app_priv->nfp_lag); + nfp_flower_metadata_cleanup(app); vfree(app->priv); app->priv = NULL; @@ -627,11 +670,29 @@ nfp_flower_repr_change_mtu(struct nfp_app *app, struct net_device *netdev, static int nfp_flower_start(struct nfp_app *app) { + struct nfp_flower_priv *app_priv = app->priv; + int err; + + if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG) { + err = nfp_flower_lag_reset(&app_priv->nfp_lag); + if (err) + return err; + + err = register_netdevice_notifier(&app_priv->nfp_lag.lag_nb); + if (err) + return err; + } + return nfp_tunnel_config_start(app); } static void nfp_flower_stop(struct nfp_app *app) { + struct nfp_flower_priv *app_priv = app->priv; + + if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG) + unregister_netdevice_notifier(&app_priv->nfp_lag.lag_nb); + nfp_tunnel_config_stop(app); } diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 733ff53cc601..bbe5764d26cb 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -43,7 +43,9 @@ #include <net/pkt_cls.h> #include <net/tcp.h> #include <linux/workqueue.h> +#include <linux/idr.h> +struct nfp_fl_pre_lag; struct net_device; struct nfp_app; @@ -67,6 +69,7 @@ struct nfp_app; /* Extra features bitmap. */ #define NFP_FL_FEATS_GENEVE BIT(0) #define NFP_FL_NBI_MTU_SETTING BIT(1) +#define NFP_FL_FEATS_LAG BIT(31) struct nfp_fl_mask_id { struct circ_buf mask_id_free_list; @@ -97,6 +100,33 @@ struct nfp_mtu_conf { }; /** + * struct nfp_fl_lag - Flower APP priv data for link aggregation + * @lag_nb: Notifier to track master/slave events + * @work: Work queue for writing configs to the HW + * @lock: Lock to protect lag_group_list + * @group_list: List of all master/slave groups offloaded + * @ida_handle: IDA to handle group ids + * @pkt_num: Incremented for each config packet sent + * @batch_ver: Incremented for each batch of config packets + * @global_inst: Instance allocator for groups + * @rst_cfg: Marker to reset HW LAG config + * @retrans_skbs: Cmsgs that could not be processed by HW and require + * retransmission + */ +struct nfp_fl_lag { + struct notifier_block lag_nb; + struct delayed_work work; + struct mutex lock; + struct list_head group_list; + struct ida ida_handle; + unsigned int pkt_num; + unsigned int batch_ver; + u8 global_inst; + bool rst_cfg; + struct sk_buff_head retrans_skbs; +}; + +/** * struct nfp_flower_priv - Flower APP per-vNIC priv data * @app: Back pointer to app * @nn: Pointer to vNIC @@ -128,6 +158,7 @@ struct nfp_mtu_conf { * from firmware for repr reify * @reify_wait_queue: wait queue for repr reify response counting * @mtu_conf: Configuration of repr MTU value + * @nfp_lag: Link aggregation data block */ struct nfp_flower_priv { struct nfp_app *app; @@ -157,6 +188,15 @@ struct nfp_flower_priv { atomic_t reify_replies; wait_queue_head_t reify_wait_queue; struct nfp_mtu_conf mtu_conf; + struct nfp_fl_lag nfp_lag; +}; + +/** + * struct nfp_flower_repr_priv - Flower APP per-repr priv data + * @lag_port_flags: Extended port flags to record lag state of repr + */ +struct nfp_flower_repr_priv { + unsigned long lag_port_flags; }; struct nfp_fl_key_ls { @@ -214,7 +254,8 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, struct net_device *netdev, struct nfp_fl_payload *nfp_flow, enum nfp_flower_tun_type tun_type); -int nfp_flower_compile_action(struct tc_cls_flower_offload *flow, +int nfp_flower_compile_action(struct nfp_app *app, + struct tc_cls_flower_offload *flow, struct net_device *netdev, struct nfp_fl_payload *nfp_flow); int nfp_compile_flow_metadata(struct nfp_app *app, @@ -241,5 +282,14 @@ void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb); void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb); int nfp_flower_setup_tc_egress_cb(enum tc_setup_type type, void *type_data, void *cb_priv); +void nfp_flower_lag_init(struct nfp_fl_lag *lag); +void nfp_flower_lag_cleanup(struct nfp_fl_lag *lag); +int nfp_flower_lag_reset(struct nfp_fl_lag *lag); +bool nfp_flower_lag_unprocessed_msg(struct nfp_app *app, struct sk_buff *skb); +int nfp_flower_lag_populate_pre_action(struct nfp_app *app, + struct net_device *master, + struct nfp_fl_pre_lag *pre_act); +int nfp_flower_lag_get_output_id(struct nfp_app *app, + struct net_device *master); #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 70ec9d821b91..c42e64f32333 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -440,7 +440,7 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, if (err) goto err_destroy_flow; - err = nfp_flower_compile_action(flow, netdev, flow_pay); + err = nfp_flower_compile_action(app, flow, netdev, flow_pay); if (err) goto err_destroy_flow; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index faa4e131c136..f6677bc9875a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -72,8 +72,21 @@ #define OP_BR_ADDR_LO 0x007ffc00000ULL #define OP_BR_ADDR_HI 0x10000000000ULL -#define nfp_is_br(_insn) \ - (((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE) +#define OP_BR_BIT_BASE 0x0d000000000ULL +#define OP_BR_BIT_BASE_MASK 0x0f800080300ULL +#define OP_BR_BIT_A_SRC 0x000000000ffULL +#define OP_BR_BIT_B_SRC 0x0000003fc00ULL +#define OP_BR_BIT_BV 0x00000040000ULL +#define OP_BR_BIT_SRC_LMEXTN 0x40000000000ULL +#define OP_BR_BIT_DEFBR OP_BR_DEFBR +#define OP_BR_BIT_ADDR_LO OP_BR_ADDR_LO +#define OP_BR_BIT_ADDR_HI OP_BR_ADDR_HI + +static inline bool nfp_is_br(u64 insn) +{ + return (insn & OP_BR_BASE_MASK) == OP_BR_BASE || + (insn & OP_BR_BIT_BASE_MASK) == OP_BR_BIT_BASE; +} enum br_mask { BR_BEQ = 0x00, @@ -161,6 +174,7 @@ enum shf_op { SHF_OP_NONE = 0, SHF_OP_AND = 2, SHF_OP_OR = 5, + SHF_OP_ASHR = 6, }; enum shf_sc { diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index 09e87d5f4f72..117eca6819de 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -277,6 +277,7 @@ const struct net_device_ops nfp_repr_netdev_ops = { .ndo_get_vf_config = nfp_app_get_vf_config, .ndo_set_vf_link_state = nfp_app_set_vf_link_state, .ndo_set_features = nfp_port_set_features, + .ndo_set_mac_address = eth_mac_addr, }; static void nfp_repr_clean(struct nfp_repr *repr) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h index c9724fb7ea4b..df599d5b6bb3 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h @@ -100,6 +100,8 @@ nfp_rtsym_lookup(struct nfp_rtsym_table *rtbl, const char *name); u64 nfp_rtsym_read_le(struct nfp_rtsym_table *rtbl, const char *name, int *error); +int nfp_rtsym_write_le(struct nfp_rtsym_table *rtbl, const char *name, + u64 value); u8 __iomem * nfp_rtsym_map(struct nfp_rtsym_table *rtbl, const char *name, const char *id, unsigned int min_size, struct nfp_cpp_area **area); diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c index 46107aefad1c..9e34216578da 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c @@ -286,6 +286,49 @@ exit: return val; } +/** + * nfp_rtsym_write_le() - Write an unsigned scalar value to a symbol + * @rtbl: NFP RTsym table + * @name: Symbol name + * @value: Value to write + * + * Lookup a symbol and write a value to it. Symbol can be 4 or 8 bytes in size. + * If 4 bytes then the lower 32-bits of 'value' are used. Value will be + * written as simple little-endian unsigned value. + * + * Return: 0 on success or error code. + */ +int nfp_rtsym_write_le(struct nfp_rtsym_table *rtbl, const char *name, + u64 value) +{ + const struct nfp_rtsym *sym; + int err; + u32 id; + + sym = nfp_rtsym_lookup(rtbl, name); + if (!sym) + return -ENOENT; + + id = NFP_CPP_ISLAND_ID(sym->target, NFP_CPP_ACTION_RW, 0, sym->domain); + + switch (sym->size) { + case 4: + err = nfp_cpp_writel(rtbl->cpp, id, sym->addr, value); + break; + case 8: + err = nfp_cpp_writeq(rtbl->cpp, id, sym->addr, value); + break; + default: + nfp_err(rtbl->cpp, + "rtsym '%s' unsupported or non-scalar size: %lld\n", + name, sym->size); + err = -EINVAL; + break; + } + + return err; +} + u8 __iomem * nfp_rtsym_map(struct nfp_rtsym_table *rtbl, const char *name, const char *id, unsigned int min_size, struct nfp_cpp_area **area) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 5e655c3601cf..1c0d0c217936 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1677,6 +1677,8 @@ static void __qed_get_vport_tstats(struct qed_hwfn *p_hwfn, HILO_64_REGPAIR(tstats.mftag_filter_discard); p_stats->common.mac_filter_discards += HILO_64_REGPAIR(tstats.eth_mac_filter_discard); + p_stats->common.gft_filter_drop += + HILO_64_REGPAIR(tstats.eth_gft_drop_pkt); } static void __qed_get_vport_ustats_addrlen(struct qed_hwfn *p_hwfn, @@ -1973,6 +1975,8 @@ qed_arfs_mode_to_hsi(enum qed_filter_config_mode mode) return GFT_PROFILE_TYPE_4_TUPLE; if (mode == QED_FILTER_CONFIG_MODE_IP_DEST) return GFT_PROFILE_TYPE_IP_DST_ADDR; + if (mode == QED_FILTER_CONFIG_MODE_IP_SRC) + return GFT_PROFILE_TYPE_IP_SRC_ADDR; return GFT_PROFILE_TYPE_L4_DST_PORT; } @@ -2013,16 +2017,6 @@ qed_configure_rfs_ntuple_filter(struct qed_hwfn *p_hwfn, u8 abs_vport_id = 0; int rc = -EINVAL; - rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id); - if (rc) - return rc; - - if (p_params->qid != QED_RFS_NTUPLE_QID_RSS) { - rc = qed_fw_l2_queue(p_hwfn, p_params->qid, &abs_rx_q_id); - if (rc) - return rc; - } - /* Get SPQ entry */ memset(&init_data, 0, sizeof(init_data)); init_data.cid = qed_spq_get_cid(p_hwfn); @@ -2047,15 +2041,28 @@ qed_configure_rfs_ntuple_filter(struct qed_hwfn *p_hwfn, DMA_REGPAIR_LE(p_ramrod->pkt_hdr_addr, p_params->addr); p_ramrod->pkt_hdr_length = cpu_to_le16(p_params->length); - if (p_params->qid != QED_RFS_NTUPLE_QID_RSS) { - p_ramrod->rx_qid_valid = 1; - p_ramrod->rx_qid = cpu_to_le16(abs_rx_q_id); + if (p_params->b_is_drop) { + p_ramrod->vport_id = cpu_to_le16(ETH_GFT_TRASHCAN_VPORT); + } else { + rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id); + if (rc) + return rc; + + if (p_params->qid != QED_RFS_NTUPLE_QID_RSS) { + rc = qed_fw_l2_queue(p_hwfn, p_params->qid, + &abs_rx_q_id); + if (rc) + return rc; + + p_ramrod->rx_qid_valid = 1; + p_ramrod->rx_qid = cpu_to_le16(abs_rx_q_id); + } + + p_ramrod->vport_id = cpu_to_le16((u16)abs_vport_id); } p_ramrod->flow_id_valid = 0; p_ramrod->flow_id = 0; - - p_ramrod->vport_id = cpu_to_le16((u16)abs_vport_id); p_ramrod->filter_action = p_params->b_is_add ? GFT_ADD_FILTER : GFT_DELETE_FILTER; diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 2d3f09ed413b..81c5c8dfa2ef 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -75,6 +75,7 @@ struct qede_stats_common { u64 rx_bcast_pkts; u64 mftag_filter_discards; u64 mac_filter_discards; + u64 gft_filter_drop; u64 tx_ucast_bytes; u64 tx_mcast_bytes; u64 tx_bcast_bytes; diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 8c6fdad91986..6906e04b609e 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -161,6 +161,7 @@ static const struct { QEDE_STAT(no_buff_discards), QEDE_PF_STAT(mftag_filter_discards), QEDE_PF_STAT(mac_filter_discards), + QEDE_PF_STAT(gft_filter_drop), QEDE_STAT(tx_err_drop_pkts), QEDE_STAT(ttl0_discard), QEDE_STAT(packet_too_big_discard), diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 43569b1839be..e9e088d9c815 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -38,6 +38,7 @@ #include <linux/qed/qed_if.h> #include "qede.h" +#define QEDE_FILTER_PRINT_MAX_LEN (64) struct qede_arfs_tuple { union { __be32 src_ipv4; @@ -51,6 +52,18 @@ struct qede_arfs_tuple { __be16 dst_port; __be16 eth_proto; u8 ip_proto; + + /* Describe filtering mode needed for this kind of filter */ + enum qed_filter_config_mode mode; + + /* Used to compare new/old filters. Return true if IPs match */ + bool (*ip_comp)(struct qede_arfs_tuple *a, struct qede_arfs_tuple *b); + + /* Given an address into ethhdr build a header from tuple info */ + void (*build_hdr)(struct qede_arfs_tuple *t, void *header); + + /* Stringify the tuple for a print into the provided buffer */ + void (*stringify)(struct qede_arfs_tuple *t, void *buffer); }; struct qede_arfs_fltr_node { @@ -73,9 +86,11 @@ struct qede_arfs_fltr_node { u16 sw_id; u16 rxq_id; u16 next_rxq_id; + u8 vfid; bool filter_op; bool used; u8 fw_rc; + bool b_is_drop; struct hlist_node node; }; @@ -90,7 +105,9 @@ struct qede_arfs { spinlock_t arfs_list_lock; unsigned long *arfs_fltr_bmap; int filter_count; - bool enable; + + /* Currently configured filtering mode */ + enum qed_filter_config_mode mode; }; static void qede_configure_arfs_fltr(struct qede_dev *edev, @@ -109,12 +126,22 @@ static void qede_configure_arfs_fltr(struct qede_dev *edev, params.length = n->buf_len; params.qid = rxq_id; params.b_is_add = add_fltr; + params.b_is_drop = n->b_is_drop; + + if (n->vfid) { + params.b_is_vf = true; + params.vf_id = n->vfid - 1; + } - DP_VERBOSE(edev, NETIF_MSG_RX_STATUS, - "%s arfs filter flow_id=%d, sw_id=%d, src_port=%d, dst_port=%d, rxq=%d\n", - add_fltr ? "Adding" : "Deleting", - n->flow_id, n->sw_id, ntohs(n->tuple.src_port), - ntohs(n->tuple.dst_port), rxq_id); + if (n->tuple.stringify) { + char tuple_buffer[QEDE_FILTER_PRINT_MAX_LEN]; + + n->tuple.stringify(&n->tuple, tuple_buffer); + DP_VERBOSE(edev, NETIF_MSG_RX_STATUS, + "%s sw_id[0x%x]: %s [vf %u queue %d]\n", + add_fltr ? "Adding" : "Deleting", + n->sw_id, tuple_buffer, n->vfid, rxq_id); + } n->used = true; n->filter_op = add_fltr; @@ -145,14 +172,13 @@ qede_enqueue_fltr_and_config_searcher(struct qede_dev *edev, INIT_HLIST_NODE(&fltr->node); hlist_add_head(&fltr->node, QEDE_ARFS_BUCKET_HEAD(edev, bucket_idx)); - edev->arfs->filter_count++; - - if (edev->arfs->filter_count == 1 && !edev->arfs->enable) { - enum qed_filter_config_mode mode; - mode = QED_FILTER_CONFIG_MODE_5_TUPLE; - edev->ops->configure_arfs_searcher(edev->cdev, mode); - edev->arfs->enable = true; + edev->arfs->filter_count++; + if (edev->arfs->filter_count == 1 && + edev->arfs->mode == QED_FILTER_CONFIG_MODE_DISABLE) { + edev->ops->configure_arfs_searcher(edev->cdev, + fltr->tuple.mode); + edev->arfs->mode = fltr->tuple.mode; } return 0; @@ -167,14 +193,15 @@ qede_dequeue_fltr_and_config_searcher(struct qede_dev *edev, fltr->buf_len, DMA_TO_DEVICE); qede_free_arfs_filter(edev, fltr); - edev->arfs->filter_count--; - if (!edev->arfs->filter_count && edev->arfs->enable) { + edev->arfs->filter_count--; + if (!edev->arfs->filter_count && + edev->arfs->mode != QED_FILTER_CONFIG_MODE_DISABLE) { enum qed_filter_config_mode mode; mode = QED_FILTER_CONFIG_MODE_DISABLE; - edev->arfs->enable = false; edev->ops->configure_arfs_searcher(edev->cdev, mode); + edev->arfs->mode = QED_FILTER_CONFIG_MODE_DISABLE; } } @@ -264,25 +291,17 @@ void qede_process_arfs_filters(struct qede_dev *edev, bool free_fltr) } } +#ifdef CONFIG_RFS_ACCEL spin_lock_bh(&edev->arfs->arfs_list_lock); - if (!edev->arfs->filter_count) { - if (edev->arfs->enable) { - enum qed_filter_config_mode mode; - - mode = QED_FILTER_CONFIG_MODE_DISABLE; - edev->arfs->enable = false; - edev->ops->configure_arfs_searcher(edev->cdev, mode); - } -#ifdef CONFIG_RFS_ACCEL - } else { + if (edev->arfs->filter_count) { set_bit(QEDE_SP_ARFS_CONFIG, &edev->sp_flags); schedule_delayed_work(&edev->sp_task, QEDE_SP_TASK_POLL_DELAY); -#endif } spin_unlock_bh(&edev->arfs->arfs_list_lock); +#endif } /* This function waits until all aRFS filters get deleted and freed. @@ -512,6 +531,7 @@ int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, eth->h_proto = skb->protocol; n->tuple.eth_proto = skb->protocol; n->tuple.ip_proto = ip_proto; + n->tuple.mode = QED_FILTER_CONFIG_MODE_5_TUPLE; memcpy(n->data + ETH_HLEN, skb->data, skb_headlen(skb)); rc = qede_enqueue_fltr_and_config_searcher(edev, n, tbl_idx); @@ -1339,38 +1359,6 @@ qede_get_arfs_fltr_by_loc(struct hlist_head *head, u32 location) return NULL; } -static bool -qede_compare_user_flow_ips(struct qede_arfs_fltr_node *tpos, - struct ethtool_rx_flow_spec *fsp, - __be16 proto) -{ - if (proto == htons(ETH_P_IP)) { - struct ethtool_tcpip4_spec *ip; - - ip = &fsp->h_u.tcp_ip4_spec; - - if (tpos->tuple.src_ipv4 == ip->ip4src && - tpos->tuple.dst_ipv4 == ip->ip4dst) - return true; - else - return false; - } else { - struct ethtool_tcpip6_spec *ip6; - struct in6_addr *src; - - ip6 = &fsp->h_u.tcp_ip6_spec; - src = &tpos->tuple.src_ipv6; - - if (!memcmp(src, &ip6->ip6src, sizeof(struct in6_addr)) && - !memcmp(&tpos->tuple.dst_ipv6, &ip6->ip6dst, - sizeof(struct in6_addr))) - return true; - else - return false; - } - return false; -} - int qede_get_cls_rule_all(struct qede_dev *edev, struct ethtool_rxnfc *info, u32 *rule_locs) { @@ -1455,102 +1443,444 @@ int qede_get_cls_rule_entry(struct qede_dev *edev, struct ethtool_rxnfc *cmd) fsp->ring_cookie = fltr->rxq_id; + if (fltr->vfid) { + fsp->ring_cookie |= ((u64)fltr->vfid) << + ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF; + } + + if (fltr->b_is_drop) + fsp->ring_cookie = RX_CLS_FLOW_DISC; unlock: __qede_unlock(edev); return rc; } static int -qede_validate_and_check_flow_exist(struct qede_dev *edev, - struct ethtool_rx_flow_spec *fsp, - int *min_hlen) +qede_poll_arfs_filter_config(struct qede_dev *edev, + struct qede_arfs_fltr_node *fltr) { - __be16 src_port = 0x0, dst_port = 0x0; - struct qede_arfs_fltr_node *fltr; - struct hlist_node *temp; - struct hlist_head *head; - __be16 eth_proto; - u8 ip_proto; + int count = QEDE_ARFS_POLL_COUNT; - if (fsp->location >= QEDE_RFS_MAX_FLTR || - fsp->ring_cookie >= QEDE_RSS_COUNT(edev)) - return -EINVAL; + while (fltr->used && count) { + msleep(20); + count--; + } + + if (count == 0 || fltr->fw_rc) { + DP_NOTICE(edev, "Timeout in polling filter config\n"); + qede_dequeue_fltr_and_config_searcher(edev, fltr); + return -EIO; + } + + return fltr->fw_rc; +} + +static int qede_flow_get_min_header_size(struct qede_arfs_tuple *t) +{ + int size = ETH_HLEN; + + if (t->eth_proto == htons(ETH_P_IP)) + size += sizeof(struct iphdr); + else + size += sizeof(struct ipv6hdr); + + if (t->ip_proto == IPPROTO_TCP) + size += sizeof(struct tcphdr); + else + size += sizeof(struct udphdr); + + return size; +} + +static bool qede_flow_spec_ipv4_cmp(struct qede_arfs_tuple *a, + struct qede_arfs_tuple *b) +{ + if (a->eth_proto != htons(ETH_P_IP) || + b->eth_proto != htons(ETH_P_IP)) + return false; + + return (a->src_ipv4 == b->src_ipv4) && + (a->dst_ipv4 == b->dst_ipv4); +} + +static void qede_flow_build_ipv4_hdr(struct qede_arfs_tuple *t, + void *header) +{ + __be16 *ports = (__be16 *)(header + ETH_HLEN + sizeof(struct iphdr)); + struct iphdr *ip = (struct iphdr *)(header + ETH_HLEN); + struct ethhdr *eth = (struct ethhdr *)header; + + eth->h_proto = t->eth_proto; + ip->saddr = t->src_ipv4; + ip->daddr = t->dst_ipv4; + ip->version = 0x4; + ip->ihl = 0x5; + ip->protocol = t->ip_proto; + ip->tot_len = cpu_to_be16(qede_flow_get_min_header_size(t) - ETH_HLEN); + + /* ports is weakly typed to suit both TCP and UDP ports */ + ports[0] = t->src_port; + ports[1] = t->dst_port; +} + +static void qede_flow_stringify_ipv4_hdr(struct qede_arfs_tuple *t, + void *buffer) +{ + const char *prefix = t->ip_proto == IPPROTO_TCP ? "TCP" : "UDP"; + + snprintf(buffer, QEDE_FILTER_PRINT_MAX_LEN, + "%s %pI4 (%04x) -> %pI4 (%04x)", + prefix, &t->src_ipv4, t->src_port, + &t->dst_ipv4, t->dst_port); +} + +static bool qede_flow_spec_ipv6_cmp(struct qede_arfs_tuple *a, + struct qede_arfs_tuple *b) +{ + if (a->eth_proto != htons(ETH_P_IPV6) || + b->eth_proto != htons(ETH_P_IPV6)) + return false; + + if (memcmp(&a->src_ipv6, &b->src_ipv6, sizeof(struct in6_addr))) + return false; - if (fsp->flow_type == TCP_V4_FLOW) { - *min_hlen += sizeof(struct iphdr) + - sizeof(struct tcphdr); - eth_proto = htons(ETH_P_IP); - ip_proto = IPPROTO_TCP; - } else if (fsp->flow_type == UDP_V4_FLOW) { - *min_hlen += sizeof(struct iphdr) + - sizeof(struct udphdr); - eth_proto = htons(ETH_P_IP); - ip_proto = IPPROTO_UDP; - } else if (fsp->flow_type == TCP_V6_FLOW) { - *min_hlen += sizeof(struct ipv6hdr) + - sizeof(struct tcphdr); - eth_proto = htons(ETH_P_IPV6); - ip_proto = IPPROTO_TCP; - } else if (fsp->flow_type == UDP_V6_FLOW) { - *min_hlen += sizeof(struct ipv6hdr) + - sizeof(struct udphdr); - eth_proto = htons(ETH_P_IPV6); - ip_proto = IPPROTO_UDP; + if (memcmp(&a->dst_ipv6, &b->dst_ipv6, sizeof(struct in6_addr))) + return false; + + return true; +} + +static void qede_flow_build_ipv6_hdr(struct qede_arfs_tuple *t, + void *header) +{ + __be16 *ports = (__be16 *)(header + ETH_HLEN + sizeof(struct ipv6hdr)); + struct ipv6hdr *ip6 = (struct ipv6hdr *)(header + ETH_HLEN); + struct ethhdr *eth = (struct ethhdr *)header; + + eth->h_proto = t->eth_proto; + memcpy(&ip6->saddr, &t->src_ipv6, sizeof(struct in6_addr)); + memcpy(&ip6->daddr, &t->dst_ipv6, sizeof(struct in6_addr)); + ip6->version = 0x6; + + if (t->ip_proto == IPPROTO_TCP) { + ip6->nexthdr = NEXTHDR_TCP; + ip6->payload_len = cpu_to_be16(sizeof(struct tcphdr)); } else { - DP_NOTICE(edev, "Unsupported flow type = 0x%x\n", - fsp->flow_type); - return -EPROTONOSUPPORT; + ip6->nexthdr = NEXTHDR_UDP; + ip6->payload_len = cpu_to_be16(sizeof(struct udphdr)); } - if (eth_proto == htons(ETH_P_IP)) { - src_port = fsp->h_u.tcp_ip4_spec.psrc; - dst_port = fsp->h_u.tcp_ip4_spec.pdst; + /* ports is weakly typed to suit both TCP and UDP ports */ + ports[0] = t->src_port; + ports[1] = t->dst_port; +} + +/* Validate fields which are set and not accepted by the driver */ +static int qede_flow_spec_validate_unused(struct qede_dev *edev, + struct ethtool_rx_flow_spec *fs) +{ + if (fs->flow_type & FLOW_MAC_EXT) { + DP_INFO(edev, "Don't support MAC extensions\n"); + return -EOPNOTSUPP; + } + + if ((fs->flow_type & FLOW_EXT) && + (fs->h_ext.vlan_etype || fs->h_ext.vlan_tci)) { + DP_INFO(edev, "Don't support vlan-based classification\n"); + return -EOPNOTSUPP; + } + + if ((fs->flow_type & FLOW_EXT) && + (fs->h_ext.data[0] || fs->h_ext.data[1])) { + DP_INFO(edev, "Don't support user defined data\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int qede_flow_spec_to_tuple_ipv4_common(struct qede_dev *edev, + struct qede_arfs_tuple *t, + struct ethtool_rx_flow_spec *fs) +{ + if ((fs->h_u.tcp_ip4_spec.ip4src & + fs->m_u.tcp_ip4_spec.ip4src) != fs->h_u.tcp_ip4_spec.ip4src) { + DP_INFO(edev, "Don't support IP-masks\n"); + return -EOPNOTSUPP; + } + + if ((fs->h_u.tcp_ip4_spec.ip4dst & + fs->m_u.tcp_ip4_spec.ip4dst) != fs->h_u.tcp_ip4_spec.ip4dst) { + DP_INFO(edev, "Don't support IP-masks\n"); + return -EOPNOTSUPP; + } + + if ((fs->h_u.tcp_ip4_spec.psrc & + fs->m_u.tcp_ip4_spec.psrc) != fs->h_u.tcp_ip4_spec.psrc) { + DP_INFO(edev, "Don't support port-masks\n"); + return -EOPNOTSUPP; + } + + if ((fs->h_u.tcp_ip4_spec.pdst & + fs->m_u.tcp_ip4_spec.pdst) != fs->h_u.tcp_ip4_spec.pdst) { + DP_INFO(edev, "Don't support port-masks\n"); + return -EOPNOTSUPP; + } + + if (fs->h_u.tcp_ip4_spec.tos) { + DP_INFO(edev, "Don't support tos\n"); + return -EOPNOTSUPP; + } + + t->eth_proto = htons(ETH_P_IP); + t->src_ipv4 = fs->h_u.tcp_ip4_spec.ip4src; + t->dst_ipv4 = fs->h_u.tcp_ip4_spec.ip4dst; + t->src_port = fs->h_u.tcp_ip4_spec.psrc; + t->dst_port = fs->h_u.tcp_ip4_spec.pdst; + + /* We must either have a valid 4-tuple or only dst port + * or only src ip as an input + */ + if (t->src_port && t->dst_port && t->src_ipv4 && t->dst_ipv4) { + t->mode = QED_FILTER_CONFIG_MODE_5_TUPLE; + } else if (!t->src_port && t->dst_port && + !t->src_ipv4 && !t->dst_ipv4) { + t->mode = QED_FILTER_CONFIG_MODE_L4_PORT; + } else if (!t->src_port && !t->dst_port && + !t->dst_ipv4 && t->src_ipv4) { + t->mode = QED_FILTER_CONFIG_MODE_IP_SRC; } else { - src_port = fsp->h_u.tcp_ip6_spec.psrc; - dst_port = fsp->h_u.tcp_ip6_spec.pdst; + DP_INFO(edev, "Invalid N-tuple\n"); + return -EOPNOTSUPP; } - head = QEDE_ARFS_BUCKET_HEAD(edev, 0); - hlist_for_each_entry_safe(fltr, temp, head, node) { - if ((fltr->tuple.ip_proto == ip_proto && - fltr->tuple.eth_proto == eth_proto && - qede_compare_user_flow_ips(fltr, fsp, eth_proto) && - fltr->tuple.src_port == src_port && - fltr->tuple.dst_port == dst_port) || - fltr->sw_id == fsp->location) - return -EEXIST; + t->ip_comp = qede_flow_spec_ipv4_cmp; + t->build_hdr = qede_flow_build_ipv4_hdr; + t->stringify = qede_flow_stringify_ipv4_hdr; + + return 0; +} + +static int qede_flow_spec_to_tuple_tcpv4(struct qede_dev *edev, + struct qede_arfs_tuple *t, + struct ethtool_rx_flow_spec *fs) +{ + t->ip_proto = IPPROTO_TCP; + + if (qede_flow_spec_to_tuple_ipv4_common(edev, t, fs)) + return -EINVAL; + + return 0; +} + +static int qede_flow_spec_to_tuple_udpv4(struct qede_dev *edev, + struct qede_arfs_tuple *t, + struct ethtool_rx_flow_spec *fs) +{ + t->ip_proto = IPPROTO_UDP; + + if (qede_flow_spec_to_tuple_ipv4_common(edev, t, fs)) + return -EINVAL; + + return 0; +} + +static int qede_flow_spec_to_tuple_ipv6_common(struct qede_dev *edev, + struct qede_arfs_tuple *t, + struct ethtool_rx_flow_spec *fs) +{ + struct in6_addr zero_addr; + void *p; + + p = &zero_addr; + memset(p, 0, sizeof(zero_addr)); + + if ((fs->h_u.tcp_ip6_spec.psrc & + fs->m_u.tcp_ip6_spec.psrc) != fs->h_u.tcp_ip6_spec.psrc) { + DP_INFO(edev, "Don't support port-masks\n"); + return -EOPNOTSUPP; + } + + if ((fs->h_u.tcp_ip6_spec.pdst & + fs->m_u.tcp_ip6_spec.pdst) != fs->h_u.tcp_ip6_spec.pdst) { + DP_INFO(edev, "Don't support port-masks\n"); + return -EOPNOTSUPP; + } + + if (fs->h_u.tcp_ip6_spec.tclass) { + DP_INFO(edev, "Don't support tclass\n"); + return -EOPNOTSUPP; } + t->eth_proto = htons(ETH_P_IPV6); + memcpy(&t->src_ipv6, &fs->h_u.tcp_ip6_spec.ip6src, + sizeof(struct in6_addr)); + memcpy(&t->dst_ipv6, &fs->h_u.tcp_ip6_spec.ip6dst, + sizeof(struct in6_addr)); + t->src_port = fs->h_u.tcp_ip6_spec.psrc; + t->dst_port = fs->h_u.tcp_ip6_spec.pdst; + + /* We must make sure we have a valid 4-tuple or only dest port + * or only src ip as an input + */ + if (t->src_port && t->dst_port && + memcmp(&t->src_ipv6, p, sizeof(struct in6_addr)) && + memcmp(&t->dst_ipv6, p, sizeof(struct in6_addr))) { + t->mode = QED_FILTER_CONFIG_MODE_5_TUPLE; + } else if (!t->src_port && t->dst_port && + !memcmp(&t->src_ipv6, p, sizeof(struct in6_addr)) && + !memcmp(&t->dst_ipv6, p, sizeof(struct in6_addr))) { + t->mode = QED_FILTER_CONFIG_MODE_L4_PORT; + } else if (!t->src_port && !t->dst_port && + !memcmp(&t->dst_ipv6, p, sizeof(struct in6_addr)) && + memcmp(&t->src_ipv6, p, sizeof(struct in6_addr))) { + t->mode = QED_FILTER_CONFIG_MODE_IP_SRC; + } else { + DP_INFO(edev, "Invalid N-tuple\n"); + return -EOPNOTSUPP; + } + + t->ip_comp = qede_flow_spec_ipv6_cmp; + t->build_hdr = qede_flow_build_ipv6_hdr; + return 0; } -static int -qede_poll_arfs_filter_config(struct qede_dev *edev, - struct qede_arfs_fltr_node *fltr) +static int qede_flow_spec_to_tuple_tcpv6(struct qede_dev *edev, + struct qede_arfs_tuple *t, + struct ethtool_rx_flow_spec *fs) { - int count = QEDE_ARFS_POLL_COUNT; + t->ip_proto = IPPROTO_TCP; - while (fltr->used && count) { - msleep(20); - count--; + if (qede_flow_spec_to_tuple_ipv6_common(edev, t, fs)) + return -EINVAL; + + return 0; +} + +static int qede_flow_spec_to_tuple_udpv6(struct qede_dev *edev, + struct qede_arfs_tuple *t, + struct ethtool_rx_flow_spec *fs) +{ + t->ip_proto = IPPROTO_UDP; + + if (qede_flow_spec_to_tuple_ipv6_common(edev, t, fs)) + return -EINVAL; + + return 0; +} + +static int qede_flow_spec_to_tuple(struct qede_dev *edev, + struct qede_arfs_tuple *t, + struct ethtool_rx_flow_spec *fs) +{ + memset(t, 0, sizeof(*t)); + + if (qede_flow_spec_validate_unused(edev, fs)) + return -EOPNOTSUPP; + + switch ((fs->flow_type & ~FLOW_EXT)) { + case TCP_V4_FLOW: + return qede_flow_spec_to_tuple_tcpv4(edev, t, fs); + case UDP_V4_FLOW: + return qede_flow_spec_to_tuple_udpv4(edev, t, fs); + case TCP_V6_FLOW: + return qede_flow_spec_to_tuple_tcpv6(edev, t, fs); + case UDP_V6_FLOW: + return qede_flow_spec_to_tuple_udpv6(edev, t, fs); + default: + DP_VERBOSE(edev, NETIF_MSG_IFUP, + "Can't support flow of type %08x\n", fs->flow_type); + return -EOPNOTSUPP; } - if (count == 0 || fltr->fw_rc) { - qede_dequeue_fltr_and_config_searcher(edev, fltr); - return -EIO; + return 0; +} + +static int qede_flow_spec_validate(struct qede_dev *edev, + struct ethtool_rx_flow_spec *fs, + struct qede_arfs_tuple *t) +{ + if (fs->location >= QEDE_RFS_MAX_FLTR) { + DP_INFO(edev, "Location out-of-bounds\n"); + return -EINVAL; } - return fltr->fw_rc; + /* Check location isn't already in use */ + if (test_bit(fs->location, edev->arfs->arfs_fltr_bmap)) { + DP_INFO(edev, "Location already in use\n"); + return -EINVAL; + } + + /* Check if the filtering-mode could support the filter */ + if (edev->arfs->filter_count && + edev->arfs->mode != t->mode) { + DP_INFO(edev, + "flow_spec would require filtering mode %08x, but %08x is configured\n", + t->mode, edev->arfs->filter_count); + return -EINVAL; + } + + /* If drop requested then no need to validate other data */ + if (fs->ring_cookie == RX_CLS_FLOW_DISC) + return 0; + + if (ethtool_get_flow_spec_ring_vf(fs->ring_cookie)) + return 0; + + if (fs->ring_cookie >= QEDE_RSS_COUNT(edev)) { + DP_INFO(edev, "Queue out-of-bounds\n"); + return -EINVAL; + } + + return 0; +} + +/* Must be called while qede lock is held */ +static struct qede_arfs_fltr_node * +qede_flow_find_fltr(struct qede_dev *edev, struct qede_arfs_tuple *t) +{ + struct qede_arfs_fltr_node *fltr; + struct hlist_node *temp; + struct hlist_head *head; + + head = QEDE_ARFS_BUCKET_HEAD(edev, 0); + + hlist_for_each_entry_safe(fltr, temp, head, node) { + if (fltr->tuple.ip_proto == t->ip_proto && + fltr->tuple.src_port == t->src_port && + fltr->tuple.dst_port == t->dst_port && + t->ip_comp(&fltr->tuple, t)) + return fltr; + } + + return NULL; +} + +static void qede_flow_set_destination(struct qede_dev *edev, + struct qede_arfs_fltr_node *n, + struct ethtool_rx_flow_spec *fs) +{ + if (fs->ring_cookie == RX_CLS_FLOW_DISC) { + n->b_is_drop = true; + return; + } + + n->vfid = ethtool_get_flow_spec_ring_vf(fs->ring_cookie); + n->rxq_id = ethtool_get_flow_spec_ring(fs->ring_cookie); + n->next_rxq_id = n->rxq_id; + + if (n->vfid) + DP_VERBOSE(edev, QED_MSG_SP, + "Configuring N-tuple for VF 0x%02x\n", n->vfid - 1); } int qede_add_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info) { struct ethtool_rx_flow_spec *fsp = &info->fs; struct qede_arfs_fltr_node *n; - int min_hlen = ETH_HLEN, rc; - struct ethhdr *eth; - struct iphdr *ip; - __be16 *ports; + struct qede_arfs_tuple t; + int min_hlen, rc; __qede_lock(edev); @@ -1559,16 +1889,28 @@ int qede_add_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info) goto unlock; } - rc = qede_validate_and_check_flow_exist(edev, fsp, &min_hlen); + /* Translate the flow specification into something fittign our DB */ + rc = qede_flow_spec_to_tuple(edev, &t, fsp); if (rc) goto unlock; + /* Make sure location is valid and filter isn't already set */ + rc = qede_flow_spec_validate(edev, fsp, &t); + if (rc) + goto unlock; + + if (qede_flow_find_fltr(edev, &t)) { + rc = -EINVAL; + goto unlock; + } + n = kzalloc(sizeof(*n), GFP_KERNEL); if (!n) { rc = -ENOMEM; goto unlock; } + min_hlen = qede_flow_get_min_header_size(&t); n->data = kzalloc(min_hlen, GFP_KERNEL); if (!n->data) { kfree(n); @@ -1579,68 +1921,13 @@ int qede_add_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info) n->sw_id = fsp->location; set_bit(n->sw_id, edev->arfs->arfs_fltr_bmap); n->buf_len = min_hlen; - n->rxq_id = fsp->ring_cookie; - n->next_rxq_id = n->rxq_id; - eth = (struct ethhdr *)n->data; - if (info->fs.flow_type == TCP_V4_FLOW || - info->fs.flow_type == UDP_V4_FLOW) { - ports = (__be16 *)(n->data + ETH_HLEN + - sizeof(struct iphdr)); - eth->h_proto = htons(ETH_P_IP); - n->tuple.eth_proto = htons(ETH_P_IP); - n->tuple.src_ipv4 = info->fs.h_u.tcp_ip4_spec.ip4src; - n->tuple.dst_ipv4 = info->fs.h_u.tcp_ip4_spec.ip4dst; - n->tuple.src_port = info->fs.h_u.tcp_ip4_spec.psrc; - n->tuple.dst_port = info->fs.h_u.tcp_ip4_spec.pdst; - ports[0] = n->tuple.src_port; - ports[1] = n->tuple.dst_port; - ip = (struct iphdr *)(n->data + ETH_HLEN); - ip->saddr = info->fs.h_u.tcp_ip4_spec.ip4src; - ip->daddr = info->fs.h_u.tcp_ip4_spec.ip4dst; - ip->version = 0x4; - ip->ihl = 0x5; - - if (info->fs.flow_type == TCP_V4_FLOW) { - n->tuple.ip_proto = IPPROTO_TCP; - ip->protocol = IPPROTO_TCP; - } else { - n->tuple.ip_proto = IPPROTO_UDP; - ip->protocol = IPPROTO_UDP; - } - ip->tot_len = cpu_to_be16(min_hlen - ETH_HLEN); - } else { - struct ipv6hdr *ip6; - - ip6 = (struct ipv6hdr *)(n->data + ETH_HLEN); - ports = (__be16 *)(n->data + ETH_HLEN + - sizeof(struct ipv6hdr)); - eth->h_proto = htons(ETH_P_IPV6); - n->tuple.eth_proto = htons(ETH_P_IPV6); - memcpy(&n->tuple.src_ipv6, &info->fs.h_u.tcp_ip6_spec.ip6src, - sizeof(struct in6_addr)); - memcpy(&n->tuple.dst_ipv6, &info->fs.h_u.tcp_ip6_spec.ip6dst, - sizeof(struct in6_addr)); - n->tuple.src_port = info->fs.h_u.tcp_ip6_spec.psrc; - n->tuple.dst_port = info->fs.h_u.tcp_ip6_spec.pdst; - ports[0] = n->tuple.src_port; - ports[1] = n->tuple.dst_port; - memcpy(&ip6->saddr, &n->tuple.src_ipv6, - sizeof(struct in6_addr)); - memcpy(&ip6->daddr, &n->tuple.dst_ipv6, - sizeof(struct in6_addr)); - ip6->version = 0x6; + memcpy(&n->tuple, &t, sizeof(n->tuple)); - if (info->fs.flow_type == TCP_V6_FLOW) { - n->tuple.ip_proto = IPPROTO_TCP; - ip6->nexthdr = NEXTHDR_TCP; - ip6->payload_len = cpu_to_be16(sizeof(struct tcphdr)); - } else { - n->tuple.ip_proto = IPPROTO_UDP; - ip6->nexthdr = NEXTHDR_UDP; - ip6->payload_len = cpu_to_be16(sizeof(struct udphdr)); - } - } + qede_flow_set_destination(edev, n, fsp); + + /* Build a minimal header according to the flow */ + n->tuple.build_hdr(&n->tuple, n->data); rc = qede_enqueue_fltr_and_config_searcher(edev, n, 0); if (rc) @@ -1650,6 +1937,7 @@ int qede_add_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info) rc = qede_poll_arfs_filter_config(edev, n); unlock: __qede_unlock(edev); + return rc; } diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 9e70f713c3c5..d118771e1a7b 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -347,6 +347,7 @@ void qede_fill_by_demand_stats(struct qede_dev *edev) p_common->rx_bcast_pkts = stats.common.rx_bcast_pkts; p_common->mftag_filter_discards = stats.common.mftag_filter_discards; p_common->mac_filter_discards = stats.common.mac_filter_discards; + p_common->gft_filter_drop = stats.common.gft_filter_drop; p_common->tx_ucast_bytes = stats.common.tx_ucast_bytes; p_common->tx_mcast_bytes = stats.common.tx_mcast_bytes; diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index d118da5a10a2..ffd68a7bc9e1 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -1104,7 +1104,6 @@ static int rtl8139_init_one(struct pci_dev *pdev, return 0; err_out: - netif_napi_del(&tp->napi); __rtl8139_cleanup_dev (dev); pci_disable_device (pdev); return i; @@ -1119,7 +1118,6 @@ static void rtl8139_remove_one(struct pci_dev *pdev) assert (dev != NULL); cancel_delayed_work_sync(&tp->thread); - netif_napi_del(&tp->napi); unregister_netdev (dev); diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index cece961f2e82..c3ad564ac4c0 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -435,17 +435,18 @@ static int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb, } while (1); } -/* Remove buffers put into a tx_queue. None of the buffers must have - * an skb attached. +/* Remove buffers put into a tx_queue for the current packet. + * None of the buffers must have an skb attached. */ -static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) +static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue, + unsigned int insert_count) { struct efx_tx_buffer *buffer; unsigned int bytes_compl = 0; unsigned int pkts_compl = 0; /* Work backwards until we hit the original insert pointer value */ - while (tx_queue->insert_count != tx_queue->write_count) { + while (tx_queue->insert_count != insert_count) { --tx_queue->insert_count; buffer = __efx_tx_queue_get_insert_buffer(tx_queue); efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); @@ -504,6 +505,8 @@ static int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue, */ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) { + unsigned int old_insert_count = tx_queue->insert_count; + bool xmit_more = skb->xmit_more; bool data_mapped = false; unsigned int segments; unsigned int skb_len; @@ -553,8 +556,10 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) /* Update BQL */ netdev_tx_sent_queue(tx_queue->core_txq, skb_len); + efx_tx_maybe_stop_queue(tx_queue); + /* Pass off to hardware */ - if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) { + if (!xmit_more || netif_xmit_stopped(tx_queue->core_txq)) { struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue); /* There could be packets left on the partner queue if those @@ -577,14 +582,26 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) tx_queue->tx_packets++; } - efx_tx_maybe_stop_queue(tx_queue); - return NETDEV_TX_OK; err: - efx_enqueue_unwind(tx_queue); + efx_enqueue_unwind(tx_queue, old_insert_count); dev_kfree_skb_any(skb); + + /* If we're not expecting another transmit and we had something to push + * on this queue or a partner queue then we need to push here to get the + * previous packets out. + */ + if (!xmit_more) { + struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue); + + if (txq2->xmit_more_available) + efx_nic_push_buffers(txq2); + + efx_nic_push_buffers(tx_queue); + } + return NETDEV_TX_OK; } diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 7f3dab4b4cbc..5428bb261102 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1237,7 +1237,10 @@ static void rndis_get_friendly_name(struct net_device *net, if (rndis_filter_query_device(rndis_device, net_device, RNDIS_OID_GEN_FRIENDLY_NAME, wname, &size) != 0) - return; + return; /* ignore if host does not support */ + + if (size == 0) + return; /* name not set */ /* Convert Windows Unicode string to UTF-8 */ len = ucs2_as_utf8(ifalias, wname, sizeof(ifalias)); diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index d6ff881165d0..e6730a01d130 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1129,6 +1129,7 @@ static int team_upper_dev_link(struct team *team, struct team_port *port, int err; lag_upper_info.tx_type = team->mode->lag_tx_type; + lag_upper_info.hash_type = NETDEV_LAG_HASH_UNKNOWN; err = netdev_master_upper_dev_link(port->dev, team->dev, NULL, &lag_upper_info, extack); if (err) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 99bf3cee1345..33a9c5661038 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -70,6 +70,7 @@ #include <net/netns/generic.h> #include <net/rtnetlink.h> #include <net/sock.h> +#include <net/xdp.h> #include <linux/seq_file.h> #include <linux/uio.h> #include <linux/skb_array.h> @@ -1284,34 +1285,44 @@ static const struct net_device_ops tun_netdev_ops = { .ndo_get_stats64 = tun_net_get_stats64, }; -static int tun_xdp_xmit(struct net_device *dev, struct xdp_frame *frame) +static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames) { struct tun_struct *tun = netdev_priv(dev); struct tun_file *tfile; u32 numqueues; - int ret = 0; + int drops = 0; + int cnt = n; + int i; rcu_read_lock(); numqueues = READ_ONCE(tun->numqueues); if (!numqueues) { - ret = -ENOSPC; - goto out; + rcu_read_unlock(); + return -ENXIO; /* Caller will free/return all frames */ } tfile = rcu_dereference(tun->tfiles[smp_processor_id() % numqueues]); - /* Encode the XDP flag into lowest bit for consumer to differ - * XDP buffer from sk_buff. - */ - if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(frame))) { - this_cpu_inc(tun->pcpu_stats->tx_dropped); - ret = -ENOSPC; + + spin_lock(&tfile->tx_ring.producer_lock); + for (i = 0; i < n; i++) { + struct xdp_frame *xdp = frames[i]; + /* Encode the XDP flag into lowest bit for consumer to differ + * XDP buffer from sk_buff. + */ + void *frame = tun_xdp_to_ptr(xdp); + + if (__ptr_ring_produce(&tfile->tx_ring, frame)) { + this_cpu_inc(tun->pcpu_stats->tx_dropped); + xdp_return_frame_rx_napi(xdp); + drops++; + } } + spin_unlock(&tfile->tx_ring.producer_lock); -out: rcu_read_unlock(); - return ret; + return cnt - drops; } static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) @@ -1321,7 +1332,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) if (unlikely(!frame)) return -EOVERFLOW; - return tun_xdp_xmit(dev, frame); + return tun_xdp_xmit(dev, 1, &frame); } static void tun_xdp_flush(struct net_device *dev) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index f34794a76c4d..39a0783d1cde 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -419,23 +419,13 @@ static void virtnet_xdp_flush(struct net_device *dev) virtqueue_kick(sq->vq); } -static int __virtnet_xdp_xmit(struct virtnet_info *vi, - struct xdp_frame *xdpf) +static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, + struct send_queue *sq, + struct xdp_frame *xdpf) { struct virtio_net_hdr_mrg_rxbuf *hdr; - struct xdp_frame *xdpf_sent; - struct send_queue *sq; - unsigned int len; - unsigned int qp; int err; - qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); - sq = &vi->sq[qp]; - - /* Free up any pending old buffers before queueing new ones. */ - while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) - xdp_return_frame(xdpf_sent); - /* virtqueue want to use data area in-front of packet */ if (unlikely(xdpf->metasize > 0)) return -EOPNOTSUPP; @@ -459,11 +449,40 @@ static int __virtnet_xdp_xmit(struct virtnet_info *vi, return 0; } -static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) +static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi, + struct xdp_frame *xdpf) +{ + struct xdp_frame *xdpf_sent; + struct send_queue *sq; + unsigned int len; + unsigned int qp; + + qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); + sq = &vi->sq[qp]; + + /* Free up any pending old buffers before queueing new ones. */ + while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) + xdp_return_frame(xdpf_sent); + + return __virtnet_xdp_xmit_one(vi, sq, xdpf); +} + +static int virtnet_xdp_xmit(struct net_device *dev, + int n, struct xdp_frame **frames) { struct virtnet_info *vi = netdev_priv(dev); struct receive_queue *rq = vi->rq; + struct xdp_frame *xdpf_sent; struct bpf_prog *xdp_prog; + struct send_queue *sq; + unsigned int len; + unsigned int qp; + int drops = 0; + int err; + int i; + + qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); + sq = &vi->sq[qp]; /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this * indicate XDP resources have been successfully allocated. @@ -472,7 +491,20 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) if (!xdp_prog) return -ENXIO; - return __virtnet_xdp_xmit(vi, xdpf); + /* Free up any pending old buffers before queueing new ones. */ + while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) + xdp_return_frame(xdpf_sent); + + for (i = 0; i < n; i++) { + struct xdp_frame *xdpf = frames[i]; + + err = __virtnet_xdp_xmit_one(vi, sq, xdpf); + if (err) { + xdp_return_frame_rx_napi(xdpf); + drops++; + } + } + return n - drops; } static unsigned int virtnet_get_headroom(struct virtnet_info *vi) @@ -616,7 +648,7 @@ static struct sk_buff *receive_small(struct net_device *dev, xdpf = convert_to_xdp_frame(&xdp); if (unlikely(!xdpf)) goto err_xdp; - err = __virtnet_xdp_xmit(vi, xdpf); + err = __virtnet_xdp_tx_xmit(vi, xdpf); if (unlikely(err)) { trace_xdp_exception(vi->dev, xdp_prog, act); goto err_xdp; @@ -779,7 +811,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, xdpf = convert_to_xdp_frame(&xdp); if (unlikely(!xdpf)) goto err_xdp; - err = __virtnet_xdp_xmit(vi, xdpf); + err = __virtnet_xdp_tx_xmit(vi, xdpf); if (unlikely(err)) { trace_xdp_exception(vi->dev, xdp_prog, act); if (unlikely(xdp_page != page)) |