diff options
177 files changed, 1882 insertions, 1644 deletions
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index ad3e0631877e..eccb3d1b6abb 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2021,6 +2021,7 @@ static void bnx2x_set_rx_buf_size(struct bnx2x *bp) ETH_OVERHEAD + mtu + BNX2X_FW_RX_ALIGN_END; + fp->rx_buf_size = SKB_DATA_ALIGN(fp->rx_buf_size); /* Note : rx_buf_size doesn't take into account NET_SKB_PAD */ if (fp->rx_buf_size + NET_SKB_PAD <= PAGE_SIZE) fp->rx_frag_size = fp->rx_buf_size + NET_SKB_PAD; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c index b209b7f6093e..7dd83d0ef0a0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c @@ -6161,103 +6161,40 @@ static void bnx2x_link_int_ack(struct link_params *params, } } +static int bnx2x_null_format_ver(u32 spirom_ver, u8 *str, u16 *len) +{ + str[0] = '\0'; + (*len)--; + return 0; +} + static int bnx2x_format_ver(u32 num, u8 *str, u16 *len) { - u8 *str_ptr = str; - u32 mask = 0xf0000000; - u8 shift = 8*4; - u8 digit; - u8 remove_leading_zeros = 1; + u16 ret; + if (*len < 10) { /* Need more than 10chars for this format */ - *str_ptr = '\0'; - (*len)--; + bnx2x_null_format_ver(num, str, len); return -EINVAL; } - while (shift > 0) { - shift -= 4; - digit = ((num & mask) >> shift); - if (digit == 0 && remove_leading_zeros) { - *str_ptr = '0'; - } else { - if (digit < 0xa) - *str_ptr = digit + '0'; - else - *str_ptr = digit - 0xa + 'a'; - - remove_leading_zeros = 0; - str_ptr++; - (*len)--; - } - mask = mask >> 4; - if (shift == 4*4) { - if (remove_leading_zeros) { - str_ptr++; - (*len)--; - } - *str_ptr = '.'; - str_ptr++; - (*len)--; - remove_leading_zeros = 1; - } - } - if (remove_leading_zeros) - (*len)--; + ret = scnprintf(str, *len, "%hx.%hx", num >> 16, num); + *len -= ret; return 0; } static int bnx2x_3_seq_format_ver(u32 num, u8 *str, u16 *len) { - u8 *str_ptr = str; - u32 mask = 0x00f00000; - u8 shift = 8*3; - u8 digit; - u8 remove_leading_zeros = 1; + u16 ret; if (*len < 10) { /* Need more than 10chars for this format */ - *str_ptr = '\0'; - (*len)--; + bnx2x_null_format_ver(num, str, len); return -EINVAL; } - while (shift > 0) { - shift -= 4; - digit = ((num & mask) >> shift); - if (digit == 0 && remove_leading_zeros) { - *str_ptr = '0'; - } else { - if (digit < 0xa) - *str_ptr = digit + '0'; - else - *str_ptr = digit - 0xa + 'a'; - - remove_leading_zeros = 0; - str_ptr++; - (*len)--; - } - mask = mask >> 4; - if ((shift == 4*4) || (shift == 4*2)) { - if (remove_leading_zeros) { - str_ptr++; - (*len)--; - } - *str_ptr = '.'; - str_ptr++; - (*len)--; - remove_leading_zeros = 1; - } - } - if (remove_leading_zeros) - (*len)--; - return 0; -} - -static int bnx2x_null_format_ver(u32 spirom_ver, u8 *str, u16 *len) -{ - str[0] = '\0'; - (*len)--; + ret = scnprintf(str, *len, "%hhx.%hhx.%hhx", num >> 16, num >> 8, num); + *len -= ret; return 0; } @@ -10681,22 +10618,19 @@ static u8 bnx2x_848xx_read_status(struct bnx2x_phy *phy, static int bnx2x_8485x_format_ver(u32 raw_ver, u8 *str, u16 *len) { - int status = 0; u32 num; num = ((raw_ver & 0xF80) >> 7) << 16 | ((raw_ver & 0x7F) << 8) | ((raw_ver & 0xF000) >> 12); - status = bnx2x_3_seq_format_ver(num, str, len); - return status; + return bnx2x_3_seq_format_ver(num, str, len); } static int bnx2x_848xx_format_ver(u32 raw_ver, u8 *str, u16 *len) { - int status = 0; u32 spirom_ver; + spirom_ver = ((raw_ver & 0xF80) >> 7) << 16 | (raw_ver & 0x7F); - status = bnx2x_format_ver(spirom_ver, str, len); - return status; + return bnx2x_format_ver(spirom_ver, str, len); } static void bnx2x_8481_hw_reset(struct bnx2x_phy *phy, @@ -12547,13 +12481,12 @@ static int bnx2x_populate_ext_phy(struct bnx2x *bp, static int bnx2x_populate_phy(struct bnx2x *bp, u8 phy_index, u32 shmem_base, u32 shmem2_base, u8 port, struct bnx2x_phy *phy) { - int status = 0; phy->type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_NOT_CONN; if (phy_index == INT_PHY) return bnx2x_populate_int_phy(bp, shmem_base, port, phy); - status = bnx2x_populate_ext_phy(bp, phy_index, shmem_base, shmem2_base, + + return bnx2x_populate_ext_phy(bp, phy_index, shmem_base, shmem2_base, port, phy); - return status; } static void bnx2x_phy_def_cfg(struct link_params *params, diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 5cbd1e7a926a..91f7492623d3 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -432,15 +432,17 @@ static int macb_mii_probe(struct net_device *dev) } pdata = dev_get_platdata(&bp->pdev->dev); - if (pdata && gpio_is_valid(pdata->phy_irq_pin)) { - ret = devm_gpio_request(&bp->pdev->dev, pdata->phy_irq_pin, - "phy int"); - if (!ret) { - phy_irq = gpio_to_irq(pdata->phy_irq_pin); - phydev->irq = (phy_irq < 0) ? PHY_POLL : phy_irq; + if (pdata) { + if (gpio_is_valid(pdata->phy_irq_pin)) { + ret = devm_gpio_request(&bp->pdev->dev, + pdata->phy_irq_pin, "phy int"); + if (!ret) { + phy_irq = gpio_to_irq(pdata->phy_irq_pin); + phydev->irq = (phy_irq < 0) ? PHY_POLL : phy_irq; + } + } else { + phydev->irq = PHY_POLL; } - } else { - phydev->irq = PHY_POLL; } /* attach the mac to the phy */ diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c index 201b9875f9bb..5cca73b8880b 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c @@ -313,6 +313,7 @@ int octeon_mbox_process_message(struct octeon_mbox *mbox) return 0; } + spin_unlock_irqrestore(&mbox->lock, flags); WARN_ON(1); return 0; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 8702661b99c0..f3a09ab55900 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5268,15 +5268,15 @@ static bool be_err_is_recoverable(struct be_adapter *adapter) dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n", ue_err_code); - if (jiffies - err_rec->probe_time <= initial_idle_time) { + if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) { dev_err(&adapter->pdev->dev, "Cannot recover within %lu sec from driver load\n", jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC); return false; } - if (err_rec->last_recovery_time && - (jiffies - err_rec->last_recovery_time <= recovery_interval)) { + if (err_rec->last_recovery_time && time_before_eq( + jiffies - err_rec->last_recovery_time, recovery_interval)) { dev_err(&adapter->pdev->dev, "Cannot recover within %lu sec from last recovery\n", jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 0c1f56e58074..8b5cdf490850 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -696,6 +696,8 @@ hns_mac_register_phydev(struct mii_bus *mdio, struct hns_mac_cb *mac_cb, rc = phy_device_register(phy); if (rc) { phy_device_free(phy); + dev_err(&mdio->dev, "registered phy fail at address %i\n", + addr); return -ENODEV; } @@ -706,7 +708,7 @@ hns_mac_register_phydev(struct mii_bus *mdio, struct hns_mac_cb *mac_cb, return 0; } -static void hns_mac_register_phy(struct hns_mac_cb *mac_cb) +static int hns_mac_register_phy(struct hns_mac_cb *mac_cb) { struct acpi_reference_args args; struct platform_device *pdev; @@ -716,24 +718,39 @@ static void hns_mac_register_phy(struct hns_mac_cb *mac_cb) /* Loop over the child nodes and register a phy_device for each one */ if (!to_acpi_device_node(mac_cb->fw_port)) - return; + return -ENODEV; rc = acpi_node_get_property_reference( mac_cb->fw_port, "mdio-node", 0, &args); if (rc) - return; + return rc; addr = hns_mac_phy_parse_addr(mac_cb->dev, mac_cb->fw_port); if (addr < 0) - return; + return addr; /* dev address in adev */ pdev = hns_dsaf_find_platform_device(acpi_fwnode_handle(args.adev)); + if (!pdev) { + dev_err(mac_cb->dev, "mac%d mdio pdev is NULL\n", + mac_cb->mac_id); + return -EINVAL; + } + mii_bus = platform_get_drvdata(pdev); + if (!mii_bus) { + dev_err(mac_cb->dev, + "mac%d mdio is NULL, dsaf will probe again later\n", + mac_cb->mac_id); + return -EPROBE_DEFER; + } + rc = hns_mac_register_phydev(mii_bus, mac_cb, addr); if (!rc) dev_dbg(mac_cb->dev, "mac%d register phy addr:%d\n", mac_cb->mac_id, addr); + + return rc; } #define MAC_MEDIA_TYPE_MAX_LEN 16 @@ -754,7 +771,7 @@ static const struct { *@np:device node * return: 0 --success, negative --fail */ -static int hns_mac_get_info(struct hns_mac_cb *mac_cb) +static int hns_mac_get_info(struct hns_mac_cb *mac_cb) { struct device_node *np; struct regmap *syscon; @@ -864,7 +881,15 @@ static int hns_mac_get_info(struct hns_mac_cb *mac_cb) } } } else if (is_acpi_node(mac_cb->fw_port)) { - hns_mac_register_phy(mac_cb); + ret = hns_mac_register_phy(mac_cb); + /* + * Mac can work well if there is phy or not.If the port don't + * connect with phy, the return value will be ignored. Only + * when there is phy but can't find mdio bus, the return value + * will be handled. + */ + if (ret == -EPROBE_DEFER) + return ret; } else { dev_err(mac_cb->dev, "mac%d cannot find phy node\n", mac_cb->mac_id); @@ -1026,6 +1051,7 @@ int hns_mac_init(struct dsaf_device *dsaf_dev) dsaf_dev->mac_cb[port_id] = mac_cb; } } + /* init mac_cb for all port */ for (port_id = 0; port_id < max_port_num; port_id++) { mac_cb = dsaf_dev->mac_cb[port_id]; @@ -1035,6 +1061,7 @@ int hns_mac_init(struct dsaf_device *dsaf_dev) ret = hns_mac_get_cfg(dsaf_dev, mac_cb); if (ret) return ret; + ret = hns_mac_init_ex(mac_cb); if (ret) return ret; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c index eba406bea52f..93e71e27401b 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c @@ -510,7 +510,9 @@ int hns_ppe_init(struct dsaf_device *dsaf_dev) hns_ppe_get_cfg(dsaf_dev->ppe_common[i]); - hns_rcb_get_cfg(dsaf_dev->rcb_common[i]); + ret = hns_rcb_get_cfg(dsaf_dev->rcb_common[i]); + if (ret) + goto get_cfg_fail; } for (i = 0; i < HNS_PPE_COM_NUM; i++) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c index c20a0f4f8f02..e2e28532e4dc 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c @@ -492,7 +492,7 @@ static int hns_rcb_get_base_irq_idx(struct rcb_common_cb *rcb_common) *hns_rcb_get_cfg - get rcb config *@rcb_common: rcb common device */ -void hns_rcb_get_cfg(struct rcb_common_cb *rcb_common) +int hns_rcb_get_cfg(struct rcb_common_cb *rcb_common) { struct ring_pair_cb *ring_pair_cb; u32 i; @@ -517,10 +517,16 @@ void hns_rcb_get_cfg(struct rcb_common_cb *rcb_common) ring_pair_cb->virq[HNS_RCB_IRQ_IDX_RX] = is_ver1 ? platform_get_irq(pdev, base_irq_idx + i * 2 + 1) : platform_get_irq(pdev, base_irq_idx + i * 3); + if ((ring_pair_cb->virq[HNS_RCB_IRQ_IDX_TX] == -EPROBE_DEFER) || + (ring_pair_cb->virq[HNS_RCB_IRQ_IDX_RX] == -EPROBE_DEFER)) + return -EPROBE_DEFER; + ring_pair_cb->q.phy_base = RCB_COMM_BASE_TO_RING_BASE(rcb_common->phy_base, i); hns_rcb_ring_pair_get_cfg(ring_pair_cb); } + + return 0; } /** diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h index a664ee88ab45..602816498c8d 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h @@ -121,7 +121,7 @@ int hns_rcb_common_get_cfg(struct dsaf_device *dsaf_dev, int comm_index); void hns_rcb_common_free_cfg(struct dsaf_device *dsaf_dev, u32 comm_index); int hns_rcb_common_init_hw(struct rcb_common_cb *rcb_common); void hns_rcb_start(struct hnae_queue *q, u32 val); -void hns_rcb_get_cfg(struct rcb_common_cb *rcb_common); +int hns_rcb_get_cfg(struct rcb_common_cb *rcb_common); void hns_rcb_get_queue_mode(enum dsaf_mode dsaf_mode, u16 *max_vfn, u16 *max_q_per_vf); diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index a29b12e80855..c7c994eb410e 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -135,7 +135,8 @@ enum e1000_boards { board_pchlan, board_pch2lan, board_pch_lpt, - board_pch_spt + board_pch_spt, + board_pch_cnp }; struct e1000_ps_page { @@ -378,18 +379,22 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca); * INCVALUE_n into the TIMINCA register allowing 32+8+(24-INCVALUE_SHIFT_n) * bits to count nanoseconds leaving the rest for fractional nonseconds. */ -#define INCVALUE_96MHz 125 -#define INCVALUE_SHIFT_96MHz 17 -#define INCPERIOD_SHIFT_96MHz 2 -#define INCPERIOD_96MHz (12 >> INCPERIOD_SHIFT_96MHz) +#define INCVALUE_96MHZ 125 +#define INCVALUE_SHIFT_96MHZ 17 +#define INCPERIOD_SHIFT_96MHZ 2 +#define INCPERIOD_96MHZ (12 >> INCPERIOD_SHIFT_96MHZ) -#define INCVALUE_25MHz 40 -#define INCVALUE_SHIFT_25MHz 18 -#define INCPERIOD_25MHz 1 +#define INCVALUE_25MHZ 40 +#define INCVALUE_SHIFT_25MHZ 18 +#define INCPERIOD_25MHZ 1 -#define INCVALUE_24MHz 125 -#define INCVALUE_SHIFT_24MHz 14 -#define INCPERIOD_24MHz 3 +#define INCVALUE_24MHZ 125 +#define INCVALUE_SHIFT_24MHZ 14 +#define INCPERIOD_24MHZ 3 + +#define INCVALUE_38400KHZ 26 +#define INCVALUE_SHIFT_38400KHZ 19 +#define INCPERIOD_38400KHZ 1 /* Another drawback of scaling the incvalue by a large factor is the * 64-bit SYSTIM register overflows more quickly. This is dealt with @@ -515,6 +520,7 @@ extern const struct e1000_info e1000_pch_info; extern const struct e1000_info e1000_pch2_info; extern const struct e1000_info e1000_pch_lpt_info; extern const struct e1000_info e1000_pch_spt_info; +extern const struct e1000_info e1000_pch_cnp_info; extern const struct e1000_info e1000_es2_info; void e1000e_ptp_init(struct e1000_adapter *adapter); diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index e70b1ebff60d..e23dbd9190d6 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -911,19 +911,20 @@ static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data) case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: + /* fall through */ + case e1000_pch_cnp: mask |= BIT(18); break; default: break; } - if ((mac->type == e1000_pch_lpt) || (mac->type == e1000_pch_spt)) + if (mac->type >= e1000_pch_lpt) wlock_mac = (er32(FWSM) & E1000_FWSM_WLOCK_MAC_MASK) >> E1000_FWSM_WLOCK_MAC_SHIFT; for (i = 0; i < mac->rar_entry_count; i++) { - if ((mac->type == e1000_pch_lpt) || - (mac->type == e1000_pch_spt)) { + if (mac->type >= e1000_pch_lpt) { /* Cannot test write-protected SHRAL[n] registers */ if ((wlock_mac == 1) || (wlock_mac && (i > wlock_mac))) continue; @@ -1532,7 +1533,7 @@ static int e1000_setup_loopback_test(struct e1000_adapter *adapter) struct e1000_hw *hw = &adapter->hw; u32 rctl, fext_nvm11, tarc0; - if (hw->mac.type == e1000_pch_spt) { + if (hw->mac.type >= e1000_pch_spt) { fext_nvm11 = er32(FEXTNVM11); fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX; ew32(FEXTNVM11, fext_nvm11); @@ -1576,6 +1577,7 @@ static void e1000_loopback_cleanup(struct e1000_adapter *adapter) switch (hw->mac.type) { case e1000_pch_spt: + case e1000_pch_cnp: fext_nvm11 = er32(FEXTNVM11); fext_nvm11 &= ~E1000_FEXTNVM11_DISABLE_MULR_FIX; ew32(FEXTNVM11, fext_nvm11); diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index 4e733bf1a38e..66bd5060a65b 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h @@ -96,6 +96,10 @@ struct e1000_hw; #define E1000_DEV_ID_PCH_SPT_I219_V4 0x15D8 #define E1000_DEV_ID_PCH_SPT_I219_LM5 0x15E3 #define E1000_DEV_ID_PCH_SPT_I219_V5 0x15D6 +#define E1000_DEV_ID_PCH_CNP_I219_LM6 0x15BD +#define E1000_DEV_ID_PCH_CNP_I219_V6 0x15BE +#define E1000_DEV_ID_PCH_CNP_I219_LM7 0x15BB +#define E1000_DEV_ID_PCH_CNP_I219_V7 0x15BC #define E1000_REVISION_4 4 @@ -118,6 +122,7 @@ enum e1000_mac_type { e1000_pch2lan, e1000_pch_lpt, e1000_pch_spt, + e1000_pch_cnp, }; enum e1000_media_type { diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index f3aaca743ea3..68ea8b4555ab 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -237,7 +237,7 @@ static bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw) if (ret_val) return false; out: - if ((hw->mac.type == e1000_pch_lpt) || (hw->mac.type == e1000_pch_spt)) { + if (hw->mac.type >= e1000_pch_lpt) { /* Only unforce SMBus if ME is not active */ if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) { /* Unforce SMBus mode in PHY */ @@ -333,6 +333,7 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) switch (hw->mac.type) { case e1000_pch_lpt: case e1000_pch_spt: + case e1000_pch_cnp: if (e1000_phy_is_accessible_pchlan(hw)) break; @@ -474,6 +475,7 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: + case e1000_pch_cnp: /* In case the PHY needs to be in mdio slow mode, * set slow mode and try to get the PHY id again. */ @@ -607,7 +609,7 @@ static s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw) nvm->type = e1000_nvm_flash_sw; - if (hw->mac.type == e1000_pch_spt) { + if (hw->mac.type >= e1000_pch_spt) { /* in SPT, gfpreg doesn't exist. NVM size is taken from the * STRAP register. This is because in SPT the GbE Flash region * is no longer accessed through the flash registers. Instead, @@ -715,6 +717,7 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw) /* fall-through */ case e1000_pch_lpt: case e1000_pch_spt: + case e1000_pch_cnp: case e1000_pchlan: /* check management mode */ mac->ops.check_mng_mode = e1000_check_mng_mode_pchlan; @@ -732,7 +735,7 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw) break; } - if ((mac->type == e1000_pch_lpt) || (mac->type == e1000_pch_spt)) { + if (mac->type >= e1000_pch_lpt) { mac->rar_entry_count = E1000_PCH_LPT_RAR_ENTRIES; mac->ops.rar_set = e1000_rar_set_pch_lpt; mac->ops.setup_physical_interface = @@ -1399,9 +1402,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * aggressive resulting in many collisions. To avoid this, increase * the IPG and reduce Rx latency in the PHY. */ - if (((hw->mac.type == e1000_pch2lan) || - (hw->mac.type == e1000_pch_lpt) || - (hw->mac.type == e1000_pch_spt)) && link) { + if ((hw->mac.type >= e1000_pch2lan) && link) { u16 speed, duplex; e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex); @@ -1412,7 +1413,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) tipg_reg |= 0xFF; /* Reduce Rx latency in analog PHY */ emi_val = 0; - } else if (hw->mac.type == e1000_pch_spt && + } else if (hw->mac.type >= e1000_pch_spt && duplex == FULL_DUPLEX && speed != SPEED_1000) { tipg_reg |= 0xC; emi_val = 1; @@ -1435,8 +1436,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) emi_addr = I217_RX_CONFIG; ret_val = e1000_write_emi_reg_locked(hw, emi_addr, emi_val); - if (hw->mac.type == e1000_pch_lpt || - hw->mac.type == e1000_pch_spt) { + if (hw->mac.type >= e1000_pch_lpt) { u16 phy_reg; e1e_rphy_locked(hw, I217_PLL_CLOCK_GATE_REG, &phy_reg); @@ -1452,7 +1452,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) if (ret_val) return ret_val; - if (hw->mac.type == e1000_pch_spt) { + if (hw->mac.type >= e1000_pch_spt) { u16 data; u16 ptr_gap; @@ -1502,7 +1502,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * on power up. * Set the Beacon Duration for I217 to 8 usec */ - if ((hw->mac.type == e1000_pch_lpt) || (hw->mac.type == e1000_pch_spt)) { + if (hw->mac.type >= e1000_pch_lpt) { u32 mac_reg; mac_reg = er32(FEXTNVM4); @@ -1520,8 +1520,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) if (ret_val) return ret_val; } - if ((hw->mac.type == e1000_pch_lpt) || - (hw->mac.type == e1000_pch_spt)) { + if (hw->mac.type >= e1000_pch_lpt) { /* Set platform power management values for * Latency Tolerance Reporting (LTR) */ @@ -1533,15 +1532,18 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) /* Clear link partner's EEE ability */ hw->dev_spec.ich8lan.eee_lp_ability = 0; - /* FEXTNVM6 K1-off workaround */ - if (hw->mac.type == e1000_pch_spt) { - u32 pcieanacfg = er32(PCIEANACFG); + if (hw->mac.type >= e1000_pch_lpt) { u32 fextnvm6 = er32(FEXTNVM6); - if (pcieanacfg & E1000_FEXTNVM6_K1_OFF_ENABLE) - fextnvm6 |= E1000_FEXTNVM6_K1_OFF_ENABLE; - else - fextnvm6 &= ~E1000_FEXTNVM6_K1_OFF_ENABLE; + if (hw->mac.type == e1000_pch_spt) { + /* FEXTNVM6 K1-off workaround - for SPT only */ + u32 pcieanacfg = er32(PCIEANACFG); + + if (pcieanacfg & E1000_FEXTNVM6_K1_OFF_ENABLE) + fextnvm6 |= E1000_FEXTNVM6_K1_OFF_ENABLE; + else + fextnvm6 &= ~E1000_FEXTNVM6_K1_OFF_ENABLE; + } ew32(FEXTNVM6, fextnvm6); } @@ -1640,6 +1642,7 @@ static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: + case e1000_pch_cnp: rc = e1000_init_phy_params_pchlan(hw); break; default: @@ -2091,6 +2094,7 @@ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw) case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: + case e1000_pch_cnp: sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG_ICH8M; break; default: @@ -3125,6 +3129,7 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank) switch (hw->mac.type) { case e1000_pch_spt: + case e1000_pch_cnp: bank1_offset = nvm->flash_bank_size; act_offset = E1000_ICH_NVM_SIG_WORD; @@ -3380,7 +3385,7 @@ static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw) /* Clear FCERR and DAEL in hw status by writing 1 */ hsfsts.hsf_status.flcerr = 1; hsfsts.hsf_status.dael = 1; - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) ew32flash(ICH_FLASH_HSFSTS, hsfsts.regval & 0xFFFF); else ew16flash(ICH_FLASH_HSFSTS, hsfsts.regval); @@ -3399,7 +3404,7 @@ static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw) * Begin by setting Flash Cycle Done. */ hsfsts.hsf_status.flcdone = 1; - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) ew32flash(ICH_FLASH_HSFSTS, hsfsts.regval & 0xFFFF); else ew16flash(ICH_FLASH_HSFSTS, hsfsts.regval); @@ -3423,7 +3428,7 @@ static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw) * now set the Flash Cycle Done. */ hsfsts.hsf_status.flcdone = 1; - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) ew32flash(ICH_FLASH_HSFSTS, hsfsts.regval & 0xFFFF); else @@ -3450,13 +3455,13 @@ static s32 e1000_flash_cycle_ich8lan(struct e1000_hw *hw, u32 timeout) u32 i = 0; /* Start a cycle by writing 1 in Flash Cycle Go in Hw Flash Control */ - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) hsflctl.regval = er32flash(ICH_FLASH_HSFSTS) >> 16; else hsflctl.regval = er16flash(ICH_FLASH_HSFCTL); hsflctl.hsf_ctrl.flcgo = 1; - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) ew32flash(ICH_FLASH_HSFSTS, hsflctl.regval << 16); else ew16flash(ICH_FLASH_HSFCTL, hsflctl.regval); @@ -3527,7 +3532,7 @@ static s32 e1000_read_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, /* In SPT, only 32 bits access is supported, * so this function should not be called. */ - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) return -E1000_ERR_NVM; else ret_val = e1000_read_flash_data_ich8lan(hw, offset, 1, &word); @@ -3634,8 +3639,7 @@ static s32 e1000_read_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset, s32 ret_val = -E1000_ERR_NVM; u8 count = 0; - if (offset > ICH_FLASH_LINEAR_ADDR_MASK || - hw->mac.type != e1000_pch_spt) + if (offset > ICH_FLASH_LINEAR_ADDR_MASK || hw->mac.type < e1000_pch_spt) return -E1000_ERR_NVM; flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) + hw->nvm.flash_base_addr); @@ -4068,6 +4072,7 @@ static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw) switch (hw->mac.type) { case e1000_pch_lpt: case e1000_pch_spt: + case e1000_pch_cnp: word = NVM_COMPAT; valid_csum_mask = NVM_COMPAT_VALID_CSUM; break; @@ -4153,7 +4158,7 @@ static s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, s32 ret_val; u8 count = 0; - if (hw->mac.type == e1000_pch_spt) { + if (hw->mac.type >= e1000_pch_spt) { if (size != 4 || offset > ICH_FLASH_LINEAR_ADDR_MASK) return -E1000_ERR_NVM; } else { @@ -4173,7 +4178,7 @@ static s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, /* In SPT, This register is in Lan memory space, not * flash. Therefore, only 32 bit access is supported */ - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) hsflctl.regval = er32flash(ICH_FLASH_HSFSTS) >> 16; else hsflctl.regval = er16flash(ICH_FLASH_HSFCTL); @@ -4185,7 +4190,7 @@ static s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, * not flash. Therefore, only 32 bit access is * supported */ - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) ew32flash(ICH_FLASH_HSFSTS, hsflctl.regval << 16); else ew16flash(ICH_FLASH_HSFCTL, hsflctl.regval); @@ -4243,7 +4248,7 @@ static s32 e1000_write_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset, s32 ret_val; u8 count = 0; - if (hw->mac.type == e1000_pch_spt) { + if (hw->mac.type >= e1000_pch_spt) { if (offset > ICH_FLASH_LINEAR_ADDR_MASK) return -E1000_ERR_NVM; } @@ -4259,7 +4264,7 @@ static s32 e1000_write_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset, /* In SPT, This register is in Lan memory space, not * flash. Therefore, only 32 bit access is supported */ - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) hsflctl.regval = er32flash(ICH_FLASH_HSFSTS) >> 16; else @@ -4272,7 +4277,7 @@ static s32 e1000_write_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset, * not flash. Therefore, only 32 bit access is * supported */ - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) ew32flash(ICH_FLASH_HSFSTS, hsflctl.regval << 16); else ew16flash(ICH_FLASH_HSFCTL, hsflctl.regval); @@ -4464,14 +4469,14 @@ static s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank) /* Write a value 11 (block Erase) in Flash * Cycle field in hw flash control */ - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) hsflctl.regval = er32flash(ICH_FLASH_HSFSTS) >> 16; else hsflctl.regval = er16flash(ICH_FLASH_HSFCTL); hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_ERASE; - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) ew32flash(ICH_FLASH_HSFSTS, hsflctl.regval << 16); else @@ -4894,8 +4899,7 @@ static void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw) ew32(RFCTL, reg); /* Enable ECC on Lynxpoint */ - if ((hw->mac.type == e1000_pch_lpt) || - (hw->mac.type == e1000_pch_spt)) { + if (hw->mac.type >= e1000_pch_lpt) { reg = er32(PBECCSTS); reg |= E1000_PBECCSTS_ECC_ENABLE; ew32(PBECCSTS, reg); @@ -5299,7 +5303,7 @@ void e1000_suspend_workarounds_ich8lan(struct e1000_hw *hw) (device_id == E1000_DEV_ID_PCH_LPTLP_I218_V) || (device_id == E1000_DEV_ID_PCH_I218_LM3) || (device_id == E1000_DEV_ID_PCH_I218_V3) || - (hw->mac.type == e1000_pch_spt)) { + (hw->mac.type >= e1000_pch_spt)) { u32 fextnvm6 = er32(FEXTNVM6); ew32(FEXTNVM6, fextnvm6 & ~E1000_FEXTNVM6_REQ_PLL_CLK); @@ -5865,7 +5869,8 @@ const struct e1000_info e1000_pch2_info = { | FLAG_HAS_JUMBO_FRAMES | FLAG_APME_IN_WUC, .flags2 = FLAG2_HAS_PHY_STATS - | FLAG2_HAS_EEE, + | FLAG2_HAS_EEE + | FLAG2_CHECK_SYSTIM_OVERFLOW, .pba = 26, .max_hw_frame_size = 9022, .get_variants = e1000_get_variants_ich8lan, @@ -5914,3 +5919,23 @@ const struct e1000_info e1000_pch_spt_info = { .phy_ops = &ich8_phy_ops, .nvm_ops = &spt_nvm_ops, }; + +const struct e1000_info e1000_pch_cnp_info = { + .mac = e1000_pch_cnp, + .flags = FLAG_IS_ICH + | FLAG_HAS_WOL + | FLAG_HAS_HW_TIMESTAMP + | FLAG_HAS_CTRLEXT_ON_LOAD + | FLAG_HAS_AMT + | FLAG_HAS_FLASH + | FLAG_HAS_JUMBO_FRAMES + | FLAG_APME_IN_WUC, + .flags2 = FLAG2_HAS_PHY_STATS + | FLAG2_HAS_EEE, + .pba = 26, + .max_hw_frame_size = 9022, + .get_variants = e1000_get_variants_ich8lan, + .mac_ops = &ich8_mac_ops, + .phy_ops = &ich8_phy_ops, + .nvm_ops = &spt_nvm_ops, +}; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 667fc45ce906..b3679728caac 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -71,6 +71,7 @@ static const struct e1000_info *e1000_info_tbl[] = { [board_pch2lan] = &e1000_pch2_info, [board_pch_lpt] = &e1000_pch_lpt_info, [board_pch_spt] = &e1000_pch_spt_info, + [board_pch_cnp] = &e1000_pch_cnp_info, }; struct e1000_reg_info { @@ -1791,8 +1792,7 @@ static irqreturn_t e1000_intr_msi(int __always_unused irq, void *data) } /* Reset on uncorrectable ECC error */ - if ((icr & E1000_ICR_ECCER) && ((hw->mac.type == e1000_pch_lpt) || - (hw->mac.type == e1000_pch_spt))) { + if ((icr & E1000_ICR_ECCER) && (hw->mac.type >= e1000_pch_lpt)) { u32 pbeccsts = er32(PBECCSTS); adapter->corr_errors += @@ -1872,8 +1872,7 @@ static irqreturn_t e1000_intr(int __always_unused irq, void *data) } /* Reset on uncorrectable ECC error */ - if ((icr & E1000_ICR_ECCER) && ((hw->mac.type == e1000_pch_lpt) || - (hw->mac.type == e1000_pch_spt))) { + if ((icr & E1000_ICR_ECCER) && (hw->mac.type >= e1000_pch_lpt)) { u32 pbeccsts = er32(PBECCSTS); adapter->corr_errors += @@ -2241,8 +2240,7 @@ static void e1000_irq_enable(struct e1000_adapter *adapter) if (adapter->msix_entries) { ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574); ew32(IMS, adapter->eiac_mask | E1000_IMS_LSC); - } else if ((hw->mac.type == e1000_pch_lpt) || - (hw->mac.type == e1000_pch_spt)) { + } else if (hw->mac.type >= e1000_pch_lpt) { ew32(IMS, IMS_ENABLE_MASK | E1000_IMS_ECCER); } else { ew32(IMS, IMS_ENABLE_MASK); @@ -3000,8 +2998,8 @@ static void e1000_configure_tx(struct e1000_adapter *adapter) hw->mac.ops.config_collision_dist(hw); - /* SPT Si errata workaround to avoid data corruption */ - if (hw->mac.type == e1000_pch_spt) { + /* SPT and CNP Si errata workaround to avoid data corruption */ + if (hw->mac.type >= e1000_pch_spt) { u32 reg_val; reg_val = er32(IOSFPC); @@ -3497,8 +3495,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca) /* Make sure clock is enabled on I217/I218/I219 before checking * the frequency */ - if (((hw->mac.type == e1000_pch_lpt) || - (hw->mac.type == e1000_pch_spt)) && + if ((hw->mac.type >= e1000_pch_lpt) && !(er32(TSYNCTXCTL) & E1000_TSYNCTXCTL_ENABLED) && !(er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_ENABLED)) { u32 fextnvm7 = er32(FEXTNVM7); @@ -3512,42 +3509,57 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca) switch (hw->mac.type) { case e1000_pch2lan: /* Stable 96MHz frequency */ - incperiod = INCPERIOD_96MHz; - incvalue = INCVALUE_96MHz; - shift = INCVALUE_SHIFT_96MHz; - adapter->cc.shift = shift + INCPERIOD_SHIFT_96MHz; + incperiod = INCPERIOD_96MHZ; + incvalue = INCVALUE_96MHZ; + shift = INCVALUE_SHIFT_96MHZ; + adapter->cc.shift = shift + INCPERIOD_SHIFT_96MHZ; break; case e1000_pch_lpt: if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) { /* Stable 96MHz frequency */ - incperiod = INCPERIOD_96MHz; - incvalue = INCVALUE_96MHz; - shift = INCVALUE_SHIFT_96MHz; - adapter->cc.shift = shift + INCPERIOD_SHIFT_96MHz; + incperiod = INCPERIOD_96MHZ; + incvalue = INCVALUE_96MHZ; + shift = INCVALUE_SHIFT_96MHZ; + adapter->cc.shift = shift + INCPERIOD_SHIFT_96MHZ; } else { /* Stable 25MHz frequency */ - incperiod = INCPERIOD_25MHz; - incvalue = INCVALUE_25MHz; - shift = INCVALUE_SHIFT_25MHz; + incperiod = INCPERIOD_25MHZ; + incvalue = INCVALUE_25MHZ; + shift = INCVALUE_SHIFT_25MHZ; adapter->cc.shift = shift; } break; case e1000_pch_spt: if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) { /* Stable 24MHz frequency */ - incperiod = INCPERIOD_24MHz; - incvalue = INCVALUE_24MHz; - shift = INCVALUE_SHIFT_24MHz; + incperiod = INCPERIOD_24MHZ; + incvalue = INCVALUE_24MHZ; + shift = INCVALUE_SHIFT_24MHZ; adapter->cc.shift = shift; break; } return -EINVAL; + case e1000_pch_cnp: + if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) { + /* Stable 24MHz frequency */ + incperiod = INCPERIOD_24MHZ; + incvalue = INCVALUE_24MHZ; + shift = INCVALUE_SHIFT_24MHZ; + adapter->cc.shift = shift; + } else { + /* Stable 38400KHz frequency */ + incperiod = INCPERIOD_38400KHZ; + incvalue = INCVALUE_38400KHZ; + shift = INCVALUE_SHIFT_38400KHZ; + adapter->cc.shift = shift; + } + break; case e1000_82574: case e1000_82583: /* Stable 25MHz frequency */ - incperiod = INCPERIOD_25MHz; - incvalue = INCVALUE_25MHz; - shift = INCVALUE_SHIFT_25MHz; + incperiod = INCPERIOD_25MHZ; + incvalue = INCVALUE_25MHZ; + shift = INCVALUE_SHIFT_25MHZ; adapter->cc.shift = shift; break; default: @@ -4038,6 +4050,7 @@ void e1000e_reset(struct e1000_adapter *adapter) case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: + case e1000_pch_cnp: fc->refresh_time = 0x0400; if (adapter->netdev->mtu <= ETH_DATA_LEN) { @@ -4082,7 +4095,7 @@ void e1000e_reset(struct e1000_adapter *adapter) } } - if (hw->mac.type == e1000_pch_spt) + if (hw->mac.type >= e1000_pch_spt) e1000_flush_desc_rings(adapter); /* Allow time for pending master requests to run */ mac->ops.reset_hw(hw); @@ -4157,7 +4170,7 @@ void e1000e_reset(struct e1000_adapter *adapter) phy_data &= ~IGP02E1000_PM_SPD; e1e_wphy(hw, IGP02E1000_PHY_POWER_MGMT, phy_data); } - if (hw->mac.type == e1000_pch_spt && adapter->int_mode == 0) { + if (hw->mac.type >= e1000_pch_spt && adapter->int_mode == 0) { u32 reg; /* Fextnvm7 @ 0xe4[2] = 1 */ @@ -4291,7 +4304,7 @@ void e1000e_down(struct e1000_adapter *adapter, bool reset) if (!pci_channel_offline(adapter->pdev)) { if (reset) e1000e_reset(adapter); - else if (hw->mac.type == e1000_pch_spt) + else if (hw->mac.type >= e1000_pch_spt) e1000_flush_desc_rings(adapter); } e1000_clean_tx_ring(adapter->tx_ring); @@ -4979,8 +4992,7 @@ static void e1000e_update_stats(struct e1000_adapter *adapter) adapter->stats.mgpdc += er32(MGTPDC); /* Correctable ECC Errors */ - if ((hw->mac.type == e1000_pch_lpt) || - (hw->mac.type == e1000_pch_spt)) { + if (hw->mac.type >= e1000_pch_lpt) { u32 pbeccsts = er32(PBECCSTS); adapter->corr_errors += @@ -6354,8 +6366,7 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime) if (adapter->hw.phy.type == e1000_phy_igp_3) { e1000e_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); - } else if ((hw->mac.type == e1000_pch_lpt) || - (hw->mac.type == e1000_pch_spt)) { + } else if (hw->mac.type >= e1000_pch_lpt) { if (!(wufc & (E1000_WUFC_EX | E1000_WUFC_MC | E1000_WUFC_BC))) /* ULP does not support wake from unicast, multicast * or broadcast. @@ -7514,6 +7525,10 @@ static const struct pci_device_id e1000_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_V4), board_pch_spt }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_LM5), board_pch_spt }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_V5), board_pch_spt }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CNP_I219_LM6), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CNP_I219_V6), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CNP_I219_LM7), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CNP_I219_V7), board_pch_cnp }, { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ }; diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index 34cc3be0df8e..b366885487a8 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -301,8 +301,8 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: - if (((hw->mac.type != e1000_pch_lpt) && - (hw->mac.type != e1000_pch_spt)) || + case e1000_pch_cnp: + if ((hw->mac.type < e1000_pch_lpt) || (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)) { adapter->ptp_clock_info.max_adj = 24000000 - 1; break; diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 70f9458f7a01..cdde3cc28fb5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -125,7 +125,6 @@ enum i40e_state_t { __I40E_CONFIG_BUSY, __I40E_CONFIG_DONE, __I40E_DOWN, - __I40E_NEEDS_RESTART, __I40E_SERVICE_SCHED, __I40E_ADMINQ_EVENT_PENDING, __I40E_MDD_EVENT_PENDING, @@ -138,7 +137,6 @@ enum i40e_state_t { __I40E_GLOBAL_RESET_REQUESTED, __I40E_EMP_RESET_REQUESTED, __I40E_EMP_RESET_INTR_RECEIVED, - __I40E_FILTER_OVERFLOW_PROMISC, __I40E_SUSPENDED, __I40E_PTP_TX_IN_PROGRESS, __I40E_BAD_EEPROM, @@ -147,6 +145,20 @@ enum i40e_state_t { __I40E_RESET_FAILED, __I40E_PORT_SUSPENDED, __I40E_VF_DISABLE, + /* This must be last as it determines the size of the BITMAP */ + __I40E_STATE_SIZE__, +}; + +/* VSI state flags */ +enum i40e_vsi_state_t { + __I40E_VSI_DOWN, + __I40E_VSI_NEEDS_RESTART, + __I40E_VSI_SYNCING_FILTERS, + __I40E_VSI_OVERFLOW_PROMISC, + __I40E_VSI_REINIT_REQUESTED, + __I40E_VSI_DOWN_REQUESTED, + /* This must be last as it determines the size of the BITMAP */ + __I40E_VSI_STATE_SIZE__, }; enum i40e_interrupt_policy { @@ -245,7 +257,7 @@ struct i40e_tc_configuration { struct i40e_udp_port_config { /* AdminQ command interface expects port number in Host byte order */ - u16 index; + u16 port; u8 type; }; @@ -322,7 +334,7 @@ struct i40e_flex_pit { struct i40e_pf { struct pci_dev *pdev; struct i40e_hw hw; - unsigned long state; + DECLARE_BITMAP(state, __I40E_STATE_SIZE__); struct msix_entry *msix_entries; bool fc_autoneg_status; @@ -396,6 +408,8 @@ struct i40e_pf { #define I40E_FLAG_DCB_ENABLED BIT_ULL(20) #define I40E_FLAG_FD_SB_ENABLED BIT_ULL(21) #define I40E_FLAG_FD_ATR_ENABLED BIT_ULL(22) +#define I40E_FLAG_FD_SB_AUTO_DISABLED BIT_ULL(23) +#define I40E_FLAG_FD_ATR_AUTO_DISABLED BIT_ULL(24) #define I40E_FLAG_PTP BIT_ULL(25) #define I40E_FLAG_MFP_ENABLED BIT_ULL(26) #define I40E_FLAG_UDP_FILTER_SYNC BIT_ULL(27) @@ -428,13 +442,6 @@ struct i40e_pf { #define I40E_FLAG_WOL_MC_MAGIC_PKT_WAKE BIT_ULL(57) #define I40E_FLAG_LEGACY_RX BIT_ULL(58) - /* Tracks features that are disabled due to hw limitations. - * If a bit is set here, it means that the corresponding - * bit in the 'flags' field is cleared i.e that feature - * is disabled - */ - u64 hw_disabled_flags; - struct i40e_client_instance *cinst; bool stat_offsets_loaded; struct i40e_hw_port_stats stats; @@ -593,7 +600,7 @@ struct i40e_vsi { bool stat_offsets_loaded; u32 current_netdev_flags; - unsigned long state; + DECLARE_BITMAP(state, __I40E_VSI_STATE_SIZE__); #define I40E_VSI_FLAG_FILTER_CHANGED BIT(0) #define I40E_VSI_FLAG_VEB_OWNER BIT(1) unsigned long flags; diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index eb2896fd52a6..c3b81a97558e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -371,8 +371,8 @@ void i40e_client_subtask(struct i40e_pf *pf) cdev = pf->cinst; /* If we're down or resetting, just bail */ - if (test_bit(__I40E_DOWN, &pf->state) || - test_bit(__I40E_CONFIG_BUSY, &pf->state)) + if (test_bit(__I40E_DOWN, pf->state) || + test_bit(__I40E_CONFIG_BUSY, pf->state)) return; if (!client || !cdev) @@ -382,7 +382,7 @@ void i40e_client_subtask(struct i40e_pf *pf) * the netdev is up, then open the client. */ if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) { - if (!test_bit(__I40E_DOWN, &vsi->state) && + if (!test_bit(__I40E_VSI_DOWN, vsi->state) && client->ops && client->ops->open) { set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state); ret = client->ops->open(&cdev->lan_info, client); @@ -397,7 +397,7 @@ void i40e_client_subtask(struct i40e_pf *pf) /* Likewise for client close. If the client is up, but the netdev * is down, then close the client. */ - if (test_bit(__I40E_DOWN, &vsi->state) && + if (test_bit(__I40E_VSI_DOWN, vsi->state) && client->ops && client->ops->close) { clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state); client->ops->close(&cdev->lan_info, client, false); @@ -503,7 +503,7 @@ static void i40e_client_release(struct i40e_client *client) continue; while (test_and_set_bit(__I40E_SERVICE_SCHED, - &pf->state)) + pf->state)) usleep_range(500, 1000); if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) { @@ -521,7 +521,7 @@ static void i40e_client_release(struct i40e_client *client) i40e_client_del_instance(pf); dev_info(&pf->pdev->dev, "Deleted client instance of Client %s\n", client->name); - clear_bit(__I40E_SERVICE_SCHED, &pf->state); + clear_bit(__I40E_SERVICE_SCHED, pf->state); } mutex_unlock(&i40e_device_mutex); } @@ -661,10 +661,10 @@ static void i40e_client_request_reset(struct i40e_info *ldev, switch (reset_level) { case I40E_CLIENT_RESET_LEVEL_PF: - set_bit(__I40E_PF_RESET_REQUESTED, &pf->state); + set_bit(__I40E_PF_RESET_REQUESTED, pf->state); break; case I40E_CLIENT_RESET_LEVEL_CORE: - set_bit(__I40E_PF_RESET_REQUESTED, &pf->state); + set_bit(__I40E_PF_RESET_REQUESTED, pf->state); break; default: dev_warn(&pf->pdev->dev, diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index a3d7ec62b76c..8f326f87a815 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -158,9 +158,12 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) dev_info(&pf->pdev->dev, " vlgrp: & = %p\n", vsi->active_vlans); dev_info(&pf->pdev->dev, - " state = %li flags = 0x%08lx, netdev_registered = %i, current_netdev_flags = 0x%04x\n", - vsi->state, vsi->flags, - vsi->netdev_registered, vsi->current_netdev_flags); + " flags = 0x%08lx, netdev_registered = %i, current_netdev_flags = 0x%04x\n", + vsi->flags, vsi->netdev_registered, vsi->current_netdev_flags); + for (i = 0; i < BITS_TO_LONGS(__I40E_VSI_STATE_SIZE__); i++) + dev_info(&pf->pdev->dev, + " state[%d] = %08lx\n", + i, vsi->state[i]); if (vsi == pf->vsi[pf->lan_vsi]) dev_info(&pf->pdev->dev, " MAC address: %pM SAN MAC: %pM Port MAC: %pM\n", pf->hw.mac.addr, @@ -174,7 +177,7 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) } dev_info(&pf->pdev->dev, " active_filters %u, promisc_threshold %u, overflow promisc %s\n", vsi->active_filters, vsi->promisc_threshold, - (test_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state) ? + (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state) ? "ON" : "OFF")); nstat = i40e_get_vsi_stats_struct(vsi); dev_info(&pf->pdev->dev, @@ -1706,7 +1709,7 @@ static ssize_t i40e_dbg_netdev_ops_write(struct file *filp, } else if (!vsi->netdev) { dev_info(&pf->pdev->dev, "tx_timeout: no netdev for VSI %d\n", vsi_seid); - } else if (test_bit(__I40E_DOWN, &vsi->state)) { + } else if (test_bit(__I40E_VSI_DOWN, vsi->state)) { dev_info(&pf->pdev->dev, "tx_timeout: VSI %d not UP\n", vsi_seid); } else if (rtnl_trylock()) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 08035c4389cd..7a8eb486b9ea 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -757,7 +757,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev, if (memcmp(©_cmd, &safe_cmd, sizeof(struct ethtool_link_ksettings))) return -EOPNOTSUPP; - while (test_and_set_bit(__I40E_CONFIG_BUSY, &pf->state)) { + while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) { timeout--; if (!timeout) return -EBUSY; @@ -891,7 +891,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev, } done: - clear_bit(__I40E_CONFIG_BUSY, &pf->state); + clear_bit(__I40E_CONFIG_BUSY, pf->state); return err; } @@ -987,7 +987,7 @@ static int i40e_set_pauseparam(struct net_device *netdev, } /* If we have link and don't have autoneg */ - if (!test_bit(__I40E_DOWN, &pf->state) && + if (!test_bit(__I40E_DOWN, pf->state) && !(hw_link_info->an_info & I40E_AQ_AN_COMPLETED)) { /* Send message that it might not necessarily work*/ netdev_info(netdev, "Autoneg did not complete so changing settings may not result in an actual change.\n"); @@ -1039,10 +1039,10 @@ static int i40e_set_pauseparam(struct net_device *netdev, err = -EAGAIN; } - if (!test_bit(__I40E_DOWN, &pf->state)) { + if (!test_bit(__I40E_DOWN, pf->state)) { /* Give it a little more time to try to come back */ msleep(75); - if (!test_bit(__I40E_DOWN, &pf->state)) + if (!test_bit(__I40E_DOWN, pf->state)) return i40e_nway_reset(netdev); } @@ -1139,8 +1139,8 @@ static int i40e_get_eeprom(struct net_device *netdev, /* make sure it is the right magic for NVMUpdate */ if ((eeprom->magic >> 16) != hw->device_id) errno = -EINVAL; - else if (test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) || - test_bit(__I40E_RESET_INTR_RECEIVED, &pf->state)) + else if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || + test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) errno = -EBUSY; else ret_val = i40e_nvmupd_command(hw, cmd, bytes, &errno); @@ -1246,8 +1246,8 @@ static int i40e_set_eeprom(struct net_device *netdev, /* check for NVMUpdate access method */ else if (!eeprom->magic || (eeprom->magic >> 16) != hw->device_id) errno = -EINVAL; - else if (test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) || - test_bit(__I40E_RESET_INTR_RECEIVED, &pf->state)) + else if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || + test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) errno = -EBUSY; else ret_val = i40e_nvmupd_command(hw, cmd, bytes, &errno); @@ -1332,7 +1332,7 @@ static int i40e_set_ringparam(struct net_device *netdev, (new_rx_count == vsi->rx_rings[0]->count)) return 0; - while (test_and_set_bit(__I40E_CONFIG_BUSY, &pf->state)) { + while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) { timeout--; if (!timeout) return -EBUSY; @@ -1485,7 +1485,7 @@ free_tx: } done: - clear_bit(__I40E_CONFIG_BUSY, &pf->state); + clear_bit(__I40E_CONFIG_BUSY, pf->state); return err; } @@ -1826,7 +1826,7 @@ static inline bool i40e_active_vfs(struct i40e_pf *pf) int i; for (i = 0; i < pf->num_alloc_vfs; i++) - if (test_bit(I40E_VF_STAT_ACTIVE, &vfs[i].vf_states)) + if (test_bit(I40E_VF_STATE_ACTIVE, &vfs[i].vf_states)) return true; return false; } @@ -1847,7 +1847,7 @@ static void i40e_diag_test(struct net_device *netdev, /* Offline tests */ netif_info(pf, drv, netdev, "offline testing starting\n"); - set_bit(__I40E_TESTING, &pf->state); + set_bit(__I40E_TESTING, pf->state); if (i40e_active_vfs(pf) || i40e_active_vmdqs(pf)) { dev_warn(&pf->pdev->dev, @@ -1857,7 +1857,7 @@ static void i40e_diag_test(struct net_device *netdev, data[I40E_ETH_TEST_INTR] = 1; data[I40E_ETH_TEST_LINK] = 1; eth_test->flags |= ETH_TEST_FL_FAILED; - clear_bit(__I40E_TESTING, &pf->state); + clear_bit(__I40E_TESTING, pf->state); goto skip_ol_tests; } @@ -1886,7 +1886,7 @@ static void i40e_diag_test(struct net_device *netdev, if (i40e_reg_test(netdev, &data[I40E_ETH_TEST_REG])) eth_test->flags |= ETH_TEST_FL_FAILED; - clear_bit(__I40E_TESTING, &pf->state); + clear_bit(__I40E_TESTING, pf->state); i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true); if (if_running) @@ -2924,11 +2924,11 @@ static int i40e_del_fdir_entry(struct i40e_vsi *vsi, struct i40e_pf *pf = vsi->back; int ret = 0; - if (test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) || - test_bit(__I40E_RESET_INTR_RECEIVED, &pf->state)) + if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || + test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) return -EBUSY; - if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state)) + if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state)) return -EBUSY; ret = i40e_update_ethtool_fdir_entry(vsi, NULL, fsp->location, cmd); @@ -3643,14 +3643,14 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi, if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED)) return -EOPNOTSUPP; - if (pf->hw_disabled_flags & I40E_FLAG_FD_SB_ENABLED) + if (pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED) return -ENOSPC; - if (test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) || - test_bit(__I40E_RESET_INTR_RECEIVED, &pf->state)) + if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || + test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) return -EBUSY; - if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state)) + if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state)) return -EBUSY; fsp = (struct ethtool_rx_flow_spec *)&cmd->fs; @@ -4086,12 +4086,12 @@ flags_complete: /* Flush current ATR settings if ATR was disabled */ if ((changed_flags & I40E_FLAG_FD_ATR_ENABLED) && !(pf->flags & I40E_FLAG_FD_ATR_ENABLED)) { - pf->hw_disabled_flags |= I40E_FLAG_FD_ATR_ENABLED; - set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state); + pf->flags |= I40E_FLAG_FD_ATR_AUTO_DISABLED; + set_bit(__I40E_FD_FLUSH_REQUESTED, pf->state); } /* Only allow ATR evict on hardware that is capable of handling it */ - if (pf->hw_disabled_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) + if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) pf->flags &= ~I40E_FLAG_HW_ATR_EVICT_CAPABLE; if (changed_flags & I40E_FLAG_TRUE_PROMISC_SUPPORT) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index c001562f19b2..d5c9c9e06ff5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -47,7 +47,7 @@ static const char i40e_driver_string[] = #define DRV_VERSION_MAJOR 2 #define DRV_VERSION_MINOR 1 -#define DRV_VERSION_BUILD 7 +#define DRV_VERSION_BUILD 14 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN @@ -295,8 +295,8 @@ struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id) **/ void i40e_service_event_schedule(struct i40e_pf *pf) { - if (!test_bit(__I40E_DOWN, &pf->state) && - !test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state)) + if (!test_bit(__I40E_VSI_DOWN, pf->state) && + !test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) queue_work(i40e_wq, &pf->service_task); } @@ -377,13 +377,13 @@ static void i40e_tx_timeout(struct net_device *netdev) switch (pf->tx_timeout_recovery_level) { case 1: - set_bit(__I40E_PF_RESET_REQUESTED, &pf->state); + set_bit(__I40E_PF_RESET_REQUESTED, pf->state); break; case 2: - set_bit(__I40E_CORE_RESET_REQUESTED, &pf->state); + set_bit(__I40E_CORE_RESET_REQUESTED, pf->state); break; case 3: - set_bit(__I40E_GLOBAL_RESET_REQUESTED, &pf->state); + set_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state); break; default: netdev_err(netdev, "tx_timeout recovery unsuccessful\n"); @@ -422,7 +422,7 @@ static void i40e_get_netdev_stats_struct(struct net_device *netdev, struct rtnl_link_stats64 *vsi_stats = i40e_get_vsi_stats_struct(vsi); int i; - if (test_bit(__I40E_DOWN, &vsi->state)) + if (test_bit(__I40E_VSI_DOWN, vsi->state)) return; if (!vsi->tx_rings) @@ -753,8 +753,8 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) u64 tx_p, tx_b; u16 q; - if (test_bit(__I40E_DOWN, &vsi->state) || - test_bit(__I40E_CONFIG_BUSY, &pf->state)) + if (test_bit(__I40E_VSI_DOWN, vsi->state) || + test_bit(__I40E_CONFIG_BUSY, pf->state)) return; ns = i40e_get_vsi_stats_struct(vsi); @@ -1050,13 +1050,13 @@ static void i40e_update_pf_stats(struct i40e_pf *pf) &osd->rx_lpi_count, &nsd->rx_lpi_count); if (pf->flags & I40E_FLAG_FD_SB_ENABLED && - !(pf->hw_disabled_flags & I40E_FLAG_FD_SB_ENABLED)) + !(pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED)) nsd->fd_sb_status = true; else nsd->fd_sb_status = false; if (pf->flags & I40E_FLAG_FD_ATR_ENABLED && - !(pf->hw_disabled_flags & I40E_FLAG_FD_ATR_ENABLED)) + !(pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED)) nsd->fd_atr_status = true; else nsd->fd_atr_status = false; @@ -1346,7 +1346,7 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, * to failed, so we don't bother to try sending the filter * to the hardware. */ - if (test_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state)) + if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state)) f->state = I40E_FILTER_FAILED; else f->state = I40E_FILTER_NEW; @@ -1525,8 +1525,8 @@ static int i40e_set_mac(struct net_device *netdev, void *p) return 0; } - if (test_bit(__I40E_DOWN, &vsi->back->state) || - test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state)) + if (test_bit(__I40E_VSI_DOWN, vsi->back->state) || + test_bit(__I40E_RESET_RECOVERY_PENDING, vsi->back->state)) return -EADDRNOTAVAIL; if (ether_addr_equal(hw->mac.addr, addr->sa_data)) @@ -1920,7 +1920,7 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, if (fcnt != num_add) { *promisc_changed = true; - set_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state); + set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); dev_warn(&vsi->back->pdev->dev, "Error %s adding RX filters on %s, promiscuous mode forced on\n", i40e_aq_str(hw, aq_err), @@ -2003,7 +2003,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) struct i40e_aqc_add_macvlan_element_data *add_list; struct i40e_aqc_remove_macvlan_element_data *del_list; - while (test_and_set_bit(__I40E_CONFIG_BUSY, &vsi->state)) + while (test_and_set_bit(__I40E_VSI_SYNCING_FILTERS, vsi->state)) usleep_range(1000, 2000); pf = vsi->back; @@ -2139,8 +2139,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) num_add = 0; hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { - if (test_bit(__I40E_FILTER_OVERFLOW_PROMISC, - &vsi->state)) { + if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, + vsi->state)) { new->state = I40E_FILTER_FAILED; continue; } @@ -2227,20 +2227,20 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) * safely exit if we didn't just enter, we no longer have any failed * filters, and we have reduced filters below the threshold value. */ - if (test_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state) && + if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state) && !promisc_changed && !failed_filters && (vsi->active_filters < vsi->promisc_threshold)) { dev_info(&pf->pdev->dev, "filter logjam cleared on %s, leaving overflow promiscuous mode\n", vsi_name); - clear_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state); + clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); promisc_changed = true; vsi->promisc_threshold = 0; } /* if the VF is not trusted do not do promisc */ if ((vsi->type == I40E_VSI_SRIOV) && !pf->vf[vsi->vf_id].trusted) { - clear_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state); + clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); goto out; } @@ -2265,12 +2265,12 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) } if ((changed_flags & IFF_PROMISC) || (promisc_changed && - test_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state))) { + test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state))) { bool cur_promisc; cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) || - test_bit(__I40E_FILTER_OVERFLOW_PROMISC, - &vsi->state)); + test_bit(__I40E_VSI_OVERFLOW_PROMISC, + vsi->state)); if ((vsi->type == I40E_VSI_MAIN) && (pf->lan_veb != I40E_NO_VEB) && !(pf->flags & I40E_FLAG_MFP_ENABLED)) { @@ -2353,7 +2353,7 @@ out: if (retval) vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; - clear_bit(__I40E_CONFIG_BUSY, &vsi->state); + clear_bit(__I40E_VSI_SYNCING_FILTERS, vsi->state); return retval; err_no_memory: @@ -2365,7 +2365,7 @@ err_no_memory_locked: spin_unlock_bh(&vsi->mac_filter_hash_lock); vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; - clear_bit(__I40E_CONFIG_BUSY, &vsi->state); + clear_bit(__I40E_VSI_SYNCING_FILTERS, vsi->state); return -ENOMEM; } @@ -3611,29 +3611,29 @@ static irqreturn_t i40e_intr(int irq, void *data) * this is not a performance path and napi_schedule() * can deal with rescheduling. */ - if (!test_bit(__I40E_DOWN, &pf->state)) + if (!test_bit(__I40E_VSI_DOWN, pf->state)) napi_schedule_irqoff(&q_vector->napi); } if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) { ena_mask &= ~I40E_PFINT_ICR0_ENA_ADMINQ_MASK; - set_bit(__I40E_ADMINQ_EVENT_PENDING, &pf->state); + set_bit(__I40E_ADMINQ_EVENT_PENDING, pf->state); i40e_debug(&pf->hw, I40E_DEBUG_NVM, "AdminQ event\n"); } if (icr0 & I40E_PFINT_ICR0_MAL_DETECT_MASK) { ena_mask &= ~I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK; - set_bit(__I40E_MDD_EVENT_PENDING, &pf->state); + set_bit(__I40E_MDD_EVENT_PENDING, pf->state); } if (icr0 & I40E_PFINT_ICR0_VFLR_MASK) { ena_mask &= ~I40E_PFINT_ICR0_ENA_VFLR_MASK; - set_bit(__I40E_VFLR_EVENT_PENDING, &pf->state); + set_bit(__I40E_VFLR_EVENT_PENDING, pf->state); } if (icr0 & I40E_PFINT_ICR0_GRST_MASK) { - if (!test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state)) - set_bit(__I40E_RESET_INTR_RECEIVED, &pf->state); + if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) + set_bit(__I40E_RESET_INTR_RECEIVED, pf->state); ena_mask &= ~I40E_PFINT_ICR0_ENA_GRST_MASK; val = rd32(hw, I40E_GLGEN_RSTAT); val = (val & I40E_GLGEN_RSTAT_RESET_TYPE_MASK) @@ -3644,7 +3644,7 @@ static irqreturn_t i40e_intr(int irq, void *data) pf->globr_count++; } else if (val == I40E_RESET_EMPR) { pf->empr_count++; - set_bit(__I40E_EMP_RESET_INTR_RECEIVED, &pf->state); + set_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state); } } @@ -3677,7 +3677,7 @@ static irqreturn_t i40e_intr(int irq, void *data) (icr0_remaining & I40E_PFINT_ICR0_PCI_EXCEPTION_MASK) || (icr0_remaining & I40E_PFINT_ICR0_ECC_ERR_MASK)) { dev_info(&pf->pdev->dev, "device will be reset\n"); - set_bit(__I40E_PF_RESET_REQUESTED, &pf->state); + set_bit(__I40E_PF_RESET_REQUESTED, pf->state); i40e_service_event_schedule(pf); } ena_mask &= ~icr0_remaining; @@ -3687,7 +3687,7 @@ static irqreturn_t i40e_intr(int irq, void *data) enable_intr: /* re-enable interrupt causes */ wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask); - if (!test_bit(__I40E_DOWN, &pf->state)) { + if (!test_bit(__I40E_VSI_DOWN, pf->state)) { i40e_service_event_schedule(pf); i40e_irq_dynamic_enable_icr0(pf, false); } @@ -3907,7 +3907,7 @@ static void i40e_netpoll(struct net_device *netdev) int i; /* if interface is down do nothing */ - if (test_bit(__I40E_DOWN, &vsi->state)) + if (test_bit(__I40E_VSI_DOWN, vsi->state)) return; if (pf->flags & I40E_FLAG_MSIX_ENABLED) { @@ -4144,7 +4144,7 @@ int i40e_vsi_start_rings(struct i40e_vsi *vsi) void i40e_vsi_stop_rings(struct i40e_vsi *vsi) { /* When port TX is suspended, don't wait */ - if (test_bit(__I40E_PORT_SUSPENDED, &vsi->back->state)) + if (test_bit(__I40E_PORT_SUSPENDED, vsi->back->state)) return i40e_vsi_stop_rings_no_wait(vsi); /* do rx first for enable and last for disable @@ -4436,14 +4436,14 @@ static void i40e_napi_disable_all(struct i40e_vsi *vsi) static void i40e_vsi_close(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; - if (!test_and_set_bit(__I40E_DOWN, &vsi->state)) + if (!test_and_set_bit(__I40E_VSI_DOWN, vsi->state)) i40e_down(vsi); i40e_vsi_free_irq(vsi); i40e_vsi_free_tx_resources(vsi); i40e_vsi_free_rx_resources(vsi); vsi->current_netdev_flags = 0; pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED; - if (test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state)) + if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) pf->flags |= I40E_FLAG_CLIENT_RESET; } @@ -4453,10 +4453,10 @@ static void i40e_vsi_close(struct i40e_vsi *vsi) **/ static void i40e_quiesce_vsi(struct i40e_vsi *vsi) { - if (test_bit(__I40E_DOWN, &vsi->state)) + if (test_bit(__I40E_VSI_DOWN, vsi->state)) return; - set_bit(__I40E_NEEDS_RESTART, &vsi->state); + set_bit(__I40E_VSI_NEEDS_RESTART, vsi->state); if (vsi->netdev && netif_running(vsi->netdev)) vsi->netdev->netdev_ops->ndo_stop(vsi->netdev); else @@ -4469,10 +4469,9 @@ static void i40e_quiesce_vsi(struct i40e_vsi *vsi) **/ static void i40e_unquiesce_vsi(struct i40e_vsi *vsi) { - if (!test_bit(__I40E_NEEDS_RESTART, &vsi->state)) + if (!test_and_clear_bit(__I40E_VSI_NEEDS_RESTART, vsi->state)) return; - clear_bit(__I40E_NEEDS_RESTART, &vsi->state); if (vsi->netdev && netif_running(vsi->netdev)) vsi->netdev->netdev_ops->ndo_open(vsi->netdev); else @@ -4638,8 +4637,8 @@ static void i40e_detect_recover_hung(struct i40e_pf *pf) return; /* Make sure, VSI state is not DOWN/RECOVERY_PENDING */ - if (test_bit(__I40E_DOWN, &vsi->back->state) || - test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state)) + if (test_bit(__I40E_VSI_DOWN, vsi->back->state) || + test_bit(__I40E_RESET_RECOVERY_PENDING, vsi->back->state)) return; /* Make sure type is MAIN VSI */ @@ -5186,7 +5185,7 @@ static int i40e_resume_port_tx(struct i40e_pf *pf) i40e_stat_str(&pf->hw, ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); /* Schedule PF reset to recover */ - set_bit(__I40E_PF_RESET_REQUESTED, &pf->state); + set_bit(__I40E_PF_RESET_REQUESTED, pf->state); i40e_service_event_schedule(pf); } @@ -5354,7 +5353,7 @@ static int i40e_up_complete(struct i40e_vsi *vsi) if (err) return err; - clear_bit(__I40E_DOWN, &vsi->state); + clear_bit(__I40E_VSI_DOWN, vsi->state); i40e_napi_enable_all(vsi); i40e_vsi_enable_irq(vsi); @@ -5403,12 +5402,12 @@ static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi) struct i40e_pf *pf = vsi->back; WARN_ON(in_interrupt()); - while (test_and_set_bit(__I40E_CONFIG_BUSY, &pf->state)) + while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) usleep_range(1000, 2000); i40e_down(vsi); i40e_up(vsi); - clear_bit(__I40E_CONFIG_BUSY, &pf->state); + clear_bit(__I40E_CONFIG_BUSY, pf->state); } /** @@ -5435,7 +5434,7 @@ void i40e_down(struct i40e_vsi *vsi) int i; /* It is assumed that the caller of this function - * sets the vsi->state __I40E_DOWN bit. + * sets the vsi->state __I40E_VSI_DOWN bit. */ if (vsi->netdev) { netif_carrier_off(vsi->netdev); @@ -5541,8 +5540,8 @@ int i40e_open(struct net_device *netdev) int err; /* disallow open during test or if eeprom is broken */ - if (test_bit(__I40E_TESTING, &pf->state) || - test_bit(__I40E_BAD_EEPROM, &pf->state)) + if (test_bit(__I40E_TESTING, pf->state) || + test_bit(__I40E_BAD_EEPROM, pf->state)) return -EBUSY; netif_carrier_off(netdev); @@ -5787,10 +5786,9 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired) struct i40e_vsi *vsi = pf->vsi[v]; if (vsi != NULL && - test_bit(__I40E_REINIT_REQUESTED, &vsi->state)) { + test_and_clear_bit(__I40E_VSI_REINIT_REQUESTED, + vsi->state)) i40e_vsi_reinit_locked(pf->vsi[v]); - clear_bit(__I40E_REINIT_REQUESTED, &vsi->state); - } } } else if (reset_flags & BIT_ULL(__I40E_DOWN_REQUESTED)) { int v; @@ -5801,10 +5799,10 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired) struct i40e_vsi *vsi = pf->vsi[v]; if (vsi != NULL && - test_bit(__I40E_DOWN_REQUESTED, &vsi->state)) { - set_bit(__I40E_DOWN, &vsi->state); + test_and_clear_bit(__I40E_VSI_DOWN_REQUESTED, + vsi->state)) { + set_bit(__I40E_VSI_DOWN, vsi->state); i40e_down(vsi); - clear_bit(__I40E_DOWN_REQUESTED, &vsi->state); } } } else { @@ -5944,7 +5942,7 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf, else pf->flags &= ~I40E_FLAG_DCB_ENABLED; - set_bit(__I40E_PORT_SUSPENDED, &pf->state); + set_bit(__I40E_PORT_SUSPENDED, pf->state); /* Reconfiguration needed quiesce all VSIs */ i40e_pf_quiesce_all_vsi(pf); @@ -5953,7 +5951,7 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf, ret = i40e_resume_port_tx(pf); - clear_bit(__I40E_PORT_SUSPENDED, &pf->state); + clear_bit(__I40E_PORT_SUSPENDED, pf->state); /* In case of error no point in resuming VSIs */ if (ret) goto exit; @@ -5962,7 +5960,7 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf, ret = i40e_pf_wait_queues_disabled(pf); if (ret) { /* Schedule PF reset to recover */ - set_bit(__I40E_PF_RESET_REQUESTED, &pf->state); + set_bit(__I40E_PF_RESET_REQUESTED, pf->state); i40e_service_event_schedule(pf); } else { i40e_pf_unquiesce_all_vsi(pf); @@ -6077,34 +6075,33 @@ void i40e_fdir_check_and_reenable(struct i40e_pf *pf) u32 fcnt_prog, fcnt_avail; struct hlist_node *node; - if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state)) + if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state)) return; - /* Check if, FD SB or ATR was auto disabled and if there is enough room - * to re-enable - */ + /* Check if we have enough room to re-enable FDir SB capability. */ fcnt_prog = i40e_get_global_fd_count(pf); fcnt_avail = pf->fdir_pf_filter_count; if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM)) || (pf->fd_add_err == 0) || (i40e_get_current_atr_cnt(pf) < pf->fd_atr_cnt)) { - if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) && - (pf->hw_disabled_flags & I40E_FLAG_FD_SB_ENABLED)) { - pf->hw_disabled_flags &= ~I40E_FLAG_FD_SB_ENABLED; - if (I40E_DEBUG_FD & pf->hw.debug_mask) + if (pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED) { + pf->flags &= ~I40E_FLAG_FD_SB_AUTO_DISABLED; + if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) && + (I40E_DEBUG_FD & pf->hw.debug_mask)) dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n"); } } - /* Wait for some more space to be available to turn on ATR. We also - * must check that no existing ntuple rules for TCP are in effect + /* We should wait for even more space before re-enabling ATR. + * Additionally, we cannot enable ATR as long as we still have TCP SB + * rules active. */ - if (fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM * 2)) { - if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && - (pf->hw_disabled_flags & I40E_FLAG_FD_ATR_ENABLED) && - (pf->fd_tcp4_filter_cnt == 0)) { - pf->hw_disabled_flags &= ~I40E_FLAG_FD_ATR_ENABLED; - if (I40E_DEBUG_FD & pf->hw.debug_mask) + if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) && + (pf->fd_tcp4_filter_cnt == 0)) { + if (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED) { + pf->flags &= ~I40E_FLAG_FD_ATR_AUTO_DISABLED; + if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && + (I40E_DEBUG_FD & pf->hw.debug_mask)) dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n"); } } @@ -6155,7 +6152,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf) } pf->fd_flush_timestamp = jiffies; - pf->hw_disabled_flags |= I40E_FLAG_FD_ATR_ENABLED; + pf->flags |= I40E_FLAG_FD_ATR_AUTO_DISABLED; /* flush all filters */ wr32(&pf->hw, I40E_PFQF_CTL_1, I40E_PFQF_CTL_1_CLEARFDTABLE_MASK); @@ -6175,8 +6172,8 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf) /* replay sideband filters */ i40e_fdir_filter_restore(pf->vsi[pf->lan_vsi]); if (!disable_atr && !pf->fd_tcp4_filter_cnt) - pf->hw_disabled_flags &= ~I40E_FLAG_FD_ATR_ENABLED; - clear_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state); + pf->flags &= ~I40E_FLAG_FD_ATR_AUTO_DISABLED; + clear_bit(__I40E_FD_FLUSH_REQUESTED, pf->state); if (I40E_DEBUG_FD & pf->hw.debug_mask) dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n"); } @@ -6206,10 +6203,10 @@ static void i40e_fdir_reinit_subtask(struct i40e_pf *pf) { /* if interface is down do nothing */ - if (test_bit(__I40E_DOWN, &pf->state)) + if (test_bit(__I40E_VSI_DOWN, pf->state)) return; - if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state)) + if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state)) i40e_fdir_flush_and_replay(pf); i40e_fdir_check_and_reenable(pf); @@ -6223,7 +6220,7 @@ static void i40e_fdir_reinit_subtask(struct i40e_pf *pf) **/ static void i40e_vsi_link_event(struct i40e_vsi *vsi, bool link_up) { - if (!vsi || test_bit(__I40E_DOWN, &vsi->state)) + if (!vsi || test_bit(__I40E_VSI_DOWN, vsi->state)) return; switch (vsi->type) { @@ -6316,11 +6313,11 @@ static void i40e_link_event(struct i40e_pf *pf) if (new_link == old_link && new_link_speed == old_link_speed && - (test_bit(__I40E_DOWN, &vsi->state) || + (test_bit(__I40E_VSI_DOWN, vsi->state) || new_link == netif_carrier_ok(vsi->netdev))) return; - if (!test_bit(__I40E_DOWN, &vsi->state)) + if (!test_bit(__I40E_VSI_DOWN, vsi->state)) i40e_print_link_message(vsi, new_link); /* Notify the base of the switch tree connected to @@ -6347,8 +6344,8 @@ static void i40e_watchdog_subtask(struct i40e_pf *pf) int i; /* if interface is down do nothing */ - if (test_bit(__I40E_DOWN, &pf->state) || - test_bit(__I40E_CONFIG_BUSY, &pf->state)) + if (test_bit(__I40E_VSI_DOWN, pf->state) || + test_bit(__I40E_CONFIG_BUSY, pf->state)) return; /* make sure we don't do these things too often */ @@ -6386,31 +6383,31 @@ static void i40e_reset_subtask(struct i40e_pf *pf) { u32 reset_flags = 0; - if (test_bit(__I40E_REINIT_REQUESTED, &pf->state)) { + if (test_bit(__I40E_REINIT_REQUESTED, pf->state)) { reset_flags |= BIT(__I40E_REINIT_REQUESTED); - clear_bit(__I40E_REINIT_REQUESTED, &pf->state); + clear_bit(__I40E_REINIT_REQUESTED, pf->state); } - if (test_bit(__I40E_PF_RESET_REQUESTED, &pf->state)) { + if (test_bit(__I40E_PF_RESET_REQUESTED, pf->state)) { reset_flags |= BIT(__I40E_PF_RESET_REQUESTED); - clear_bit(__I40E_PF_RESET_REQUESTED, &pf->state); + clear_bit(__I40E_PF_RESET_REQUESTED, pf->state); } - if (test_bit(__I40E_CORE_RESET_REQUESTED, &pf->state)) { + if (test_bit(__I40E_CORE_RESET_REQUESTED, pf->state)) { reset_flags |= BIT(__I40E_CORE_RESET_REQUESTED); - clear_bit(__I40E_CORE_RESET_REQUESTED, &pf->state); + clear_bit(__I40E_CORE_RESET_REQUESTED, pf->state); } - if (test_bit(__I40E_GLOBAL_RESET_REQUESTED, &pf->state)) { + if (test_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state)) { reset_flags |= BIT(__I40E_GLOBAL_RESET_REQUESTED); - clear_bit(__I40E_GLOBAL_RESET_REQUESTED, &pf->state); + clear_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state); } - if (test_bit(__I40E_DOWN_REQUESTED, &pf->state)) { - reset_flags |= BIT(__I40E_DOWN_REQUESTED); - clear_bit(__I40E_DOWN_REQUESTED, &pf->state); + if (test_bit(__I40E_VSI_DOWN_REQUESTED, pf->state)) { + reset_flags |= BIT(__I40E_VSI_DOWN_REQUESTED); + clear_bit(__I40E_VSI_DOWN_REQUESTED, pf->state); } /* If there's a recovery already waiting, it takes * precedence before starting a new reset sequence. */ - if (test_bit(__I40E_RESET_INTR_RECEIVED, &pf->state)) { + if (test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) { i40e_prep_for_reset(pf, false); i40e_reset(pf); i40e_rebuild(pf, false, false); @@ -6418,8 +6415,8 @@ static void i40e_reset_subtask(struct i40e_pf *pf) /* If we're already down or resetting, just bail */ if (reset_flags && - !test_bit(__I40E_DOWN, &pf->state) && - !test_bit(__I40E_CONFIG_BUSY, &pf->state)) { + !test_bit(__I40E_VSI_DOWN, pf->state) && + !test_bit(__I40E_CONFIG_BUSY, pf->state)) { rtnl_lock(); i40e_do_reset(pf, reset_flags, true); rtnl_unlock(); @@ -6468,7 +6465,7 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) u32 val; /* Do not run clean AQ when PF reset fails */ - if (test_bit(__I40E_RESET_FAILED, &pf->state)) + if (test_bit(__I40E_RESET_FAILED, pf->state)) return; /* check for error indications */ @@ -6572,7 +6569,7 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) } while (i++ < pf->adminq_work_limit); if (i < pf->adminq_work_limit) - clear_bit(__I40E_ADMINQ_EVENT_PENDING, &pf->state); + clear_bit(__I40E_ADMINQ_EVENT_PENDING, pf->state); /* re-enable Admin queue interrupt cause */ val = rd32(hw, I40E_PFINT_ICR0_ENA); @@ -6598,13 +6595,13 @@ static void i40e_verify_eeprom(struct i40e_pf *pf) if (err) { dev_info(&pf->pdev->dev, "eeprom check failed (%d), Tx/Rx traffic disabled\n", err); - set_bit(__I40E_BAD_EEPROM, &pf->state); + set_bit(__I40E_BAD_EEPROM, pf->state); } } - if (!err && test_bit(__I40E_BAD_EEPROM, &pf->state)) { + if (!err && test_bit(__I40E_BAD_EEPROM, pf->state)) { dev_info(&pf->pdev->dev, "eeprom check passed, Tx/Rx traffic enabled\n"); - clear_bit(__I40E_BAD_EEPROM, &pf->state); + clear_bit(__I40E_BAD_EEPROM, pf->state); } } @@ -6922,8 +6919,8 @@ static void i40e_prep_for_reset(struct i40e_pf *pf, bool lock_acquired) i40e_status ret = 0; u32 v; - clear_bit(__I40E_RESET_INTR_RECEIVED, &pf->state); - if (test_and_set_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state)) + clear_bit(__I40E_RESET_INTR_RECEIVED, pf->state); + if (test_and_set_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) return; if (i40e_check_asq_alive(&pf->hw)) i40e_vc_notify_reset(pf); @@ -6982,8 +6979,8 @@ static int i40e_reset(struct i40e_pf *pf) ret = i40e_pf_reset(hw); if (ret) { dev_info(&pf->pdev->dev, "PF reset failed, %d\n", ret); - set_bit(__I40E_RESET_FAILED, &pf->state); - clear_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state); + set_bit(__I40E_RESET_FAILED, pf->state); + clear_bit(__I40E_RESET_RECOVERY_PENDING, pf->state); } else { pf->pfr_count++; } @@ -7005,7 +7002,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) u32 val; int v; - if (test_bit(__I40E_DOWN, &pf->state)) + if (test_bit(__I40E_VSI_DOWN, pf->state)) goto clear_recovery; dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n"); @@ -7019,7 +7016,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) } /* re-verify the eeprom if we just had an EMP reset */ - if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, &pf->state)) + if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) i40e_verify_eeprom(pf); i40e_clear_pxe_mode(hw); @@ -7182,9 +7179,9 @@ end_unlock: if (!lock_acquired) rtnl_unlock(); end_core_reset: - clear_bit(__I40E_RESET_FAILED, &pf->state); + clear_bit(__I40E_RESET_FAILED, pf->state); clear_recovery: - clear_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state); + clear_bit(__I40E_RESET_RECOVERY_PENDING, pf->state); } /** @@ -7237,7 +7234,7 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf) u32 reg; int i; - if (!test_bit(__I40E_MDD_EVENT_PENDING, &pf->state)) + if (!test_bit(__I40E_MDD_EVENT_PENDING, pf->state)) return; /* find what triggered the MDD event */ @@ -7289,7 +7286,7 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf) } /* Queue belongs to the PF, initiate a reset */ if (pf_mdd_detected) { - set_bit(__I40E_PF_RESET_REQUESTED, &pf->state); + set_bit(__I40E_PF_RESET_REQUESTED, pf->state); i40e_service_event_schedule(pf); } } @@ -7318,12 +7315,12 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf) "Too many MDD events on VF %d, disabled\n", i); dev_info(&pf->pdev->dev, "Use PF Control I/F to re-enable the VF\n"); - set_bit(I40E_VF_STAT_DISABLED, &vf->vf_states); + set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states); } } /* re-enable mdd interrupt cause */ - clear_bit(__I40E_MDD_EVENT_PENDING, &pf->state); + clear_bit(__I40E_MDD_EVENT_PENDING, pf->state); reg = rd32(hw, I40E_PFINT_ICR0_ENA); reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); @@ -7331,6 +7328,23 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf) } /** + * i40e_sync_udp_filters - Trigger a sync event for existing UDP filters + * @pf: board private structure + **/ +static void i40e_sync_udp_filters(struct i40e_pf *pf) +{ + int i; + + /* loop through and set pending bit for all active UDP filters */ + for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) { + if (pf->udp_ports[i].port) + pf->pending_udp_bitmap |= BIT_ULL(i); + } + + pf->flags |= I40E_FLAG_UDP_FILTER_SYNC; +} + +/** * i40e_sync_udp_filters_subtask - Sync the VSI filter list with HW * @pf: board private structure **/ @@ -7349,7 +7363,7 @@ static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf) for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) { if (pf->pending_udp_bitmap & BIT_ULL(i)) { pf->pending_udp_bitmap &= ~BIT_ULL(i); - port = pf->udp_ports[i].index; + port = pf->udp_ports[i].port; if (port) ret = i40e_aq_add_udp_tunnel(hw, port, pf->udp_ports[i].type, @@ -7366,7 +7380,7 @@ static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf) i40e_stat_str(&pf->hw, ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); - pf->udp_ports[i].index = 0; + pf->udp_ports[i].port = 0; } } } @@ -7384,11 +7398,10 @@ static void i40e_service_task(struct work_struct *work) unsigned long start_time = jiffies; /* don't bother with service tasks if a reset is in progress */ - if (test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state)) { + if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) return; - } - if (test_and_set_bit(__I40E_SERVICE_SCHED, &pf->state)) + if (test_and_set_bit(__I40E_SERVICE_SCHED, pf->state)) return; i40e_detect_recover_hung(pf); @@ -7416,16 +7429,16 @@ static void i40e_service_task(struct work_struct *work) /* flush memory to make sure state is correct before next watchdog */ smp_mb__before_atomic(); - clear_bit(__I40E_SERVICE_SCHED, &pf->state); + clear_bit(__I40E_SERVICE_SCHED, pf->state); /* If the tasks have taken longer than one timer cycle or there * is more work to be done, reschedule the service task now * rather than wait for the timer to tick again. */ if (time_after(jiffies, (start_time + pf->service_timer_period)) || - test_bit(__I40E_ADMINQ_EVENT_PENDING, &pf->state) || - test_bit(__I40E_MDD_EVENT_PENDING, &pf->state) || - test_bit(__I40E_VFLR_EVENT_PENDING, &pf->state)) + test_bit(__I40E_ADMINQ_EVENT_PENDING, pf->state) || + test_bit(__I40E_MDD_EVENT_PENDING, pf->state) || + test_bit(__I40E_VFLR_EVENT_PENDING, pf->state)) i40e_service_event_schedule(pf); } @@ -7574,7 +7587,7 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type) } vsi->type = type; vsi->back = pf; - set_bit(__I40E_DOWN, &vsi->state); + set_bit(__I40E_VSI_DOWN, vsi->state); vsi->flags = 0; vsi->idx = vsi_idx; vsi->int_rate_limit = 0; @@ -8156,7 +8169,7 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf) /* Only request the irq if this is the first time through, and * not when we're rebuilding after a Reset */ - if (!test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state)) { + if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) { err = request_irq(pf->msix_entries[0].vector, i40e_intr, 0, pf->int_name, pf); if (err) { @@ -8808,9 +8821,9 @@ static int i40e_sw_init(struct i40e_pf *pf) (pf->hw.aq.api_min_ver > 4))) { /* Supported in FW API version higher than 1.4 */ pf->flags |= I40E_FLAG_GENEVE_OFFLOAD_CAPABLE; - pf->hw_disabled_flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE; + pf->flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE; } else { - pf->hw_disabled_flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE; + pf->flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE; } pf->eeprom_version = 0xDEAD; @@ -8870,16 +8883,16 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features) need_reset = true; i40e_fdir_filter_exit(pf); } - pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; - pf->hw_disabled_flags &= ~I40E_FLAG_FD_SB_ENABLED; + pf->flags &= ~(I40E_FLAG_FD_SB_ENABLED | + I40E_FLAG_FD_SB_AUTO_DISABLED); /* reset fd counters */ pf->fd_add_err = 0; pf->fd_atr_cnt = 0; /* if ATR was auto disabled it can be re-enabled. */ - if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && - (pf->hw_disabled_flags & I40E_FLAG_FD_ATR_ENABLED)) { - pf->hw_disabled_flags &= ~I40E_FLAG_FD_ATR_ENABLED; - if (I40E_DEBUG_FD & pf->hw.debug_mask) + if (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED) { + pf->flags &= ~I40E_FLAG_FD_ATR_AUTO_DISABLED; + if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && + (I40E_DEBUG_FD & pf->hw.debug_mask)) dev_info(&pf->pdev->dev, "ATR re-enabled.\n"); } } @@ -8953,7 +8966,7 @@ static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, u16 port) u8 i; for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) { - if (pf->udp_ports[i].index == port) + if (pf->udp_ports[i].port == port) return i; } @@ -9006,7 +9019,7 @@ static void i40e_udp_tunnel_add(struct net_device *netdev, } /* New port: add it and mark its index in the bitmap */ - pf->udp_ports[next_idx].index = port; + pf->udp_ports[next_idx].port = port; pf->pending_udp_bitmap |= BIT_ULL(next_idx); pf->flags |= I40E_FLAG_UDP_FILTER_SYNC; } @@ -9047,7 +9060,7 @@ static void i40e_udp_tunnel_del(struct net_device *netdev, /* if port exists, set it to 0 (mark for deletion) * and make it pending */ - pf->udp_ports[idx].index = 0; + pf->udp_ports[idx].port = 0; pf->pending_udp_bitmap |= BIT_ULL(idx); pf->flags |= I40E_FLAG_UDP_FILTER_SYNC; @@ -9701,7 +9714,7 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) } vsi->active_filters = 0; - clear_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state); + clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); spin_lock_bh(&vsi->mac_filter_hash_lock); /* If macvlan filters already exist, force them to get loaded */ hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { @@ -9754,7 +9767,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi) return -ENODEV; } if (vsi == pf->vsi[pf->lan_vsi] && - !test_bit(__I40E_DOWN, &pf->state)) { + !test_bit(__I40E_VSI_DOWN, pf->state)) { dev_info(&pf->pdev->dev, "Can't remove PF VSI\n"); return -ENODEV; } @@ -10738,6 +10751,9 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) i40e_ptp_init(pf); + /* repopulate tunnel port filters */ + i40e_sync_udp_filters(pf); + return ret; } @@ -10987,7 +11003,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } pf->next_vsi = 0; pf->pdev = pdev; - set_bit(__I40E_DOWN, &pf->state); + set_bit(__I40E_VSI_DOWN, pf->state); hw = &pf->hw; hw->back = pf; @@ -11166,7 +11182,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pf->service_timer_period = HZ; INIT_WORK(&pf->service_task, i40e_service_task); - clear_bit(__I40E_SERVICE_SCHED, &pf->state); + clear_bit(__I40E_SERVICE_SCHED, pf->state); /* NVM bit on means WoL disabled for the port */ i40e_read_nvm_word(hw, I40E_SR_NVM_WAKE_ON_LAN, &wol_nvm_bits); @@ -11204,7 +11220,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* prep for VF support */ if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) && (pf->flags & I40E_FLAG_MSIX_ENABLED) && - !test_bit(__I40E_BAD_EEPROM, &pf->state)) { + !test_bit(__I40E_BAD_EEPROM, pf->state)) { if (pci_num_vf(pdev)) pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; } @@ -11277,7 +11293,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * before setting up the misc vector or we get a race and the vector * ends up disabled forever. */ - clear_bit(__I40E_DOWN, &pf->state); + clear_bit(__I40E_VSI_DOWN, pf->state); /* In case of MSIX we are going to setup the misc vector right here * to handle admin queue events etc. In case of legacy and MSI @@ -11297,7 +11313,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* prep for VF support */ if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) && (pf->flags & I40E_FLAG_MSIX_ENABLED) && - !test_bit(__I40E_BAD_EEPROM, &pf->state)) { + !test_bit(__I40E_BAD_EEPROM, pf->state)) { /* disable link interrupts for VFs */ val = rd32(hw, I40E_PFGEN_PORTMDIO_NUM); val &= ~I40E_PFGEN_PORTMDIO_NUM_VFLINK_STAT_ENA_MASK; @@ -11432,7 +11448,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Unwind what we've done if something failed in the setup */ err_vsis: - set_bit(__I40E_DOWN, &pf->state); + set_bit(__I40E_VSI_DOWN, pf->state); i40e_clear_interrupt_scheme(pf); kfree(pf->vsi); err_switch_setup: @@ -11483,8 +11499,8 @@ static void i40e_remove(struct pci_dev *pdev) i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), 0); /* no more scheduling of any task */ - set_bit(__I40E_SUSPENDED, &pf->state); - set_bit(__I40E_DOWN, &pf->state); + set_bit(__I40E_SUSPENDED, pf->state); + set_bit(__I40E_VSI_DOWN, pf->state); if (pf->service_timer.data) del_timer_sync(&pf->service_timer); if (pf->service_task.func) @@ -11592,7 +11608,7 @@ static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev, } /* shutdown all operations */ - if (!test_bit(__I40E_SUSPENDED, &pf->state)) { + if (!test_bit(__I40E_SUSPENDED, pf->state)) { rtnl_lock(); i40e_prep_for_reset(pf, true); rtnl_unlock(); @@ -11659,7 +11675,7 @@ static void i40e_pci_error_resume(struct pci_dev *pdev) struct i40e_pf *pf = pci_get_drvdata(pdev); dev_dbg(&pdev->dev, "%s\n", __func__); - if (test_bit(__I40E_SUSPENDED, &pf->state)) + if (test_bit(__I40E_SUSPENDED, pf->state)) return; rtnl_lock(); @@ -11723,8 +11739,8 @@ static void i40e_shutdown(struct pci_dev *pdev) struct i40e_pf *pf = pci_get_drvdata(pdev); struct i40e_hw *hw = &pf->hw; - set_bit(__I40E_SUSPENDED, &pf->state); - set_bit(__I40E_DOWN, &pf->state); + set_bit(__I40E_SUSPENDED, pf->state); + set_bit(__I40E_VSI_DOWN, pf->state); rtnl_lock(); i40e_prep_for_reset(pf, true); rtnl_unlock(); @@ -11772,8 +11788,8 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state) struct i40e_hw *hw = &pf->hw; int retval = 0; - set_bit(__I40E_SUSPENDED, &pf->state); - set_bit(__I40E_DOWN, &pf->state); + set_bit(__I40E_SUSPENDED, pf->state); + set_bit(__I40E_VSI_DOWN, pf->state); if (pf->wol_en && (pf->flags & I40E_FLAG_WOL_MC_MAGIC_PKT_WAKE)) i40e_enable_mc_magic_wake(pf); @@ -11824,8 +11840,8 @@ static int i40e_resume(struct pci_dev *pdev) pci_wake_from_d3(pdev, false); /* handling the reset will rebuild the device state */ - if (test_and_clear_bit(__I40E_SUSPENDED, &pf->state)) { - clear_bit(__I40E_DOWN, &pf->state); + if (test_and_clear_bit(__I40E_SUSPENDED, pf->state)) { + clear_bit(__I40E_VSI_DOWN, pf->state); rtnl_lock(); i40e_reset_and_rebuild(pf, false, true); rtnl_unlock(); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index 2caee35528fa..18c1cc08da97 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -358,7 +358,7 @@ void i40e_ptp_tx_hwtstamp(struct i40e_pf *pf) skb_tstamp_tx(pf->ptp_tx_skb, &shhwtstamps); dev_kfree_skb_any(pf->ptp_tx_skb); pf->ptp_tx_skb = NULL; - clear_bit_unlock(__I40E_PTP_TX_IN_PROGRESS, &pf->state); + clear_bit_unlock(__I40E_PTP_TX_IN_PROGRESS, pf->state); } /** @@ -768,7 +768,7 @@ void i40e_ptp_stop(struct i40e_pf *pf) if (pf->ptp_tx_skb) { dev_kfree_skb_any(pf->ptp_tx_skb); pf->ptp_tx_skb = NULL; - clear_bit_unlock(__I40E_PTP_TX_IN_PROGRESS, &pf->state); + clear_bit_unlock(__I40E_PTP_TX_IN_PROGRESS, pf->state); } if (pf->ptp_clock) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 1531a0f9fcc6..29321a6167a6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -333,15 +333,9 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && I40E_DEBUG_FD & pf->hw.debug_mask) dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n"); - pf->hw_disabled_flags |= I40E_FLAG_FD_ATR_ENABLED; + pf->flags |= I40E_FLAG_FD_ATR_AUTO_DISABLED; } else { pf->fd_tcp4_filter_cnt--; - if (pf->fd_tcp4_filter_cnt == 0) { - if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && - I40E_DEBUG_FD & pf->hw.debug_mask) - dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n"); - pf->hw_disabled_flags &= ~I40E_FLAG_FD_ATR_ENABLED; - } } return 0; @@ -589,7 +583,7 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, * progress do nothing, once flush is complete the state will * be cleared. */ - if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state)) + if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state)) return; pf->fd_add_err++; @@ -597,9 +591,9 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf); if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) && - (pf->hw_disabled_flags & I40E_FLAG_FD_SB_ENABLED)) { - pf->hw_disabled_flags |= I40E_FLAG_FD_ATR_ENABLED; - set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state); + pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED) { + pf->flags |= I40E_FLAG_FD_ATR_AUTO_DISABLED; + set_bit(__I40E_FD_FLUSH_REQUESTED, pf->state); } /* filter programming failed most likely due to table full */ @@ -611,12 +605,10 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, */ if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) { if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) && - !(pf->hw_disabled_flags & - I40E_FLAG_FD_SB_ENABLED)) { + !(pf->flags & I40E_FLAG_FD_SB_AUTO_DISABLED)) { + pf->flags |= I40E_FLAG_FD_SB_AUTO_DISABLED; if (I40E_DEBUG_FD & pf->hw.debug_mask) dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n"); - pf->hw_disabled_flags |= - I40E_FLAG_FD_SB_ENABLED; } } } else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) { @@ -850,7 +842,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, if (budget && ((j / WB_STRIDE) == 0) && (j > 0) && - !test_bit(__I40E_DOWN, &vsi->state) && + !test_bit(__I40E_VSI_DOWN, vsi->state) && (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) tx_ring->arm_wb = true; } @@ -868,7 +860,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, smp_mb(); if (__netif_subqueue_stopped(tx_ring->netdev, tx_ring->queue_index) && - !test_bit(__I40E_DOWN, &vsi->state)) { + !test_bit(__I40E_VSI_DOWN, vsi->state)) { netif_wake_subqueue(tx_ring->netdev, tx_ring->queue_index); ++tx_ring->tx_stats.restart_queue; @@ -2179,7 +2171,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, } enable_int: - if (!test_bit(__I40E_DOWN, &vsi->state)) + if (!test_bit(__I40E_VSI_DOWN, vsi->state)) wr32(hw, INTREG(vector - 1), txval); if (q_vector->itr_countdown) @@ -2208,7 +2200,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) int budget_per_ring; int work_done = 0; - if (test_bit(__I40E_DOWN, &vsi->state)) { + if (test_bit(__I40E_VSI_DOWN, vsi->state)) { napi_complete(napi); return 0; } @@ -2312,7 +2304,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED)) return; - if ((pf->hw_disabled_flags & I40E_FLAG_FD_ATR_ENABLED)) + if (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED) return; /* if sampling is disabled do nothing */ @@ -2346,7 +2338,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, th = (struct tcphdr *)(hdr.network + hlen); /* Due to lack of space, no more new filters can be programmed */ - if (th->syn && (pf->hw_disabled_flags & I40E_FLAG_FD_ATR_ENABLED)) + if (th->syn && (pf->flags & I40E_FLAG_FD_ATR_AUTO_DISABLED)) return; if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) { /* HW ATR eviction will take care of removing filters on FIN @@ -2634,7 +2626,7 @@ static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb, return 0; if (pf->ptp_tx && - !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) { + !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, pf->state)) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; pf->ptp_tx_skb = skb_get(skb); } else { diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 350cba70490c..95c23fbaa211 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -50,8 +50,8 @@ static void i40e_vc_vf_broadcast(struct i40e_pf *pf, for (i = 0; i < pf->num_alloc_vfs; i++, vf++) { int abs_vf_id = vf->vf_id + (int)hw->func_caps.vf_base_id; /* Not all vfs are enabled so skip the ones that are not */ - if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states) && - !test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) + if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states) && + !test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) continue; /* Ignore return value on purpose - a given VF may fail, but @@ -137,8 +137,8 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf) return; /* verify if the VF is in either init or active before proceeding */ - if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states) && - !test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) + if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states) && + !test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) return; abs_vf_id = vf->vf_id + (int)vf->pf->hw.func_caps.vf_base_id; @@ -812,7 +812,7 @@ static void i40e_free_vf_res(struct i40e_vf *vf) /* Start by disabling VF's configuration API to prevent the OS from * accessing the VF's VSI after it's freed / invalidated. */ - clear_bit(I40E_VF_STAT_INIT, &vf->vf_states); + clear_bit(I40E_VF_STATE_INIT, &vf->vf_states); /* free vsi & disconnect it from the parent uplink */ if (vf->lan_vsi_idx) { @@ -884,7 +884,7 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf) vf->num_queue_pairs = total_queue_pairs; /* VF is now completely initialized */ - set_bit(I40E_VF_STAT_INIT, &vf->vf_states); + set_bit(I40E_VF_STATE_INIT, &vf->vf_states); error_alloc: if (ret) @@ -938,7 +938,7 @@ static void i40e_trigger_vf_reset(struct i40e_vf *vf, bool flr) u32 reg, reg_idx, bit_idx; /* warn the VF */ - clear_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states); + clear_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states); /* Disable VF's configuration API during reset. The flag is re-enabled * in i40e_alloc_vf_res(), when it's safe again to access VF's VSI. @@ -946,7 +946,7 @@ static void i40e_trigger_vf_reset(struct i40e_vf *vf, bool flr) * to do it earlier to give some time to finish to any VF config * functions that may still be running at this point. */ - clear_bit(I40E_VF_STAT_INIT, &vf->vf_states); + clear_bit(I40E_VF_STATE_INIT, &vf->vf_states); /* In the case of a VFLR, the HW has already reset the VF and we * just need to clean up, so don't hit the VFRTRIG register. @@ -1004,10 +1004,11 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf) if (!i40e_alloc_vf_res(vf)) { int abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id; i40e_enable_vf_mappings(vf); - set_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states); - clear_bit(I40E_VF_STAT_DISABLED, &vf->vf_states); + set_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states); + clear_bit(I40E_VF_STATE_DISABLED, &vf->vf_states); /* Do not notify the client during VF init */ - if (vf->pf->num_alloc_vfs) + if (test_and_clear_bit(I40E_VF_STATE_PRE_ENABLE, + &vf->vf_states)) i40e_notify_client_of_vf_reset(pf, abs_vf_id); vf->num_vlan = 0; } @@ -1035,7 +1036,7 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr) int i; /* If VFs have been disabled, there is no need to reset */ - if (test_and_set_bit(__I40E_VF_DISABLE, &pf->state)) + if (test_and_set_bit(__I40E_VF_DISABLE, pf->state)) return; i40e_trigger_vf_reset(vf, flr); @@ -1072,7 +1073,7 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr) i40e_cleanup_reset_vf(vf); i40e_flush(hw); - clear_bit(__I40E_VF_DISABLE, &pf->state); + clear_bit(__I40E_VF_DISABLE, pf->state); } /** @@ -1097,7 +1098,7 @@ void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) return; /* If VFs have been disabled, there is no need to reset */ - if (test_and_set_bit(__I40E_VF_DISABLE, &pf->state)) + if (test_and_set_bit(__I40E_VF_DISABLE, pf->state)) return; /* Begin reset on all VFs at once */ @@ -1172,7 +1173,7 @@ void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) i40e_cleanup_reset_vf(&pf->vf[v]); i40e_flush(hw); - clear_bit(__I40E_VF_DISABLE, &pf->state); + clear_bit(__I40E_VF_DISABLE, pf->state); } /** @@ -1189,13 +1190,25 @@ void i40e_free_vfs(struct i40e_pf *pf) if (!pf->vf) return; - while (test_and_set_bit(__I40E_VF_DISABLE, &pf->state)) + while (test_and_set_bit(__I40E_VF_DISABLE, pf->state)) usleep_range(1000, 2000); i40e_notify_client_of_vf_enable(pf, 0); - for (i = 0; i < pf->num_alloc_vfs; i++) - if (test_bit(I40E_VF_STAT_INIT, &pf->vf[i].vf_states)) - i40e_vsi_stop_rings(pf->vsi[pf->vf[i].lan_vsi_idx]); + + /* Amortize wait time by stopping all VFs at the same time */ + for (i = 0; i < pf->num_alloc_vfs; i++) { + if (test_bit(I40E_VF_STATE_INIT, &pf->vf[i].vf_states)) + continue; + + i40e_vsi_stop_rings_no_wait(pf->vsi[pf->vf[i].lan_vsi_idx]); + } + + for (i = 0; i < pf->num_alloc_vfs; i++) { + if (test_bit(I40E_VF_STATE_INIT, &pf->vf[i].vf_states)) + continue; + + i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[i].lan_vsi_idx]); + } /* Disable IOV before freeing resources. This lets any VF drivers * running in the host get themselves cleaned up before we yank @@ -1206,13 +1219,11 @@ void i40e_free_vfs(struct i40e_pf *pf) else dev_warn(&pf->pdev->dev, "VFs are assigned - not disabling SR-IOV\n"); - msleep(20); /* let any messages in transit get finished up */ - /* free up VF resources */ tmp = pf->num_alloc_vfs; pf->num_alloc_vfs = 0; for (i = 0; i < tmp; i++) { - if (test_bit(I40E_VF_STAT_INIT, &pf->vf[i].vf_states)) + if (test_bit(I40E_VF_STATE_INIT, &pf->vf[i].vf_states)) i40e_free_vf_res(&pf->vf[i]); /* disable qp mappings */ i40e_disable_vf_mappings(&pf->vf[i]); @@ -1235,7 +1246,7 @@ void i40e_free_vfs(struct i40e_pf *pf) wr32(hw, I40E_GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx)); } } - clear_bit(__I40E_VF_DISABLE, &pf->state); + clear_bit(__I40E_VF_DISABLE, pf->state); } #ifdef CONFIG_PCI_IOV @@ -1280,12 +1291,15 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs) /* assign default capabilities */ set_bit(I40E_VIRTCHNL_VF_CAP_L2, &vfs[i].vf_caps); vfs[i].spoofchk = true; - /* VF resources get allocated during reset */ - i40e_reset_vf(&vfs[i], false); + + set_bit(I40E_VF_STATE_PRE_ENABLE, &vfs[i].vf_states); } pf->num_alloc_vfs = num_alloc_vfs; + /* VF resources get allocated during reset */ + i40e_reset_all_vfs(pf, false); + i40e_notify_client_of_vf_enable(pf, num_alloc_vfs); err_alloc: @@ -1312,7 +1326,7 @@ static int i40e_pci_sriov_enable(struct pci_dev *pdev, int num_vfs) int pre_existing_vfs = pci_num_vf(pdev); int err = 0; - if (test_bit(__I40E_TESTING, &pf->state)) { + if (test_bit(__I40E_TESTING, pf->state)) { dev_warn(&pdev->dev, "Cannot enable SR-IOV virtual functions while the device is undergoing diagnostic testing\n"); err = -EPERM; @@ -1418,7 +1432,7 @@ static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode, "Number of invalid messages exceeded for VF %d\n", vf->vf_id); dev_err(&pf->pdev->dev, "Use PF Control I/F to enable the VF\n"); - set_bit(I40E_VF_STAT_DISABLED, &vf->vf_states); + set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states); } } else { vf->num_valid_msgs++; @@ -1493,7 +1507,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) int len = 0; int ret; - if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) { + if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -1522,7 +1536,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) if (i40e_vf_client_capable(pf, vf->vf_id) && (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_IWARP)) { vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_IWARP; - set_bit(I40E_VF_STAT_IWARPENA, &vf->vf_states); + set_bit(I40E_VF_STATE_IWARPENA, &vf->vf_states); } if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) { @@ -1583,7 +1597,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) ether_addr_copy(vfres->vsi_res[0].default_mac_addr, vf->default_lan_addr.addr); } - set_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states); + set_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states); err: /* send the response back to the VF */ @@ -1606,7 +1620,7 @@ err: **/ static void i40e_vc_reset_vf_msg(struct i40e_vf *vf) { - if (test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) + if (test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) i40e_reset_vf(vf, false); } @@ -1654,7 +1668,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, int bkt; vsi = i40e_find_vsi_from_id(pf, info->vsi_id); - if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || !i40e_vc_isvalid_vsi_id(vf, info->vsi_id) || !vsi) { aq_ret = I40E_ERR_PARAM; @@ -1715,9 +1729,9 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, "VF %d successfully set multicast promiscuous mode\n", vf->vf_id); if (allmulti) - set_bit(I40E_VF_STAT_MC_PROMISC, &vf->vf_states); + set_bit(I40E_VF_STATE_MC_PROMISC, &vf->vf_states); else - clear_bit(I40E_VF_STAT_MC_PROMISC, &vf->vf_states); + clear_bit(I40E_VF_STATE_MC_PROMISC, &vf->vf_states); } if (info->flags & I40E_FLAG_VF_UNICAST_PROMISC) @@ -1766,9 +1780,9 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, "VF %d successfully set unicast promiscuous mode\n", vf->vf_id); if (alluni) - set_bit(I40E_VF_STAT_UC_PROMISC, &vf->vf_states); + set_bit(I40E_VF_STATE_UC_PROMISC, &vf->vf_states); else - clear_bit(I40E_VF_STAT_UC_PROMISC, &vf->vf_states); + clear_bit(I40E_VF_STATE_UC_PROMISC, &vf->vf_states); } error_param: @@ -1797,7 +1811,7 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) i40e_status aq_ret = 0; int i; - if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) { + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -1854,7 +1868,7 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) unsigned long tempmap; int i; - if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) { + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -1914,7 +1928,7 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) u16 vsi_id = vqs->vsi_id; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) { + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -1953,7 +1967,7 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) struct i40e_pf *pf = vf->pf; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) { + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -1995,7 +2009,7 @@ static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) memset(&stats, 0, sizeof(struct i40e_eth_stats)); - if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) { + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2082,7 +2096,7 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) i40e_status ret = 0; int i; - if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || !i40e_vc_isvalid_vsi_id(vf, vsi_id)) { ret = I40E_ERR_PARAM; goto error_param; @@ -2151,7 +2165,7 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) i40e_status ret = 0; int i; - if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || !i40e_vc_isvalid_vsi_id(vf, vsi_id)) { ret = I40E_ERR_PARAM; goto error_param; @@ -2217,7 +2231,7 @@ static int i40e_vc_add_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) "VF is not trusted, switch the VF to trusted to add more VLAN addresses\n"); goto error_param; } - if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || !i40e_vc_isvalid_vsi_id(vf, vsi_id)) { aq_ret = I40E_ERR_PARAM; goto error_param; @@ -2244,12 +2258,12 @@ static int i40e_vc_add_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) if (!ret) vf->num_vlan++; - if (test_bit(I40E_VF_STAT_UC_PROMISC, &vf->vf_states)) + if (test_bit(I40E_VF_STATE_UC_PROMISC, &vf->vf_states)) i40e_aq_set_vsi_uc_promisc_on_vlan(&pf->hw, vsi->seid, true, vfl->vlan_id[i], NULL); - if (test_bit(I40E_VF_STAT_MC_PROMISC, &vf->vf_states)) + if (test_bit(I40E_VF_STATE_MC_PROMISC, &vf->vf_states)) i40e_aq_set_vsi_mc_promisc_on_vlan(&pf->hw, vsi->seid, true, vfl->vlan_id[i], @@ -2284,7 +2298,7 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) i40e_status aq_ret = 0; int i; - if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || !i40e_vc_isvalid_vsi_id(vf, vsi_id)) { aq_ret = I40E_ERR_PARAM; goto error_param; @@ -2307,12 +2321,12 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) i40e_vsi_kill_vlan(vsi, vfl->vlan_id[i]); vf->num_vlan--; - if (test_bit(I40E_VF_STAT_UC_PROMISC, &vf->vf_states)) + if (test_bit(I40E_VF_STATE_UC_PROMISC, &vf->vf_states)) i40e_aq_set_vsi_uc_promisc_on_vlan(&pf->hw, vsi->seid, false, vfl->vlan_id[i], NULL); - if (test_bit(I40E_VF_STAT_MC_PROMISC, &vf->vf_states)) + if (test_bit(I40E_VF_STATE_MC_PROMISC, &vf->vf_states)) i40e_aq_set_vsi_mc_promisc_on_vlan(&pf->hw, vsi->seid, false, vfl->vlan_id[i], @@ -2338,8 +2352,8 @@ static int i40e_vc_iwarp_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) int abs_vf_id = vf->vf_id + pf->hw.func_caps.vf_base_id; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || - !test_bit(I40E_VF_STAT_IWARPENA, &vf->vf_states)) { + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + !test_bit(I40E_VF_STATE_IWARPENA, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2369,8 +2383,8 @@ static int i40e_vc_iwarp_qvmap_msg(struct i40e_vf *vf, u8 *msg, u16 msglen, (struct i40e_virtchnl_iwarp_qvlist_info *)msg; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || - !test_bit(I40E_VF_STAT_IWARPENA, &vf->vf_states)) { + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + !test_bit(I40E_VF_STATE_IWARPENA, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2407,7 +2421,7 @@ static int i40e_vc_config_rss_key(struct i40e_vf *vf, u8 *msg, u16 msglen) u16 vsi_id = vrk->vsi_id; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || !i40e_vc_isvalid_vsi_id(vf, vsi_id) || (vrk->key_len != I40E_HKEY_ARRAY_SIZE)) { aq_ret = I40E_ERR_PARAM; @@ -2439,7 +2453,7 @@ static int i40e_vc_config_rss_lut(struct i40e_vf *vf, u8 *msg, u16 msglen) u16 vsi_id = vrl->vsi_id; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || !i40e_vc_isvalid_vsi_id(vf, vsi_id) || (vrl->lut_entries != I40E_VF_HLUT_ARRAY_SIZE)) { aq_ret = I40E_ERR_PARAM; @@ -2469,7 +2483,7 @@ static int i40e_vc_get_rss_hena(struct i40e_vf *vf, u8 *msg, u16 msglen) i40e_status aq_ret = 0; int len = 0; - if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) { + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -2506,7 +2520,7 @@ static int i40e_vc_set_rss_hena(struct i40e_vf *vf, u8 *msg, u16 msglen) struct i40e_hw *hw = &pf->hw; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) { + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -2536,7 +2550,7 @@ static int i40e_vc_validate_vf_msg(struct i40e_vf *vf, u32 v_opcode, int valid_len = 0; /* Check if VF is disabled. */ - if (test_bit(I40E_VF_STAT_DISABLED, &vf->vf_states)) + if (test_bit(I40E_VF_STATE_DISABLED, &vf->vf_states)) return I40E_ERR_PARAM; /* Validate message length. */ @@ -2804,7 +2818,7 @@ int i40e_vc_process_vflr_event(struct i40e_pf *pf) struct i40e_vf *vf; int vf_id; - if (!test_bit(__I40E_VFLR_EVENT_PENDING, &pf->state)) + if (!test_bit(__I40E_VFLR_EVENT_PENDING, pf->state)) return 0; /* Re-enable the VFLR interrupt cause here, before looking for which @@ -2817,7 +2831,7 @@ int i40e_vc_process_vflr_event(struct i40e_pf *pf) wr32(hw, I40E_PFINT_ICR0_ENA, reg); i40e_flush(hw); - clear_bit(__I40E_VFLR_EVENT_PENDING, &pf->state); + clear_bit(__I40E_VFLR_EVENT_PENDING, pf->state); for (vf_id = 0; vf_id < pf->num_alloc_vfs; vf_id++) { reg_idx = (hw->func_caps.vf_base_id + vf_id) / 32; bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32; @@ -2860,7 +2874,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) vf = &(pf->vf[vf_id]); vsi = pf->vsi[vf->lan_vsi_idx]; - if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) { + if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { dev_err(&pf->pdev->dev, "VF %d still in reset. Try again.\n", vf_id); ret = -EAGAIN; @@ -2949,7 +2963,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, vf = &(pf->vf[vf_id]); vsi = pf->vsi[vf->lan_vsi_idx]; - if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) { + if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { dev_err(&pf->pdev->dev, "VF %d still in reset. Try again.\n", vf_id); ret = -EAGAIN; @@ -3081,7 +3095,7 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, vf = &(pf->vf[vf_id]); vsi = pf->vsi[vf->lan_vsi_idx]; - if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) { + if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { dev_err(&pf->pdev->dev, "VF %d still in reset. Try again.\n", vf_id); ret = -EAGAIN; @@ -3162,7 +3176,7 @@ int i40e_ndo_get_vf_config(struct net_device *netdev, vf = &(pf->vf[vf_id]); /* first vsi is always the LAN vsi */ vsi = pf->vsi[vf->lan_vsi_idx]; - if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) { + if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { dev_err(&pf->pdev->dev, "VF %d still in reset. Try again.\n", vf_id); ret = -EAGAIN; @@ -3281,7 +3295,7 @@ int i40e_ndo_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool enable) } vf = &(pf->vf[vf_id]); - if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) { + if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { dev_err(&pf->pdev->dev, "VF %d still in reset. Try again.\n", vf_id); ret = -EAGAIN; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 9495f1422122..20d7c8160e9e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -56,13 +56,14 @@ enum i40e_queue_ctrl { /* VF states */ enum i40e_vf_states { - I40E_VF_STAT_INIT = 0, - I40E_VF_STAT_ACTIVE, - I40E_VF_STAT_IWARPENA, - I40E_VF_STAT_FCOEENA, - I40E_VF_STAT_DISABLED, - I40E_VF_STAT_MC_PROMISC, - I40E_VF_STAT_UC_PROMISC, + I40E_VF_STATE_INIT = 0, + I40E_VF_STATE_ACTIVE, + I40E_VF_STATE_IWARPENA, + I40E_VF_STATE_FCOEENA, + I40E_VF_STATE_DISABLED, + I40E_VF_STATE_MC_PROMISC, + I40E_VF_STATE_UC_PROMISC, + I40E_VF_STATE_PRE_ENABLE, }; /* VF capabilities */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 34e96d98251a..dfe241a12ad0 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -266,7 +266,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, if (budget && ((j / WB_STRIDE) == 0) && (j > 0) && - !test_bit(__I40E_DOWN, &vsi->state) && + !test_bit(__I40E_VSI_DOWN, vsi->state) && (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) tx_ring->arm_wb = true; } @@ -284,7 +284,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, smp_mb(); if (__netif_subqueue_stopped(tx_ring->netdev, tx_ring->queue_index) && - !test_bit(__I40E_DOWN, &vsi->state)) { + !test_bit(__I40E_VSI_DOWN, vsi->state)) { netif_wake_subqueue(tx_ring->netdev, tx_ring->queue_index); ++tx_ring->tx_stats.restart_queue; @@ -1508,7 +1508,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, } enable_int: - if (!test_bit(__I40E_DOWN, &vsi->state)) + if (!test_bit(__I40E_VSI_DOWN, vsi->state)) wr32(hw, INTREG(vector - 1), txval); if (q_vector->itr_countdown) @@ -1537,7 +1537,7 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget) int budget_per_ring; int work_done = 0; - if (test_bit(__I40E_DOWN, &vsi->state)) { + if (test_bit(__I40E_VSI_DOWN, vsi->state)) { napi_complete(napi); return 0; } diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 40f56e2335df..b8ada6d8d890 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -49,6 +49,13 @@ #define DEFAULT_DEBUG_LEVEL_SHIFT 3 #define PFX "i40evf: " +/* VSI state flags shared with common code */ +enum i40evf_vsi_state_t { + __I40E_VSI_DOWN, + /* This must be last as it determines the size of the BITMAP */ + __I40E_VSI_STATE_SIZE__, +}; + /* dummy struct to make common code less painful */ struct i40e_vsi { struct i40evf_adapter *back; @@ -56,7 +63,7 @@ struct i40e_vsi { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; u16 seid; u16 id; - unsigned long state; + DECLARE_BITMAP(state, __I40E_VSI_STATE_SIZE__); int base_vector; u16 work_limit; u16 qs_handle; @@ -168,8 +175,6 @@ enum i40evf_critical_section_t { __I40EVF_IN_CRITICAL_TASK, /* cannot be interrupted */ __I40EVF_IN_CLIENT_TASK, }; -/* make common code happy */ -#define __I40E_DOWN __I40EVF_DOWN /* board specific private data structure */ struct i40evf_adapter { @@ -218,7 +223,6 @@ struct i40evf_adapter { #define I40EVF_FLAG_ALLMULTI_ON BIT(19) #define I40EVF_FLAG_LEGACY_RX BIT(20) /* duplicates for common code */ -#define I40E_FLAG_FDIR_ATR_ENABLED 0 #define I40E_FLAG_DCB_ENABLED 0 #define I40E_FLAG_RX_CSUM_ENABLED I40EVF_FLAG_RX_CSUM_ENABLED #define I40E_FLAG_WB_ON_ITR_CAPABLE I40EVF_FLAG_WB_ON_ITR_CAPABLE diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 5915273c372f..ea110a730e16 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -46,7 +46,7 @@ static const char i40evf_driver_string[] = #define DRV_VERSION_MAJOR 2 #define DRV_VERSION_MINOR 1 -#define DRV_VERSION_BUILD 7 +#define DRV_VERSION_BUILD 14 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) \ @@ -497,7 +497,7 @@ static void i40evf_netpoll(struct net_device *netdev) int i; /* if interface is down do nothing */ - if (test_bit(__I40E_DOWN, &adapter->vsi.state)) + if (test_bit(__I40E_VSI_DOWN, adapter->vsi.state)) return; for (i = 0; i < q_vectors; i++) @@ -694,13 +694,14 @@ static void i40evf_configure_tx(struct i40evf_adapter *adapter) static void i40evf_configure_rx(struct i40evf_adapter *adapter) { unsigned int rx_buf_len = I40E_RXBUFFER_2048; - struct net_device *netdev = adapter->netdev; struct i40e_hw *hw = &adapter->hw; int i; /* Legacy Rx will always default to a 2048 buffer size. */ #if (PAGE_SIZE < 8192) if (!(adapter->flags & I40EVF_FLAG_LEGACY_RX)) { + struct net_device *netdev = adapter->netdev; + /* For jumbo frames on systems with 4K pages we have to use * an order 1 page, so we might as well increase the size * of our Rx buffer to make better use of the available space @@ -1087,7 +1088,7 @@ static void i40evf_configure(struct i40evf_adapter *adapter) static void i40evf_up_complete(struct i40evf_adapter *adapter) { adapter->state = __I40EVF_RUNNING; - clear_bit(__I40E_DOWN, &adapter->vsi.state); + clear_bit(__I40E_VSI_DOWN, adapter->vsi.state); i40evf_napi_enable_all(adapter); @@ -1271,13 +1272,13 @@ static int i40evf_set_interrupt_capability(struct i40evf_adapter *adapter) } pairs = adapter->num_active_queues; - /* It's easy to be greedy for MSI-X vectors, but it really - * doesn't do us much good if we have a lot more vectors - * than CPU's. So let's be conservative and only ask for - * (roughly) twice the number of vectors as there are CPU's. + /* It's easy to be greedy for MSI-X vectors, but it really doesn't do + * us much good if we have more vectors than CPUs. However, we already + * limit the total number of queues by the number of CPUs so we do not + * need any further limiting here. */ - v_budget = min_t(int, pairs, (int)(num_online_cpus() * 2)) + NONQ_VECS; - v_budget = min_t(int, v_budget, (int)adapter->vf_res->max_vectors); + v_budget = min_t(int, pairs + NONQ_VECS, + (int)adapter->vf_res->max_vectors); adapter->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry), GFP_KERNEL); @@ -1508,6 +1509,13 @@ int i40evf_init_interrupt_scheme(struct i40evf_adapter *adapter) { int err; + err = i40evf_alloc_queues(adapter); + if (err) { + dev_err(&adapter->pdev->dev, + "Unable to allocate memory for queues\n"); + goto err_alloc_queues; + } + rtnl_lock(); err = i40evf_set_interrupt_capability(adapter); rtnl_unlock(); @@ -1524,23 +1532,16 @@ int i40evf_init_interrupt_scheme(struct i40evf_adapter *adapter) goto err_alloc_q_vectors; } - err = i40evf_alloc_queues(adapter); - if (err) { - dev_err(&adapter->pdev->dev, - "Unable to allocate memory for queues\n"); - goto err_alloc_queues; - } - dev_info(&adapter->pdev->dev, "Multiqueue %s: Queue pair count = %u", (adapter->num_active_queues > 1) ? "Enabled" : "Disabled", adapter->num_active_queues); return 0; -err_alloc_queues: - i40evf_free_q_vectors(adapter); err_alloc_q_vectors: i40evf_reset_interrupt_capability(adapter); err_set_interrupt: + i40evf_free_queues(adapter); +err_alloc_queues: return err; } @@ -1753,7 +1754,7 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter) adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED; if (netif_running(adapter->netdev)) { - set_bit(__I40E_DOWN, &adapter->vsi.state); + set_bit(__I40E_VSI_DOWN, adapter->vsi.state); netif_carrier_off(adapter->netdev); netif_tx_disable(adapter->netdev); adapter->link_up = false; @@ -2233,7 +2234,7 @@ static int i40evf_close(struct net_device *netdev) return 0; - set_bit(__I40E_DOWN, &adapter->vsi.state); + set_bit(__I40E_VSI_DOWN, adapter->vsi.state); if (CLIENT_ENABLED(adapter)) adapter->flags |= I40EVF_FLAG_CLIENT_NEEDS_CLOSE; @@ -2674,7 +2675,7 @@ static void i40evf_init_task(struct work_struct *work) dev_info(&pdev->dev, "GRO is enabled\n"); adapter->state = __I40EVF_DOWN; - set_bit(__I40E_DOWN, &adapter->vsi.state); + set_bit(__I40E_VSI_DOWN, adapter->vsi.state); i40evf_misc_irq_enable(adapter); adapter->rss_key = kzalloc(adapter->rss_key_size, GFP_KERNEL); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c index 335beb8b8b45..97a8f00674d0 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c @@ -798,7 +798,7 @@ wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, const struct bpf_insn *insn = &meta->insn; if (insn->off < 0) /* TODO */ - return -ENOTSUPP; + return -EOPNOTSUPP; wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op, insn->src_reg * 2, br_mask, insn->off); @@ -818,7 +818,7 @@ wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u32 tmp_reg; if (insn->off < 0) /* TODO */ - return -ENOTSUPP; + return -EOPNOTSUPP; tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); if (!swap) @@ -847,7 +847,7 @@ wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 areg = insn->src_reg * 2, breg = insn->dst_reg * 2; if (insn->off < 0) /* TODO */ - return -ENOTSUPP; + return -EOPNOTSUPP; if (swap) { areg ^= breg; @@ -1132,7 +1132,7 @@ static int mem_ldx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) emit_alu(nfp_prog, reg_both(meta->insn.dst_reg * 2), reg_none(), ALU_OP_NONE, NFP_BPF_ABI_LEN); else - return -ENOTSUPP; + return -EOPNOTSUPP; return 0; } @@ -1143,7 +1143,7 @@ static int mem_ldx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) if (meta->insn.off != offsetof(struct xdp_md, data) && meta->insn.off != offsetof(struct xdp_md, data_end)) - return -ENOTSUPP; + return -EOPNOTSUPP; emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT); @@ -1174,12 +1174,12 @@ static int mem_stx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) if (meta->insn.off == offsetof(struct sk_buff, mark)) return wrp_set_mark(nfp_prog, meta->insn.src_reg * 2); - return -ENOTSUPP; + return -EOPNOTSUPP; } static int mem_stx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) @@ -1192,7 +1192,7 @@ static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { if (meta->insn.off < 0) /* TODO */ - return -ENOTSUPP; + return -EOPNOTSUPP; emit_br(nfp_prog, BR_UNC, meta->insn.off, 0); return 0; @@ -1206,7 +1206,7 @@ static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) u32 tmp_reg; if (insn->off < 0) /* TODO */ - return -ENOTSUPP; + return -EOPNOTSUPP; if (imm & ~0U) { tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); @@ -1245,7 +1245,7 @@ static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) u32 tmp_reg; if (insn->off < 0) /* TODO */ - return -ENOTSUPP; + return -EOPNOTSUPP; if (!imm) { meta->skip = true; @@ -1276,7 +1276,7 @@ static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) u32 tmp_reg; if (insn->off < 0) /* TODO */ - return -ENOTSUPP; + return -EOPNOTSUPP; if (!imm) { emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), @@ -1302,7 +1302,7 @@ static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) const struct bpf_insn *insn = &meta->insn; if (insn->off < 0) /* TODO */ - return -ENOTSUPP; + return -EOPNOTSUPP; emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2), ALU_OP_XOR, reg_b(insn->src_reg * 2)); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 8f20fdef0754..fcf81b3be830 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -201,6 +201,7 @@ struct nfp_net_tx_buf { * @txds: Virtual address of TX ring in host memory * @dma: DMA address of the TX ring * @size: Size, in bytes, of the TX ring (needed to free) + * @is_xdp: Is this a XDP TX ring? */ struct nfp_net_tx_ring { struct nfp_net_r_vector *r_vec; @@ -221,6 +222,7 @@ struct nfp_net_tx_ring { dma_addr_t dma; unsigned int size; + bool is_xdp; } ____cacheline_aligned; /* RX and freelist descriptor format */ @@ -468,10 +470,10 @@ struct nfp_net_dp { u8 chained_metadata_format:1; u8 rx_dma_dir; - u8 rx_dma_off; - u8 rx_offset; + u32 rx_dma_off; + u32 ctrl; u32 fl_bufsz; @@ -816,7 +818,8 @@ nfp_net_irqs_assign(struct nfp_net *nn, struct msix_entry *irq_entries, unsigned int n); struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn); -int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *new); +int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *new, + struct netlink_ext_ack *extack); bool nfp_net_link_changed_read_clear(struct nfp_net *nn); int nfp_net_refresh_eth_port(struct nfp_net *nn); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 8a9b74305493..db20376260f5 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -478,15 +478,18 @@ static irqreturn_t nfp_net_irq_exn(int irq, void *data) * @tx_ring: TX ring structure * @r_vec: IRQ vector servicing this ring * @idx: Ring index + * @is_xdp: Is this an XDP TX ring? */ static void nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring, - struct nfp_net_r_vector *r_vec, unsigned int idx) + struct nfp_net_r_vector *r_vec, unsigned int idx, + bool is_xdp) { struct nfp_net *nn = r_vec->nfp_net; tx_ring->idx = idx; tx_ring->r_vec = r_vec; + tx_ring->is_xdp = is_xdp; tx_ring->qcidx = tx_ring->idx * nn->stride_tx; tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx); @@ -924,6 +927,9 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring) int fidx; int idx; + if (tx_ring->wr_p == tx_ring->rd_p) + return; + /* Work out how many descriptors have been transmitted */ qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); @@ -994,11 +1000,13 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring) static void nfp_net_xdp_complete(struct nfp_net_tx_ring *tx_ring) { struct nfp_net_r_vector *r_vec = tx_ring->r_vec; - struct nfp_net_dp *dp = &r_vec->nfp_net->dp; u32 done_pkts = 0, done_bytes = 0; int idx, todo; u32 qcp_rd_p; + if (tx_ring->wr_p == tx_ring->rd_p) + return; + /* Work out how many descriptors have been transmitted */ qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); @@ -1010,22 +1018,12 @@ static void nfp_net_xdp_complete(struct nfp_net_tx_ring *tx_ring) else todo = qcp_rd_p + tx_ring->cnt - tx_ring->qcp_rd_p; + done_pkts = todo; while (todo--) { idx = tx_ring->rd_p & (tx_ring->cnt - 1); tx_ring->rd_p++; - if (!tx_ring->txbufs[idx].frag) - continue; - - nfp_net_dma_unmap_rx(dp, tx_ring->txbufs[idx].dma_addr); - __free_page(virt_to_page(tx_ring->txbufs[idx].frag)); - - done_pkts++; done_bytes += tx_ring->txbufs[idx].real_len; - - tx_ring->txbufs[idx].dma_addr = 0; - tx_ring->txbufs[idx].frag = NULL; - tx_ring->txbufs[idx].fidx = -2; } tx_ring->qcp_rd_p = qcp_rd_p; @@ -1050,42 +1048,35 @@ static void nfp_net_xdp_complete(struct nfp_net_tx_ring *tx_ring) static void nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) { - struct nfp_net_r_vector *r_vec = tx_ring->r_vec; const struct skb_frag_struct *frag; struct netdev_queue *nd_q; - while (tx_ring->rd_p != tx_ring->wr_p) { + while (!tx_ring->is_xdp && tx_ring->rd_p != tx_ring->wr_p) { struct nfp_net_tx_buf *tx_buf; - int idx; + struct sk_buff *skb; + int idx, nr_frags; idx = tx_ring->rd_p & (tx_ring->cnt - 1); tx_buf = &tx_ring->txbufs[idx]; - if (tx_ring == r_vec->xdp_ring) { - nfp_net_dma_unmap_rx(dp, tx_buf->dma_addr); - __free_page(virt_to_page(tx_ring->txbufs[idx].frag)); - } else { - struct sk_buff *skb = tx_ring->txbufs[idx].skb; - int nr_frags = skb_shinfo(skb)->nr_frags; - - if (tx_buf->fidx == -1) { - /* unmap head */ - dma_unmap_single(dp->dev, tx_buf->dma_addr, - skb_headlen(skb), - DMA_TO_DEVICE); - } else { - /* unmap fragment */ - frag = &skb_shinfo(skb)->frags[tx_buf->fidx]; - dma_unmap_page(dp->dev, tx_buf->dma_addr, - skb_frag_size(frag), - DMA_TO_DEVICE); - } + skb = tx_ring->txbufs[idx].skb; + nr_frags = skb_shinfo(skb)->nr_frags; - /* check for last gather fragment */ - if (tx_buf->fidx == nr_frags - 1) - dev_kfree_skb_any(skb); + if (tx_buf->fidx == -1) { + /* unmap head */ + dma_unmap_single(dp->dev, tx_buf->dma_addr, + skb_headlen(skb), DMA_TO_DEVICE); + } else { + /* unmap fragment */ + frag = &skb_shinfo(skb)->frags[tx_buf->fidx]; + dma_unmap_page(dp->dev, tx_buf->dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); } + /* check for last gather fragment */ + if (tx_buf->fidx == nr_frags - 1) + dev_kfree_skb_any(skb); + tx_buf->dma_addr = 0; tx_buf->skb = NULL; tx_buf->fidx = -2; @@ -1100,7 +1091,7 @@ nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) tx_ring->qcp_rd_p = 0; tx_ring->wr_ptr_add = 0; - if (tx_ring == r_vec->xdp_ring) + if (tx_ring->is_xdp) return; nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); @@ -1153,16 +1144,13 @@ nfp_net_free_frag(void *frag, bool xdp) /** * nfp_net_rx_alloc_one() - Allocate and map page frag for RX * @dp: NFP Net data path struct - * @rx_ring: RX ring structure of the skb * @dma_addr: Pointer to storage for DMA address (output param) * * This function will allcate a new page frag, map it for DMA. * * Return: allocated page frag or NULL on failure. */ -static void * -nfp_net_rx_alloc_one(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, - dma_addr_t *dma_addr) +static void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) { void *frag; @@ -1317,8 +1305,7 @@ nfp_net_rx_ring_bufs_alloc(struct nfp_net_dp *dp, rxbufs = rx_ring->rxbufs; for (i = 0; i < rx_ring->cnt - 1; i++) { - rxbufs[i].frag = - nfp_net_rx_alloc_one(dp, rx_ring, &rxbufs[i].dma_addr); + rxbufs[i].frag = nfp_net_rx_alloc_one(dp, &rxbufs[i].dma_addr); if (!rxbufs[i].frag) { nfp_net_rx_ring_bufs_free(dp, rx_ring); return -ENOMEM; @@ -1496,8 +1483,6 @@ nfp_net_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, { struct nfp_net_tx_buf *txbuf; struct nfp_net_tx_desc *txd; - dma_addr_t new_dma_addr; - void *new_frag; int wr_idx; if (unlikely(nfp_net_tx_full(tx_ring, 1))) { @@ -1505,17 +1490,13 @@ nfp_net_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, return false; } - new_frag = nfp_net_napi_alloc_one(dp, &new_dma_addr); - if (unlikely(!new_frag)) { - nfp_net_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf, NULL); - return false; - } - nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); - wr_idx = tx_ring->wr_p & (tx_ring->cnt - 1); /* Stash the soft descriptor of the head then initialize it */ txbuf = &tx_ring->txbufs[wr_idx]; + + nfp_net_rx_give_one(dp, rx_ring, txbuf->frag, txbuf->dma_addr); + txbuf->frag = rxbuf->frag; txbuf->dma_addr = rxbuf->dma_addr; txbuf->fidx = -1; @@ -1800,13 +1781,11 @@ static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring) * nfp_net_tx_ring_alloc() - Allocate resource for a TX ring * @dp: NFP Net data path struct * @tx_ring: TX Ring structure to allocate - * @is_xdp: True if ring will be used for XDP * * Return: 0 on success, negative errno otherwise. */ static int -nfp_net_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring, - bool is_xdp) +nfp_net_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) { struct nfp_net_r_vector *r_vec = tx_ring->r_vec; int sz; @@ -1824,7 +1803,7 @@ nfp_net_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring, if (!tx_ring->txbufs) goto err_alloc; - if (!is_xdp) + if (!tx_ring->is_xdp) netif_set_xps_queue(dp->netdev, &r_vec->affinity_mask, tx_ring->idx); @@ -1835,6 +1814,45 @@ err_alloc: return -ENOMEM; } +static void +nfp_net_tx_ring_bufs_free(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring) +{ + unsigned int i; + + if (!tx_ring->is_xdp) + return; + + for (i = 0; i < tx_ring->cnt; i++) { + if (!tx_ring->txbufs[i].frag) + return; + + nfp_net_dma_unmap_rx(dp, tx_ring->txbufs[i].dma_addr); + __free_page(virt_to_page(tx_ring->txbufs[i].frag)); + } +} + +static int +nfp_net_tx_ring_bufs_alloc(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_tx_buf *txbufs = tx_ring->txbufs; + unsigned int i; + + if (!tx_ring->is_xdp) + return 0; + + for (i = 0; i < tx_ring->cnt; i++) { + txbufs[i].frag = nfp_net_rx_alloc_one(dp, &txbufs[i].dma_addr); + if (!txbufs[i].frag) { + nfp_net_tx_ring_bufs_free(dp, tx_ring); + return -ENOMEM; + } + } + + return 0; +} + static int nfp_net_tx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp) { unsigned int r; @@ -1851,17 +1869,23 @@ static int nfp_net_tx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp) bias = dp->num_stack_tx_rings; nfp_net_tx_ring_init(&dp->tx_rings[r], &nn->r_vecs[r - bias], - r); + r, bias); - if (nfp_net_tx_ring_alloc(dp, &dp->tx_rings[r], bias)) + if (nfp_net_tx_ring_alloc(dp, &dp->tx_rings[r])) goto err_free_prev; + + if (nfp_net_tx_ring_bufs_alloc(dp, &dp->tx_rings[r])) + goto err_free_ring; } return 0; err_free_prev: - while (r--) + while (r--) { + nfp_net_tx_ring_bufs_free(dp, &dp->tx_rings[r]); +err_free_ring: nfp_net_tx_ring_free(&dp->tx_rings[r]); + } kfree(dp->tx_rings); return -ENOMEM; } @@ -1870,8 +1894,10 @@ static void nfp_net_tx_rings_free(struct nfp_net_dp *dp) { unsigned int r; - for (r = 0; r < dp->num_tx_rings; r++) + for (r = 0; r < dp->num_tx_rings; r++) { + nfp_net_tx_ring_bufs_free(dp, &dp->tx_rings[r]); nfp_net_tx_ring_free(&dp->tx_rings[r]); + } kfree(dp->tx_rings); } @@ -2369,8 +2395,10 @@ static void nfp_net_close_free_all(struct nfp_net *nn) nfp_net_rx_ring_bufs_free(&nn->dp, &nn->dp.rx_rings[r]); nfp_net_rx_ring_free(&nn->dp.rx_rings[r]); } - for (r = 0; r < nn->dp.num_tx_rings; r++) + for (r = 0; r < nn->dp.num_tx_rings; r++) { + nfp_net_tx_ring_bufs_free(&nn->dp, &nn->dp.tx_rings[r]); nfp_net_tx_ring_free(&nn->dp.tx_rings[r]); + } for (r = 0; r < nn->dp.num_r_vecs; r++) nfp_net_cleanup_vector(nn, &nn->r_vecs[r]); @@ -2496,24 +2524,27 @@ struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn) return new; } -static int nfp_net_check_config(struct nfp_net *nn, struct nfp_net_dp *dp) +static int +nfp_net_check_config(struct nfp_net *nn, struct nfp_net_dp *dp, + struct netlink_ext_ack *extack) { /* XDP-enabled tests */ if (!dp->xdp_prog) return 0; if (dp->fl_bufsz > PAGE_SIZE) { - nn_warn(nn, "MTU too large w/ XDP enabled\n"); + NL_MOD_TRY_SET_ERR_MSG(extack, "MTU too large w/ XDP enabled"); return -EINVAL; } if (dp->num_tx_rings > nn->max_tx_rings) { - nn_warn(nn, "Insufficient number of TX rings w/ XDP enabled\n"); + NL_MOD_TRY_SET_ERR_MSG(extack, "Insufficient number of TX rings w/ XDP enabled"); return -EINVAL; } return 0; } -int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *dp) +int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *dp, + struct netlink_ext_ack *extack) { int r, err; @@ -2525,7 +2556,7 @@ int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *dp) dp->num_r_vecs = max(dp->num_rx_rings, dp->num_stack_tx_rings); - err = nfp_net_check_config(nn, dp); + err = nfp_net_check_config(nn, dp, extack); if (err) goto exit_free_dp; @@ -2600,7 +2631,7 @@ static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu) dp->mtu = new_mtu; - return nfp_net_ring_reconfig(nn, dp); + return nfp_net_ring_reconfig(nn, dp, NULL); } static void nfp_net_stat64(struct net_device *netdev, @@ -2651,9 +2682,9 @@ nfp_net_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, struct nfp_net *nn = netdev_priv(netdev); if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (proto != htons(ETH_P_ALL)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn)) { if (!nn->dp.bpf_offload_xdp) @@ -2916,9 +2947,10 @@ static int nfp_net_xdp_offload(struct nfp_net *nn, struct bpf_prog *prog) return ret; } -static int nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog) +static int nfp_net_xdp_setup(struct nfp_net *nn, struct netdev_xdp *xdp) { struct bpf_prog *old_prog = nn->dp.xdp_prog; + struct bpf_prog *prog = xdp->prog; struct nfp_net_dp *dp; int err; @@ -2938,14 +2970,10 @@ static int nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog) dp->xdp_prog = prog; dp->num_tx_rings += prog ? nn->dp.num_rx_rings : -nn->dp.num_rx_rings; dp->rx_dma_dir = prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; - if (prog) - dp->rx_dma_off = XDP_PACKET_HEADROOM - - (nn->dp.rx_offset ?: NFP_NET_MAX_PREPEND); - else - dp->rx_dma_off = 0; + dp->rx_dma_off = prog ? XDP_PACKET_HEADROOM - nn->dp.rx_offset : 0; /* We need RX reconfig to remap the buffers (BIDIR vs FROM_DEV) */ - err = nfp_net_ring_reconfig(nn, dp); + err = nfp_net_ring_reconfig(nn, dp, xdp->extack); if (err) return err; @@ -2963,7 +2991,7 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_xdp *xdp) switch (xdp->command) { case XDP_SETUP_PROG: - return nfp_net_xdp_setup(nn, xdp->prog); + return nfp_net_xdp_setup(nn, xdp); case XDP_QUERY_PROG: xdp->prog_attached = !!nn->dp.xdp_prog; return 0; @@ -3170,13 +3198,6 @@ int nfp_net_netdev_init(struct net_device *netdev) struct nfp_net *nn = netdev_priv(netdev); int err; - /* XDP calls for 256 byte packet headroom which wouldn't fit in a u8. - * We, however, reuse the metadata prepend space for XDP buffers which - * is at least 1 byte long and as long as XDP headroom doesn't increase - * above 256 the *extra* XDP headroom will fit on 8 bits. - */ - BUILD_BUG_ON(XDP_PACKET_HEADROOM > 256); - nn->dp.chained_metadata_format = nn->fw_ver.major > 3; nn->dp.rx_dma_dir = DMA_FROM_DEVICE; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 6e27d1281425..abbb47e60cc3 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -309,7 +309,7 @@ static int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt) dp->rxd_cnt = rxd_cnt; dp->txd_cnt = txd_cnt; - return nfp_net_ring_reconfig(nn, dp); + return nfp_net_ring_reconfig(nn, dp, NULL); } static int nfp_net_set_ringparam(struct net_device *netdev, @@ -791,7 +791,7 @@ static int nfp_net_set_coalesce(struct net_device *netdev, ec->tx_coalesce_usecs_high || ec->tx_max_coalesced_frames_high || ec->rate_sample_interval) - return -ENOTSUPP; + return -EOPNOTSUPP; /* Compute factor used to convert coalesce '_usecs' parameters to * ME timestamp ticks. There are 16 ME clock cycles for each timestamp @@ -880,7 +880,7 @@ static int nfp_net_set_num_rings(struct nfp_net *nn, unsigned int total_rx, if (dp->xdp_prog) dp->num_tx_rings += total_rx; - return nfp_net_ring_reconfig(nn, dp); + return nfp_net_ring_reconfig(nn, dp, NULL); } static int nfp_net_set_channels(struct net_device *netdev, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c index b5b6f69d1e0f..cc823df12c8a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c @@ -119,12 +119,12 @@ nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) if (tc_no_actions(cls_bpf->exts)) return NN_ACT_DIRECT; - return -ENOTSUPP; + return -EOPNOTSUPP; } /* TC legacy mode */ if (!tc_single_action(cls_bpf->exts)) - return -ENOTSUPP; + return -EOPNOTSUPP; tcf_exts_to_list(cls_bpf->exts, &actions); list_for_each_entry(a, &actions, list) { @@ -136,7 +136,7 @@ nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) return NN_ACT_TC_REDIR; } - return -ENOTSUPP; + return -EOPNOTSUPP; } static int @@ -152,7 +152,7 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn, int ret; if (!IS_ENABLED(CONFIG_BPF_SYSCALL)) - return -ENOTSUPP; + return -EOPNOTSUPP; ret = nfp_net_bpf_get_act(nn, cls_bpf); if (ret < 0) @@ -162,7 +162,7 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn, max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; if (max_mtu < nn->dp.netdev->mtu) { nn_info(nn, "BPF offload not supported with MTU larger than HW packet split boundary\n"); - return -ENOTSUPP; + return -EOPNOTSUPP; } start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); @@ -289,6 +289,6 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) return nfp_net_bpf_stats_update(nn, cls_bpf); default: - return -ENOTSUPP; + return -EOPNOTSUPP; } } diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c index 61797c98f5fe..2fa9247bb23d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c @@ -78,7 +78,7 @@ #define NSP_MAGIC 0xab10 #define NSP_MAJOR 0 -#define NSP_MINOR (__MAX_SPCODE - 1) +#define NSP_MINOR 8 #define NSP_CODE_MAJOR GENMASK(15, 12) #define NSP_CODE_MINOR GENMASK(11, 0) @@ -94,8 +94,6 @@ enum nfp_nsp_cmd { SPCODE_ETH_RESCAN = 7, /* Rescan ETHs, write ETH_TABLE to buf */ SPCODE_ETH_CONTROL = 8, /* Update media config from buffer */ SPCODE_NSP_IDENTIFY = 13, /* Read NSP version */ - - __MAX_SPCODE, }; static const struct { diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index c07191cb7631..2ab1aab7c3fe 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -72,6 +72,7 @@ extern const struct qed_common_ops qed_common_ops_pass; #define QED_WFQ_UNIT 100 #define QED_WID_SIZE (1024) +#define QED_MIN_WIDS (4) #define QED_PF_DEMS_SIZE (4) /* cau states */ @@ -525,6 +526,7 @@ struct qed_hwfn { struct dbg_tools_data dbg_info; /* PWM region specific data */ + u16 wid_count; u32 dpi_size; u32 dpi_count; diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index aa1a4d5c864c..5f31140d0b77 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -1318,17 +1318,15 @@ static int qed_hw_init_dpi_size(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 pwm_region_size, u32 n_cpus) { - u32 dpi_page_size_1, dpi_page_size_2, dpi_page_size; - u32 dpi_bit_shift, dpi_count; + u32 dpi_bit_shift, dpi_count, dpi_page_size; u32 min_dpis; + u32 n_wids; /* Calculate DPI size */ - dpi_page_size_1 = QED_WID_SIZE * n_cpus; - dpi_page_size_2 = max_t(u32, QED_WID_SIZE, PAGE_SIZE); - dpi_page_size = max_t(u32, dpi_page_size_1, dpi_page_size_2); - dpi_page_size = roundup_pow_of_two(dpi_page_size); + n_wids = max_t(u32, QED_MIN_WIDS, n_cpus); + dpi_page_size = QED_WID_SIZE * roundup_pow_of_two(n_wids); + dpi_page_size = (dpi_page_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); dpi_bit_shift = ilog2(dpi_page_size / 4096); - dpi_count = pwm_region_size / dpi_page_size; min_dpis = p_hwfn->pf_params.rdma_pf_params.min_dpis; @@ -1356,7 +1354,7 @@ qed_hw_init_pf_doorbell_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 pwm_regsize, norm_regsize; u32 non_pwm_conn, min_addr_reg1; - u32 db_bar_size, n_cpus; + u32 db_bar_size, n_cpus = 1; u32 roce_edpm_mode; u32 pf_dems_shift; int rc = 0; @@ -1417,6 +1415,8 @@ qed_hw_init_pf_doorbell_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) qed_rdma_dpm_bar(p_hwfn, p_ptt); } + p_hwfn->wid_count = (u16) n_cpus; + DP_INFO(p_hwfn, "doorbell bar: normal_region_size=%d, pwm_region_size=%d, dpi_size=%d, dpi_count=%d, roce_edpm=%s\n", norm_regsize, diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 8a5a0649fc4a..59992cf20d42 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -928,7 +928,9 @@ static int qed_slowpath_start(struct qed_dev *cdev, struct qed_tunnel_info tunn_info; const u8 *data = NULL; struct qed_hwfn *hwfn; +#ifdef CONFIG_RFS_ACCEL struct qed_ptt *p_ptt; +#endif int rc = -EINVAL; if (qed_iov_wq_start(cdev)) diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index b8c811f95205..56289d7cd306 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -90,7 +90,7 @@ void qed_roce_async_event(struct qed_hwfn *p_hwfn, } static int qed_rdma_bmap_alloc(struct qed_hwfn *p_hwfn, - struct qed_bmap *bmap, u32 max_count) + struct qed_bmap *bmap, u32 max_count, char *name) { DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "max_count = %08x\n", max_count); @@ -104,26 +104,24 @@ static int qed_rdma_bmap_alloc(struct qed_hwfn *p_hwfn, return -ENOMEM; } - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocated bitmap %p\n", - bmap->bitmap); + snprintf(bmap->name, QED_RDMA_MAX_BMAP_NAME, "%s", name); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "0\n"); return 0; } static int qed_rdma_bmap_alloc_id(struct qed_hwfn *p_hwfn, struct qed_bmap *bmap, u32 *id_num) { - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "bmap = %p\n", bmap); - *id_num = find_first_zero_bit(bmap->bitmap, bmap->max_count); - - if (*id_num >= bmap->max_count) { - DP_NOTICE(p_hwfn, "no id available max_count=%d\n", - bmap->max_count); + if (*id_num >= bmap->max_count) return -EINVAL; - } __set_bit(*id_num, bmap->bitmap); + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "%s bitmap: allocated id %d\n", + bmap->name, *id_num); + return 0; } @@ -141,15 +139,18 @@ static void qed_bmap_release_id(struct qed_hwfn *p_hwfn, { bool b_acquired; - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "id_num = %08x", id_num); if (id_num >= bmap->max_count) return; b_acquired = test_and_clear_bit(id_num, bmap->bitmap); if (!b_acquired) { - DP_NOTICE(p_hwfn, "ID %d already released\n", id_num); + DP_NOTICE(p_hwfn, "%s bitmap: id %d already released\n", + bmap->name, id_num); return; } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "%s bitmap: released id %d\n", + bmap->name, id_num); } static int qed_bmap_test_id(struct qed_hwfn *p_hwfn, @@ -224,7 +225,8 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, } /* Allocate bit map for pd's */ - rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->pd_map, RDMA_MAX_PDS); + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->pd_map, RDMA_MAX_PDS, + "PD"); if (rc) { DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Failed to allocate pd_map, rc = %d\n", @@ -234,7 +236,7 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, /* Allocate DPI bitmap */ rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->dpi_map, - p_hwfn->dpi_count); + p_hwfn->dpi_count, "DPI"); if (rc) { DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Failed to allocate DPI bitmap, rc = %d\n", rc); @@ -245,7 +247,7 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, * twice the number of QPs. */ rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cq_map, - p_rdma_info->num_qps * 2); + p_rdma_info->num_qps * 2, "CQ"); if (rc) { DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Failed to allocate cq bitmap, rc = %d\n", rc); @@ -257,7 +259,7 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, * The maximum number of CQs is bounded to twice the number of QPs. */ rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->toggle_bits, - p_rdma_info->num_qps * 2); + p_rdma_info->num_qps * 2, "Toggle"); if (rc) { DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Failed to allocate toogle bits, rc = %d\n", rc); @@ -266,7 +268,7 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, /* Allocate bitmap for itids */ rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->tid_map, - p_rdma_info->num_mrs); + p_rdma_info->num_mrs, "MR"); if (rc) { DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Failed to allocate itids bitmaps, rc = %d\n", rc); @@ -274,7 +276,8 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, } /* Allocate bitmap for cids used for qps. */ - rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cid_map, num_cons); + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cid_map, num_cons, + "CID"); if (rc) { DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Failed to allocate cid bitmap, rc = %d\n", rc); @@ -282,7 +285,8 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, } /* Allocate bitmap for cids used for responders/requesters. */ - rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->real_cid_map, num_cons); + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->real_cid_map, num_cons, + "REAL_CID"); if (rc) { DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Failed to allocate real cid bitmap, rc = %d\n", rc); @@ -313,6 +317,54 @@ free_rdma_info: return rc; } +static void qed_rdma_bmap_free(struct qed_hwfn *p_hwfn, + struct qed_bmap *bmap, bool check) +{ + int weight = bitmap_weight(bmap->bitmap, bmap->max_count); + int last_line = bmap->max_count / (64 * 8); + int last_item = last_line * 8 + + DIV_ROUND_UP(bmap->max_count % (64 * 8), 64); + u64 *pmap = (u64 *)bmap->bitmap; + int line, item, offset; + u8 str_last_line[200] = { 0 }; + + if (!weight || !check) + goto end; + + DP_NOTICE(p_hwfn, + "%s bitmap not free - size=%d, weight=%d, 512 bits per line\n", + bmap->name, bmap->max_count, weight); + + /* print aligned non-zero lines, if any */ + for (item = 0, line = 0; line < last_line; line++, item += 8) + if (bitmap_weight((unsigned long *)&pmap[item], 64 * 8)) + DP_NOTICE(p_hwfn, + "line 0x%04x: 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n", + line, + pmap[item], + pmap[item + 1], + pmap[item + 2], + pmap[item + 3], + pmap[item + 4], + pmap[item + 5], + pmap[item + 6], pmap[item + 7]); + + /* print last unaligned non-zero line, if any */ + if ((bmap->max_count % (64 * 8)) && + (bitmap_weight((unsigned long *)&pmap[item], + bmap->max_count - item * 64))) { + offset = sprintf(str_last_line, "line 0x%04x: ", line); + for (; item < last_item; item++) + offset += sprintf(str_last_line + offset, + "0x%016llx ", pmap[item]); + DP_NOTICE(p_hwfn, "%s\n", str_last_line); + } + +end: + kfree(bmap->bitmap); + bmap->bitmap = NULL; +} + static void qed_rdma_resc_free(struct qed_hwfn *p_hwfn) { struct qed_bmap *rcid_map = &p_hwfn->p_rdma_info->real_cid_map; @@ -332,12 +384,12 @@ static void qed_rdma_resc_free(struct qed_hwfn *p_hwfn) } } - kfree(p_rdma_info->cid_map.bitmap); - kfree(p_rdma_info->tid_map.bitmap); - kfree(p_rdma_info->toggle_bits.bitmap); - kfree(p_rdma_info->cq_map.bitmap); - kfree(p_rdma_info->dpi_map.bitmap); - kfree(p_rdma_info->pd_map.bitmap); + qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->cid_map, 1); + qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->pd_map, 1); + qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->dpi_map, 1); + qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->cq_map, 1); + qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->toggle_bits, 0); + qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->tid_map, 1); kfree(p_rdma_info->port); kfree(p_rdma_info->dev); @@ -732,6 +784,7 @@ static int qed_rdma_add_user(void *rdma_cxt, ((out_params->dpi) * p_hwfn->dpi_size); out_params->dpi_size = p_hwfn->dpi_size; + out_params->wid_count = p_hwfn->wid_count; DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Adding user - done, rc = %d\n", rc); return rc; @@ -750,6 +803,8 @@ static struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt) p_port->link_speed = p_hwfn->mcp_info->link_output.speed; + p_port->max_msg_size = RDMA_MAX_DATA_SIZE_IN_WQE; + return p_port; } @@ -802,9 +857,12 @@ static void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod) static int qed_fill_rdma_dev_info(struct qed_dev *cdev, struct qed_dev_rdma_info *info) { + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + memset(info, 0, sizeof(*info)); info->rdma_type = QED_RDMA_TYPE_ROCE; + info->user_dpm_enabled = (p_hwfn->db_bar_no_edpm == 0); qed_fill_dev_info(cdev, &info->common); @@ -952,8 +1010,7 @@ static int qed_rdma_create_cq(void *rdma_cxt, /* Allocate icid */ spin_lock_bh(&p_info->lock); - rc = qed_rdma_bmap_alloc_id(p_hwfn, - &p_info->cq_map, &returned_id); + rc = qed_rdma_bmap_alloc_id(p_hwfn, &p_info->cq_map, &returned_id); spin_unlock_bh(&p_info->lock); if (rc) { @@ -2189,8 +2246,7 @@ static int qed_roce_modify_qp(struct qed_hwfn *p_hwfn, params->modify_flags); return rc; - } else if (qp->cur_state == QED_ROCE_QP_STATE_ERR || - qp->cur_state == QED_ROCE_QP_STATE_SQE) { + } else if (qp->cur_state == QED_ROCE_QP_STATE_ERR) { /* ->ERR */ rc = qed_roce_sp_modify_responder(p_hwfn, qp, true, params->modify_flags); @@ -2456,6 +2512,8 @@ qed_rdma_register_tid(void *rdma_cxt, } rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code); + if (rc) + return rc; if (fw_return_code != RDMA_RETURN_OK) { DP_NOTICE(p_hwfn, "fw_return_code = %d\n", fw_return_code); diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.h b/drivers/net/ethernet/qlogic/qed/qed_roce.h index 3ccc08a7c995..9742af516183 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.h +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.h @@ -67,9 +67,11 @@ enum qed_rdma_toggle_bit { QED_RDMA_TOGGLE_BIT_SET = 1 }; +#define QED_RDMA_MAX_BMAP_NAME (10) struct qed_bmap { unsigned long *bitmap; u32 max_count; + char name[QED_RDMA_MAX_BMAP_NAME]; }; struct qed_rdma_info { diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index c4c4a408b40b..11d71e5eea14 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -234,7 +234,7 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn) /* send acquire request */ rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); if (rc) - return rc; + goto exit; /* copy acquire response from buffer to p_hwfn */ memcpy(&p_iov->acquire_resp, resp, sizeof(p_iov->acquire_resp)); diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index a6cc9a2d41c1..9e983e1d8249 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -1857,7 +1857,7 @@ static int happy_meal_is_not_so_happy(struct happy_meal *hp, u32 status) if (status & GREG_STAT_TXLERR) printk("LateError "); if (status & GREG_STAT_TXPERR) - printk("ParityErro "); + printk("ParityError "); if (status & GREG_STAT_TXTERR) printk("TagBotch "); printk("]\n"); diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 7074b40ebd7f..dec5d563ab19 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1244,7 +1244,7 @@ static int geneve_newlink(struct net *net, struct net_device *dev, metadata = true; if (data[IFLA_GENEVE_UDP_CSUM] && - !nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) + nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) info.key.tun_flags |= TUNNEL_CSUM; if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX] && diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 9cd8b27d1292..a32dc5d11e89 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -74,27 +74,40 @@ static int bcm54612e_config_init(struct phy_device *phydev) return 0; } -static int bcm54810_config(struct phy_device *phydev) +static int bcm5481x_config(struct phy_device *phydev) { int rc, val; - val = bcm_phy_read_exp(phydev, BCM54810_EXP_BROADREACH_LRE_MISC_CTL); - val &= ~BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN; - rc = bcm_phy_write_exp(phydev, BCM54810_EXP_BROADREACH_LRE_MISC_CTL, - val); - if (rc < 0) - return rc; - + /* handling PHY's internal RX clock delay */ val = bcm54xx_auxctl_read(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC); - val &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN; val |= MII_BCM54XX_AUXCTL_MISC_WREN; + if (phydev->interface == PHY_INTERFACE_MODE_RGMII || + phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) { + /* Disable RGMII RXC-RXD skew */ + val &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN; + } + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || + phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) { + /* Enable RGMII RXC-RXD skew */ + val |= MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN; + } rc = bcm54xx_auxctl_write(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC, val); if (rc < 0) return rc; + /* handling PHY's internal TX clock delay */ val = bcm_phy_read_shadow(phydev, BCM54810_SHD_CLK_CTL); - val &= ~BCM54810_SHD_CLK_CTL_GTXCLK_EN; + if (phydev->interface == PHY_INTERFACE_MODE_RGMII || + phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) { + /* Disable internal TX clock delay */ + val &= ~BCM54810_SHD_CLK_CTL_GTXCLK_EN; + } + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || + phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) { + /* Enable internal TX clock delay */ + val |= BCM54810_SHD_CLK_CTL_GTXCLK_EN; + } rc = bcm_phy_write_shadow(phydev, BCM54810_SHD_CLK_CTL, val); if (rc < 0) return rc; @@ -244,7 +257,7 @@ static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev) static int bcm54xx_config_init(struct phy_device *phydev) { - int reg, err; + int reg, err, val; reg = phy_read(phydev, MII_BCM54XX_ECR); if (reg < 0) @@ -283,8 +296,14 @@ static int bcm54xx_config_init(struct phy_device *phydev) if (err) return err; } else if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54810) { - err = bcm54810_config(phydev); - if (err) + /* For BCM54810, we need to disable BroadR-Reach function */ + val = bcm_phy_read_exp(phydev, + BCM54810_EXP_BROADREACH_LRE_MISC_CTL); + val &= ~BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN; + err = bcm_phy_write_exp(phydev, + BCM54810_EXP_BROADREACH_LRE_MISC_CTL, + val); + if (err < 0) return err; } @@ -392,29 +411,7 @@ static int bcm5481_config_aneg(struct phy_device *phydev) ret = genphy_config_aneg(phydev); /* Then we can set up the delay. */ - if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) { - u16 reg; - - /* - * There is no BCM5481 specification available, so down - * here is everything we know about "register 0x18". This - * at least helps BCM5481 to successfully receive packets - * on MPC8360E-RDK board. Peter Barada <peterb@logicpd.com> - * says: "This sets delay between the RXD and RXC signals - * instead of using trace lengths to achieve timing". - */ - - /* Set RDX clk delay. */ - reg = 0x7 | (0x7 << 12); - phy_write(phydev, 0x18, reg); - - reg = phy_read(phydev, 0x18); - /* Set RDX-RXC skew. */ - reg |= (1 << 8); - /* Write bits 14:0. */ - reg |= (1 << 15); - phy_write(phydev, 0x18, reg); - } + bcm5481x_config(phydev); if (of_property_read_bool(np, "enet-phy-lane-swap")) { /* Lane Swap - Undocumented register...magic! */ diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 82f1c3a73345..3d0bc484b3d7 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1878,7 +1878,8 @@ err: return ret; } -static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) +static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, + struct netlink_ext_ack *extack) { unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr); struct virtnet_info *vi = netdev_priv(dev); @@ -1890,16 +1891,17 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO)) { - netdev_warn(dev, "can't set XDP while host is implementing LRO, disable LRO first\n"); + NL_SET_ERR_MSG(extack, "can't set XDP while host is implementing LRO, disable LRO first"); return -EOPNOTSUPP; } if (vi->mergeable_rx_bufs && !vi->any_header_sg) { - netdev_warn(dev, "XDP expects header/data in single page, any_header_sg required\n"); + NL_SET_ERR_MSG(extack, "XDP expects header/data in single page, any_header_sg required"); return -EINVAL; } if (dev->mtu > max_sz) { + NL_SET_ERR_MSG(extack, "MTU too large to enable XDP"); netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz); return -EINVAL; } @@ -1910,6 +1912,7 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) /* XDP requires extra queues for XDP_TX */ if (curr_qp + xdp_qp > vi->max_queue_pairs) { + NL_SET_ERR_MSG(extack, "Too few free TX rings available"); netdev_warn(dev, "request %i queues but max is %i\n", curr_qp + xdp_qp, vi->max_queue_pairs); return -ENOMEM; @@ -1971,7 +1974,7 @@ static int virtnet_xdp(struct net_device *dev, struct netdev_xdp *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: - return virtnet_xdp_set(dev, xdp->prog); + return virtnet_xdp_set(dev, xdp->prog, xdp->extack); case XDP_QUERY_PROG: xdp->prog_attached = virtnet_xdp_query(dev); return 0; @@ -2228,8 +2231,8 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) vi->rq[i].pages = NULL; netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll, napi_weight); - netif_napi_add(vi->dev, &vi->sq[i].napi, virtnet_poll_tx, - napi_tx ? napi_weight : 0); + netif_tx_napi_add(vi->dev, &vi->sq[i].napi, virtnet_poll_tx, + napi_tx ? napi_weight : 0); sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index ebc98bb17a51..328b4712683c 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2758,8 +2758,6 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6, sock = vxlan_create_sock(net, ipv6, port, flags); if (IS_ERR(sock)) { - pr_info("Cannot bind port %d, err=%ld\n", ntohs(port), - PTR_ERR(sock)); kfree(vs); return ERR_CAST(sock); } @@ -2822,17 +2820,21 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6) static int vxlan_sock_add(struct vxlan_dev *vxlan) { - bool ipv6 = vxlan->flags & VXLAN_F_IPV6; bool metadata = vxlan->flags & VXLAN_F_COLLECT_METADATA; + bool ipv6 = vxlan->flags & VXLAN_F_IPV6 || metadata; + bool ipv4 = !ipv6 || metadata; int ret = 0; RCU_INIT_POINTER(vxlan->vn4_sock, NULL); #if IS_ENABLED(CONFIG_IPV6) RCU_INIT_POINTER(vxlan->vn6_sock, NULL); - if (ipv6 || metadata) + if (ipv6) { ret = __vxlan_sock_add(vxlan, true); + if (ret < 0 && ret != -EAFNOSUPPORT) + ipv4 = false; + } #endif - if (!ret && (!ipv6 || metadata)) + if (ipv4) ret = __vxlan_sock_add(vxlan, false); if (ret < 0) vxlan_sock_release(vxlan); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6847714a5ae3..9c23bd2efb56 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -813,11 +813,16 @@ enum xdp_netdev_command { XDP_QUERY_PROG, }; +struct netlink_ext_ack; + struct netdev_xdp { enum xdp_netdev_command command; union { /* XDP_SETUP_PROG */ - struct bpf_prog *prog; + struct { + struct bpf_prog *prog; + struct netlink_ext_ack *extack; + }; /* XDP_QUERY_PROG */ bool prog_attached; }; @@ -3291,7 +3296,8 @@ int dev_get_phys_port_id(struct net_device *dev, int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); int dev_change_proto_down(struct net_device *dev, bool proto_down); -int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags); +int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, + int fd, u32 flags); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 1b49209dd5c7..996711d8a7b4 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -41,6 +41,11 @@ int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error); int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, int flags); +static inline u16 nfnl_msg_type(u8 subsys, u8 msg_type) +{ + return subsys << 8 | msg_type; +} + void nfnl_lock(__u8 subsys_id); void nfnl_unlock(__u8 subsys_id); #ifdef CONFIG_PROVE_LOCKING diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 984b2112c77b..a30efb437e6d 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -109,8 +109,10 @@ struct ebt_table { #define EBT_ALIGN(s) (((s) + (__alignof__(struct _xt_align)-1)) & \ ~(__alignof__(struct _xt_align)-1)) extern struct ebt_table *ebt_register_table(struct net *net, - const struct ebt_table *table); -extern void ebt_unregister_table(struct net *net, struct ebt_table *table); + const struct ebt_table *table, + const struct nf_hook_ops *); +extern void ebt_unregister_table(struct net *net, struct ebt_table *table, + const struct nf_hook_ops *); extern unsigned int ebt_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct ebt_table *table); diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 8d2a8924705c..c20395edf2de 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -92,6 +92,14 @@ struct netlink_ext_ack { (extack)->_msg = _msg; \ } while (0) +#define NL_MOD_TRY_SET_ERR_MSG(extack, msg) do { \ + static const char _msg[] = KBUILD_MODNAME ": " msg; \ + struct netlink_ext_ack *_extack = (extack); \ + \ + if (_extack) \ + _extack->_msg = _msg; \ +} while (0) + extern void netlink_kernel_release(struct sock *sk); extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups); extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h index f742d4312c9d..cbb2ff0ce4bc 100644 --- a/include/linux/qed/qed_roce_if.h +++ b/include/linux/qed/qed_roce_if.h @@ -240,6 +240,7 @@ struct qed_rdma_add_user_out_params { u64 dpi_addr; u64 dpi_phys_addr; u32 dpi_size; + u16 wid_count; }; enum roce_mode { @@ -533,6 +534,7 @@ enum qed_rdma_type { struct qed_dev_rdma_info { struct qed_dev_info common; enum qed_rdma_type rdma_type; + u8 user_dpm_enabled; }; struct qed_rdma_ops { diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 8a4a57b887fb..4f4f786255ef 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1349,8 +1349,6 @@ int ip_vs_protocol_init(void); void ip_vs_protocol_cleanup(void); void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags); int *ip_vs_create_timeout_table(int *table, int size); -int ip_vs_set_state_timeout(int *table, int num, const char *const *names, - const char *name, int to); void ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, const struct sk_buff *skb, int offset, const char *msg); @@ -1555,13 +1553,9 @@ static inline void ip_vs_notrack(struct sk_buff *skb) enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - if (!ct || !nf_ct_is_untracked(ct)) { - struct nf_conn *untracked; - + if (ct) { nf_conntrack_put(&ct->ct_general); - untracked = nf_ct_untracked_get(); - nf_conntrack_get(&untracked->ct_general); - nf_ct_set(skb, untracked, IP_CT_NEW); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); } #endif } @@ -1620,7 +1614,7 @@ static inline bool ip_vs_conn_uses_conntrack(struct ip_vs_conn *cp, if (!(cp->flags & IP_VS_CONN_F_NFCT)) return false; ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct)) + if (ct) return true; #endif return false; diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 6ff32815641b..919e4e8af327 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -14,7 +14,6 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; #ifdef CONFIG_NF_CT_PROTO_DCCP extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4; diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index c59b82456f89..eaea968f8657 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -5,7 +5,6 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; -extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; #ifdef CONFIG_NF_CT_PROTO_DCCP extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6; diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 19605878da47..8ece3612d0cd 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -50,25 +50,6 @@ union nf_conntrack_expect_proto { #define NF_CT_ASSERT(x) #endif -struct nf_conntrack_helper; - -/* Must be kept in sync with the classes defined by helpers */ -#define NF_CT_MAX_EXPECT_CLASSES 4 - -/* nf_conn feature for connections that have a helper */ -struct nf_conn_help { - /* Helper. if any */ - struct nf_conntrack_helper __rcu *helper; - - struct hlist_head expectations; - - /* Current number of expected connections */ - u8 expecting[NF_CT_MAX_EXPECT_CLASSES]; - - /* private helper information. */ - char data[]; -}; - #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> @@ -243,14 +224,6 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct, enum ip_conntrack_dir dir, u32 seq); -/* Fake conntrack entry for untracked connections */ -DECLARE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked); -static inline struct nf_conn *nf_ct_untracked_get(void) -{ - return raw_cpu_ptr(&nf_conntrack_untracked); -} -void nf_ct_untracked_status_or(unsigned long bits); - /* Iterate over all conntracks: if iter returns true, it's deleted. */ void nf_ct_iterate_cleanup(struct net *net, int (*iter)(struct nf_conn *i, void *data), @@ -281,11 +254,6 @@ static inline int nf_ct_is_dying(const struct nf_conn *ct) return test_bit(IPS_DYING_BIT, &ct->status); } -static inline int nf_ct_is_untracked(const struct nf_conn *ct) -{ - return test_bit(IPS_UNTRACKED_BIT, &ct->status); -} - /* Packet is received from loopback */ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) { diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 84ec7ca5f195..81d7f8a30945 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -65,7 +65,7 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb) struct nf_conn *ct = (struct nf_conn *)skb_nfct(skb); int ret = NF_ACCEPT; - if (ct && !nf_ct_is_untracked(ct)) { + if (ct) { if (!nf_ct_is_confirmed(ct)) ret = __nf_conntrack_confirm(skb); if (likely(ret == NF_ACCEPT)) diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 12d967b58726..2a10c6570fcc 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -20,11 +20,11 @@ enum nf_ct_ecache_state { struct nf_conntrack_ecache { unsigned long cache; /* bitops want long */ - unsigned long missed; /* missed events */ + u16 missed; /* missed events */ u16 ctmask; /* bitmask of ct events to be delivered */ u16 expmask; /* bitmask of expect events to be delivered */ + enum nf_ct_ecache_state state:8;/* ecache state */ u32 portid; /* netlink portid of destroyer */ - enum nf_ct_ecache_state state; /* ecache state */ }; static inline struct nf_conntrack_ecache * diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 65cc2cb005d9..2ba54feaccd8 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -73,6 +73,7 @@ struct nf_conntrack_expect_policy { }; #define NF_CT_EXPECT_CLASS_DEFAULT 0 +#define NF_CT_EXPECT_MAX_CNT 255 int nf_conntrack_expect_pernet_init(struct net *net); void nf_conntrack_expect_pernet_fini(struct net *net); @@ -104,6 +105,7 @@ static inline void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) void nf_ct_remove_expectations(struct nf_conn *ct); void nf_ct_unexpect_related(struct nf_conntrack_expect *exp); +bool nf_ct_remove_expect(struct nf_conntrack_expect *exp); /* Allocate space for an expectation: this is mandatory before calling nf_ct_expect_related. You will have to call put afterwards. */ diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 1c3035dda31f..4944bc9153cf 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -43,8 +43,8 @@ enum nf_ct_ext_id { /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { struct rcu_head rcu; - u16 offset[NF_CT_EXT_NUM]; - u16 len; + u8 offset[NF_CT_EXT_NUM]; + u8 len; char data[0]; }; @@ -69,12 +69,7 @@ static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id) ((id##_TYPE *)__nf_ct_ext_find((ext), (id))) /* Destroy all relationships */ -void __nf_ct_ext_destroy(struct nf_conn *ct); -static inline void nf_ct_ext_destroy(struct nf_conn *ct) -{ - if (ct->ext) - __nf_ct_ext_destroy(ct); -} +void nf_ct_ext_destroy(struct nf_conn *ct); /* Free operation. If you want to free a object referred from private area, * please implement __nf_ct_ext_free() and call it. @@ -86,15 +81,7 @@ static inline void nf_ct_ext_free(struct nf_conn *ct) } /* Add this type, returns pointer to data or NULL. */ -void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, - size_t var_alloc_len, gfp_t gfp); - -#define nf_ct_ext_add(ct, id, gfp) \ - ((id##_TYPE *)__nf_ct_ext_add_length((ct), (id), 0, (gfp))) -#define nf_ct_ext_add_length(ct, id, len, gfp) \ - ((id##_TYPE *)__nf_ct_ext_add_length((ct), (id), (len), (gfp))) - -#define NF_CT_EXT_F_PREALLOC 0x0001 +void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp); struct nf_ct_ext_type { /* Destroys relationships (can be NULL). */ @@ -102,15 +89,11 @@ struct nf_ct_ext_type { enum nf_ct_ext_id id; - unsigned int flags; - /* Length and min alignment. */ u8 len; u8 align; - /* initial size of nf_ct_ext. */ - u8 alloc_size; }; -int nf_ct_extend_register(struct nf_ct_ext_type *type); -void nf_ct_extend_unregister(struct nf_ct_ext_type *type); +int nf_ct_extend_register(const struct nf_ct_ext_type *type); +void nf_ct_extend_unregister(const struct nf_ct_ext_type *type); #endif /* _NF_CONNTRACK_EXTEND_H */ diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 1eaac1f4cd6a..e04fa7691e5d 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -29,9 +29,6 @@ struct nf_conntrack_helper { struct module *me; /* pointer to self */ const struct nf_conntrack_expect_policy *expect_policy; - /* length of internal data, ie. sizeof(struct nf_ct_*_master) */ - size_t data_len; - /* Tuple of things we will help (compared against server response) */ struct nf_conntrack_tuple tuple; @@ -49,9 +46,33 @@ struct nf_conntrack_helper { unsigned int expect_class_max; unsigned int flags; - unsigned int queue_num; /* For user-space helpers. */ + + /* For user-space helpers: */ + unsigned int queue_num; + /* length of userspace private data stored in nf_conn_help->data */ + u16 data_len; }; +/* Must be kept in sync with the classes defined by helpers */ +#define NF_CT_MAX_EXPECT_CLASSES 4 + +/* nf_conn feature for connections that have a helper */ +struct nf_conn_help { + /* Helper. if any */ + struct nf_conntrack_helper __rcu *helper; + + struct hlist_head expectations; + + /* Current number of expected connections */ + u8 expecting[NF_CT_MAX_EXPECT_CLASSES]; + + /* private helper information. */ + char data[32] __aligned(8); +}; + +#define NF_CT_HELPER_BUILD_BUG_ON(structsize) \ + BUILD_BUG_ON((structsize) > FIELD_SIZEOF(struct nf_conn_help, data)) + struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum); @@ -62,7 +83,7 @@ void nf_ct_helper_init(struct nf_conntrack_helper *helper, u16 l3num, u16 protonum, const char *name, u16 default_port, u16 spec_port, u32 id, const struct nf_conntrack_expect_policy *exp_pol, - u32 expect_class_max, u32 data_len, + u32 expect_class_max, int (*help)(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo), diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 85e993e278d5..7032e044bbe2 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -58,6 +58,9 @@ struct nf_conntrack_l4proto { unsigned int dataoff, u_int8_t pf, unsigned int hooknum); + /* called by gc worker if table is full */ + bool (*can_early_drop)(const struct nf_conn *ct); + /* Print out the per-protocol part of the tuple. Return like seq_* */ void (*print_tuple)(struct seq_file *s, const struct nf_conntrack_tuple *); diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h index b0ca402c1f72..a2fcb5271726 100644 --- a/include/net/netfilter/nf_conntrack_synproxy.h +++ b/include/net/netfilter/nf_conntrack_synproxy.h @@ -52,6 +52,8 @@ struct synproxy_stats { struct synproxy_net { struct nf_conn *tmpl; struct synproxy_stats __percpu *stats; + unsigned int hook_ref4; + unsigned int hook_ref6; }; extern unsigned int synproxy_net_id; diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index c327a431a6f3..05c82a1a4267 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -67,7 +67,7 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum, { #if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \ IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6) - return nat->masq_index && hooknum == NF_INET_POST_ROUTING && + return nat && nat->masq_index && hooknum == NF_INET_POST_ROUTING && CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL && nat->masq_index != out->ifindex; #else diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h index 01bcc6bfbcc9..fbfa5acf4f14 100644 --- a/include/net/netfilter/nf_nat_helper.h +++ b/include/net/netfilter/nf_nat_helper.h @@ -7,31 +7,31 @@ struct sk_buff; /* These return true or false. */ -int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, unsigned int match_offset, - unsigned int match_len, const char *rep_buffer, - unsigned int rep_len, bool adjust); +bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned int match_offset, + unsigned int match_len, const char *rep_buffer, + unsigned int rep_len, bool adjust); -static inline int nf_nat_mangle_tcp_packet(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned int match_offset, - unsigned int match_len, - const char *rep_buffer, - unsigned int rep_len) +static inline bool nf_nat_mangle_tcp_packet(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len) { return __nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, match_offset, match_len, rep_buffer, rep_len, true); } -int nf_nat_mangle_udp_packet(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, unsigned int match_offset, - unsigned int match_len, const char *rep_buffer, - unsigned int rep_len); +bool nf_nat_mangle_udp_packet(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned int match_offset, + unsigned int match_len, const char *rep_buffer, + unsigned int rep_len); /* Setup NAT on this expected conntrack so it follows master, but goes * to port ct->master->saved_proto. */ diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 09948d10e38e..4454719ff849 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -24,8 +24,7 @@ struct nf_queue_entry { struct nf_queue_handler { int (*outfn)(struct nf_queue_entry *entry, unsigned int queuenum); - void (*nf_hook_drop)(struct net *net, - const struct nf_hook_entry *hooks); + unsigned int (*nf_hook_drop)(struct net *net); }; void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f713a053f89d..028faec8fc27 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -911,6 +911,11 @@ static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chai return container_of(chain, struct nft_base_chain, chain); } +static inline bool nft_is_base_chain(const struct nft_chain *chain) +{ + return chain->flags & NFT_BASE_CHAIN; +} + int __nft_release_basechain(struct nft_ctx *ctx); unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e553529929f6..945a1f5f63c5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -481,8 +481,7 @@ union bpf_attr { * u32 bpf_get_socket_uid(skb) * Get the owner uid of the socket stored inside sk_buff. * @skb: pointer to skb - * Return: uid of the socket owner on success or 0 if the socket pointer - * inside sk_buff is NULL + * Return: uid of the socket owner on success or overflowuid if failed. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 6a8e33dd4ecb..a8072cc7fa0b 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -28,12 +28,14 @@ enum ip_conntrack_info { /* only for userspace compatibility */ #ifndef __KERNEL__ IP_CT_NEW_REPLY = IP_CT_NUMBER, +#else + IP_CT_UNTRACKED = 7, #endif }; #define NF_CT_STATE_INVALID_BIT (1 << 0) #define NF_CT_STATE_BIT(ctinfo) (1 << ((ctinfo) % IP_CT_IS_REPLY + 1)) -#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_NUMBER + 1)) +#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_UNTRACKED + 1)) /* Bitset representing status of connection. */ enum ip_conntrack_status { @@ -94,7 +96,7 @@ enum ip_conntrack_status { IPS_TEMPLATE_BIT = 11, IPS_TEMPLATE = (1 << IPS_TEMPLATE_BIT), - /* Conntrack is a fake untracked entry */ + /* Conntrack is a fake untracked entry. Obsolete and not used anymore */ IPS_UNTRACKED_BIT = 12, IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT), @@ -117,6 +119,9 @@ enum ip_conntrack_events { IPCT_NATSEQADJ = IPCT_SEQADJ, IPCT_SECMARK, /* new security mark has been set */ IPCT_LABEL, /* new connlabel has been set */ +#ifdef __KERNEL__ + __IPCT_MAX +#endif }; enum ip_conntrack_expect_events { diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 8f3842690d17..683f6f88fcac 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -901,6 +901,7 @@ enum nft_rt_attributes { * @NFT_CT_BYTES: conntrack bytes * @NFT_CT_AVGPKT: conntrack average bytes per packet * @NFT_CT_ZONE: conntrack zone + * @NFT_CT_EVENTMASK: ctnetlink events to be generated for this conntrack */ enum nft_ct_keys { NFT_CT_STATE, @@ -921,6 +922,7 @@ enum nft_ct_keys { NFT_CT_BYTES, NFT_CT_AVGPKT, NFT_CT_ZONE, + NFT_CT_EVENTMASK, }; /** diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6f8b6ed690be..c2ff608c1984 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1924,6 +1924,17 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return 0; } else if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 && + dst_reg->type == PTR_TO_STACK && + ((BPF_SRC(insn->code) == BPF_X && + regs[insn->src_reg].type == CONST_IMM) || + BPF_SRC(insn->code) == BPF_K)) { + if (BPF_SRC(insn->code) == BPF_X) + dst_reg->imm += regs[insn->src_reg].imm; + else + dst_reg->imm += insn->imm; + return 0; + } else if (opcode == BPF_ADD && + BPF_CLASS(insn->code) == BPF_ALU64 && (dst_reg->type == PTR_TO_PACKET || (BPF_SRC(insn->code) == BPF_X && regs[insn->src_reg].type == PTR_TO_PACKET))) { diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index de7988b0349e..ab0c7cc8448f 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -589,16 +589,14 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, if (unlikely(source != fdb->dst)) { fdb->dst = source; fdb_modified = true; + /* Take over HW learned entry */ + if (unlikely(fdb->added_by_external_learn)) + fdb->added_by_external_learn = 0; } if (now != fdb->updated) fdb->updated = now; if (unlikely(added_by_user)) fdb->added_by_user = 1; - /* Take over HW learned entry */ - if (unlikely(fdb->added_by_external_learn)) { - fdb->added_by_external_learn = 0; - fdb_modified = true; - } if (unlikely(fdb_modified)) fdb_notify(br, fdb, RTM_NEWNEIGH); } diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 8fe36dc3aab2..2585b100ebbb 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -65,13 +65,13 @@ static int ebt_broute(struct sk_buff *skb) static int __net_init broute_net_init(struct net *net) { - net->xt.broute_table = ebt_register_table(net, &broute_table); + net->xt.broute_table = ebt_register_table(net, &broute_table, NULL); return PTR_ERR_OR_ZERO(net->xt.broute_table); } static void __net_exit broute_net_exit(struct net *net) { - ebt_unregister_table(net, net->xt.broute_table); + ebt_unregister_table(net, net->xt.broute_table, NULL); } static struct pernet_operations broute_net_ops = { diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 593a1bdc079e..f22ef7c21913 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -93,13 +93,13 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { static int __net_init frame_filter_net_init(struct net *net) { - net->xt.frame_filter = ebt_register_table(net, &frame_filter); + net->xt.frame_filter = ebt_register_table(net, &frame_filter, ebt_ops_filter); return PTR_ERR_OR_ZERO(net->xt.frame_filter); } static void __net_exit frame_filter_net_exit(struct net *net) { - ebt_unregister_table(net, net->xt.frame_filter); + ebt_unregister_table(net, net->xt.frame_filter, ebt_ops_filter); } static struct pernet_operations frame_filter_net_ops = { @@ -109,20 +109,11 @@ static struct pernet_operations frame_filter_net_ops = { static int __init ebtable_filter_init(void) { - int ret; - - ret = register_pernet_subsys(&frame_filter_net_ops); - if (ret < 0) - return ret; - ret = nf_register_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter)); - if (ret < 0) - unregister_pernet_subsys(&frame_filter_net_ops); - return ret; + return register_pernet_subsys(&frame_filter_net_ops); } static void __exit ebtable_filter_fini(void) { - nf_unregister_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter)); unregister_pernet_subsys(&frame_filter_net_ops); } diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index eb33919821ee..2f7a4f314406 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -93,13 +93,13 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { static int __net_init frame_nat_net_init(struct net *net) { - net->xt.frame_nat = ebt_register_table(net, &frame_nat); + net->xt.frame_nat = ebt_register_table(net, &frame_nat, ebt_ops_nat); return PTR_ERR_OR_ZERO(net->xt.frame_nat); } static void __net_exit frame_nat_net_exit(struct net *net) { - ebt_unregister_table(net, net->xt.frame_nat); + ebt_unregister_table(net, net->xt.frame_nat, ebt_ops_nat); } static struct pernet_operations frame_nat_net_ops = { @@ -109,20 +109,11 @@ static struct pernet_operations frame_nat_net_ops = { static int __init ebtable_nat_init(void) { - int ret; - - ret = register_pernet_subsys(&frame_nat_net_ops); - if (ret < 0) - return ret; - ret = nf_register_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat)); - if (ret < 0) - unregister_pernet_subsys(&frame_nat_net_ops); - return ret; + return register_pernet_subsys(&frame_nat_net_ops); } static void __exit ebtable_nat_fini(void) { - nf_unregister_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat)); unregister_pernet_subsys(&frame_nat_net_ops); } diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 79b69917f521..9ec0c9f908fa 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1157,8 +1157,30 @@ free_newinfo: return ret; } +static void __ebt_unregister_table(struct net *net, struct ebt_table *table) +{ + int i; + + mutex_lock(&ebt_mutex); + list_del(&table->list); + mutex_unlock(&ebt_mutex); + EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size, + ebt_cleanup_entry, net, NULL); + if (table->private->nentries) + module_put(table->me); + vfree(table->private->entries); + if (table->private->chainstack) { + for_each_possible_cpu(i) + vfree(table->private->chainstack[i]); + vfree(table->private->chainstack); + } + vfree(table->private); + kfree(table); +} + struct ebt_table * -ebt_register_table(struct net *net, const struct ebt_table *input_table) +ebt_register_table(struct net *net, const struct ebt_table *input_table, + const struct nf_hook_ops *ops) { struct ebt_table_info *newinfo; struct ebt_table *t, *table; @@ -1238,6 +1260,16 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table) } list_add(&table->list, &net->xt.tables[NFPROTO_BRIDGE]); mutex_unlock(&ebt_mutex); + + if (!ops) + return table; + + ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); + if (ret) { + __ebt_unregister_table(net, table); + return ERR_PTR(ret); + } + return table; free_unlock: mutex_unlock(&ebt_mutex); @@ -1256,29 +1288,12 @@ out: return ERR_PTR(ret); } -void ebt_unregister_table(struct net *net, struct ebt_table *table) +void ebt_unregister_table(struct net *net, struct ebt_table *table, + const struct nf_hook_ops *ops) { - int i; - - if (!table) { - BUGPRINT("Request to unregister NULL table!!!\n"); - return; - } - mutex_lock(&ebt_mutex); - list_del(&table->list); - mutex_unlock(&ebt_mutex); - EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size, - ebt_cleanup_entry, net, NULL); - if (table->private->nentries) - module_put(table->me); - vfree(table->private->entries); - if (table->private->chainstack) { - for_each_possible_cpu(i) - vfree(table->private->chainstack[i]); - vfree(table->private->chainstack); - } - vfree(table->private); - kfree(table); + if (ops) + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); + __ebt_unregister_table(net, table); } /* userspace just supplied us with counters */ @@ -1713,7 +1728,7 @@ static int compat_copy_entry_to_user(struct ebt_entry *e, void __user **dstptr, if (*size < sizeof(*ce)) return -EINVAL; - ce = (struct ebt_entry __user *)*dstptr; + ce = *dstptr; if (copy_to_user(ce, e, sizeof(*ce))) return -EFAULT; diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index 5974dbc1ea24..bb63c9aed55d 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -111,7 +111,7 @@ nft_meta_bridge_select_ops(const struct nft_ctx *ctx, static struct nft_expr_type nft_meta_bridge_type __read_mostly = { .family = NFPROTO_BRIDGE, .name = "meta", - .select_ops = &nft_meta_bridge_select_ops, + .select_ops = nft_meta_bridge_select_ops, .policy = nft_meta_policy, .maxattr = NFTA_META_MAX, .owner = THIS_MODULE, diff --git a/net/can/bcm.c b/net/can/bcm.c index 0e855917b7e1..65432633a250 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -147,6 +147,7 @@ static inline ktime_t bcm_timeval_to_ktime(struct bcm_timeval tv) /* * procfs functions */ +#if IS_ENABLED(CONFIG_PROC_FS) static char *bcm_proc_getifname(struct net *net, char *result, int ifindex) { struct net_device *dev; @@ -251,6 +252,7 @@ static const struct file_operations bcm_proc_fops = { .llseek = seq_lseek, .release = single_release, }; +#endif /* CONFIG_PROC_FS */ /* * bcm_can_tx - send the (next) CAN frame to the appropriate CAN interface @@ -1537,9 +1539,11 @@ static int bcm_release(struct socket *sock) bcm_remove_op(op); } +#if IS_ENABLED(CONFIG_PROC_FS) /* remove procfs entry */ if (net->can.bcmproc_dir && bo->bcm_proc_read) remove_proc_entry(bo->procname, net->can.bcmproc_dir); +#endif /* CONFIG_PROC_FS */ /* remove device reference */ if (bo->bound) { @@ -1598,6 +1602,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, bo->ifindex = 0; } +#if IS_ENABLED(CONFIG_PROC_FS) if (net->can.bcmproc_dir) { /* unique socket address as filename */ sprintf(bo->procname, "%lu", sock_i_ino(sk)); @@ -1609,6 +1614,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, goto fail; } } +#endif /* CONFIG_PROC_FS */ bo->bound = 1; @@ -1691,22 +1697,21 @@ static const struct can_proto bcm_can_proto = { static int canbcm_pernet_init(struct net *net) { +#if IS_ENABLED(CONFIG_PROC_FS) /* create /proc/net/can-bcm directory */ - if (IS_ENABLED(CONFIG_PROC_FS)) { - net->can.bcmproc_dir = - proc_net_mkdir(net, "can-bcm", net->proc_net); - } + net->can.bcmproc_dir = proc_net_mkdir(net, "can-bcm", net->proc_net); +#endif /* CONFIG_PROC_FS */ return 0; } static void canbcm_pernet_exit(struct net *net) { +#if IS_ENABLED(CONFIG_PROC_FS) /* remove /proc/net/can-bcm directory */ - if (IS_ENABLED(CONFIG_PROC_FS)) { - if (net->can.bcmproc_dir) - remove_proc_entry("can-bcm", net->proc_net); - } + if (net->can.bcmproc_dir) + remove_proc_entry("can-bcm", net->proc_net); +#endif /* CONFIG_PROC_FS */ } static struct pernet_operations canbcm_pernet_ops __read_mostly = { diff --git a/net/core/dev.c b/net/core/dev.c index 8371a01eee87..35a06cebb282 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6854,12 +6854,14 @@ EXPORT_SYMBOL(dev_change_proto_down); /** * dev_change_xdp_fd - set or clear a bpf program for a device rx path * @dev: device + * @extact: netlink extended ack * @fd: new program fd or negative value to clear * @flags: xdp-related flags * * Set or clear a bpf program for a device */ -int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags) +int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, + int fd, u32 flags) { int (*xdp_op)(struct net_device *dev, struct netdev_xdp *xdp); const struct net_device_ops *ops = dev->netdev_ops; @@ -6892,6 +6894,7 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags) memset(&xdp, 0, sizeof(xdp)); xdp.command = XDP_SETUP_PROG; + xdp.extack = extack; xdp.prog = prog; err = xdp_op(dev, &xdp); diff --git a/net/core/filter.c b/net/core/filter.c index 9a37860a80fc..a253a6197e6b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -98,8 +98,8 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap) skb->sk = sk; pkt_len = bpf_prog_run_save_cb(filter->prog, skb); - err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM; skb->sk = save_sk; + err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM; } rcu_read_unlock(); diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 5cbed3816229..cfae3d5fe11f 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -203,7 +203,7 @@ int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { const struct lwtunnel_encap_ops *ops; struct nlattr *nest; - int ret = -EINVAL; + int ret; if (!lwtstate) return 0; @@ -212,10 +212,11 @@ int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) lwtstate->type > LWTUNNEL_ENCAP_MAX) return 0; - ret = -EOPNOTSUPP; nest = nla_nest_start(skb, RTA_ENCAP); if (!nest) - goto nla_put_failure; + return -EMSGSIZE; + + ret = -EOPNOTSUPP; rcu_read_lock(); ops = rcu_dereference(lwtun_encaps[lwtstate->type]); if (likely(ops && ops->fill_encap)) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index c1d8aed8e5a8..1934efd4a9d4 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -35,7 +35,8 @@ LIST_HEAD(net_namespace_list); EXPORT_SYMBOL_GPL(net_namespace_list); struct net init_net = { - .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), + .count = ATOMIC_INIT(1), + .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), }; EXPORT_SYMBOL(init_net); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9031a6c8bfa7..6e67315ec368 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1919,6 +1919,7 @@ static int do_set_master(struct net_device *dev, int ifindex) #define DO_SETLINK_NOTIFY 0x03 static int do_setlink(const struct sk_buff *skb, struct net_device *dev, struct ifinfomsg *ifm, + struct netlink_ext_ack *extack, struct nlattr **tb, char *ifname, int status) { const struct net_device_ops *ops = dev->netdev_ops; @@ -2201,7 +2202,7 @@ static int do_setlink(const struct sk_buff *skb, } if (xdp[IFLA_XDP_FD]) { - err = dev_change_xdp_fd(dev, + err = dev_change_xdp_fd(dev, extack, nla_get_s32(xdp[IFLA_XDP_FD]), xdp_flags); if (err) @@ -2261,7 +2262,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto errout; - err = do_setlink(skb, dev, ifm, tb, ifname, 0); + err = do_setlink(skb, dev, ifm, extack, tb, ifname, 0); errout: return err; } @@ -2423,6 +2424,7 @@ EXPORT_SYMBOL(rtnl_create_link); static int rtnl_group_changelink(const struct sk_buff *skb, struct net *net, int group, struct ifinfomsg *ifm, + struct netlink_ext_ack *extack, struct nlattr **tb) { struct net_device *dev, *aux; @@ -2430,7 +2432,7 @@ static int rtnl_group_changelink(const struct sk_buff *skb, for_each_netdev_safe(net, dev, aux) { if (dev->group == group) { - err = do_setlink(skb, dev, ifm, tb, NULL, 0); + err = do_setlink(skb, dev, ifm, extack, tb, NULL, 0); if (err < 0) return err; } @@ -2576,14 +2578,15 @@ replay: status |= DO_SETLINK_NOTIFY; } - return do_setlink(skb, dev, ifm, tb, ifname, status); + return do_setlink(skb, dev, ifm, extack, tb, ifname, + status); } if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { if (ifm->ifi_index == 0 && tb[IFLA_GROUP]) return rtnl_group_changelink(skb, net, nla_get_u32(tb[IFLA_GROUP]), - ifm, tb); + ifm, extack, tb); return -ENODEV; } diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index c8bf5136a72b..1ed81ac6dd1a 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -134,7 +134,7 @@ static int __init dn_rtmsg_init(void) return -ENOMEM; } - rv = nf_register_hook(&dnrmg_ops); + rv = nf_register_net_hook(&init_net, &dnrmg_ops); if (rv) { netlink_kernel_release(dnrmg); } @@ -144,7 +144,7 @@ static int __init dn_rtmsg_init(void) static void __exit dn_rtmsg_fini(void) { - nf_unregister_hook(&dnrmg_ops); + nf_unregister_net_hook(&init_net, &dnrmg_ops); netlink_kernel_release(dnrmg); } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 1d46d05efb0f..ec4fe3d4b5c9 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -330,7 +330,6 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, sent to multicast group to reach destination designated router. */ struct ip_ra_chain __rcu *ip_ra_chain; -static DEFINE_SPINLOCK(ip_ra_lock); static void ip_ra_destroy_rcu(struct rcu_head *head) @@ -352,21 +351,17 @@ int ip_ra_control(struct sock *sk, unsigned char on, new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; - spin_lock_bh(&ip_ra_lock); for (rap = &ip_ra_chain; - (ra = rcu_dereference_protected(*rap, - lockdep_is_held(&ip_ra_lock))) != NULL; + (ra = rtnl_dereference(*rap)) != NULL; rap = &ra->next) { if (ra->sk == sk) { if (on) { - spin_unlock_bh(&ip_ra_lock); kfree(new_ra); return -EADDRINUSE; } /* dont let ip_call_ra_chain() use sk again */ ra->sk = NULL; RCU_INIT_POINTER(*rap, ra->next); - spin_unlock_bh(&ip_ra_lock); if (ra->destructor) ra->destructor(sk); @@ -380,17 +375,14 @@ int ip_ra_control(struct sock *sk, unsigned char on, return 0; } } - if (!new_ra) { - spin_unlock_bh(&ip_ra_lock); + if (!new_ra) return -ENOBUFS; - } new_ra->sk = sk; new_ra->destructor = destructor; RCU_INIT_POINTER(new_ra->next, ra); rcu_assign_pointer(*rap, new_ra); sock_hold(sk); - spin_unlock_bh(&ip_ra_lock); return 0; } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index f17dab1dee6e..0bc3c3d73e61 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -309,8 +309,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo, */ for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; - struct arpt_entry *e - = (struct arpt_entry *)(entry0 + pos); + struct arpt_entry *e = entry0 + pos; if (!(valid_hooks & (1 << hook))) continue; @@ -354,14 +353,12 @@ static int mark_source_chains(const struct xt_table_info *newinfo, if (pos == oldpos) goto next; - e = (struct arpt_entry *) - (entry0 + pos); + e = entry0 + pos; } while (oldpos == pos + e->next_offset); /* Move along one */ size = e->next_offset; - e = (struct arpt_entry *) - (entry0 + pos + size); + e = entry0 + pos + size; if (pos + size >= newinfo->size) return 0; e->counters.pcnt = pos; @@ -376,16 +373,14 @@ static int mark_source_chains(const struct xt_table_info *newinfo, if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; - e = (struct arpt_entry *) - (entry0 + newpos); + e = entry0 + newpos; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; if (newpos >= newinfo->size) return 0; } - e = (struct arpt_entry *) - (entry0 + newpos); + e = entry0 + newpos; e->counters.pcnt = pos; pos = newpos; } @@ -681,7 +676,7 @@ static int copy_entries_to_user(unsigned int total_size, for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ const struct xt_entry_target *t; - e = (struct arpt_entry *)(loc_cpu_entry + off); + e = loc_cpu_entry + off; if (copy_to_user(userptr + off, e, sizeof(*e))) { ret = -EFAULT; goto free_counters; @@ -1128,7 +1123,7 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, int h; origsize = *size; - de = (struct arpt_entry *)*dstptr; + de = *dstptr; memcpy(de, e, sizeof(struct arpt_entry)); memcpy(&de->counters, &e->counters, sizeof(e->counters)); @@ -1322,7 +1317,7 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, int ret; origsize = *size; - ce = (struct compat_arpt_entry __user *)*dstptr; + ce = *dstptr; if (copy_to_user(ce, e, sizeof(struct arpt_entry)) != 0 || copy_to_user(&ce->counters, &counters[i], sizeof(counters[i])) != 0) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 384b85713e06..2a55a40211cb 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -382,7 +382,7 @@ mark_source_chains(const struct xt_table_info *newinfo, to 0 as we leave), and comefrom to save source hook bitmask */ for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; - struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos); + struct ipt_entry *e = entry0 + pos; if (!(valid_hooks & (1 << hook))) continue; @@ -424,14 +424,12 @@ mark_source_chains(const struct xt_table_info *newinfo, if (pos == oldpos) goto next; - e = (struct ipt_entry *) - (entry0 + pos); + e = entry0 + pos; } while (oldpos == pos + e->next_offset); /* Move along one */ size = e->next_offset; - e = (struct ipt_entry *) - (entry0 + pos + size); + e = entry0 + pos + size; if (pos + size >= newinfo->size) return 0; e->counters.pcnt = pos; @@ -446,16 +444,14 @@ mark_source_chains(const struct xt_table_info *newinfo, if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; - e = (struct ipt_entry *) - (entry0 + newpos); + e = entry0 + newpos; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; if (newpos >= newinfo->size) return 0; } - e = (struct ipt_entry *) - (entry0 + newpos); + e = entry0 + newpos; e->counters.pcnt = pos; pos = newpos; } @@ -834,7 +830,7 @@ copy_entries_to_user(unsigned int total_size, const struct xt_entry_match *m; const struct xt_entry_target *t; - e = (struct ipt_entry *)(loc_cpu_entry + off); + e = loc_cpu_entry + off; if (copy_to_user(userptr + off, e, sizeof(*e))) { ret = -EFAULT; goto free_counters; @@ -1229,7 +1225,7 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, int ret = 0; origsize = *size; - ce = (struct compat_ipt_entry __user *)*dstptr; + ce = *dstptr; if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 || copy_to_user(&ce->counters, &counters[i], sizeof(counters[i])) != 0) @@ -1366,7 +1362,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, struct xt_entry_match *ematch; origsize = *size; - de = (struct ipt_entry *)*dstptr; + de = *dstptr; memcpy(de, e, sizeof(struct ipt_entry)); memcpy(&de->counters, &e->counters, sizeof(e->counters)); diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 3240a2614e82..af2b69b6895f 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -293,12 +293,16 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) XT_SYNPROXY_OPT_ECN); synproxy_send_client_synack(net, skb, th, &opts); - return NF_DROP; - + consume_skb(skb); + return NF_STOLEN; } else if (th->ack && !(th->fin || th->rst || th->syn)) { /* ACK from client */ - synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq)); - return NF_DROP; + if (synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq))) { + consume_skb(skb); + return NF_STOLEN; + } else { + return NF_DROP; + } } return XT_CONTINUE; @@ -367,10 +371,13 @@ static unsigned int ipv4_synproxy_hook(void *priv, * number match the one of first SYN. */ if (synproxy_recv_client_ack(net, skb, th, &opts, - ntohl(th->seq) + 1)) + ntohl(th->seq) + 1)) { this_cpu_inc(snet->stats->cookie_retrans); - - return NF_DROP; + consume_skb(skb); + return NF_STOLEN; + } else { + return NF_DROP; + } } synproxy->isn = ntohl(th->ack_seq); @@ -409,19 +416,56 @@ static unsigned int ipv4_synproxy_hook(void *priv, return NF_ACCEPT; } +static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = { + { + .hook = ipv4_synproxy_hook, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, + }, + { + .hook = ipv4_synproxy_hook, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, + }, +}; + static int synproxy_tg4_check(const struct xt_tgchk_param *par) { + struct synproxy_net *snet = synproxy_pernet(par->net); const struct ipt_entry *e = par->entryinfo; + int err; if (e->ip.proto != IPPROTO_TCP || e->ip.invflags & XT_INV_PROTO) return -EINVAL; - return nf_ct_netns_get(par->net, par->family); + err = nf_ct_netns_get(par->net, par->family); + if (err) + return err; + + if (snet->hook_ref4 == 0) { + err = nf_register_net_hooks(par->net, ipv4_synproxy_ops, + ARRAY_SIZE(ipv4_synproxy_ops)); + if (err) { + nf_ct_netns_put(par->net, par->family); + return err; + } + } + + snet->hook_ref4++; + return err; } static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par) { + struct synproxy_net *snet = synproxy_pernet(par->net); + + snet->hook_ref4--; + if (snet->hook_ref4 == 0) + nf_unregister_net_hooks(par->net, ipv4_synproxy_ops, + ARRAY_SIZE(ipv4_synproxy_ops)); nf_ct_netns_put(par->net, par->family); } @@ -436,46 +480,14 @@ static struct xt_target synproxy_tg4_reg __read_mostly = { .me = THIS_MODULE, }; -static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = { - { - .hook = ipv4_synproxy_hook, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, - }, - { - .hook = ipv4_synproxy_hook, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, - }, -}; - static int __init synproxy_tg4_init(void) { - int err; - - err = nf_register_hooks(ipv4_synproxy_ops, - ARRAY_SIZE(ipv4_synproxy_ops)); - if (err < 0) - goto err1; - - err = xt_register_target(&synproxy_tg4_reg); - if (err < 0) - goto err2; - - return 0; - -err2: - nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops)); -err1: - return err; + return xt_register_target(&synproxy_tg4_reg); } static void __exit synproxy_tg4_exit(void) { xt_unregister_target(&synproxy_tg4_reg); - nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops)); } module_init(synproxy_tg4_init); diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c index f0dbff05fc28..39895b9ddeb9 100644 --- a/net/ipv4/netfilter/nf_dup_ipv4.c +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -69,8 +69,7 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Avoid counting cloned packets towards the original connection. */ nf_reset(skb); - nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); - nf_conntrack_get(skb_nfct(skb)); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); #endif /* * If we are in PREROUTING/INPUT, decrease the TTL to mitigate potential diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index 6f5e8d01b876..feedd759ca80 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -264,13 +264,7 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, if (!ct) return NF_ACCEPT; - /* Don't try to NAT if this packet is not conntracked */ - if (nf_ct_is_untracked(ct)) - return NF_ACCEPT; - - nat = nf_ct_nat_ext_add(ct); - if (nat == NULL) - return NF_ACCEPT; + nat = nfct_nat(ct); switch (ctinfo) { case IP_CT_RELATED: diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c index ea91058b5f6f..dc1dea15c1b4 100644 --- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c @@ -37,7 +37,6 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING); ct = nf_ct_get(skb, &ctinfo); - nat = nfct_nat(ct); NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY)); @@ -56,7 +55,9 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, return NF_DROP; } - nat->masq_index = out->ifindex; + nat = nf_ct_nat_ext_add(ct); + if (nat) + nat->masq_index = out->ifindex; /* Transfer from original range. */ memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index b3ca21b2ba9b..8a69363b4884 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -49,9 +49,14 @@ static void pptp_nat_expected(struct nf_conn *ct, const struct nf_ct_pptp_master *ct_pptp_info; const struct nf_nat_pptp *nat_pptp_info; struct nf_nat_range range; + struct nf_conn_nat *nat; + nat = nf_ct_nat_ext_add(ct); + if (WARN_ON_ONCE(!nat)) + return; + + nat_pptp_info = &nat->help.nat_pptp_info; ct_pptp_info = nfct_help_data(master); - nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; /* And here goes the grand finale of corrosion... */ if (exp->dir == IP_CT_DIR_ORIGINAL) { @@ -120,13 +125,17 @@ pptp_outbound_pkt(struct sk_buff *skb, { struct nf_ct_pptp_master *ct_pptp_info; + struct nf_conn_nat *nat = nfct_nat(ct); struct nf_nat_pptp *nat_pptp_info; u_int16_t msg; __be16 new_callid; unsigned int cid_off; + if (WARN_ON_ONCE(!nat)) + return NF_DROP; + + nat_pptp_info = &nat->help.nat_pptp_info; ct_pptp_info = nfct_help_data(ct); - nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; new_callid = ct_pptp_info->pns_call_id; @@ -177,11 +186,11 @@ pptp_outbound_pkt(struct sk_buff *skb, ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid)); /* mangle packet */ - if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, - cid_off + sizeof(struct pptp_pkt_hdr) + - sizeof(struct PptpControlHeader), - sizeof(new_callid), (char *)&new_callid, - sizeof(new_callid)) == 0) + if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, + cid_off + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader), + sizeof(new_callid), (char *)&new_callid, + sizeof(new_callid))) return NF_DROP; return NF_ACCEPT; } @@ -191,11 +200,15 @@ pptp_exp_gre(struct nf_conntrack_expect *expect_orig, struct nf_conntrack_expect *expect_reply) { const struct nf_conn *ct = expect_orig->master; + struct nf_conn_nat *nat = nfct_nat(ct); struct nf_ct_pptp_master *ct_pptp_info; struct nf_nat_pptp *nat_pptp_info; + if (WARN_ON_ONCE(!nat)) + return; + + nat_pptp_info = &nat->help.nat_pptp_info; ct_pptp_info = nfct_help_data(ct); - nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; /* save original PAC call ID in nat_info */ nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; @@ -223,11 +236,15 @@ pptp_inbound_pkt(struct sk_buff *skb, union pptp_ctrl_union *pptpReq) { const struct nf_nat_pptp *nat_pptp_info; + struct nf_conn_nat *nat = nfct_nat(ct); u_int16_t msg; __be16 new_pcid; unsigned int pcid_off; - nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; + if (WARN_ON_ONCE(!nat)) + return NF_DROP; + + nat_pptp_info = &nat->help.nat_pptp_info; new_pcid = nat_pptp_info->pns_call_id; switch (msg = ntohs(ctlh->messageType)) { @@ -271,11 +288,11 @@ pptp_inbound_pkt(struct sk_buff *skb, pr_debug("altering peer call id from 0x%04x to 0x%04x\n", ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid)); - if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, - pcid_off + sizeof(struct pptp_pkt_hdr) + - sizeof(struct PptpControlHeader), - sizeof(new_pcid), (char *)&new_pcid, - sizeof(new_pcid)) == 0) + if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, + pcid_off + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader), + sizeof(new_pcid), (char *)&new_pcid, + sizeof(new_pcid))) return NF_DROP; return NF_ACCEPT; } diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index da04b9c33ef3..d5b1e0b3f687 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -827,8 +827,8 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, return 1; } -static unsigned char snmp_request_decode(struct asn1_ctx *ctx, - struct snmp_request *request) +static unsigned char noinline_for_stack +snmp_request_decode(struct asn1_ctx *ctx, struct snmp_request *request) { unsigned int cls, con, tag; unsigned char *end; @@ -920,10 +920,10 @@ static inline void mangle_address(unsigned char *begin, } } -static unsigned char snmp_trap_decode(struct asn1_ctx *ctx, - struct snmp_v1_trap *trap, - const struct oct1_map *map, - __sum16 *check) +static unsigned char noinline_for_stack +snmp_trap_decode(struct asn1_ctx *ctx, struct snmp_v1_trap *trap, + const struct oct1_map *map, + __sum16 *check) { unsigned int cls, con, tag, len; unsigned char *end; diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c index a83d558e1aae..e9293bdebba0 100644 --- a/net/ipv4/netfilter/nf_socket_ipv4.c +++ b/net/ipv4/netfilter/nf_socket_ipv4.c @@ -139,7 +139,7 @@ struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, * SNAT-ted connection. */ ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct) && + if (ct && ((iph->protocol != IPPROTO_ICMP && ctinfo == IP_CT_ESTABLISHED_REPLY) || (iph->protocol == IPPROTO_ICMP && diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index f4e4462cb5bb..de3681df2ce7 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -212,7 +212,7 @@ nft_fib4_select_ops(const struct nft_ctx *ctx, static struct nft_expr_type nft_fib4_type __read_mostly = { .name = "fib", - .select_ops = &nft_fib4_select_ops, + .select_ops = nft_fib4_select_ops, .policy = nft_fib_policy, .maxattr = NFTA_FIB_MAX, .family = NFPROTO_IPV4, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 059dad7deefe..1e4c76d2b827 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -533,7 +533,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (tp->urg_data & TCP_URG_VALID) mask |= POLLPRI; - } else if (sk->sk_state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) { + } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) { /* Active TCP fastopen socket with defer_connect * Return POLLOUT so application can call write() * in order for kernel to generate SYN+data diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 1e15c54fd5e2..1f90644056ac 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -51,15 +51,6 @@ void *ip6t_alloc_initial_table(const struct xt_table *info) } EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table); -/* - We keep a set of rules for each CPU, so we can avoid write-locking - them in the softirq when updating the counters and therefore - only need to read-lock in the softirq; doing a write_lock_bh() in user - context stops packets coming through and allows user context to read - the counters or update the rules. - - Hence the start of any table is given by get_table() below. */ - /* Returns whether matches rule or not. */ /* Performance critical - called for every packet */ static inline bool @@ -411,7 +402,7 @@ mark_source_chains(const struct xt_table_info *newinfo, to 0 as we leave), and comefrom to save source hook bitmask */ for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; - struct ip6t_entry *e = (struct ip6t_entry *)(entry0 + pos); + struct ip6t_entry *e = entry0 + pos; if (!(valid_hooks & (1 << hook))) continue; @@ -453,14 +444,12 @@ mark_source_chains(const struct xt_table_info *newinfo, if (pos == oldpos) goto next; - e = (struct ip6t_entry *) - (entry0 + pos); + e = entry0 + pos; } while (oldpos == pos + e->next_offset); /* Move along one */ size = e->next_offset; - e = (struct ip6t_entry *) - (entry0 + pos + size); + e = entry0 + pos + size; if (pos + size >= newinfo->size) return 0; e->counters.pcnt = pos; @@ -475,16 +464,14 @@ mark_source_chains(const struct xt_table_info *newinfo, if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; - e = (struct ip6t_entry *) - (entry0 + newpos); + e = entry0 + newpos; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; if (newpos >= newinfo->size) return 0; } - e = (struct ip6t_entry *) - (entry0 + newpos); + e = entry0 + newpos; e->counters.pcnt = pos; pos = newpos; } @@ -863,7 +850,7 @@ copy_entries_to_user(unsigned int total_size, const struct xt_entry_match *m; const struct xt_entry_target *t; - e = (struct ip6t_entry *)(loc_cpu_entry + off); + e = loc_cpu_entry + off; if (copy_to_user(userptr + off, e, sizeof(*e))) { ret = -EFAULT; goto free_counters; @@ -1258,7 +1245,7 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr, int ret = 0; origsize = *size; - ce = (struct compat_ip6t_entry __user *)*dstptr; + ce = *dstptr; if (copy_to_user(ce, e, sizeof(struct ip6t_entry)) != 0 || copy_to_user(&ce->counters, &counters[i], sizeof(counters[i])) != 0) @@ -1394,7 +1381,7 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, struct xt_entry_match *ematch; origsize = *size; - de = (struct ip6t_entry *)*dstptr; + de = *dstptr; memcpy(de, e, sizeof(struct ip6t_entry)); memcpy(&de->counters, &e->counters, sizeof(e->counters)); diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 4ef1ddd4bbbd..d3c4daa708b9 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -307,12 +307,17 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) XT_SYNPROXY_OPT_ECN); synproxy_send_client_synack(net, skb, th, &opts); - return NF_DROP; + consume_skb(skb); + return NF_STOLEN; } else if (th->ack && !(th->fin || th->rst || th->syn)) { /* ACK from client */ - synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq)); - return NF_DROP; + if (synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq))) { + consume_skb(skb); + return NF_STOLEN; + } else { + return NF_DROP; + } } return XT_CONTINUE; @@ -388,10 +393,13 @@ static unsigned int ipv6_synproxy_hook(void *priv, * number match the one of first SYN. */ if (synproxy_recv_client_ack(net, skb, th, &opts, - ntohl(th->seq) + 1)) + ntohl(th->seq) + 1)) { this_cpu_inc(snet->stats->cookie_retrans); - - return NF_DROP; + consume_skb(skb); + return NF_STOLEN; + } else { + return NF_DROP; + } } synproxy->isn = ntohl(th->ack_seq); @@ -430,20 +438,57 @@ static unsigned int ipv6_synproxy_hook(void *priv, return NF_ACCEPT; } +static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = { + { + .hook = ipv6_synproxy_hook, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, + }, + { + .hook = ipv6_synproxy_hook, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, + }, +}; + static int synproxy_tg6_check(const struct xt_tgchk_param *par) { + struct synproxy_net *snet = synproxy_pernet(par->net); const struct ip6t_entry *e = par->entryinfo; + int err; if (!(e->ipv6.flags & IP6T_F_PROTO) || e->ipv6.proto != IPPROTO_TCP || e->ipv6.invflags & XT_INV_PROTO) return -EINVAL; - return nf_ct_netns_get(par->net, par->family); + err = nf_ct_netns_get(par->net, par->family); + if (err) + return err; + + if (snet->hook_ref6 == 0) { + err = nf_register_net_hooks(par->net, ipv6_synproxy_ops, + ARRAY_SIZE(ipv6_synproxy_ops)); + if (err) { + nf_ct_netns_put(par->net, par->family); + return err; + } + } + + snet->hook_ref6++; + return err; } static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par) { + struct synproxy_net *snet = synproxy_pernet(par->net); + + snet->hook_ref6--; + if (snet->hook_ref6 == 0) + nf_unregister_net_hooks(par->net, ipv6_synproxy_ops, + ARRAY_SIZE(ipv6_synproxy_ops)); nf_ct_netns_put(par->net, par->family); } @@ -458,46 +503,14 @@ static struct xt_target synproxy_tg6_reg __read_mostly = { .me = THIS_MODULE, }; -static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = { - { - .hook = ipv6_synproxy_hook, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, - }, - { - .hook = ipv6_synproxy_hook, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, - }, -}; - static int __init synproxy_tg6_init(void) { - int err; - - err = nf_register_hooks(ipv6_synproxy_ops, - ARRAY_SIZE(ipv6_synproxy_ops)); - if (err < 0) - goto err1; - - err = xt_register_target(&synproxy_tg6_reg); - if (err < 0) - goto err2; - - return 0; - -err2: - nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops)); -err1: - return err; + return xt_register_target(&synproxy_tg6_reg); } static void __exit synproxy_tg6_exit(void) { xt_unregister_target(&synproxy_tg6_reg); - nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops)); } module_init(synproxy_tg6_init); diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index d2c2ccbfbe72..d5f028e33f65 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -221,8 +221,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, type = icmp6h->icmp6_type - 130; if (type >= 0 && type < sizeof(noct_valid_new) && noct_valid_new[type]) { - nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); - nf_conntrack_get(skb_nfct(skb)); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); return NF_ACCEPT; } diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index 888ecd106e5f..4a7ddeddbaab 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -58,8 +58,7 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_reset(skb); - nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); - nf_conntrack_get(skb_nfct(skb)); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); #endif if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_IN) { diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index e0be97e636a4..bf3ad3e7b647 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -273,13 +273,7 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb, if (!ct) return NF_ACCEPT; - /* Don't try to NAT if this packet is not conntracked */ - if (nf_ct_is_untracked(ct)) - return NF_ACCEPT; - - nat = nf_ct_nat_ext_add(ct); - if (nat == NULL) - return NF_ACCEPT; + nat = nfct_nat(ct); switch (ctinfo) { case IP_CT_RELATED: diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c index 051b6a6bfff6..2297c9f073ba 100644 --- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c @@ -30,6 +30,7 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, const struct net_device *out) { enum ip_conntrack_info ctinfo; + struct nf_conn_nat *nat; struct in6_addr src; struct nf_conn *ct; struct nf_nat_range newrange; @@ -42,7 +43,9 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, &ipv6_hdr(skb)->daddr, 0, &src) < 0) return NF_DROP; - nfct_nat(ct)->masq_index = out->ifindex; + nat = nf_ct_nat_ext_add(ct); + if (nat) + nat->masq_index = out->ifindex; newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; newrange.min_addr.in6 = src; diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index e8d88d82636b..43f91d9b086c 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -246,7 +246,7 @@ nft_fib6_select_ops(const struct nft_ctx *ctx, static struct nft_expr_type nft_fib6_type __read_mostly = { .name = "fib", - .select_ops = &nft_fib6_select_ops, + .select_ops = nft_fib6_select_ops, .policy = nft_fib_policy, .maxattr = NFTA_FIB_MAX, .family = NFPROTO_IPV6, diff --git a/net/netfilter/core.c b/net/netfilter/core.c index a87a6f8a74d8..552d606e57ca 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -126,14 +126,15 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) } EXPORT_SYMBOL(nf_register_net_hook); -void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) +static struct nf_hook_entry * +__nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) { struct nf_hook_entry __rcu **pp; struct nf_hook_entry *p; pp = nf_hook_entry_head(net, reg); if (WARN_ON_ONCE(!pp)) - return; + return NULL; mutex_lock(&nf_hook_mutex); for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) { @@ -145,7 +146,7 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) mutex_unlock(&nf_hook_mutex); if (!p) { WARN(1, "nf_unregister_net_hook: hook not found!\n"); - return; + return NULL; } #ifdef CONFIG_NETFILTER_INGRESS if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) @@ -154,10 +155,24 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) #ifdef HAVE_JUMP_LABEL static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif + + return p; +} + +void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) +{ + struct nf_hook_entry *p = __nf_unregister_net_hook(net, reg); + unsigned int nfq; + + if (!p) + return; + synchronize_net(); - nf_queue_nf_hook_drop(net, p); + /* other cpu might still process nfqueue verdict that used reg */ - synchronize_net(); + nfq = nf_queue_nf_hook_drop(net); + if (nfq) + synchronize_net(); kfree(p); } EXPORT_SYMBOL(nf_unregister_net_hook); @@ -183,10 +198,32 @@ err: EXPORT_SYMBOL(nf_register_net_hooks); void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, - unsigned int n) + unsigned int hookcount) { - while (n-- > 0) - nf_unregister_net_hook(net, ®[n]); + struct nf_hook_entry *to_free[16]; + unsigned int i, n, nfq; + + do { + n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free)); + + for (i = 0; i < n; i++) + to_free[i] = __nf_unregister_net_hook(net, ®[i]); + + synchronize_net(); + + /* need 2nd synchronize_net() if nfqueue is used, skb + * can get reinjected right before nf_queue_hook_drop() + */ + nfq = nf_queue_nf_hook_drop(net); + if (nfq) + synchronize_net(); + + for (i = 0; i < n; i++) + kfree(to_free[i]); + + reg += n; + hookcount -= n; + } while (hookcount > 0); } EXPORT_SYMBOL(nf_unregister_net_hooks); diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 6f09a99298cd..8ad2b52a0b32 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -232,7 +232,7 @@ mtype_list(const struct ip_set *set, if (!test_bit(id, map->members) || (SET_WITH_TIMEOUT(set) && #ifdef IP_SET_BITMAP_STORED_TIMEOUT - mtype_is_filled((const struct mtype_elem *)x) && + mtype_is_filled(x) && #endif ip_set_timeout_expired(ext_timeout(x, set)))) continue; @@ -248,8 +248,7 @@ mtype_list(const struct ip_set *set, } if (mtype_do_list(skb, map, id, set->dsize)) goto nla_put_failure; - if (ip_set_put_extensions(skb, set, x, - mtype_is_filled((const struct mtype_elem *)x))) + if (ip_set_put_extensions(skb, set, x, mtype_is_filled(x))) goto nla_put_failure; ipset_nest_end(skb, nested); } diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 9bd5b6636181..ba6a5516dc7c 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -503,14 +503,6 @@ __ip_set_put(struct ip_set *set) * a separate reference counter */ static inline void -__ip_set_get_netlink(struct ip_set *set) -{ - write_lock_bh(&ip_set_ref_lock); - set->ref_netlink++; - write_unlock_bh(&ip_set_ref_lock); -} - -static inline void __ip_set_put_netlink(struct ip_set *set) { write_lock_bh(&ip_set_ref_lock); @@ -771,7 +763,7 @@ start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags, struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), + nlh = nlmsg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd), sizeof(*nfmsg), flags); if (!nlh) return NULL; @@ -1916,7 +1908,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) ret = -EFAULT; goto done; } - op = (unsigned int *)data; + op = data; if (*op < IP_SET_OP_VERSION) { /* Check the version at the beginning of operations */ @@ -2014,7 +2006,7 @@ static struct nf_sockopt_ops so_set __read_mostly = { .pf = PF_INET, .get_optmin = SO_IP_SET, .get_optmax = SO_IP_SET + 1, - .get = &ip_set_sockfn_get, + .get = ip_set_sockfn_get, .owner = THIS_MODULE, }; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index b4a746d0e39b..d2d7bdf1d510 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -2200,6 +2200,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { static int __net_init __ip_vs_init(struct net *net) { struct netns_ipvs *ipvs; + int ret; ipvs = net_generic(net, ip_vs_net_id); if (ipvs == NULL) @@ -2231,11 +2232,17 @@ static int __net_init __ip_vs_init(struct net *net) if (ip_vs_sync_net_init(ipvs) < 0) goto sync_fail; + ret = nf_register_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); + if (ret < 0) + goto hook_fail; + return 0; /* * Error handling */ +hook_fail: + ip_vs_sync_net_cleanup(ipvs); sync_fail: ip_vs_conn_net_cleanup(ipvs); conn_fail: @@ -2255,6 +2262,7 @@ static void __net_exit __ip_vs_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); + nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */ ip_vs_conn_net_cleanup(ipvs); ip_vs_app_net_cleanup(ipvs); @@ -2315,24 +2323,16 @@ static int __init ip_vs_init(void) if (ret < 0) goto cleanup_sub; - ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); - if (ret < 0) { - pr_err("can't register hooks.\n"); - goto cleanup_dev; - } - ret = ip_vs_register_nl_ioctl(); if (ret < 0) { pr_err("can't register netlink/ioctl.\n"); - goto cleanup_hooks; + goto cleanup_dev; } pr_info("ipvs loaded.\n"); return ret; -cleanup_hooks: - nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); cleanup_dev: unregister_pernet_device(&ipvs_core_dev_ops); cleanup_sub: @@ -2349,7 +2349,6 @@ exit: static void __exit ip_vs_cleanup(void) { ip_vs_unregister_nl_ioctl(); - nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); unregister_pernet_device(&ipvs_core_dev_ops); unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ ip_vs_conn_cleanup(); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 892da70866d6..668d9643f0cc 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1774,13 +1774,13 @@ static struct ctl_table vs_vars[] = { .procname = "sync_version", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_do_sync_mode, + .proc_handler = proc_do_sync_mode, }, { .procname = "sync_ports", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_do_sync_ports, + .proc_handler = proc_do_sync_ports, }, { .procname = "sync_persist_mode", @@ -2130,8 +2130,8 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, " Total Incoming Outgoing Incoming Outgoing\n"); - seq_printf(seq, - " Conns Packets Packets Bytes Bytes\n"); + seq_puts(seq, + " Conns Packets Packets Bytes Bytes\n"); ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats); seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n", @@ -2178,8 +2178,8 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, " Total Incoming Outgoing Incoming Outgoing\n"); - seq_printf(seq, - "CPU Conns Packets Packets Bytes Bytes\n"); + seq_puts(seq, + "CPU Conns Packets Packets Bytes Bytes\n"); for_each_possible_cpu(i) { struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i); diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index d30c327bb578..fb780be76d15 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -260,7 +260,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, buf_len = strlen(buf); ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct) && nfct_nat(ct)) { + if (ct && (ct->status & IPS_NAT_MASK)) { + bool mangled; + /* If mangling fails this function will return 0 * which will cause the packet to be dropped. * Mangling can only fail under memory pressure, @@ -268,12 +270,13 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, * packet. */ rcu_read_lock(); - ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, - iph->ihl * 4, - start-data, end-start, - buf, buf_len); + mangled = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + iph->ihl * 4, + start - data, + end - start, + buf, buf_len); rcu_read_unlock(); - if (ret) { + if (mangled) { ip_vs_nfct_expect_related(skb, ct, n_cp, IPPROTO_TCP, 0, 0); if (skb->ip_summed == CHECKSUM_COMPLETE) @@ -482,11 +485,8 @@ static struct pernet_operations ip_vs_ftp_ops = { static int __init ip_vs_ftp_init(void) { - int rv; - - rv = register_pernet_subsys(&ip_vs_ftp_ops); /* rcu_barrier() is called by netns on error */ - return rv; + return register_pernet_subsys(&ip_vs_ftp_ops); } /* diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index fc230d99aa3b..6cf3fd81a5ec 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -85,7 +85,7 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) struct nf_conn *ct = nf_ct_get(skb, &ctinfo); struct nf_conntrack_tuple new_tuple; - if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) || + if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct)) return; @@ -232,7 +232,7 @@ void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct, { struct nf_conntrack_expect *exp; - if (ct == NULL || nf_ct_is_untracked(ct)) + if (ct == NULL) return; exp = nf_ct_expect_alloc(ct); diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 8ae480715cea..ca880a3ad033 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -193,28 +193,6 @@ ip_vs_create_timeout_table(int *table, int size) } -/* - * Set timeout value for state specified by name - */ -int -ip_vs_set_state_timeout(int *table, int num, const char *const *names, - const char *name, int to) -{ - int i; - - if (!table || !name || !to) - return -EINVAL; - - for (i = 0; i < num; i++) { - if (strcmp(names[i], name)) - continue; - table[i] = to * HZ; - return 0; - } - return -ENOENT; -} - - const char * ip_vs_state_name(__u16 proto, int state) { struct ip_vs_protocol *pp = ip_vs_proto_get(proto); diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index b03c28084f81..0e5b64a75da0 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -520,7 +520,7 @@ static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs, if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) && pkts % sync_period != sysctl_sync_threshold(ipvs)) return 0; - } else if (sync_refresh_period <= 0 && + } else if (!sync_refresh_period && pkts != sysctl_sync_threshold(ipvs)) return 0; @@ -1849,7 +1849,7 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, if (state == IP_VS_STATE_MASTER) { struct ipvs_master_sync_state *ms; - ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL); + ipvs->ms = kcalloc(count, sizeof(ipvs->ms[0]), GFP_KERNEL); if (!ipvs->ms) goto out; ms = ipvs->ms; @@ -1862,7 +1862,7 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, ms->ipvs = ipvs; } } else { - array = kzalloc(count * sizeof(struct task_struct *), + array = kcalloc(count, sizeof(struct task_struct *), GFP_KERNEL); if (!array) goto out; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 4e1a98fcc8c3..2eab1e0400f4 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -775,7 +775,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct)) { + if (ct) { IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, ipvsh->off, "ip_vs_nat_xmit(): " "stopping DNAT to local address"); @@ -866,7 +866,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct)) { + if (ct) { IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, ipvsh->off, "ip_vs_nat_xmit_v6(): " "stopping DNAT to local address"); @@ -1338,7 +1338,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct)) { + if (ct) { IP_VS_DBG(10, "%s(): " "stopping DNAT to local address %pI4\n", __func__, &cp->daddr.ip); @@ -1429,7 +1429,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct)) { + if (ct) { IP_VS_DBG(10, "%s(): " "stopping DNAT to local address %pI6\n", __func__, &cp->daddr.in6); diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 45da11afa785..866916712905 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -55,7 +55,7 @@ seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir) }; EXPORT_SYMBOL_GPL(seq_print_acct); -static struct nf_ct_ext_type acct_extend __read_mostly = { +static const struct nf_ct_ext_type acct_extend = { .len = sizeof(struct nf_conn_acct), .align = __alignof__(struct nf_conn_acct), .id = NF_CT_EXT_ACCT, diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index 57a26cc90c9f..03d2ccffa9fa 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -207,6 +207,8 @@ static int __init nf_conntrack_amanda_init(void) { int ret, i; + NF_CT_HELPER_BUILD_BUG_ON(0); + for (i = 0; i < ARRAY_SIZE(search); i++) { search[i].ts = textsearch_prepare(ts_algo, search[i].string, search[i].len, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 3d621b8d7b8a..f9245dbfe435 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -76,6 +76,7 @@ struct conntrack_gc_work { struct delayed_work dwork; u32 last_bucket; bool exiting; + bool early_drop; long next_gc_run; }; @@ -180,14 +181,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); unsigned int nf_conntrack_max __read_mostly; seqcount_t nf_conntrack_generation __read_mostly; - -/* nf_conn must be 8 bytes aligned, as the 3 LSB bits are used - * for the nfctinfo. We cheat by (ab)using the PER CPU cache line - * alignment to enforce this. - */ -DEFINE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked); -EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); - static unsigned int nf_conntrack_hash_rnd __read_mostly; static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, @@ -706,7 +699,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); if (l4proto->allow_clash && - !nfct_nat(ct) && + ((ct->status & IPS_NAT_DONE_MASK) == 0) && !nf_ct_is_dying(ct) && atomic_inc_not_zero(&ct->ct_general.use)) { enum ip_conntrack_info oldinfo; @@ -959,10 +952,30 @@ static noinline int early_drop(struct net *net, unsigned int _hash) return false; } +static bool gc_worker_skip_ct(const struct nf_conn *ct) +{ + return !nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct); +} + +static bool gc_worker_can_early_drop(const struct nf_conn *ct) +{ + const struct nf_conntrack_l4proto *l4proto; + + if (!test_bit(IPS_ASSURED_BIT, &ct->status)) + return true; + + l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); + if (l4proto->can_early_drop && l4proto->can_early_drop(ct)) + return true; + + return false; +} + static void gc_worker(struct work_struct *work) { unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); unsigned int i, goal, buckets = 0, expired_count = 0; + unsigned int nf_conntrack_max95 = 0; struct conntrack_gc_work *gc_work; unsigned int ratio, scanned = 0; unsigned long next_run; @@ -971,6 +984,8 @@ static void gc_worker(struct work_struct *work) goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV; i = gc_work->last_bucket; + if (gc_work->early_drop) + nf_conntrack_max95 = nf_conntrack_max / 100u * 95u; do { struct nf_conntrack_tuple_hash *h; @@ -987,6 +1002,8 @@ static void gc_worker(struct work_struct *work) i = 0; hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { + struct net *net; + tmp = nf_ct_tuplehash_to_ctrack(h); scanned++; @@ -995,6 +1012,27 @@ static void gc_worker(struct work_struct *work) expired_count++; continue; } + + if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp)) + continue; + + net = nf_ct_net(tmp); + if (atomic_read(&net->ct.count) < nf_conntrack_max95) + continue; + + /* need to take reference to avoid possible races */ + if (!atomic_inc_not_zero(&tmp->ct_general.use)) + continue; + + if (gc_worker_skip_ct(tmp)) { + nf_ct_put(tmp); + continue; + } + + if (gc_worker_can_early_drop(tmp)) + nf_ct_kill(tmp); + + nf_ct_put(tmp); } /* could check get_nulls_value() here and restart if ct @@ -1040,6 +1078,7 @@ static void gc_worker(struct work_struct *work) next_run = gc_work->next_gc_run; gc_work->last_bucket = i; + gc_work->early_drop = false; queue_delayed_work(system_long_wq, &gc_work->dwork, next_run); } @@ -1065,6 +1104,8 @@ __nf_conntrack_alloc(struct net *net, if (nf_conntrack_max && unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { if (!early_drop(net, hash)) { + if (!conntrack_gc_work.early_drop) + conntrack_gc_work.early_drop = true; atomic_dec(&net->ct.count); net_warn_ratelimited("nf_conntrack: table full, dropping packet\n"); return ERR_PTR(-ENOMEM); @@ -1314,9 +1355,10 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, int ret; tmpl = nf_ct_get(skb, &ctinfo); - if (tmpl) { + if (tmpl || ctinfo == IP_CT_UNTRACKED) { /* Previously seen (loopback or untracked)? Ignore. */ - if (!nf_ct_is_template(tmpl)) { + if ((tmpl && !nf_ct_is_template(tmpl)) || + ctinfo == IP_CT_UNTRACKED) { NF_CT_STAT_INC_ATOMIC(net, ignore); return NF_ACCEPT; } @@ -1629,18 +1671,6 @@ void nf_ct_free_hashtable(void *hash, unsigned int size) } EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); -static int untrack_refs(void) -{ - int cnt = 0, cpu; - - for_each_possible_cpu(cpu) { - struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu); - - cnt += atomic_read(&ct->ct_general.use) - 1; - } - return cnt; -} - void nf_conntrack_cleanup_start(void) { conntrack_gc_work.exiting = true; @@ -1650,8 +1680,6 @@ void nf_conntrack_cleanup_start(void) void nf_conntrack_cleanup_end(void) { RCU_INIT_POINTER(nf_ct_destroy, NULL); - while (untrack_refs() > 0) - schedule(); cancel_delayed_work_sync(&conntrack_gc_work.dwork); nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); @@ -1825,20 +1853,44 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, &nf_conntrack_htable_size, 0600); -void nf_ct_untracked_status_or(unsigned long bits) +static unsigned int total_extension_size(void) { - int cpu; + /* remember to add new extensions below */ + BUILD_BUG_ON(NF_CT_EXT_NUM > 9); - for_each_possible_cpu(cpu) - per_cpu(nf_conntrack_untracked, cpu).status |= bits; -} -EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or); + return sizeof(struct nf_ct_ext) + + sizeof(struct nf_conn_help) +#if IS_ENABLED(CONFIG_NF_NAT) + + sizeof(struct nf_conn_nat) +#endif + + sizeof(struct nf_conn_seqadj) + + sizeof(struct nf_conn_acct) +#ifdef CONFIG_NF_CONNTRACK_EVENTS + + sizeof(struct nf_conntrack_ecache) +#endif +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + + sizeof(struct nf_conn_tstamp) +#endif +#ifdef CONFIG_NF_CONNTRACK_TIMEOUT + + sizeof(struct nf_conn_timeout) +#endif +#ifdef CONFIG_NF_CONNTRACK_LABELS + + sizeof(struct nf_conn_labels) +#endif +#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY) + + sizeof(struct nf_conn_synproxy) +#endif + ; +}; int nf_conntrack_init_start(void) { int max_factor = 8; int ret = -ENOMEM; - int i, cpu; + int i; + + /* struct nf_ct_ext uses u8 to store offsets/size */ + BUILD_BUG_ON(total_extension_size() > 255u); seqcount_init(&nf_conntrack_generation); @@ -1921,15 +1973,6 @@ int nf_conntrack_init_start(void) if (ret < 0) goto err_proto; - /* Set up fake conntrack: to never be deleted, not in any hashes */ - for_each_possible_cpu(cpu) { - struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu); - write_pnet(&ct->ct_net, &init_net); - atomic_set(&ct->ct_general.use, 1); - } - /* - and look it like as a confirmed connection */ - nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); - conntrack_gc_work_init(&conntrack_gc_work); queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ); @@ -1977,6 +2020,7 @@ int nf_conntrack_init_net(struct net *net) int ret = -ENOMEM; int cpu; + BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER); atomic_set(&net->ct.count, 0); net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu); diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 22fc32143e9c..caac41ad9483 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -195,7 +195,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct) events = xchg(&e->cache, 0); - if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct) || !events) + if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct)) goto out_unlock; /* We make a copy of the missed event cache without taking @@ -212,7 +212,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct) ret = notify->fcn(events | missed, &item); - if (likely(ret >= 0 && !missed)) + if (likely(ret == 0 && !missed)) goto out_unlock; spin_lock_bh(&ct->lock); @@ -347,7 +347,7 @@ static struct ctl_table event_sysctl_table[] = { }; #endif /* CONFIG_SYSCTL */ -static struct nf_ct_ext_type event_extend __read_mostly = { +static const struct nf_ct_ext_type event_extend = { .len = sizeof(struct nf_conntrack_ecache), .align = __alignof__(struct nf_conntrack_ecache), .id = NF_CT_EXT_ECACHE, @@ -420,6 +420,9 @@ int nf_conntrack_ecache_init(void) int ret = nf_ct_extend_register(&event_extend); if (ret < 0) pr_err("nf_ct_event: Unable to register event extension.\n"); + + BUILD_BUG_ON(__IPCT_MAX >= 16); /* ctmask, missed use u16 */ + return ret; } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index a5ca5e426bae..e03d16ed550d 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -103,6 +103,17 @@ nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple, nf_ct_zone_equal_any(i->master, zone); } +bool nf_ct_remove_expect(struct nf_conntrack_expect *exp) +{ + if (del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); + return true; + } + return false; +} +EXPORT_SYMBOL_GPL(nf_ct_remove_expect); + struct nf_conntrack_expect * __nf_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone, @@ -211,10 +222,7 @@ void nf_ct_remove_expectations(struct nf_conn *ct) spin_lock_bh(&nf_conntrack_expect_lock); hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { - if (del_timer(&exp->timeout)) { - nf_ct_unlink_expect(exp); - nf_ct_expect_put(exp); - } + nf_ct_remove_expect(exp); } spin_unlock_bh(&nf_conntrack_expect_lock); } @@ -255,10 +263,7 @@ static inline int expect_matches(const struct nf_conntrack_expect *a, void nf_ct_unexpect_related(struct nf_conntrack_expect *exp) { spin_lock_bh(&nf_conntrack_expect_lock); - if (del_timer(&exp->timeout)) { - nf_ct_unlink_expect(exp); - nf_ct_expect_put(exp); - } + nf_ct_remove_expect(exp); spin_unlock_bh(&nf_conntrack_expect_lock); } EXPORT_SYMBOL_GPL(nf_ct_unexpect_related); @@ -394,10 +399,8 @@ static void evict_oldest_expect(struct nf_conn *master, last = exp; } - if (last && del_timer(&last->timeout)) { - nf_ct_unlink_expect(last); - nf_ct_expect_put(last); - } + if (last) + nf_ct_remove_expect(last); } static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) @@ -419,11 +422,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) h = nf_ct_expect_dst_hash(net, &expect->tuple); hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) { if (expect_matches(i, expect)) { - if (del_timer(&i->timeout)) { - nf_ct_unlink_expect(i); - nf_ct_expect_put(i); + if (nf_ct_remove_expect(expect)) break; - } } else if (expect_clash(i, expect)) { ret = -EBUSY; goto out; @@ -549,7 +549,7 @@ static int exp_seq_show(struct seq_file *s, void *v) seq_printf(s, "%ld ", timer_pending(&expect->timeout) ? (long)(expect->timeout.expires - jiffies)/HZ : 0); else - seq_printf(s, "- "); + seq_puts(s, "- "); seq_printf(s, "l3proto = %u proto=%u ", expect->tuple.src.l3num, expect->tuple.dst.protonum); @@ -559,7 +559,7 @@ static int exp_seq_show(struct seq_file *s, void *v) expect->tuple.dst.protonum)); if (expect->flags & NF_CT_EXPECT_PERMANENT) { - seq_printf(s, "PERMANENT"); + seq_puts(s, "PERMANENT"); delim = ","; } if (expect->flags & NF_CT_EXPECT_INACTIVE) { diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 008299b7f78f..6c605e88ebae 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -18,17 +18,14 @@ static struct nf_ct_ext_type __rcu *nf_ct_ext_types[NF_CT_EXT_NUM]; static DEFINE_MUTEX(nf_ct_ext_type_mutex); +#define NF_CT_EXT_PREALLOC 128u /* conntrack events are on by default */ -void __nf_ct_ext_destroy(struct nf_conn *ct) +void nf_ct_ext_destroy(struct nf_conn *ct) { unsigned int i; struct nf_ct_ext_type *t; - struct nf_ct_ext *ext = ct->ext; for (i = 0; i < NF_CT_EXT_NUM; i++) { - if (!__nf_ct_ext_exist(ext, i)) - continue; - rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[i]); @@ -41,54 +38,26 @@ void __nf_ct_ext_destroy(struct nf_conn *ct) rcu_read_unlock(); } } -EXPORT_SYMBOL(__nf_ct_ext_destroy); - -static void * -nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, - size_t var_alloc_len, gfp_t gfp) -{ - unsigned int off, len; - struct nf_ct_ext_type *t; - size_t alloc_size; - - rcu_read_lock(); - t = rcu_dereference(nf_ct_ext_types[id]); - if (!t) { - rcu_read_unlock(); - return NULL; - } - - off = ALIGN(sizeof(struct nf_ct_ext), t->align); - len = off + t->len + var_alloc_len; - alloc_size = t->alloc_size + var_alloc_len; - rcu_read_unlock(); - - *ext = kzalloc(alloc_size, gfp); - if (!*ext) - return NULL; - - (*ext)->offset[id] = off; - (*ext)->len = len; - - return (void *)(*ext) + off; -} +EXPORT_SYMBOL(nf_ct_ext_destroy); -void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, - size_t var_alloc_len, gfp_t gfp) +void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) { + unsigned int newlen, newoff, oldlen, alloc; struct nf_ct_ext *old, *new; - int newlen, newoff; struct nf_ct_ext_type *t; /* Conntrack must not be confirmed to avoid races on reallocation. */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); old = ct->ext; - if (!old) - return nf_ct_ext_create(&ct->ext, id, var_alloc_len, gfp); - if (__nf_ct_ext_exist(old, id)) - return NULL; + if (old) { + if (__nf_ct_ext_exist(old, id)) + return NULL; + oldlen = old->len; + } else { + oldlen = sizeof(*new); + } rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); @@ -97,15 +66,19 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, return NULL; } - newoff = ALIGN(old->len, t->align); - newlen = newoff + t->len + var_alloc_len; + newoff = ALIGN(oldlen, t->align); + newlen = newoff + t->len; rcu_read_unlock(); - new = __krealloc(old, newlen, gfp); + alloc = max(newlen, NF_CT_EXT_PREALLOC); + new = __krealloc(old, alloc, gfp); if (!new) return NULL; - if (new != old) { + if (!old) { + memset(new->offset, 0, sizeof(new->offset)); + ct->ext = new; + } else if (new != old) { kfree_rcu(old, rcu); rcu_assign_pointer(ct->ext, new); } @@ -115,45 +88,10 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, memset((void *)new + newoff, 0, newlen - newoff); return (void *)new + newoff; } -EXPORT_SYMBOL(__nf_ct_ext_add_length); - -static void update_alloc_size(struct nf_ct_ext_type *type) -{ - int i, j; - struct nf_ct_ext_type *t1, *t2; - enum nf_ct_ext_id min = 0, max = NF_CT_EXT_NUM - 1; - - /* unnecessary to update all types */ - if ((type->flags & NF_CT_EXT_F_PREALLOC) == 0) { - min = type->id; - max = type->id; - } - - /* This assumes that extended areas in conntrack for the types - whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */ - for (i = min; i <= max; i++) { - t1 = rcu_dereference_protected(nf_ct_ext_types[i], - lockdep_is_held(&nf_ct_ext_type_mutex)); - if (!t1) - continue; - - t1->alloc_size = ALIGN(sizeof(struct nf_ct_ext), t1->align) + - t1->len; - for (j = 0; j < NF_CT_EXT_NUM; j++) { - t2 = rcu_dereference_protected(nf_ct_ext_types[j], - lockdep_is_held(&nf_ct_ext_type_mutex)); - if (t2 == NULL || t2 == t1 || - (t2->flags & NF_CT_EXT_F_PREALLOC) == 0) - continue; - - t1->alloc_size = ALIGN(t1->alloc_size, t2->align) - + t2->len; - } - } -} +EXPORT_SYMBOL(nf_ct_ext_add); /* This MUST be called in process context. */ -int nf_ct_extend_register(struct nf_ct_ext_type *type) +int nf_ct_extend_register(const struct nf_ct_ext_type *type) { int ret = 0; @@ -163,12 +101,7 @@ int nf_ct_extend_register(struct nf_ct_ext_type *type) goto out; } - /* This ensures that nf_ct_ext_create() can allocate enough area - before updating alloc_size */ - type->alloc_size = ALIGN(sizeof(struct nf_ct_ext), type->align) - + type->len; rcu_assign_pointer(nf_ct_ext_types[type->id], type); - update_alloc_size(type); out: mutex_unlock(&nf_ct_ext_type_mutex); return ret; @@ -176,11 +109,10 @@ out: EXPORT_SYMBOL_GPL(nf_ct_extend_register); /* This MUST be called in process context. */ -void nf_ct_extend_unregister(struct nf_ct_ext_type *type) +void nf_ct_extend_unregister(const struct nf_ct_ext_type *type) { mutex_lock(&nf_ct_ext_type_mutex); RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL); - update_alloc_size(type); mutex_unlock(&nf_ct_ext_type_mutex); synchronize_rcu(); } diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 4aecef4a89fb..f0e9a7511e1a 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -577,6 +577,8 @@ static int __init nf_conntrack_ftp_init(void) { int i, ret = 0; + NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_ftp_master)); + ftp_buffer = kmalloc(65536, GFP_KERNEL); if (!ftp_buffer) return -ENOMEM; @@ -589,12 +591,10 @@ static int __init nf_conntrack_ftp_init(void) for (i = 0; i < ports_c; i++) { nf_ct_helper_init(&ftp[2 * i], AF_INET, IPPROTO_TCP, "ftp", FTP_PORT, ports[i], ports[i], &ftp_exp_policy, - 0, sizeof(struct nf_ct_ftp_master), help, - nf_ct_ftp_from_nlattr, THIS_MODULE); + 0, help, nf_ct_ftp_from_nlattr, THIS_MODULE); nf_ct_helper_init(&ftp[2 * i + 1], AF_INET6, IPPROTO_TCP, "ftp", FTP_PORT, ports[i], ports[i], &ftp_exp_policy, - 0, sizeof(struct nf_ct_ftp_master), help, - nf_ct_ftp_from_nlattr, THIS_MODULE); + 0, help, nf_ct_ftp_from_nlattr, THIS_MODULE); } ret = nf_conntrack_helpers_register(ftp, ports_c * 2); diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index f65d93639d12..3bcdc718484e 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -637,7 +637,6 @@ static const struct nf_conntrack_expect_policy h245_exp_policy = { static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = { .name = "H.245", .me = THIS_MODULE, - .data_len = sizeof(struct nf_ct_h323_master), .tuple.src.l3num = AF_UNSPEC, .tuple.dst.protonum = IPPROTO_UDP, .help = h245_help, @@ -1215,7 +1214,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = { { .name = "Q.931", .me = THIS_MODULE, - .data_len = sizeof(struct nf_ct_h323_master), .tuple.src.l3num = AF_INET, .tuple.src.u.tcp.port = cpu_to_be16(Q931_PORT), .tuple.dst.protonum = IPPROTO_TCP, @@ -1800,7 +1798,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = { { .name = "RAS", .me = THIS_MODULE, - .data_len = sizeof(struct nf_ct_h323_master), .tuple.src.l3num = AF_INET, .tuple.src.u.udp.port = cpu_to_be16(RAS_PORT), .tuple.dst.protonum = IPPROTO_UDP, @@ -1810,7 +1807,6 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = { { .name = "RAS", .me = THIS_MODULE, - .data_len = sizeof(struct nf_ct_h323_master), .tuple.src.l3num = AF_INET6, .tuple.src.u.udp.port = cpu_to_be16(RAS_PORT), .tuple.dst.protonum = IPPROTO_UDP, @@ -1836,6 +1832,8 @@ static int __init nf_conntrack_h323_init(void) { int ret; + NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_h323_master)); + h323_buffer = kmalloc(65536, GFP_KERNEL); if (!h323_buffer) return -ENOMEM; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 4eeb3418366a..4b9dfe3eef62 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -187,8 +187,7 @@ nf_ct_helper_ext_add(struct nf_conn *ct, { struct nf_conn_help *help; - help = nf_ct_ext_add_length(ct, NF_CT_EXT_HELPER, - helper->data_len, gfp); + help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, gfp); if (help) INIT_HLIST_HEAD(&help->expectations); else @@ -392,6 +391,9 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES); BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1); + if (me->expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT) + return -EINVAL; + mutex_lock(&nf_ct_helper_mutex); hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) { if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, &mask)) { @@ -455,11 +457,8 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) if ((rcu_dereference_protected( help->helper, lockdep_is_held(&nf_conntrack_expect_lock) - ) == me || exp->helper == me) && - del_timer(&exp->timeout)) { - nf_ct_unlink_expect(exp); - nf_ct_expect_put(exp); - } + ) == me || exp->helper == me)) + nf_ct_remove_expect(exp); } } spin_unlock_bh(&nf_conntrack_expect_lock); @@ -491,7 +490,7 @@ void nf_ct_helper_init(struct nf_conntrack_helper *helper, u16 l3num, u16 protonum, const char *name, u16 default_port, u16 spec_port, u32 id, const struct nf_conntrack_expect_policy *exp_pol, - u32 expect_class_max, u32 data_len, + u32 expect_class_max, int (*help)(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo), @@ -504,7 +503,6 @@ void nf_ct_helper_init(struct nf_conntrack_helper *helper, helper->tuple.src.u.all = htons(spec_port); helper->expect_policy = exp_pol; helper->expect_class_max = expect_class_max; - helper->data_len = data_len; helper->help = help; helper->from_nlattr = from_nlattr; helper->me = module; @@ -544,7 +542,7 @@ void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *helper, } EXPORT_SYMBOL_GPL(nf_conntrack_helpers_unregister); -static struct nf_ct_ext_type helper_extend __read_mostly = { +static const struct nf_ct_ext_type helper_extend = { .len = sizeof(struct nf_conn_help), .align = __alignof__(struct nf_conn_help), .id = NF_CT_EXT_HELPER, diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 1972a149f958..5523acce9d69 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -243,6 +243,12 @@ static int __init nf_conntrack_irc_init(void) return -EINVAL; } + if (max_dcc_channels > NF_CT_EXPECT_MAX_CNT) { + pr_err("max_dcc_channels must not be more than %u\n", + NF_CT_EXPECT_MAX_CNT); + return -EINVAL; + } + irc_exp_policy.max_expected = max_dcc_channels; irc_exp_policy.timeout = dcc_timeout; @@ -257,7 +263,7 @@ static int __init nf_conntrack_irc_init(void) for (i = 0; i < ports_c; i++) { nf_ct_helper_init(&irc[i], AF_INET, IPPROTO_TCP, "irc", IRC_PORT, ports[i], i, &irc_exp_policy, - 0, 0, help, NULL, THIS_MODULE); + 0, help, NULL, THIS_MODULE); } ret = nf_conntrack_helpers_register(&irc[0], ports_c); diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index bcab8bde7312..adf219859901 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -82,7 +82,7 @@ void nf_connlabels_put(struct net *net) } EXPORT_SYMBOL_GPL(nf_connlabels_put); -static struct nf_ct_ext_type labels_extend __read_mostly = { +static const struct nf_ct_ext_type labels_extend = { .len = sizeof(struct nf_conn_labels), .align = __alignof__(struct nf_conn_labels), .id = NF_CT_EXT_LABELS, diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 4c8f30a3d6d2..496ce173f0c1 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -58,6 +58,8 @@ static struct nf_conntrack_helper helper __read_mostly = { static int __init nf_conntrack_netbios_ns_init(void) { + NF_CT_HELPER_BUILD_BUG_ON(0); + exp_policy.timeout = timeout; return nf_conntrack_helper_register(&helper); } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index aafd25dff8c0..5f6f2f388928 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -467,7 +467,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct nlattr *nest_parms; unsigned int flags = portid ? NLM_F_MULTI : 0, event; - event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW); + event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_NEW); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -627,10 +627,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) unsigned int flags = 0, group; int err; - /* ignore our fake conntrack entry */ - if (nf_ct_is_untracked(ct)) - return 0; - if (events & (1 << IPCT_DESTROY)) { type = IPCTNL_MSG_CT_DELETE; group = NFNLGRP_CONNTRACK_DESTROY; @@ -652,7 +648,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) if (skb == NULL) goto errout; - type |= NFNL_SUBSYS_CTNETLINK << 8; + type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, type); nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -1991,7 +1987,8 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0, event; - event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS_CPU); + event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, + IPCTNL_MSG_CT_GET_STATS_CPU); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2074,7 +2071,7 @@ ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, unsigned int flags = portid ? NLM_F_MULTI : 0, event; unsigned int nr_conntracks = atomic_read(&net->ct.count); - event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS); + event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2180,13 +2177,7 @@ ctnetlink_glue_build_size(const struct nf_conn *ct) static struct nf_conn *ctnetlink_glue_get_ct(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) { - struct nf_conn *ct; - - ct = nf_ct_get(skb, ctinfo); - if (ct && nf_ct_is_untracked(ct)) - ct = NULL; - - return ct; + return nf_ct_get(skb, ctinfo); } static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) @@ -2585,7 +2576,7 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 portid, u32 seq, struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; - event |= NFNL_SUBSYS_CTNETLINK_EXP << 8; + event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2636,7 +2627,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) if (skb == NULL) goto errout; - type |= NFNL_SUBSYS_CTNETLINK_EXP << 8; + type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, type); nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -3054,6 +3045,10 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, struct nf_conn_help *help; int err; + help = nfct_help(ct); + if (!help) + return ERR_PTR(-EOPNOTSUPP); + if (cda[CTA_EXPECT_CLASS] && helper) { class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS])); if (class > helper->expect_class_max) @@ -3063,26 +3058,11 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, if (!exp) return ERR_PTR(-ENOMEM); - help = nfct_help(ct); - if (!help) { - if (!cda[CTA_EXPECT_TIMEOUT]) { - err = -EINVAL; - goto err_out; - } - exp->timeout.expires = - jiffies + ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ; - - exp->flags = NF_CT_EXPECT_USERSPACE; - if (cda[CTA_EXPECT_FLAGS]) { - exp->flags |= - ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS])); - } + if (cda[CTA_EXPECT_FLAGS]) { + exp->flags = ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS])); + exp->flags &= ~NF_CT_EXPECT_USERSPACE; } else { - if (cda[CTA_EXPECT_FLAGS]) { - exp->flags = ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS])); - exp->flags &= ~NF_CT_EXPECT_USERSPACE; - } else - exp->flags = 0; + exp->flags = 0; } if (cda[CTA_EXPECT_FN]) { const char *name = nla_data(cda[CTA_EXPECT_FN]); @@ -3245,7 +3225,8 @@ ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int cpu, struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0, event; - event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_EXP_GET_STATS_CPU); + event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, + IPCTNL_MSG_EXP_GET_STATS_CPU); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index f60a4755d71e..6959e93063d4 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -263,7 +263,7 @@ out_unexpect_orig: goto out_put_both; } -static inline int +static int pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq, @@ -391,7 +391,7 @@ invalid: return NF_ACCEPT; } -static inline int +static int pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq, @@ -523,6 +523,14 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, int ret; u_int16_t msg; +#if IS_ENABLED(CONFIG_NF_NAT) + if (!nf_ct_is_confirmed(ct) && (ct->status & IPS_NAT_MASK)) { + struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); + + if (!nat && !nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC)) + return NF_DROP; + } +#endif /* don't do any tracking before tcp handshake complete */ if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; @@ -596,7 +604,6 @@ static const struct nf_conntrack_expect_policy pptp_exp_policy = { static struct nf_conntrack_helper pptp __read_mostly = { .name = "pptp", .me = THIS_MODULE, - .data_len = sizeof(struct nf_ct_pptp_master), .tuple.src.l3num = AF_INET, .tuple.src.u.tcp.port = cpu_to_be16(PPTP_CONTROL_PORT), .tuple.dst.protonum = IPPROTO_TCP, @@ -607,6 +614,8 @@ static struct nf_conntrack_helper pptp __read_mostly = { static int __init nf_conntrack_pptp_init(void) { + NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_pptp_master)); + return nf_conntrack_helper_register(&pptp); } diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 2d6ee1803415..2de6c1fe3261 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -202,7 +202,7 @@ static int kill_l3proto(struct nf_conn *i, void *data) static int kill_l4proto(struct nf_conn *i, void *data) { struct nf_conntrack_l4proto *l4proto; - l4proto = (struct nf_conntrack_l4proto *)data; + l4proto = data; return nf_ct_protonum(i) == l4proto->l4proto && nf_ct_l3num(i) == l4proto->l3proto; } @@ -441,9 +441,8 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one); void nf_ct_l4proto_pernet_unregister_one(struct net *net, struct nf_conntrack_l4proto *l4proto) { - struct nf_proto_net *pn = NULL; + struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto); - pn = nf_ct_l4proto_net(net, l4proto); if (pn == NULL) return; diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index b2e02dfe7fa8..b553fdd68816 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -609,6 +609,20 @@ out_invalid: return -NF_ACCEPT; } +static bool dccp_can_early_drop(const struct nf_conn *ct) +{ + switch (ct->proto.dccp.state) { + case CT_DCCP_CLOSEREQ: + case CT_DCCP_CLOSING: + case CT_DCCP_TIMEWAIT: + return true; + default: + break; + } + + return false; +} + static void dccp_print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple) { @@ -868,6 +882,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = { .packet = dccp_packet, .get_timeouts = dccp_get_timeouts, .error = dccp_error, + .can_early_drop = dccp_can_early_drop, .print_tuple = dccp_print_tuple, .print_conntrack = dccp_print_conntrack, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) @@ -902,6 +917,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = { .packet = dccp_packet, .get_timeouts = dccp_get_timeouts, .error = dccp_error, + .can_early_drop = dccp_can_early_drop, .print_tuple = dccp_print_tuple, .print_conntrack = dccp_print_conntrack, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 2a7300587c87..13875d599a85 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -535,6 +535,20 @@ out_invalid: return -NF_ACCEPT; } +static bool sctp_can_early_drop(const struct nf_conn *ct) +{ + switch (ct->proto.sctp.state) { + case SCTP_CONNTRACK_SHUTDOWN_SENT: + case SCTP_CONNTRACK_SHUTDOWN_RECD: + case SCTP_CONNTRACK_SHUTDOWN_ACK_SENT: + return true; + default: + break; + } + + return false; +} + #if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> @@ -781,6 +795,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .get_timeouts = sctp_get_timeouts, .new = sctp_new, .error = sctp_error, + .can_early_drop = sctp_can_early_drop, .me = THIS_MODULE, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = sctp_to_nlattr, @@ -816,6 +831,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .get_timeouts = sctp_get_timeouts, .new = sctp_new, .error = sctp_error, + .can_early_drop = sctp_can_early_drop, .me = THIS_MODULE, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = sctp_to_nlattr, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 85bde77ad967..9758a7dfd83e 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -419,10 +419,9 @@ static void tcp_options(const struct sk_buff *skb, && opsize == TCPOLEN_WINDOW) { state->td_scale = *(u_int8_t *)ptr; - if (state->td_scale > 14) { - /* See RFC1323 */ - state->td_scale = 14; - } + if (state->td_scale > TCP_MAX_WSCALE) + state->td_scale = TCP_MAX_WSCALE; + state->flags |= IP_CT_TCP_FLAG_WINDOW_SCALE; } @@ -1172,6 +1171,22 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } +static bool tcp_can_early_drop(const struct nf_conn *ct) +{ + switch (ct->proto.tcp.state) { + case TCP_CONNTRACK_FIN_WAIT: + case TCP_CONNTRACK_LAST_ACK: + case TCP_CONNTRACK_TIME_WAIT: + case TCP_CONNTRACK_CLOSE: + case TCP_CONNTRACK_CLOSE_WAIT: + return true; + default: + break; + } + + return false; +} + #if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> @@ -1550,6 +1565,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = .get_timeouts = tcp_get_timeouts, .new = tcp_new, .error = tcp_error, + .can_early_drop = tcp_can_early_drop, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = tcp_to_nlattr, .nlattr_size = tcp_nlattr_size, @@ -1587,6 +1603,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = .get_timeouts = tcp_get_timeouts, .new = tcp_new, .error = tcp_error, + .can_early_drop = tcp_can_early_drop, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = tcp_to_nlattr, .nlattr_size = tcp_nlattr_size, diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 9dcb9ee9b97d..ae457f39d5ce 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -184,6 +184,8 @@ static int __init nf_conntrack_sane_init(void) { int i, ret = 0; + NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_sane_master)); + sane_buffer = kmalloc(65536, GFP_KERNEL); if (!sane_buffer) return -ENOMEM; @@ -196,13 +198,11 @@ static int __init nf_conntrack_sane_init(void) for (i = 0; i < ports_c; i++) { nf_ct_helper_init(&sane[2 * i], AF_INET, IPPROTO_TCP, "sane", SANE_PORT, ports[i], ports[i], - &sane_exp_policy, 0, - sizeof(struct nf_ct_sane_master), help, NULL, + &sane_exp_policy, 0, help, NULL, THIS_MODULE); nf_ct_helper_init(&sane[2 * i + 1], AF_INET6, IPPROTO_TCP, "sane", SANE_PORT, ports[i], ports[i], - &sane_exp_policy, 0, - sizeof(struct nf_ct_sane_master), help, NULL, + &sane_exp_policy, 0, help, NULL, THIS_MODULE); } diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index ef7063eced7c..a975efd6b8c3 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -231,7 +231,7 @@ s32 nf_ct_seq_offset(const struct nf_conn *ct, } EXPORT_SYMBOL_GPL(nf_ct_seq_offset); -static struct nf_ct_ext_type nf_ct_seqadj_extend __read_mostly = { +static const struct nf_ct_ext_type nf_ct_seqadj_extend = { .len = sizeof(struct nf_conn_seqadj), .align = __alignof__(struct nf_conn_seqadj), .id = NF_CT_EXT_SEQADJ, diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 0d17894798b5..d38af4274335 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -829,10 +829,8 @@ static void flush_expectations(struct nf_conn *ct, bool media) hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) { if ((exp->class != SIP_EXPECT_SIGNALLING) ^ media) continue; - if (!del_timer(&exp->timeout)) + if (!nf_ct_remove_expect(exp)) continue; - nf_ct_unlink_expect(exp); - nf_ct_expect_put(exp); if (!media) break; } @@ -1624,29 +1622,27 @@ static int __init nf_conntrack_sip_init(void) { int i, ret; + NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_sip_master)); + if (ports_c == 0) ports[ports_c++] = SIP_PORT; for (i = 0; i < ports_c; i++) { nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP, "sip", SIP_PORT, ports[i], i, sip_exp_policy, - SIP_EXPECT_MAX, - sizeof(struct nf_ct_sip_master), sip_help_udp, + SIP_EXPECT_MAX, sip_help_udp, NULL, THIS_MODULE); nf_ct_helper_init(&sip[4 * i + 1], AF_INET, IPPROTO_TCP, "sip", SIP_PORT, ports[i], i, sip_exp_policy, - SIP_EXPECT_MAX, - sizeof(struct nf_ct_sip_master), sip_help_tcp, + SIP_EXPECT_MAX, sip_help_tcp, NULL, THIS_MODULE); nf_ct_helper_init(&sip[4 * i + 2], AF_INET6, IPPROTO_UDP, "sip", SIP_PORT, ports[i], i, sip_exp_policy, - SIP_EXPECT_MAX, - sizeof(struct nf_ct_sip_master), sip_help_udp, + SIP_EXPECT_MAX, sip_help_udp, NULL, THIS_MODULE); nf_ct_helper_init(&sip[4 * i + 3], AF_INET6, IPPROTO_TCP, "sip", SIP_PORT, ports[i], i, sip_exp_policy, - SIP_EXPECT_MAX, - sizeof(struct nf_ct_sip_master), sip_help_tcp, + SIP_EXPECT_MAX, sip_help_tcp, NULL, THIS_MODULE); } diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 2256147dcaad..ccb5cb9043e0 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -250,7 +250,7 @@ static int ct_seq_show(struct seq_file *s, void *v) goto release; if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) - seq_printf(s, "[UNREPLIED] "); + seq_puts(s, "[UNREPLIED] "); print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, l3proto, l4proto); @@ -261,7 +261,7 @@ static int ct_seq_show(struct seq_file *s, void *v) goto release; if (test_bit(IPS_ASSURED_BIT, &ct->status)) - seq_printf(s, "[ASSURED] "); + seq_puts(s, "[ASSURED] "); if (seq_has_overflowed(s)) goto release; @@ -350,7 +350,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v) const struct ip_conntrack_stat *st = v; if (v == SEQ_START_TOKEN) { - seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n"); + seq_puts(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n"); return 0; } diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index b1227dc6f75e..0ec6779fd5d9 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -113,16 +113,18 @@ static int __init nf_conntrack_tftp_init(void) { int i, ret; + NF_CT_HELPER_BUILD_BUG_ON(0); + if (ports_c == 0) ports[ports_c++] = TFTP_PORT; for (i = 0; i < ports_c; i++) { nf_ct_helper_init(&tftp[2 * i], AF_INET, IPPROTO_UDP, "tftp", TFTP_PORT, ports[i], i, &tftp_exp_policy, - 0, 0, tftp_help, NULL, THIS_MODULE); + 0, tftp_help, NULL, THIS_MODULE); nf_ct_helper_init(&tftp[2 * i + 1], AF_INET6, IPPROTO_UDP, "tftp", TFTP_PORT, ports[i], i, &tftp_exp_policy, - 0, 0, tftp_help, NULL, THIS_MODULE); + 0, tftp_help, NULL, THIS_MODULE); } ret = nf_conntrack_helpers_register(tftp, ports_c * 2); diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c index 26e742006c48..46aee65f339b 100644 --- a/net/netfilter/nf_conntrack_timeout.c +++ b/net/netfilter/nf_conntrack_timeout.c @@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(nf_ct_timeout_find_get_hook); void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout) __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_timeout_put_hook); -static struct nf_ct_ext_type timeout_extend __read_mostly = { +static const struct nf_ct_ext_type timeout_extend = { .len = sizeof(struct nf_conn_timeout), .align = __alignof__(struct nf_conn_timeout), .id = NF_CT_EXT_TIMEOUT, diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c index 7a394df0deb7..4c4734b78318 100644 --- a/net/netfilter/nf_conntrack_timestamp.c +++ b/net/netfilter/nf_conntrack_timestamp.c @@ -33,7 +33,7 @@ static struct ctl_table tstamp_sysctl_table[] = { }; #endif /* CONFIG_SYSCTL */ -static struct nf_ct_ext_type tstamp_extend __read_mostly = { +static const struct nf_ct_ext_type tstamp_extend = { .len = sizeof(struct nf_conn_tstamp), .align = __alignof__(struct nf_conn_tstamp), .id = NF_CT_EXT_TSTAMP, diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index c46d214d5323..bfa742da83af 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -14,7 +14,7 @@ /* nf_queue.c */ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, struct nf_hook_entry **entryp, unsigned int verdict); -void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry); +unsigned int nf_queue_nf_hook_drop(struct net *net); int __init netfilter_queue_init(void); /* nf_log.c */ diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 8d85a0598b60..8bb152a7cca4 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -71,7 +71,6 @@ void nf_log_unset(struct net *net, const struct nf_logger *logger) RCU_INIT_POINTER(net->nf.nf_loggers[i], NULL); } mutex_unlock(&nf_log_mutex); - synchronize_rcu(); } EXPORT_SYMBOL(nf_log_unset); @@ -376,13 +375,13 @@ static int seq_show(struct seq_file *s, void *v) logger = nft_log_dereference(loggers[*pos][i]); seq_printf(s, "%s", logger->name); if (i == 0 && loggers[*pos][i + 1] != NULL) - seq_printf(s, ","); + seq_puts(s, ","); if (seq_has_overflowed(s)) return -ENOSPC; } - seq_printf(s, ")\n"); + seq_puts(s, ")\n"); if (seq_has_overflowed(s)) return -ENOSPC; diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c index eb772380a202..e4d61a7a5258 100644 --- a/net/netfilter/nf_nat_amanda.c +++ b/net/netfilter/nf_nat_amanda.c @@ -33,7 +33,6 @@ static unsigned int help(struct sk_buff *skb, { char buffer[sizeof("65535")]; u_int16_t port; - unsigned int ret; /* Connection comes from client. */ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; @@ -63,14 +62,14 @@ static unsigned int help(struct sk_buff *skb, } sprintf(buffer, "%u", port); - ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo, - protoff, matchoff, matchlen, - buffer, strlen(buffer)); - if (ret != NF_ACCEPT) { + if (!nf_nat_mangle_udp_packet(skb, exp->master, ctinfo, + protoff, matchoff, matchlen, + buffer, strlen(buffer))) { nf_ct_helper_log(skb, exp->master, "cannot mangle packet"); nf_ct_unexpect_related(exp); + return NF_DROP; } - return ret; + return NF_ACCEPT; } static void __exit nf_nat_amanda_fini(void) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 908ba5abbc0b..b48d6b5aae8a 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -71,11 +71,10 @@ static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl) if (ct == NULL) return; - family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; - rcu_read_lock(); + family = nf_ct_l3num(ct); l3proto = __nf_nat_l3proto_find(family); if (l3proto == NULL) - goto out; + return; dir = CTINFO2DIR(ctinfo); if (dir == IP_CT_DIR_ORIGINAL) @@ -84,8 +83,6 @@ static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl) statusbit = IPS_SRC_NAT; l3proto->decode_session(skb, ct, dir, statusbit, fl); -out: - rcu_read_unlock(); } int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family) @@ -411,12 +408,6 @@ nf_nat_setup_info(struct nf_conn *ct, enum nf_nat_manip_type maniptype) { struct nf_conntrack_tuple curr_tuple, new_tuple; - struct nf_conn_nat *nat; - - /* nat helper or nfctnetlink also setup binding */ - nat = nf_ct_nat_ext_add(ct); - if (nat == NULL) - return NF_ACCEPT; NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || maniptype == NF_NAT_MANIP_DST); @@ -549,10 +540,6 @@ struct nf_nat_proto_clean { static int nf_nat_proto_remove(struct nf_conn *i, void *data) { const struct nf_nat_proto_clean *clean = data; - struct nf_conn_nat *nat = nfct_nat(i); - - if (!nat) - return 0; if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) || (clean->l4proto && nf_ct_protonum(i) != clean->l4proto)) @@ -563,12 +550,10 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data) static int nf_nat_proto_clean(struct nf_conn *ct, void *data) { - struct nf_conn_nat *nat = nfct_nat(ct); - if (nf_nat_proto_remove(ct, data)) return 1; - if (!nat) + if ((ct->status & IPS_SRC_NAT_DONE) == 0) return 0; /* This netns is being destroyed, and conntrack has nat null binding. @@ -716,13 +701,9 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister); /* No one using conntrack by the time this called. */ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) { - struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); - - if (!nat) - return; - - rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource, - nf_nat_bysource_params); + if (ct->status & IPS_SRC_NAT_DONE) + rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource, + nf_nat_bysource_params); } static struct nf_ct_ext_type nat_extend __read_mostly = { @@ -730,7 +711,6 @@ static struct nf_ct_ext_type nat_extend __read_mostly = { .align = __alignof__(struct nf_conn_nat), .destroy = nf_nat_cleanup_conntrack, .id = NF_CT_EXT_NAT, - .flags = NF_CT_EXT_F_PREALLOC, }; #if IS_ENABLED(CONFIG_NF_CT_NETLINK) @@ -820,7 +800,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, /* No NAT information has been passed, allocate the null-binding */ if (attr == NULL) - return __nf_nat_alloc_null_binding(ct, manip); + return __nf_nat_alloc_null_binding(ct, manip) == NF_DROP ? -ENOMEM : 0; err = nfnetlink_parse_nat(attr, ct, &range, l3proto); if (err < 0) @@ -875,9 +855,6 @@ static int __init nf_nat_init(void) nf_ct_helper_expectfn_register(&follow_master_nat); - /* Initialize fake conntrack so that NAT will skip it */ - nf_ct_untracked_status_or(IPS_NAT_DONE_MASK); - BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, nfnetlink_parse_nat_setup); diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index 211661cb2c90..607a373379b4 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -70,15 +70,15 @@ static void mangle_contents(struct sk_buff *skb, } /* Unusual, but possible case. */ -static int enlarge_skb(struct sk_buff *skb, unsigned int extra) +static bool enlarge_skb(struct sk_buff *skb, unsigned int extra) { if (skb->len + extra > 65535) - return 0; + return false; if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC)) - return 0; + return false; - return 1; + return true; } /* Generic function for mangling variable-length address changes inside @@ -89,26 +89,26 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra) * skb enlargement, ... * * */ -int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned int match_offset, - unsigned int match_len, - const char *rep_buffer, - unsigned int rep_len, bool adjust) +bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len, bool adjust) { const struct nf_nat_l3proto *l3proto; struct tcphdr *tcph; int oldlen, datalen; if (!skb_make_writable(skb, skb->len)) - return 0; + return false; if (rep_len > match_len && rep_len - match_len > skb_tailroom(skb) && !enlarge_skb(skb, rep_len - match_len)) - return 0; + return false; SKB_LINEAR_ASSERT(skb); @@ -128,7 +128,7 @@ int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, nf_ct_seqadj_set(ct, ctinfo, tcph->seq, (int)rep_len - (int)match_len); - return 1; + return true; } EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet); @@ -142,7 +142,7 @@ EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet); * XXX - This function could be merged with nf_nat_mangle_tcp_packet which * should be fairly easy to do. */ -int +bool nf_nat_mangle_udp_packet(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, @@ -157,12 +157,12 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb, int datalen, oldlen; if (!skb_make_writable(skb, skb->len)) - return 0; + return false; if (rep_len > match_len && rep_len - match_len > skb_tailroom(skb) && !enlarge_skb(skb, rep_len - match_len)) - return 0; + return false; udph = (void *)skb->data + protoff; @@ -176,13 +176,13 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb, /* fix udp checksum if udp checksum was previously calculated */ if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) - return 1; + return true; l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); l3proto->csum_recalc(skb, IPPROTO_UDP, udph, &udph->check, datalen, oldlen); - return 1; + return true; } EXPORT_SYMBOL(nf_nat_mangle_udp_packet); diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c index 1fb2258c3535..0648cb096bd8 100644 --- a/net/netfilter/nf_nat_irc.c +++ b/net/netfilter/nf_nat_irc.c @@ -37,7 +37,6 @@ static unsigned int help(struct sk_buff *skb, struct nf_conn *ct = exp->master; union nf_inet_addr newaddr; u_int16_t port; - unsigned int ret; /* Reply comes from server. */ newaddr = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3; @@ -83,14 +82,14 @@ static unsigned int help(struct sk_buff *skb, pr_debug("nf_nat_irc: inserting '%s' == %pI4, port %u\n", buffer, &newaddr.ip, port); - ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, matchoff, - matchlen, buffer, strlen(buffer)); - if (ret != NF_ACCEPT) { + if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, matchoff, + matchlen, buffer, strlen(buffer))) { nf_ct_helper_log(skb, ct, "cannot mangle packet"); nf_ct_unexpect_related(exp); + return NF_DROP; } - return ret; + return NF_ACCEPT; } static void __exit nf_nat_irc_fini(void) diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 4a7662486f44..043850c9d154 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -96,15 +96,18 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry) } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); -void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry) +unsigned int nf_queue_nf_hook_drop(struct net *net) { const struct nf_queue_handler *qh; + unsigned int count = 0; rcu_read_lock(); qh = rcu_dereference(net->nf.queue_handler); if (qh) - qh->nf_hook_drop(net, entry); + count = qh->nf_hook_drop(net); rcu_read_unlock(); + + return count; } static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 7c6d1fbe38b9..a504e87c6ddf 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -66,8 +66,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, case TCPOPT_WINDOW: if (opsize == TCPOLEN_WINDOW) { opts->wscale = *ptr; - if (opts->wscale > 14) - opts->wscale = 14; + if (opts->wscale > TCP_MAX_WSCALE) + opts->wscale = TCP_MAX_WSCALE; opts->options |= XT_SYNPROXY_OPT_WSCALE; } break; @@ -287,9 +287,9 @@ static int synproxy_cpu_seq_show(struct seq_file *seq, void *v) struct synproxy_stats *stats = v; if (v == SEQ_START_TOKEN) { - seq_printf(seq, "entries\t\tsyn_received\t" - "cookie_invalid\tcookie_valid\t" - "cookie_retrans\tconn_reopened\n"); + seq_puts(seq, "entries\t\tsyn_received\t" + "cookie_invalid\tcookie_valid\t" + "cookie_retrans\tconn_reopened\n"); return 0; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 907431318637..1c6482d2c4dc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -144,7 +144,7 @@ static int nf_tables_register_hooks(struct net *net, unsigned int hook_nops) { if (table->flags & NFT_TABLE_F_DORMANT || - !(chain->flags & NFT_BASE_CHAIN)) + !nft_is_base_chain(chain)) return 0; return nf_register_net_hooks(net, nft_base_chain(chain)->ops, @@ -157,7 +157,7 @@ static void nf_tables_unregister_hooks(struct net *net, unsigned int hook_nops) { if (table->flags & NFT_TABLE_F_DORMANT || - !(chain->flags & NFT_BASE_CHAIN)) + !nft_is_base_chain(chain)) return; nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, hook_nops); @@ -438,7 +438,7 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - event |= NFNL_SUBSYS_NFTABLES << 8; + event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); if (nlh == NULL) goto nla_put_failure; @@ -587,7 +587,7 @@ static void _nf_tables_table_disable(struct net *net, list_for_each_entry(chain, &table->chains, list) { if (!nft_is_active_next(net, chain)) continue; - if (!(chain->flags & NFT_BASE_CHAIN)) + if (!nft_is_base_chain(chain)) continue; if (cnt && i++ == cnt) @@ -608,7 +608,7 @@ static int nf_tables_table_enable(struct net *net, list_for_each_entry(chain, &table->chains, list) { if (!nft_is_active_next(net, chain)) continue; - if (!(chain->flags & NFT_BASE_CHAIN)) + if (!nft_is_base_chain(chain)) continue; err = nf_register_net_hooks(net, nft_base_chain(chain)->ops, @@ -989,7 +989,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - event |= NFNL_SUBSYS_NFTABLES << 8; + event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); if (nlh == NULL) goto nla_put_failure; @@ -1007,7 +1007,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name)) goto nla_put_failure; - if (chain->flags & NFT_BASE_CHAIN) { + if (nft_is_base_chain(chain)) { const struct nft_base_chain *basechain = nft_base_chain(chain); const struct nf_hook_ops *ops = &basechain->ops[0]; struct nlattr *nest; @@ -1227,7 +1227,7 @@ static void nf_tables_chain_destroy(struct nft_chain *chain) { BUG_ON(chain->use > 0); - if (chain->flags & NFT_BASE_CHAIN) { + if (nft_is_base_chain(chain)) { struct nft_base_chain *basechain = nft_base_chain(chain); module_put(basechain->type->owner); @@ -1365,8 +1365,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, } if (nla[NFTA_CHAIN_POLICY]) { - if ((chain != NULL && - !(chain->flags & NFT_BASE_CHAIN))) + if (chain != NULL && + !nft_is_base_chain(chain)) return -EOPNOTSUPP; if (chain == NULL && @@ -1397,7 +1397,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, struct nft_chain_hook hook; struct nf_hook_ops *ops; - if (!(chain->flags & NFT_BASE_CHAIN)) + if (!nft_is_base_chain(chain)) return -EBUSY; err = nft_chain_parse_hook(net, nla, afi, &hook, @@ -1434,7 +1434,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, } if (nla[NFTA_CHAIN_COUNTERS]) { - if (!(chain->flags & NFT_BASE_CHAIN)) + if (!nft_is_base_chain(chain)) return -EOPNOTSUPP; stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); @@ -1886,10 +1886,9 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, const struct nft_expr *expr, *next; struct nlattr *list; const struct nft_rule *prule; - int type = event | NFNL_SUBSYS_NFTABLES << 8; + u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); - nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), - flags); + nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags); if (nlh == NULL) goto nla_put_failure; @@ -1907,7 +1906,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, goto nla_put_failure; if ((event != NFT_MSG_DELRULE) && (rule->list.prev != &chain->rules)) { - prule = list_entry(rule->list.prev, struct nft_rule, list); + prule = list_prev_entry(rule, list); if (nla_put_be64(skb, NFTA_RULE_POSITION, cpu_to_be64(prule->handle), NFTA_RULE_PAD)) @@ -2646,7 +2645,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, u32 portid = ctx->portid; u32 seq = ctx->seq; - event |= NFNL_SUBSYS_NFTABLES << 8; + event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); if (nlh == NULL) @@ -3398,8 +3397,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) if (IS_ERR(set)) return PTR_ERR(set); - event = NFT_MSG_NEWSETELEM; - event |= NFNL_SUBSYS_NFTABLES << 8; + event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWSETELEM); portid = NETLINK_CB(cb->skb).portid; seq = cb->nlh->nlmsg_seq; @@ -3484,7 +3482,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, struct nlattr *nest; int err; - event |= NFNL_SUBSYS_NFTABLES << 8; + event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); if (nlh == NULL) @@ -4257,7 +4255,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; - event |= NFNL_SUBSYS_NFTABLES << 8; + event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); if (nlh == NULL) goto nla_put_failure; @@ -4439,8 +4437,6 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk, err: kfree_skb(skb2); return err; - - return 0; } static void nft_obj_destroy(struct nft_object *obj) @@ -4530,7 +4526,7 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_NEWGEN; + int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN); nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0); if (nlh == NULL) @@ -4712,7 +4708,7 @@ static void nft_chain_commit_update(struct nft_trans *trans) if (nft_trans_chain_name(trans)[0]) strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans)); - if (!(trans->ctx.chain->flags & NFT_BASE_CHAIN)) + if (!nft_is_base_chain(trans->ctx.chain)) return; basechain = nft_base_chain(trans->ctx.chain); @@ -5026,7 +5022,7 @@ int nft_chain_validate_dependency(const struct nft_chain *chain, { const struct nft_base_chain *basechain; - if (chain->flags & NFT_BASE_CHAIN) { + if (nft_is_base_chain(chain)) { basechain = nft_base_chain(chain); if (basechain->type->type != type) return -EOPNOTSUPP; @@ -5040,7 +5036,7 @@ int nft_chain_validate_hooks(const struct nft_chain *chain, { struct nft_base_chain *basechain; - if (chain->flags & NFT_BASE_CHAIN) { + if (nft_is_base_chain(chain)) { basechain = nft_base_chain(chain); if ((1 << basechain->ops[0].hooknum) & hook_flags) @@ -5350,7 +5346,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, tb[NFTA_VERDICT_CHAIN], genmask); if (IS_ERR(chain)) return PTR_ERR(chain); - if (chain->flags & NFT_BASE_CHAIN) + if (nft_is_base_chain(chain)) return -EOPNOTSUPP; chain->use++; @@ -5523,7 +5519,7 @@ int __nft_release_basechain(struct nft_ctx *ctx) { struct nft_rule *rule, *nr; - BUG_ON(!(ctx->chain->flags & NFT_BASE_CHAIN)); + BUG_ON(!nft_is_base_chain(ctx->chain)); nf_tables_unregister_hooks(ctx->net, ctx->chain->table, ctx->chain, ctx->afi->nops); diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 9e2ae424b640..403432988313 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -128,7 +128,7 @@ static int nf_tables_netdev_event(struct notifier_block *this, list_for_each_entry(table, &afi->tables, list) { ctx.table = table; list_for_each_entry_safe(chain, nr, &table->chains, list) { - if (!(chain->flags & NFT_BASE_CHAIN)) + if (!nft_is_base_chain(chain)) continue; ctx.chain = chain; diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 12eb9041dca2..e1b15e7a5793 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -169,7 +169,7 @@ void nft_trace_notify(struct nft_traceinfo *info) struct nlmsghdr *nlh; struct sk_buff *skb; unsigned int size; - int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE; + u16 event; if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE)) return; @@ -198,6 +198,7 @@ void nft_trace_notify(struct nft_traceinfo *info) if (!skb) return; + event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_TRACE); nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct nfgenmsg), 0); if (!nlh) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index e42f858b91d2..80f5ecf2c3d7 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -503,7 +503,7 @@ static void nfnetlink_rcv(struct sk_buff *skb) if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) nfnetlink_rcv_skb_batch(skb, nlh); else - netlink_rcv_skb(skb, &nfnetlink_rcv_msg); + netlink_rcv_skb(skb, nfnetlink_rcv_msg); } #ifdef CONFIG_MODULES diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 2837d5fb98bd..9898fb4d0512 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -139,7 +139,7 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, u64 pkts, bytes; u32 old_flags; - event |= NFNL_SUBSYS_ACCT << 8; + event = nfnl_msg_type(NFNL_SUBSYS_ACCT, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 5b6c68311566..950bf6eadc65 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -105,7 +105,7 @@ nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct) if (help->helper->data_len == 0) return -EINVAL; - memcpy(help->data, nla_data(attr), help->helper->data_len); + nla_memcpy(help->data, nla_data(attr), sizeof(help->data)); return 0; } @@ -152,6 +152,9 @@ nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, nla_data(tb[NFCTH_POLICY_NAME]), NF_CT_HELPER_NAME_LEN); expect_policy->max_expected = ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); + if (expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT) + return -EINVAL; + expect_policy->timeout = ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT])); @@ -215,6 +218,7 @@ nfnl_cthelper_create(const struct nlattr * const tb[], { struct nf_conntrack_helper *helper; struct nfnl_cthelper *nfcth; + unsigned int size; int ret; if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN]) @@ -230,7 +234,12 @@ nfnl_cthelper_create(const struct nlattr * const tb[], goto err1; strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN); - helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); + size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); + if (size > FIELD_SIZEOF(struct nf_conn_help, data)) { + ret = -ENOMEM; + goto err2; + } + helper->flags |= NF_CT_HELPER_F_USERSPACE; memcpy(&helper->tuple, tuple, sizeof(struct nf_conntrack_tuple)); @@ -292,6 +301,9 @@ nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy, new_policy->max_expected = ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); + if (new_policy->max_expected > NF_CT_EXPECT_MAX_CNT) + return -EINVAL; + new_policy->timeout = ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT])); @@ -503,7 +515,7 @@ nfnl_cthelper_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, unsigned int flags = portid ? NLM_F_MULTI : 0; int status; - event |= NFNL_SUBSYS_CTHELPER << 8; + event = nfnl_msg_type(NFNL_SUBSYS_CTHELPER, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 0a3510e7e396..a3e7bb54d96a 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -159,7 +159,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, unsigned int flags = portid ? NLM_F_MULTI : 0; struct nf_conntrack_l4proto *l4proto = timeout->l4proto; - event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8; + event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -432,7 +432,7 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; - event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8; + event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index ecd857b75ffe..da9704971a83 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -411,7 +411,7 @@ __build_packet_message(struct nfnl_log_net *log, const unsigned char *hwhdrp; nlh = nlmsg_put(inst->skb, 0, 0, - NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET, + nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET), sizeof(struct nfgenmsg), 0); if (!nlh) return -1; @@ -803,7 +803,7 @@ static int nfulnl_recv_unsupp(struct net *net, struct sock *ctnl, static struct nf_logger nfulnl_logger __read_mostly = { .name = "nfnetlink_log", .type = NF_LOG_TYPE_ULOG, - .logfn = &nfulnl_log_packet, + .logfn = nfulnl_log_packet, .me = THIS_MODULE, }; @@ -1140,10 +1140,10 @@ out: static void __exit nfnetlink_log_fini(void) { - nf_log_unregister(&nfulnl_logger); nfnetlink_subsys_unregister(&nfulnl_subsys); netlink_unregister_notifier(&nfulnl_rtnl_notifier); unregister_pernet_subsys(&nfnl_log_net_ops); + nf_log_unregister(&nfulnl_logger); } MODULE_DESCRIPTION("netfilter userspace logging"); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 3be6fef30581..8a0f218b7938 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -447,7 +447,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, } nlh = nlmsg_put(skb, 0, 0, - NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, + nfnl_msg_type(NFNL_SUBSYS_QUEUE, NFQNL_MSG_PACKET), sizeof(struct nfgenmsg), 0); if (!nlh) { skb_tx_error(entskb); @@ -922,16 +922,10 @@ static struct notifier_block nfqnl_dev_notifier = { .notifier_call = nfqnl_rcv_dev_event, }; -static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long entry_ptr) -{ - return rcu_access_pointer(entry->hook) == - (struct nf_hook_entry *)entry_ptr; -} - -static void nfqnl_nf_hook_drop(struct net *net, - const struct nf_hook_entry *hook) +static unsigned int nfqnl_nf_hook_drop(struct net *net) { struct nfnl_queue_net *q = nfnl_queue_pernet(net); + unsigned int instances = 0; int i; rcu_read_lock(); @@ -939,10 +933,14 @@ static void nfqnl_nf_hook_drop(struct net *net, struct nfqnl_instance *inst; struct hlist_head *head = &q->instance_table[i]; - hlist_for_each_entry_rcu(inst, head, hlist) - nfqnl_flush(inst, nf_hook_cmp, (unsigned long)hook); + hlist_for_each_entry_rcu(inst, head, hlist) { + nfqnl_flush(inst, NULL, 0); + instances++; + } } rcu_read_unlock(); + + return instances; } static int @@ -1213,8 +1211,8 @@ static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { }; static const struct nf_queue_handler nfqh = { - .outfn = &nfqnl_enqueue_packet, - .nf_hook_drop = &nfqnl_nf_hook_drop, + .outfn = nfqnl_enqueue_packet, + .nf_hook_drop = nfqnl_nf_hook_drop, }; static int nfqnl_recv_config(struct net *net, struct sock *ctnl, diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index d76d0f36799f..f753ec69f790 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -42,7 +42,8 @@ static int nft_compat_chain_validate_dependency(const char *tablename, { const struct nft_base_chain *basechain; - if (!tablename || !(chain->flags & NFT_BASE_CHAIN)) + if (!tablename || + !nft_is_base_chain(chain)) return 0; basechain = nft_base_chain(chain); @@ -165,7 +166,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, par->entryinfo = entry; par->target = target; par->targinfo = info; - if (ctx->chain->flags & NFT_BASE_CHAIN) { + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); const struct nf_hook_ops *ops = &basechain->ops[0]; @@ -298,7 +299,7 @@ static int nft_target_validate(const struct nft_ctx *ctx, unsigned int hook_mask = 0; int ret; - if (ctx->chain->flags & NFT_BASE_CHAIN) { + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); const struct nf_hook_ops *ops = &basechain->ops[0]; @@ -379,7 +380,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, par->entryinfo = entry; par->match = match; par->matchinfo = info; - if (ctx->chain->flags & NFT_BASE_CHAIN) { + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); const struct nf_hook_ops *ops = &basechain->ops[0]; @@ -477,7 +478,7 @@ static int nft_match_validate(const struct nft_ctx *ctx, unsigned int hook_mask = 0; int ret; - if (ctx->chain->flags & NFT_BASE_CHAIN) { + if (nft_is_base_chain(ctx->chain)) { const struct nft_base_chain *basechain = nft_base_chain(ctx->chain); const struct nf_hook_ops *ops = &basechain->ops[0]; @@ -503,7 +504,7 @@ nfnl_compat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; - event |= NFNL_SUBSYS_NFT_COMPAT << 8; + event = nfnl_msg_type(NFNL_SUBSYS_NFT_COMPAT, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 640fe5a5865e..a34ceb38fc55 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -72,12 +72,12 @@ static void nft_ct_get_eval(const struct nft_expr *expr, switch (priv->key) { case NFT_CT_STATE: - if (ct == NULL) - state = NF_CT_STATE_INVALID_BIT; - else if (nf_ct_is_untracked(ct)) + if (ct) + state = NF_CT_STATE_BIT(ctinfo); + else if (ctinfo == IP_CT_UNTRACKED) state = NF_CT_STATE_UNTRACKED_BIT; else - state = NF_CT_STATE_BIT(ctinfo); + state = NF_CT_STATE_INVALID_BIT; *dest = state; return; default: @@ -264,7 +264,7 @@ static void nft_ct_set_eval(const struct nft_expr *expr, struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL) + if (ct == NULL || nf_ct_is_template(ct)) return; switch (priv->key) { @@ -284,6 +284,22 @@ static void nft_ct_set_eval(const struct nft_expr *expr, NF_CT_LABELS_MAX_SIZE / sizeof(u32)); break; #endif +#ifdef CONFIG_NF_CONNTRACK_EVENTS + case NFT_CT_EVENTMASK: { + struct nf_conntrack_ecache *e = nf_ct_ecache_find(ct); + u32 ctmask = regs->data[priv->sreg]; + + if (e) { + if (e->ctmask != ctmask) + e->ctmask = ctmask; + break; + } + + if (ctmask && !nf_ct_is_confirmed(ct)) + nf_ct_ecache_ext_add(ct, ctmask, 0, GFP_ATOMIC); + break; + } +#endif default: break; } @@ -539,6 +555,13 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, len = sizeof(u16); break; #endif +#ifdef CONFIG_NF_CONNTRACK_EVENTS + case NFT_CT_EVENTMASK: + if (tb[NFTA_CT_DIRECTION]) + return -EINVAL; + len = sizeof(u32); + break; +#endif default: return -EOPNOTSUPP; } @@ -702,7 +725,7 @@ nft_ct_select_ops(const struct nft_ctx *ctx, static struct nft_expr_type nft_ct_type __read_mostly = { .name = "ct", - .select_ops = &nft_ct_select_ops, + .select_ops = nft_ct_select_ops, .policy = nft_ct_policy, .maxattr = NFTA_CT_MAX, .owner = THIS_MODULE, @@ -718,12 +741,10 @@ static void nft_notrack_eval(const struct nft_expr *expr, ct = nf_ct_get(pkt->skb, &ctinfo); /* Previously seen (loopback or untracked)? Ignore. */ - if (ct) + if (ct || ctinfo == IP_CT_UNTRACKED) return; - ct = nf_ct_untracked_get(); - atomic_inc(&ct->ct_general.use); - nf_ct_set(skb, ct, IP_CT_NEW); + nf_ct_set(skb, ct, IP_CT_UNTRACKED); } static struct nft_expr_type nft_notrack_type; diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index d212a85d2f33..1ec49fe5845f 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -232,7 +232,7 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx, static struct nft_expr_type nft_exthdr_type __read_mostly = { .name = "exthdr", - .select_ops = &nft_exthdr_select_ops, + .select_ops = nft_exthdr_select_ops, .policy = nft_exthdr_policy, .maxattr = NFTA_EXTHDR_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 52a5079a91a3..24f2f7567ddb 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -228,7 +228,7 @@ nft_hash_select_ops(const struct nft_ctx *ctx, static struct nft_expr_type nft_hash_type __read_mostly = { .name = "hash", - .select_ops = &nft_hash_select_ops, + .select_ops = nft_hash_select_ops, .policy = nft_hash_policy, .maxattr = NFTA_HASH_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 9563ce3c23aa..5a60eb23a7ed 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -467,7 +467,7 @@ nft_meta_select_ops(const struct nft_ctx *ctx, static struct nft_expr_type nft_meta_type __read_mostly = { .name = "meta", - .select_ops = &nft_meta_select_ops, + .select_ops = nft_meta_select_ops, .policy = nft_meta_policy, .maxattr = NFTA_META_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index a66b36097b8f..5a3a52c71545 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -188,7 +188,7 @@ nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) static struct nft_expr_type nft_ng_type __read_mostly = { .name = "numgen", - .select_ops = &nft_ng_select_ops, + .select_ops = nft_ng_select_ops, .policy = nft_ng_policy, .maxattr = NFTA_NG_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index dbb6aaff67ec..98613658d4ac 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -197,7 +197,7 @@ nft_queue_select_ops(const struct nft_ctx *ctx, static struct nft_expr_type nft_queue_type __read_mostly = { .name = "queue", - .select_ops = &nft_queue_select_ops, + .select_ops = nft_queue_select_ops, .policy = nft_queue_policy, .maxattr = NFTA_QUEUE_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 5f652720fc78..8ec086b6b56b 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -352,7 +352,7 @@ static int nft_hash_init(const struct nft_set *set, static void nft_hash_elem_destroy(void *ptr, void *arg) { - nft_set_elem_destroy((const struct nft_set *)arg, ptr, true); + nft_set_elem_destroy(arg, ptr, true); } static void nft_hash_destroy(const struct nft_set *set) diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index b008db0184b8..3cbe1bcf6a74 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -26,11 +26,12 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct) if (skb->_nfct != 0) return XT_CONTINUE; - /* special case the untracked ct : we want the percpu object */ - if (!ct) - ct = nf_ct_untracked_get(); - atomic_inc(&ct->ct_general.use); - nf_ct_set(skb, ct, IP_CT_NEW); + if (ct) { + atomic_inc(&ct->ct_general.use); + nf_ct_set(skb, ct, IP_CT_NEW); + } else { + nf_ct_set(skb, ct, IP_CT_UNTRACKED); + } return XT_CONTINUE; } @@ -335,7 +336,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, struct nf_conn *ct = info->ct; struct nf_conn_help *help; - if (ct && !nf_ct_is_untracked(ct)) { + if (ct) { help = nfct_help(ct); if (help) module_put(help->helper->me); @@ -412,8 +413,7 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) if (skb->_nfct != 0) return XT_CONTINUE; - nf_ct_set(skb, nf_ct_untracked_get(), IP_CT_NEW); - nf_conntrack_get(skb_nfct(skb)); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); return XT_CONTINUE; } diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c index 02afaf48a729..60e6dbe12460 100644 --- a/net/netfilter/xt_HMARK.c +++ b/net/netfilter/xt_HMARK.c @@ -84,7 +84,7 @@ hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t, struct nf_conntrack_tuple *otuple; struct nf_conntrack_tuple *rtuple; - if (ct == NULL || nf_ct_is_untracked(ct)) + if (ct == NULL) return -1; otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index 9a9884a39c0e..57ef175dfbfa 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -121,9 +121,6 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par) if (ct == NULL) return false; - if (nf_ct_is_untracked(ct)) - return false; - if (ct->master) hash = xt_cluster_hash(ct->master, info); else diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c index 7827128d5a95..23372879e6e3 100644 --- a/net/netfilter/xt_connlabel.c +++ b/net/netfilter/xt_connlabel.c @@ -29,7 +29,7 @@ connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par) bool invert = info->options & XT_CONNLABEL_OP_INVERT; ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL || nf_ct_is_untracked(ct)) + if (ct == NULL) return invert; labels = nf_ct_labels_find(ct); diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 9935d5029b0e..ec377cc6a369 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -44,7 +44,7 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par) u_int32_t newmark; ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL || nf_ct_is_untracked(ct)) + if (ct == NULL) return XT_CONTINUE; switch (info->mode) { @@ -97,7 +97,7 @@ connmark_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL || nf_ct_is_untracked(ct)) + if (ct == NULL) return false; return ((ct->mark & info->mask) == info->mark) ^ info->invert; diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index c0fb217bc649..39cf1d019240 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -172,12 +172,11 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, ct = nf_ct_get(skb, &ctinfo); - if (ct) { - if (nf_ct_is_untracked(ct)) - statebit = XT_CONNTRACK_STATE_UNTRACKED; - else - statebit = XT_CONNTRACK_STATE_BIT(ctinfo); - } else + if (ct) + statebit = XT_CONNTRACK_STATE_BIT(ctinfo); + else if (ctinfo == IP_CT_UNTRACKED) + statebit = XT_CONNTRACK_STATE_UNTRACKED; + else statebit = XT_CONNTRACK_STATE_INVALID; if (info->match_flags & XT_CONNTRACK_STATE) { diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 2a6dfe8b74d3..762e1874f28b 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -119,7 +119,7 @@ static int cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision) { if (revision == 1) { - struct hashlimit_cfg1 *cfg = (struct hashlimit_cfg1 *)from; + struct hashlimit_cfg1 *cfg = from; to->mode = cfg->mode; to->avg = cfg->avg; @@ -895,7 +895,7 @@ static void *dl_seq_start(struct seq_file *s, loff_t *pos) static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos) { struct xt_hashlimit_htable *htable = s->private; - unsigned int *bucket = (unsigned int *)v; + unsigned int *bucket = v; *pos = ++(*bucket); if (*pos >= htable->cfg.size) { @@ -909,7 +909,7 @@ static void dl_seq_stop(struct seq_file *s, void *v) __releases(htable->lock) { struct xt_hashlimit_htable *htable = s->private; - unsigned int *bucket = (unsigned int *)v; + unsigned int *bucket = v; if (!IS_ERR(bucket)) kfree(bucket); @@ -980,7 +980,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, static int dl_seq_show_v1(struct seq_file *s, void *v) { struct xt_hashlimit_htable *htable = s->private; - unsigned int *bucket = (unsigned int *)v; + unsigned int *bucket = v; struct dsthash_ent *ent; if (!hlist_empty(&htable->hash[*bucket])) { @@ -994,7 +994,7 @@ static int dl_seq_show_v1(struct seq_file *s, void *v) static int dl_seq_show(struct seq_file *s, void *v) { struct xt_hashlimit_htable *htable = s->private; - unsigned int *bucket = (unsigned int *)v; + unsigned int *bucket = v; struct dsthash_ent *ent; if (!hlist_empty(&htable->hash[*bucket])) { diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c index 0fdc89064488..42540d26c2b8 100644 --- a/net/netfilter/xt_ipvs.c +++ b/net/netfilter/xt_ipvs.c @@ -116,7 +116,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL || nf_ct_is_untracked(ct)) { + if (ct == NULL) { match = false; goto out_put_cp; } diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 1d89a4eaf841..37d581a31cff 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -532,7 +532,7 @@ static int recent_seq_show(struct seq_file *seq, void *v) &e->addr.in6, e->ttl, e->stamps[i], e->index); for (i = 0; i < e->nstamps; i++) seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]); - seq_printf(seq, "\n"); + seq_putc(seq, '\n'); return 0; } diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index 5746a33789a5..5fbd79194d21 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -28,14 +28,13 @@ state_mt(const struct sk_buff *skb, struct xt_action_param *par) unsigned int statebit; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - if (!ct) + if (ct) + statebit = XT_STATE_BIT(ctinfo); + else if (ctinfo == IP_CT_UNTRACKED) + statebit = XT_STATE_UNTRACKED; + else statebit = XT_STATE_INVALID; - else { - if (nf_ct_is_untracked(ct)) - statebit = XT_STATE_UNTRACKED; - else - statebit = XT_STATE_BIT(ctinfo); - } + return (sinfo->statemask & statebit); } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 4f7c3b5c080b..42a95919df09 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -797,11 +797,6 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, enum nf_nat_manip_type maniptype; int err; - if (nf_ct_is_untracked(ct)) { - /* A NAT action may only be performed on tracked packets. */ - return NF_ACCEPT; - } - /* Add NAT extension if not confirmed yet. */ if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct)) return NF_ACCEPT; /* Can't NAT. */ diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 3ecf07666df3..ca526c0881bd 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -439,29 +439,39 @@ static void fl_set_key_val(struct nlattr **tb, memcpy(mask, nla_data(tb[mask_type]), len); } -static void fl_set_key_mpls(struct nlattr **tb, - struct flow_dissector_key_mpls *key_val, - struct flow_dissector_key_mpls *key_mask) +static int fl_set_key_mpls(struct nlattr **tb, + struct flow_dissector_key_mpls *key_val, + struct flow_dissector_key_mpls *key_mask) { if (tb[TCA_FLOWER_KEY_MPLS_TTL]) { key_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TTL]); key_mask->mpls_ttl = MPLS_TTL_MASK; } if (tb[TCA_FLOWER_KEY_MPLS_BOS]) { - key_val->mpls_bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_BOS]); + u8 bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_BOS]); + + if (bos & ~MPLS_BOS_MASK) + return -EINVAL; + key_val->mpls_bos = bos; key_mask->mpls_bos = MPLS_BOS_MASK; } if (tb[TCA_FLOWER_KEY_MPLS_TC]) { - key_val->mpls_tc = - nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TC]) & MPLS_TC_MASK; + u8 tc = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TC]); + + if (tc & ~MPLS_TC_MASK) + return -EINVAL; + key_val->mpls_tc = tc; key_mask->mpls_tc = MPLS_TC_MASK; } if (tb[TCA_FLOWER_KEY_MPLS_LABEL]) { - key_val->mpls_label = - nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_LABEL]) & - MPLS_LABEL_MASK; + u32 label = nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_LABEL]); + + if (label & ~MPLS_LABEL_MASK) + return -EINVAL; + key_val->mpls_label = label; key_mask->mpls_label = MPLS_LABEL_MASK; } + return 0; } static void fl_set_key_vlan(struct nlattr **tb, @@ -622,7 +632,9 @@ static int fl_set_key(struct net *net, struct nlattr **tb, sizeof(key->icmp.code)); } else if (key->basic.n_proto == htons(ETH_P_MPLS_UC) || key->basic.n_proto == htons(ETH_P_MPLS_MC)) { - fl_set_key_mpls(tb, &key->mpls, &mask->mpls); + ret = fl_set_key_mpls(tb, &key->mpls, &mask->mpls); + if (ret) + return ret; } else if (key->basic.n_proto == htons(ETH_P_ARP) || key->basic.n_proto == htons(ETH_P_RARP)) { fl_set_key_val(tb, &key->arp.sip, TCA_FLOWER_KEY_ARP_SIP, diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index d4433a47e6c3..4221dc359453 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -14,6 +14,7 @@ #include <linux/perf_event.h> #include <linux/netlink.h> #include <linux/rtnetlink.h> +#include <linux/types.h> #include <sys/types.h> #include <sys/socket.h> #include <sys/syscall.h> @@ -185,12 +186,16 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) return 0; } -static int load_maps(struct bpf_map_def *maps, int len, +static int load_maps(struct bpf_map_def *maps, int nr_maps, const char **map_names, fixup_map_cb fixup_map) { int i; - - for (i = 0; i < len / sizeof(struct bpf_map_def); i++) { + /* + * Warning: Using "maps" pointing to ELF data_maps->d_buf as + * an array of struct bpf_map_def is a wrong assumption about + * the ELF maps section format. + */ + for (i = 0; i < nr_maps; i++) { if (fixup_map) fixup_map(&maps[i], map_names[i], i); @@ -269,6 +274,10 @@ static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols, return 1; } insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; + /* + * Warning: Using sizeof(struct bpf_map_def) here is a + * wrong assumption about ELF maps section format + */ insn[insn_idx].imm = map_fd[sym.st_value / sizeof(struct bpf_map_def)]; } @@ -311,18 +320,18 @@ static int get_sorted_map_names(Elf *elf, Elf_Data *symbols, int maps_shndx, map_name = elf_strptr(elf, strtabidx, map_symbols[i].st_name); if (!map_name) { printf("cannot get map symbol\n"); - return 1; + return -1; } map_names[i] = strdup(map_name); if (!map_names[i]) { printf("strdup(%s): %s(%d)\n", map_name, strerror(errno), errno); - return 1; + return -1; } } - return 0; + return nr_maps; } static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) @@ -396,11 +405,25 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) } if (data_maps) { - if (get_sorted_map_names(elf, symbols, maps_shndx, strtabidx, - map_names)) + int nr_maps; + int prog_elf_map_sz; + + nr_maps = get_sorted_map_names(elf, symbols, maps_shndx, + strtabidx, map_names); + if (nr_maps < 0) goto done; - if (load_maps(data_maps->d_buf, data_maps->d_size, + /* Deduce map struct size stored in ELF maps section */ + prog_elf_map_sz = data_maps->d_size / nr_maps; + if (prog_elf_map_sz != sizeof(struct bpf_map_def)) { + printf("Error: ELF maps sec wrong size (%d/%lu)," + " old kern.o file?\n", + prog_elf_map_sz, sizeof(struct bpf_map_def)); + ret = 1; + goto done; + } + + if (load_maps(data_maps->d_buf, nr_maps, (const char **)map_names, fixup_map)) goto done; @@ -563,7 +586,7 @@ struct ksym *ksym_search(long key) return &syms[0]; } -int set_link_xdp_fd(int ifindex, int fd, int flags) +int set_link_xdp_fd(int ifindex, int fd, __u32 flags) { struct sockaddr_nl sa; int sock, seq = 0, len, ret = -1; diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h index 6bfd75ec6a16..05822f83173a 100644 --- a/samples/bpf/bpf_load.h +++ b/samples/bpf/bpf_load.h @@ -47,5 +47,5 @@ struct ksym { int load_kallsyms(void); struct ksym *ksym_search(long key); -int set_link_xdp_fd(int ifindex, int fd, int flags); +int set_link_xdp_fd(int ifindex, int fd, __u32 flags); #endif diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index deb05e630d84..378850c70eb8 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -20,11 +20,11 @@ #include "libbpf.h" static int ifindex; -static int flags; +static __u32 xdp_flags; static void int_exit(int sig) { - set_link_xdp_fd(ifindex, -1, flags); + set_link_xdp_fd(ifindex, -1, xdp_flags); exit(0); } @@ -75,7 +75,7 @@ int main(int argc, char **argv) while ((opt = getopt(argc, argv, optstr)) != -1) { switch (opt) { case 'S': - flags |= XDP_FLAGS_SKB_MODE; + xdp_flags |= XDP_FLAGS_SKB_MODE; break; default: usage(basename(argv[0])); @@ -103,7 +103,7 @@ int main(int argc, char **argv) signal(SIGINT, int_exit); - if (set_link_xdp_fd(ifindex, prog_fd[0], flags) < 0) { + if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) { printf("link set xdp fd failed\n"); return 1; } diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index cb2bda7b5346..92b8bde9337c 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -25,11 +25,12 @@ #define STATS_INTERVAL_S 2U static int ifindex = -1; +static __u32 xdp_flags = 0; static void int_exit(int sig) { if (ifindex > -1) - set_link_xdp_fd(ifindex, -1, 0); + set_link_xdp_fd(ifindex, -1, xdp_flags); exit(0); } @@ -143,7 +144,6 @@ int main(int argc, char **argv) struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct vip vip = {}; char filename[256]; - int flags = 0; int opt; int i; @@ -204,7 +204,7 @@ int main(int argc, char **argv) kill_after_s = atoi(optarg); break; case 'S': - flags |= XDP_FLAGS_SKB_MODE; + xdp_flags |= XDP_FLAGS_SKB_MODE; break; default: usage(argv[0]); @@ -248,14 +248,14 @@ int main(int argc, char **argv) } } - if (set_link_xdp_fd(ifindex, prog_fd[0], flags) < 0) { + if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) { printf("link set xdp fd failed\n"); return 1; } poll_stats(kill_after_s); - set_link_xdp_fd(ifindex, -1, flags); + set_link_xdp_fd(ifindex, -1, xdp_flags); return 0; } diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index d3395c192a24..3773562056da 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1932,16 +1932,22 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { - "unpriv: obfuscate stack pointer", + "stack pointer arithmetic", .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1), + BPF_ST_MEM(0, BPF_REG_2, 4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_ST_MEM(0, BPF_REG_2, 4, 0), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R2 pointer arithmetic", - .result_unpriv = REJECT, .result = ACCEPT, }, { |